threadlens 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,665 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import glob
4
- import json
5
- import os
6
- import sqlite3
7
- import urllib.parse
8
- from collections.abc import Iterator, Mapping
9
- from pathlib import Path
10
- from typing import Any
11
-
12
- from .extract import (
13
- amp_history_messages,
14
- agent_jsonl_messages,
15
- claude_messages,
16
- codex_messages,
17
- compact_text,
18
- content_to_text,
19
- custom_jsonl_messages,
20
- read_jsonl,
21
- timestamp_text,
22
- visible_message_text,
23
- )
24
- from .models import ThreadMessage
25
- from .profiles import SourceProfile
26
-
27
-
28
- DEFAULT_SOURCE_NAMES = ("codex", "claude", "cursor", "pi", "omp", "amp", "droid", "opencode")
29
- SOURCE_NAMES = ("codex", "claude", "cursor", "pi", "omp", "amp", "droid", "opencode")
30
-
31
-
32
- def _dedup_paths(paths: list[Path]) -> list[Path]:
33
- """Drop duplicate paths while preserving order."""
34
- seen: set[Path] = set()
35
- out: list[Path] = []
36
- for p in paths:
37
- if p not in seen:
38
- seen.add(p)
39
- out.append(p)
40
- return out
41
-
42
-
43
- def _xdg_config_home(home: Path, env: Mapping[str, str]) -> Path:
44
- root = env.get("XDG_CONFIG_HOME")
45
- return Path(root) if root else home / ".config"
46
-
47
-
48
- def _xdg_data_home(home: Path, env: Mapping[str, str]) -> Path:
49
- root = env.get("XDG_DATA_HOME")
50
- return Path(root) if root else home / ".local" / "share"
51
-
52
-
53
- def _appdata_roots(home: Path, env: Mapping[str, str]) -> list[Path]:
54
- """Windows AppData roots (Roaming, Local), honoring env vars when present.
55
-
56
- The exact Windows store paths for some agents are unverified — see the
57
- cross-platform note in README and the tracking GitHub issue.
58
- """
59
- roaming = env.get("APPDATA")
60
- local = env.get("LOCALAPPDATA")
61
- # Env-provided roots are *additional* candidates, never replacements: some
62
- # agents ignore APPDATA/LOCALAPPDATA and still write to the conventional
63
- # AppData/Roaming and AppData/Local locations, so always include both.
64
- roots: list[Path] = []
65
- if roaming:
66
- roots.append(Path(roaming))
67
- roots.append(home / "AppData" / "Roaming")
68
- if local:
69
- roots.append(Path(local))
70
- roots.append(home / "AppData" / "Local")
71
- return _dedup_paths(roots)
72
-
73
-
74
- def source_paths(
75
- source: str,
76
- home: Path | None = None,
77
- environ: Mapping[str, str] | None = None,
78
- ) -> list[Path]:
79
- home = home or Path.home()
80
- env = environ if environ is not None else os.environ
81
- if source == "codex":
82
- return sorted((home / ".codex" / "sessions").glob("**/*.jsonl"))
83
- if source == "claude":
84
- paths = sorted((home / ".claude" / "projects").glob("**/*.jsonl"))
85
- history = home / ".claude" / "history.jsonl"
86
- if history.exists():
87
- paths.append(history)
88
- return paths
89
- if source == "cursor":
90
- # Cursor (a VS Code fork) stores its User dir per OS. XDG/AppData paths are
91
- # *additional* candidates, never replacements — some apps ignore the env vars
92
- # and still write to the conventional location, so always include it too.
93
- user_dirs = _dedup_paths([
94
- home / "Library" / "Application Support" / "Cursor" / "User", # macOS
95
- _xdg_config_home(home, env) / "Cursor" / "User", # Linux ($XDG_CONFIG_HOME)
96
- home / ".config" / "Cursor" / "User", # Linux conventional fallback
97
- *[r / "Cursor" / "User" for r in _appdata_roots(home, env)], # Windows
98
- ])
99
- paths: list[Path] = []
100
- for root in user_dirs:
101
- if not root.exists():
102
- continue
103
- global_state = root / "globalStorage" / "state.vscdb"
104
- if global_state.exists():
105
- paths.append(global_state)
106
- workspace = root / "workspaceStorage"
107
- if workspace.exists():
108
- paths.extend(sorted(workspace.glob("**/state.vscdb")))
109
- return paths
110
- if source == "pi":
111
- return sorted((home / ".pi" / "agent" / "sessions").glob("**/*.jsonl"))
112
- if source == "omp":
113
- return sorted((home / ".omp" / "agent" / "sessions").glob("**/*.jsonl"))
114
- if source == "amp":
115
- amp_dirs = _dedup_paths([
116
- _xdg_data_home(home, env) / "amp", # $XDG_DATA_HOME
117
- home / ".local" / "share" / "amp", # conventional fallback
118
- *[r / "amp" for r in _appdata_roots(home, env)], # Windows (best-effort)
119
- ])
120
- histories: list[Path] = []
121
- for amp_dir in amp_dirs:
122
- history = amp_dir / "history.jsonl"
123
- if history.exists():
124
- histories.append(history)
125
- return _dedup_paths(histories)
126
- if source == "droid":
127
- return sorted((home / ".factory" / "sessions").glob("**/*.jsonl"))
128
- if source == "opencode":
129
- opencode_dirs = _dedup_paths([
130
- _xdg_data_home(home, env) / "opencode", # $XDG_DATA_HOME
131
- home / ".local" / "share" / "opencode", # conventional fallback
132
- *[r / "opencode" for r in _appdata_roots(home, env)], # Windows (best-effort)
133
- ])
134
- dbs: list[Path] = []
135
- for oc_dir in opencode_dirs:
136
- db = oc_dir / "opencode.db"
137
- if db.exists() and opencode_db_has_messages(db):
138
- dbs.append(db)
139
- return _dedup_paths(dbs)
140
- raise ValueError(f"Unknown source: {source}")
141
-
142
-
143
- def describe_sources(home: Path | None = None) -> list[tuple[str, int, list[Path]]]:
144
- rows = []
145
- for source in SOURCE_NAMES:
146
- paths = source_paths(source, home=home)
147
- rows.append((source, len(paths), paths[:5]))
148
- return rows
149
-
150
-
151
- def iter_messages(source: str, *, home: Path | None = None, limit_files: int | None = None) -> Iterator[ThreadMessage]:
152
- paths = source_paths(source, home=home)
153
- if limit_files is not None:
154
- paths = paths[:limit_files]
155
-
156
- for path in paths:
157
- yield from iter_path_messages(source, path)
158
-
159
-
160
- def iter_path_messages(source: str, path: Path) -> Iterator[ThreadMessage]:
161
- if source == "codex":
162
- yield from codex_messages(path)
163
- elif source == "claude":
164
- yield from claude_messages(path)
165
- elif source == "cursor":
166
- yield from cursor_messages(path)
167
- elif source in {"pi", "omp", "droid"}:
168
- yield from agent_jsonl_messages(path, source=source)
169
- elif source == "amp":
170
- yield from amp_history_messages(path)
171
- elif source == "opencode":
172
- yield from opencode_messages(path)
173
-
174
-
175
- def iter_custom_messages(paths: list[Path]) -> Iterator[ThreadMessage]:
176
- for path in custom_jsonl_paths(paths):
177
- yield from custom_jsonl_messages(path)
178
-
179
-
180
- def custom_jsonl_paths(paths: list[Path]) -> list[Path]:
181
- files: list[Path] = []
182
- for root in paths:
183
- expanded = root.expanduser()
184
- if expanded.is_dir():
185
- files.extend(sorted(expanded.glob("**/*.jsonl")))
186
- elif expanded.is_file():
187
- files.append(expanded)
188
- return files
189
-
190
-
191
- def source_profile_paths(profile: SourceProfile) -> list[Path]:
192
- files: list[Path] = []
193
- for pattern in profile.paths:
194
- expanded = Path(pattern).expanduser()
195
- if expanded.is_dir():
196
- files.extend(sorted(expanded.glob("**/*.jsonl")))
197
- continue
198
- if expanded.is_file():
199
- files.append(expanded)
200
- continue
201
- if glob.has_magic(str(expanded)):
202
- files.extend(sorted(Path(match) for match in glob.glob(str(expanded), recursive=True) if Path(match).is_file()))
203
- return sorted(dict.fromkeys(files))
204
-
205
-
206
- def source_profile_messages(profile: SourceProfile, path: Path) -> Iterator[ThreadMessage]:
207
- if profile.format != "jsonl":
208
- return
209
-
210
- for line_no, row in read_jsonl(path):
211
- text_value = value_at_path(row, profile.text_key)
212
- text = content_to_text(text_value if text_value is not None else row)
213
- if not text:
214
- continue
215
-
216
- session_id = scalar_text(value_at_path(row, profile.session_key)) or path.stem
217
- message_id = scalar_text(value_at_path(row, profile.message_key)) or f"{path.stem}:{line_no}"
218
- role = scalar_text(value_at_path(row, profile.role_key)) or scalar_text(row.get("role")) or "unknown"
219
- timestamp = scalar_text(value_at_path(row, profile.timestamp_key))
220
- cwd = scalar_text(value_at_path(row, profile.cwd_key))
221
- title = scalar_text(value_at_path(row, profile.title_key)) or text[:120] or path.stem
222
-
223
- yield ThreadMessage(
224
- source=profile.name,
225
- thread_id=session_id,
226
- message_id=message_id,
227
- path=path,
228
- line=line_no,
229
- timestamp=timestamp,
230
- role=role,
231
- cwd=cwd,
232
- title=compact_text(title, limit=120),
233
- text=compact_text(text),
234
- metadata={"profile": profile.name},
235
- )
236
-
237
-
238
- def value_at_path(value: Any, key_path: str) -> Any:
239
- if not key_path:
240
- return None
241
-
242
- current = value
243
- for part in key_path.split("."):
244
- if isinstance(current, dict):
245
- current = current.get(part)
246
- elif isinstance(current, list) and part.isdigit():
247
- index = int(part)
248
- if index >= len(current):
249
- return None
250
- current = current[index]
251
- else:
252
- return None
253
- return current
254
-
255
-
256
- def scalar_text(value: Any) -> str:
257
- if value is None:
258
- return ""
259
- if isinstance(value, str):
260
- return value
261
- if isinstance(value, bool | int | float):
262
- return str(value)
263
- return content_to_text(value)
264
-
265
-
266
- def connect_sqlite_readonly(path: Path) -> sqlite3.Connection:
267
- uri_path = urllib.parse.quote(str(path), safe="/:")
268
- return sqlite3.connect(f"file:{uri_path}?mode=ro", uri=True)
269
-
270
-
271
- def decode_sqlite_value(value: Any) -> Any:
272
- if value is None:
273
- return None
274
- if isinstance(value, bytes):
275
- try:
276
- value = value.decode("utf-8")
277
- except UnicodeDecodeError:
278
- return None
279
- if isinstance(value, str):
280
- try:
281
- return json.loads(value)
282
- except json.JSONDecodeError:
283
- return value
284
- return value
285
-
286
-
287
- def cursor_messages(path: Path) -> Iterator[ThreadMessage]:
288
- try:
289
- conn = connect_sqlite_readonly(path)
290
- except sqlite3.Error:
291
- return
292
-
293
- try:
294
- tables = {
295
- row[0]
296
- for row in conn.execute("select name from sqlite_master where type = 'table'")
297
- }
298
- except sqlite3.Error:
299
- return
300
-
301
- try:
302
- if "cursorDiskKV" not in tables and "ItemTable" not in tables:
303
- return
304
-
305
- if "cursorDiskKV" in tables:
306
- yield from cursor_disk_kv_messages(path, conn)
307
- if "ItemTable" in tables:
308
- yield from cursor_item_table_messages(path, conn)
309
- except sqlite3.Error:
310
- return
311
- finally:
312
- conn.close()
313
-
314
-
315
- def cursor_disk_kv_messages(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
316
- query = """
317
- select key, value
318
- from cursorDiskKV
319
- where key like 'composerData:%'
320
- or key like 'bubbleId:%'
321
- """
322
- try:
323
- rows = conn.execute(query)
324
- except sqlite3.Error:
325
- return
326
-
327
- for row_no, (key, raw_value) in enumerate(rows, 1):
328
- value = decode_sqlite_value(raw_value)
329
- if value is None:
330
- continue
331
- text = cursor_message_text(value, key=str(key))
332
- if not text:
333
- continue
334
-
335
- if key.startswith("bubbleId:"):
336
- parts = key.split(":")
337
- thread_id = parts[1] if len(parts) > 1 else key
338
- message_id = parts[-1]
339
- elif key.startswith("composerData:"):
340
- thread_id = key.removeprefix("composerData:")
341
- message_id = thread_id
342
- else:
343
- thread_id = extract_cursor_thread_id(value) or key
344
- message_id = key
345
-
346
- role = extract_cursor_role(value)
347
- timestamp = extract_cursor_timestamp(value)
348
- cwd = extract_cursor_cwd(value)
349
- title = extract_cursor_title(value, fallback=thread_id)
350
-
351
- yield ThreadMessage(
352
- source="cursor",
353
- thread_id=str(thread_id),
354
- message_id=str(message_id),
355
- path=path,
356
- line=row_no,
357
- timestamp=timestamp,
358
- role=role,
359
- cwd=cwd,
360
- title=title,
361
- text=compact_text(text),
362
- metadata={"cursor_key": key},
363
- )
364
-
365
-
366
- def cursor_item_table_messages(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
367
- query = """
368
- select key, value
369
- from ItemTable
370
- where key like 'composer.%'
371
- or key like 'composerData:%'
372
- or key like 'conversation%'
373
- or key like 'cursor.composer%'
374
- """
375
- try:
376
- rows = conn.execute(query)
377
- except sqlite3.Error:
378
- return
379
-
380
- for row_no, (key, raw_value) in enumerate(rows, 1):
381
- value = decode_sqlite_value(raw_value)
382
- if value is None:
383
- continue
384
- text = cursor_message_text(value, key=str(key))
385
- if not text:
386
- continue
387
-
388
- yield ThreadMessage(
389
- source="cursor",
390
- thread_id=extract_cursor_thread_id(value) or str(key),
391
- message_id=str(key),
392
- path=path,
393
- line=row_no,
394
- timestamp=extract_cursor_timestamp(value),
395
- role=extract_cursor_role(value),
396
- cwd=extract_cursor_cwd(value),
397
- title=extract_cursor_title(value, fallback=str(key)),
398
- text=compact_text(text),
399
- metadata={"cursor_key": key, "table": "ItemTable"},
400
- )
401
-
402
-
403
- def cursor_message_text(value: Any, *, key: str) -> str:
404
- if not isinstance(value, dict):
405
- return ""
406
-
407
- if key.startswith("agentKv:blob:"):
408
- return ""
409
-
410
- if key.startswith("bubbleId:"):
411
- return first_cursor_text(value, ("text", "richText", "content"))
412
-
413
- if key.startswith("composerData:") or key.startswith("composer."):
414
- return first_cursor_text(value, ("text", "richText", "name", "title"))
415
-
416
- if key.startswith("conversation") or key.startswith("cursor.composer"):
417
- return first_cursor_text(value, ("text", "richText", "content", "name", "title"))
418
-
419
- return ""
420
-
421
-
422
- def first_cursor_text(value: dict[str, Any], keys: tuple[str, ...]) -> str:
423
- for key in keys:
424
- text = cursor_value_to_text(value.get(key))
425
- if text:
426
- return text
427
- return ""
428
-
429
-
430
- def cursor_value_to_text(value: Any) -> str:
431
- if isinstance(value, str):
432
- stripped = value.strip()
433
- if stripped.startswith("{") or stripped.startswith("["):
434
- try:
435
- decoded = json.loads(stripped)
436
- except json.JSONDecodeError:
437
- pass
438
- else:
439
- return content_to_text(decoded)
440
- return content_to_text(value)
441
- return content_to_text(value)
442
-
443
-
444
- def extract_cursor_thread_id(value: Any) -> str:
445
- if isinstance(value, dict):
446
- for key in ("composerId", "conversationId", "sessionId", "id"):
447
- candidate = value.get(key)
448
- if isinstance(candidate, str) and candidate:
449
- return candidate
450
- return ""
451
-
452
-
453
- def extract_cursor_role(value: Any) -> str:
454
- if isinstance(value, dict):
455
- for key in ("role", "type"):
456
- candidate = value.get(key)
457
- if isinstance(candidate, str) and candidate:
458
- return candidate
459
- return "cursor"
460
-
461
-
462
- def extract_cursor_timestamp(value: Any) -> str:
463
- if isinstance(value, dict):
464
- for key in ("createdAt", "timestamp", "lastUpdatedAt", "updatedAt"):
465
- candidate = value.get(key)
466
- if isinstance(candidate, str):
467
- return candidate
468
- if isinstance(candidate, int | float):
469
- return str(candidate)
470
- return ""
471
-
472
-
473
- def extract_cursor_cwd(value: Any) -> str:
474
- if not isinstance(value, dict):
475
- return ""
476
- repos = value.get("trackedGitRepos") or value.get("workspaceUris") or value.get("workspaceFolders")
477
- if isinstance(repos, list) and repos:
478
- first = repos[0]
479
- if isinstance(first, str):
480
- return first
481
- if isinstance(first, dict):
482
- for key in ("path", "uri", "fsPath"):
483
- candidate = first.get(key)
484
- if isinstance(candidate, str):
485
- return candidate
486
- return ""
487
-
488
-
489
- def extract_cursor_title(value: Any, *, fallback: str) -> str:
490
- if isinstance(value, dict):
491
- for key in ("text", "richText", "name", "title"):
492
- candidate = cursor_value_to_text(value.get(key))
493
- if candidate:
494
- return compact_text(candidate, limit=120)
495
- return fallback
496
-
497
-
498
- def opencode_db_has_messages(path: Path) -> bool:
499
- try:
500
- conn = connect_sqlite_readonly(path)
501
- except sqlite3.Error:
502
- return True
503
- try:
504
- tables = {
505
- row[0]
506
- for row in conn.execute("select name from sqlite_master where type = 'table'")
507
- }
508
- if "part" in tables:
509
- row = conn.execute("select count(*) from part").fetchone()
510
- return bool(row and int(row[0]) > 0)
511
- if "message" in tables:
512
- row = conn.execute("select count(*) from message").fetchone()
513
- return bool(row and int(row[0]) > 0)
514
- return False
515
- except sqlite3.Error:
516
- return True
517
- finally:
518
- conn.close()
519
-
520
-
521
- def opencode_messages(path: Path) -> Iterator[ThreadMessage]:
522
- try:
523
- conn = connect_sqlite_readonly(path)
524
- except sqlite3.Error:
525
- return
526
-
527
- try:
528
- tables = {
529
- row[0]
530
- for row in conn.execute("select name from sqlite_master where type = 'table'")
531
- }
532
- except sqlite3.Error:
533
- conn.close()
534
- return
535
-
536
- try:
537
- if {"session", "message", "part"}.issubset(tables):
538
- yield from opencode_part_messages(path, conn)
539
- elif {"session", "message"}.issubset(tables):
540
- yield from opencode_message_rows(path, conn)
541
- except sqlite3.Error:
542
- return
543
- finally:
544
- conn.close()
545
-
546
-
547
- def opencode_part_messages(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
548
- query = """
549
- select
550
- p.id as part_id,
551
- p.message_id as message_id,
552
- p.session_id as session_id,
553
- p.time_created as part_time_created,
554
- p.data as part_data,
555
- m.data as message_data,
556
- s.directory as directory,
557
- s.path as session_path,
558
- s.title as title
559
- from part p
560
- left join message m on m.id = p.message_id
561
- left join session s on s.id = p.session_id
562
- order by p.time_created, p.id
563
- """
564
- for row_no, row in enumerate(conn.execute(query), 1):
565
- (
566
- part_id,
567
- message_id,
568
- session_id,
569
- part_time_created,
570
- raw_part_data,
571
- raw_message_data,
572
- directory,
573
- session_path,
574
- title,
575
- ) = row
576
- part_data = decode_sqlite_value(raw_part_data)
577
- message_data = decode_sqlite_value(raw_message_data)
578
- text = opencode_part_text(part_data)
579
- if not text:
580
- continue
581
- role = opencode_role(message_data, part_data)
582
- if role not in {"user", "assistant"}:
583
- continue
584
-
585
- yield ThreadMessage(
586
- source="opencode",
587
- thread_id=str(session_id),
588
- message_id=str(part_id or message_id or f"{session_id}:{row_no}"),
589
- path=path,
590
- line=row_no,
591
- timestamp=timestamp_text(part_time_created),
592
- role=role,
593
- cwd=str(directory or session_path or ""),
594
- title=compact_text(str(title or session_id), limit=120),
595
- text=text,
596
- metadata={"message_id": message_id},
597
- )
598
-
599
-
600
- def opencode_message_rows(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
601
- query = """
602
- select
603
- m.id as message_id,
604
- m.session_id as session_id,
605
- m.time_created as time_created,
606
- m.data as message_data,
607
- s.directory as directory,
608
- s.path as session_path,
609
- s.title as title
610
- from message m
611
- left join session s on s.id = m.session_id
612
- order by m.time_created, m.id
613
- """
614
- for row_no, row in enumerate(conn.execute(query), 1):
615
- message_id, session_id, time_created, raw_message_data, directory, session_path, title = row
616
- message_data = decode_sqlite_value(raw_message_data)
617
- role = opencode_role(message_data, None)
618
- if role not in {"user", "assistant"}:
619
- continue
620
- text = opencode_part_text(message_data)
621
- if not text:
622
- continue
623
-
624
- yield ThreadMessage(
625
- source="opencode",
626
- thread_id=str(session_id),
627
- message_id=str(message_id or f"{session_id}:{row_no}"),
628
- path=path,
629
- line=row_no,
630
- timestamp=timestamp_text(time_created),
631
- role=role,
632
- cwd=str(directory or session_path or ""),
633
- title=compact_text(str(title or session_id), limit=120),
634
- text=text,
635
- metadata={},
636
- )
637
-
638
-
639
- def opencode_role(message_data: Any, part_data: Any) -> str:
640
- for value in (message_data, part_data):
641
- if isinstance(value, dict):
642
- for key in ("role", "author", "type"):
643
- candidate = value.get(key)
644
- if isinstance(candidate, str) and candidate in {"user", "assistant"}:
645
- return candidate
646
- return ""
647
-
648
-
649
- def opencode_part_text(value: Any) -> str:
650
- if isinstance(value, str):
651
- return content_to_text(value)
652
- if not isinstance(value, dict):
653
- return ""
654
-
655
- part_type = str(value.get("type") or "")
656
- if part_type in {"tool", "tool_call", "tool_result", "step-start", "step-finish", "snapshot"}:
657
- return ""
658
- if part_type:
659
- return visible_message_text(value)
660
-
661
- for key in ("text", "content", "message"):
662
- text = visible_message_text(value.get(key))
663
- if text:
664
- return text
665
- return ""