threadlens 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,592 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import glob
4
- import json
5
- import sqlite3
6
- import urllib.parse
7
- from collections.abc import Iterator
8
- from pathlib import Path
9
- from typing import Any
10
-
11
- from .extract import (
12
- amp_history_messages,
13
- agent_jsonl_messages,
14
- claude_messages,
15
- codex_messages,
16
- compact_text,
17
- content_to_text,
18
- custom_jsonl_messages,
19
- read_jsonl,
20
- timestamp_text,
21
- visible_message_text,
22
- )
23
- from .models import ThreadMessage
24
- from .profiles import SourceProfile
25
-
26
-
27
- DEFAULT_SOURCE_NAMES = ("codex", "claude", "cursor", "pi", "omp", "amp", "droid", "opencode")
28
- SOURCE_NAMES = ("codex", "claude", "cursor", "pi", "omp", "amp", "droid", "opencode")
29
-
30
-
31
- def source_paths(source: str, home: Path | None = None) -> list[Path]:
32
- home = home or Path.home()
33
- if source == "codex":
34
- return sorted((home / ".codex" / "sessions").glob("**/*.jsonl"))
35
- if source == "claude":
36
- paths = sorted((home / ".claude" / "projects").glob("**/*.jsonl"))
37
- history = home / ".claude" / "history.jsonl"
38
- if history.exists():
39
- paths.append(history)
40
- return paths
41
- if source == "cursor":
42
- root = home / "Library" / "Application Support" / "Cursor" / "User"
43
- paths: list[Path] = []
44
- global_state = root / "globalStorage" / "state.vscdb"
45
- if global_state.exists():
46
- paths.append(global_state)
47
- workspace = root / "workspaceStorage"
48
- if workspace.exists():
49
- paths.extend(sorted(workspace.glob("**/state.vscdb")))
50
- return paths
51
- if source == "pi":
52
- return sorted((home / ".pi" / "agent" / "sessions").glob("**/*.jsonl"))
53
- if source == "omp":
54
- return sorted((home / ".omp" / "agent" / "sessions").glob("**/*.jsonl"))
55
- if source == "amp":
56
- history = home / ".local" / "share" / "amp" / "history.jsonl"
57
- if history.exists():
58
- return [history]
59
- return []
60
- if source == "droid":
61
- return sorted((home / ".factory" / "sessions").glob("**/*.jsonl"))
62
- if source == "opencode":
63
- db = home / ".local" / "share" / "opencode" / "opencode.db"
64
- if db.exists() and opencode_db_has_messages(db):
65
- return [db]
66
- return []
67
- raise ValueError(f"Unknown source: {source}")
68
-
69
-
70
- def describe_sources(home: Path | None = None) -> list[tuple[str, int, list[Path]]]:
71
- rows = []
72
- for source in SOURCE_NAMES:
73
- paths = source_paths(source, home=home)
74
- rows.append((source, len(paths), paths[:5]))
75
- return rows
76
-
77
-
78
- def iter_messages(source: str, *, home: Path | None = None, limit_files: int | None = None) -> Iterator[ThreadMessage]:
79
- paths = source_paths(source, home=home)
80
- if limit_files is not None:
81
- paths = paths[:limit_files]
82
-
83
- for path in paths:
84
- yield from iter_path_messages(source, path)
85
-
86
-
87
- def iter_path_messages(source: str, path: Path) -> Iterator[ThreadMessage]:
88
- if source == "codex":
89
- yield from codex_messages(path)
90
- elif source == "claude":
91
- yield from claude_messages(path)
92
- elif source == "cursor":
93
- yield from cursor_messages(path)
94
- elif source in {"pi", "omp", "droid"}:
95
- yield from agent_jsonl_messages(path, source=source)
96
- elif source == "amp":
97
- yield from amp_history_messages(path)
98
- elif source == "opencode":
99
- yield from opencode_messages(path)
100
-
101
-
102
- def iter_custom_messages(paths: list[Path]) -> Iterator[ThreadMessage]:
103
- for path in custom_jsonl_paths(paths):
104
- yield from custom_jsonl_messages(path)
105
-
106
-
107
- def custom_jsonl_paths(paths: list[Path]) -> list[Path]:
108
- files: list[Path] = []
109
- for root in paths:
110
- expanded = root.expanduser()
111
- if expanded.is_dir():
112
- files.extend(sorted(expanded.glob("**/*.jsonl")))
113
- elif expanded.is_file():
114
- files.append(expanded)
115
- return files
116
-
117
-
118
- def source_profile_paths(profile: SourceProfile) -> list[Path]:
119
- files: list[Path] = []
120
- for pattern in profile.paths:
121
- expanded = Path(pattern).expanduser()
122
- if expanded.is_dir():
123
- files.extend(sorted(expanded.glob("**/*.jsonl")))
124
- continue
125
- if expanded.is_file():
126
- files.append(expanded)
127
- continue
128
- if glob.has_magic(str(expanded)):
129
- files.extend(sorted(Path(match) for match in glob.glob(str(expanded), recursive=True) if Path(match).is_file()))
130
- return sorted(dict.fromkeys(files))
131
-
132
-
133
- def source_profile_messages(profile: SourceProfile, path: Path) -> Iterator[ThreadMessage]:
134
- if profile.format != "jsonl":
135
- return
136
-
137
- for line_no, row in read_jsonl(path):
138
- text_value = value_at_path(row, profile.text_key)
139
- text = content_to_text(text_value if text_value is not None else row)
140
- if not text:
141
- continue
142
-
143
- session_id = scalar_text(value_at_path(row, profile.session_key)) or path.stem
144
- message_id = scalar_text(value_at_path(row, profile.message_key)) or f"{path.stem}:{line_no}"
145
- role = scalar_text(value_at_path(row, profile.role_key)) or scalar_text(row.get("role")) or "unknown"
146
- timestamp = scalar_text(value_at_path(row, profile.timestamp_key))
147
- cwd = scalar_text(value_at_path(row, profile.cwd_key))
148
- title = scalar_text(value_at_path(row, profile.title_key)) or text[:120] or path.stem
149
-
150
- yield ThreadMessage(
151
- source=profile.name,
152
- thread_id=session_id,
153
- message_id=message_id,
154
- path=path,
155
- line=line_no,
156
- timestamp=timestamp,
157
- role=role,
158
- cwd=cwd,
159
- title=compact_text(title, limit=120),
160
- text=compact_text(text),
161
- metadata={"profile": profile.name},
162
- )
163
-
164
-
165
- def value_at_path(value: Any, key_path: str) -> Any:
166
- if not key_path:
167
- return None
168
-
169
- current = value
170
- for part in key_path.split("."):
171
- if isinstance(current, dict):
172
- current = current.get(part)
173
- elif isinstance(current, list) and part.isdigit():
174
- index = int(part)
175
- if index >= len(current):
176
- return None
177
- current = current[index]
178
- else:
179
- return None
180
- return current
181
-
182
-
183
- def scalar_text(value: Any) -> str:
184
- if value is None:
185
- return ""
186
- if isinstance(value, str):
187
- return value
188
- if isinstance(value, bool | int | float):
189
- return str(value)
190
- return content_to_text(value)
191
-
192
-
193
- def connect_sqlite_readonly(path: Path) -> sqlite3.Connection:
194
- uri_path = urllib.parse.quote(str(path), safe="/:")
195
- return sqlite3.connect(f"file:{uri_path}?mode=ro", uri=True)
196
-
197
-
198
- def decode_sqlite_value(value: Any) -> Any:
199
- if value is None:
200
- return None
201
- if isinstance(value, bytes):
202
- try:
203
- value = value.decode("utf-8")
204
- except UnicodeDecodeError:
205
- return None
206
- if isinstance(value, str):
207
- try:
208
- return json.loads(value)
209
- except json.JSONDecodeError:
210
- return value
211
- return value
212
-
213
-
214
- def cursor_messages(path: Path) -> Iterator[ThreadMessage]:
215
- try:
216
- conn = connect_sqlite_readonly(path)
217
- except sqlite3.Error:
218
- return
219
-
220
- try:
221
- tables = {
222
- row[0]
223
- for row in conn.execute("select name from sqlite_master where type = 'table'")
224
- }
225
- except sqlite3.Error:
226
- return
227
-
228
- try:
229
- if "cursorDiskKV" not in tables and "ItemTable" not in tables:
230
- return
231
-
232
- if "cursorDiskKV" in tables:
233
- yield from cursor_disk_kv_messages(path, conn)
234
- if "ItemTable" in tables:
235
- yield from cursor_item_table_messages(path, conn)
236
- except sqlite3.Error:
237
- return
238
- finally:
239
- conn.close()
240
-
241
-
242
- def cursor_disk_kv_messages(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
243
- query = """
244
- select key, value
245
- from cursorDiskKV
246
- where key like 'composerData:%'
247
- or key like 'bubbleId:%'
248
- """
249
- try:
250
- rows = conn.execute(query)
251
- except sqlite3.Error:
252
- return
253
-
254
- for row_no, (key, raw_value) in enumerate(rows, 1):
255
- value = decode_sqlite_value(raw_value)
256
- if value is None:
257
- continue
258
- text = cursor_message_text(value, key=str(key))
259
- if not text:
260
- continue
261
-
262
- if key.startswith("bubbleId:"):
263
- parts = key.split(":")
264
- thread_id = parts[1] if len(parts) > 1 else key
265
- message_id = parts[-1]
266
- elif key.startswith("composerData:"):
267
- thread_id = key.removeprefix("composerData:")
268
- message_id = thread_id
269
- else:
270
- thread_id = extract_cursor_thread_id(value) or key
271
- message_id = key
272
-
273
- role = extract_cursor_role(value)
274
- timestamp = extract_cursor_timestamp(value)
275
- cwd = extract_cursor_cwd(value)
276
- title = extract_cursor_title(value, fallback=thread_id)
277
-
278
- yield ThreadMessage(
279
- source="cursor",
280
- thread_id=str(thread_id),
281
- message_id=str(message_id),
282
- path=path,
283
- line=row_no,
284
- timestamp=timestamp,
285
- role=role,
286
- cwd=cwd,
287
- title=title,
288
- text=compact_text(text),
289
- metadata={"cursor_key": key},
290
- )
291
-
292
-
293
- def cursor_item_table_messages(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
294
- query = """
295
- select key, value
296
- from ItemTable
297
- where key like 'composer.%'
298
- or key like 'composerData:%'
299
- or key like 'conversation%'
300
- or key like 'cursor.composer%'
301
- """
302
- try:
303
- rows = conn.execute(query)
304
- except sqlite3.Error:
305
- return
306
-
307
- for row_no, (key, raw_value) in enumerate(rows, 1):
308
- value = decode_sqlite_value(raw_value)
309
- if value is None:
310
- continue
311
- text = cursor_message_text(value, key=str(key))
312
- if not text:
313
- continue
314
-
315
- yield ThreadMessage(
316
- source="cursor",
317
- thread_id=extract_cursor_thread_id(value) or str(key),
318
- message_id=str(key),
319
- path=path,
320
- line=row_no,
321
- timestamp=extract_cursor_timestamp(value),
322
- role=extract_cursor_role(value),
323
- cwd=extract_cursor_cwd(value),
324
- title=extract_cursor_title(value, fallback=str(key)),
325
- text=compact_text(text),
326
- metadata={"cursor_key": key, "table": "ItemTable"},
327
- )
328
-
329
-
330
- def cursor_message_text(value: Any, *, key: str) -> str:
331
- if not isinstance(value, dict):
332
- return ""
333
-
334
- if key.startswith("agentKv:blob:"):
335
- return ""
336
-
337
- if key.startswith("bubbleId:"):
338
- return first_cursor_text(value, ("text", "richText", "content"))
339
-
340
- if key.startswith("composerData:") or key.startswith("composer."):
341
- return first_cursor_text(value, ("text", "richText", "name", "title"))
342
-
343
- if key.startswith("conversation") or key.startswith("cursor.composer"):
344
- return first_cursor_text(value, ("text", "richText", "content", "name", "title"))
345
-
346
- return ""
347
-
348
-
349
- def first_cursor_text(value: dict[str, Any], keys: tuple[str, ...]) -> str:
350
- for key in keys:
351
- text = cursor_value_to_text(value.get(key))
352
- if text:
353
- return text
354
- return ""
355
-
356
-
357
- def cursor_value_to_text(value: Any) -> str:
358
- if isinstance(value, str):
359
- stripped = value.strip()
360
- if stripped.startswith("{") or stripped.startswith("["):
361
- try:
362
- decoded = json.loads(stripped)
363
- except json.JSONDecodeError:
364
- pass
365
- else:
366
- return content_to_text(decoded)
367
- return content_to_text(value)
368
- return content_to_text(value)
369
-
370
-
371
- def extract_cursor_thread_id(value: Any) -> str:
372
- if isinstance(value, dict):
373
- for key in ("composerId", "conversationId", "sessionId", "id"):
374
- candidate = value.get(key)
375
- if isinstance(candidate, str) and candidate:
376
- return candidate
377
- return ""
378
-
379
-
380
- def extract_cursor_role(value: Any) -> str:
381
- if isinstance(value, dict):
382
- for key in ("role", "type"):
383
- candidate = value.get(key)
384
- if isinstance(candidate, str) and candidate:
385
- return candidate
386
- return "cursor"
387
-
388
-
389
- def extract_cursor_timestamp(value: Any) -> str:
390
- if isinstance(value, dict):
391
- for key in ("createdAt", "timestamp", "lastUpdatedAt", "updatedAt"):
392
- candidate = value.get(key)
393
- if isinstance(candidate, str):
394
- return candidate
395
- if isinstance(candidate, int | float):
396
- return str(candidate)
397
- return ""
398
-
399
-
400
- def extract_cursor_cwd(value: Any) -> str:
401
- if not isinstance(value, dict):
402
- return ""
403
- repos = value.get("trackedGitRepos") or value.get("workspaceUris") or value.get("workspaceFolders")
404
- if isinstance(repos, list) and repos:
405
- first = repos[0]
406
- if isinstance(first, str):
407
- return first
408
- if isinstance(first, dict):
409
- for key in ("path", "uri", "fsPath"):
410
- candidate = first.get(key)
411
- if isinstance(candidate, str):
412
- return candidate
413
- return ""
414
-
415
-
416
- def extract_cursor_title(value: Any, *, fallback: str) -> str:
417
- if isinstance(value, dict):
418
- for key in ("text", "richText", "name", "title"):
419
- candidate = cursor_value_to_text(value.get(key))
420
- if candidate:
421
- return compact_text(candidate, limit=120)
422
- return fallback
423
-
424
-
425
- def opencode_db_has_messages(path: Path) -> bool:
426
- try:
427
- conn = connect_sqlite_readonly(path)
428
- except sqlite3.Error:
429
- return True
430
- try:
431
- tables = {
432
- row[0]
433
- for row in conn.execute("select name from sqlite_master where type = 'table'")
434
- }
435
- if "part" in tables:
436
- row = conn.execute("select count(*) from part").fetchone()
437
- return bool(row and int(row[0]) > 0)
438
- if "message" in tables:
439
- row = conn.execute("select count(*) from message").fetchone()
440
- return bool(row and int(row[0]) > 0)
441
- return False
442
- except sqlite3.Error:
443
- return True
444
- finally:
445
- conn.close()
446
-
447
-
448
- def opencode_messages(path: Path) -> Iterator[ThreadMessage]:
449
- try:
450
- conn = connect_sqlite_readonly(path)
451
- except sqlite3.Error:
452
- return
453
-
454
- try:
455
- tables = {
456
- row[0]
457
- for row in conn.execute("select name from sqlite_master where type = 'table'")
458
- }
459
- except sqlite3.Error:
460
- conn.close()
461
- return
462
-
463
- try:
464
- if {"session", "message", "part"}.issubset(tables):
465
- yield from opencode_part_messages(path, conn)
466
- elif {"session", "message"}.issubset(tables):
467
- yield from opencode_message_rows(path, conn)
468
- except sqlite3.Error:
469
- return
470
- finally:
471
- conn.close()
472
-
473
-
474
- def opencode_part_messages(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
475
- query = """
476
- select
477
- p.id as part_id,
478
- p.message_id as message_id,
479
- p.session_id as session_id,
480
- p.time_created as part_time_created,
481
- p.data as part_data,
482
- m.data as message_data,
483
- s.directory as directory,
484
- s.path as session_path,
485
- s.title as title
486
- from part p
487
- left join message m on m.id = p.message_id
488
- left join session s on s.id = p.session_id
489
- order by p.time_created, p.id
490
- """
491
- for row_no, row in enumerate(conn.execute(query), 1):
492
- (
493
- part_id,
494
- message_id,
495
- session_id,
496
- part_time_created,
497
- raw_part_data,
498
- raw_message_data,
499
- directory,
500
- session_path,
501
- title,
502
- ) = row
503
- part_data = decode_sqlite_value(raw_part_data)
504
- message_data = decode_sqlite_value(raw_message_data)
505
- text = opencode_part_text(part_data)
506
- if not text:
507
- continue
508
- role = opencode_role(message_data, part_data)
509
- if role not in {"user", "assistant"}:
510
- continue
511
-
512
- yield ThreadMessage(
513
- source="opencode",
514
- thread_id=str(session_id),
515
- message_id=str(part_id or message_id or f"{session_id}:{row_no}"),
516
- path=path,
517
- line=row_no,
518
- timestamp=timestamp_text(part_time_created),
519
- role=role,
520
- cwd=str(directory or session_path or ""),
521
- title=compact_text(str(title or session_id), limit=120),
522
- text=text,
523
- metadata={"message_id": message_id},
524
- )
525
-
526
-
527
- def opencode_message_rows(path: Path, conn: sqlite3.Connection) -> Iterator[ThreadMessage]:
528
- query = """
529
- select
530
- m.id as message_id,
531
- m.session_id as session_id,
532
- m.time_created as time_created,
533
- m.data as message_data,
534
- s.directory as directory,
535
- s.path as session_path,
536
- s.title as title
537
- from message m
538
- left join session s on s.id = m.session_id
539
- order by m.time_created, m.id
540
- """
541
- for row_no, row in enumerate(conn.execute(query), 1):
542
- message_id, session_id, time_created, raw_message_data, directory, session_path, title = row
543
- message_data = decode_sqlite_value(raw_message_data)
544
- role = opencode_role(message_data, None)
545
- if role not in {"user", "assistant"}:
546
- continue
547
- text = opencode_part_text(message_data)
548
- if not text:
549
- continue
550
-
551
- yield ThreadMessage(
552
- source="opencode",
553
- thread_id=str(session_id),
554
- message_id=str(message_id or f"{session_id}:{row_no}"),
555
- path=path,
556
- line=row_no,
557
- timestamp=timestamp_text(time_created),
558
- role=role,
559
- cwd=str(directory or session_path or ""),
560
- title=compact_text(str(title or session_id), limit=120),
561
- text=text,
562
- metadata={},
563
- )
564
-
565
-
566
- def opencode_role(message_data: Any, part_data: Any) -> str:
567
- for value in (message_data, part_data):
568
- if isinstance(value, dict):
569
- for key in ("role", "author", "type"):
570
- candidate = value.get(key)
571
- if isinstance(candidate, str) and candidate in {"user", "assistant"}:
572
- return candidate
573
- return ""
574
-
575
-
576
- def opencode_part_text(value: Any) -> str:
577
- if isinstance(value, str):
578
- return content_to_text(value)
579
- if not isinstance(value, dict):
580
- return ""
581
-
582
- part_type = str(value.get("type") or "")
583
- if part_type in {"tool", "tool_call", "tool_result", "step-start", "step-finish", "snapshot"}:
584
- return ""
585
- if part_type:
586
- return visible_message_text(value)
587
-
588
- for key in ("text", "content", "message"):
589
- text = visible_message_text(value.get(key))
590
- if text:
591
- return text
592
- return ""