causaliq-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. causaliq_knowledge/__init__.py +6 -3
  2. causaliq_knowledge/action.py +480 -0
  3. causaliq_knowledge/cache/__init__.py +18 -0
  4. causaliq_knowledge/cache/encoders/__init__.py +13 -0
  5. causaliq_knowledge/cache/encoders/base.py +90 -0
  6. causaliq_knowledge/cache/encoders/json_encoder.py +430 -0
  7. causaliq_knowledge/cache/token_cache.py +666 -0
  8. causaliq_knowledge/cli/__init__.py +15 -0
  9. causaliq_knowledge/cli/cache.py +478 -0
  10. causaliq_knowledge/cli/generate.py +410 -0
  11. causaliq_knowledge/cli/main.py +172 -0
  12. causaliq_knowledge/cli/models.py +309 -0
  13. causaliq_knowledge/graph/__init__.py +78 -0
  14. causaliq_knowledge/graph/generator.py +457 -0
  15. causaliq_knowledge/graph/loader.py +222 -0
  16. causaliq_knowledge/graph/models.py +426 -0
  17. causaliq_knowledge/graph/params.py +175 -0
  18. causaliq_knowledge/graph/prompts.py +445 -0
  19. causaliq_knowledge/graph/response.py +392 -0
  20. causaliq_knowledge/graph/view_filter.py +154 -0
  21. causaliq_knowledge/llm/base_client.py +147 -1
  22. causaliq_knowledge/llm/cache.py +443 -0
  23. causaliq_knowledge/py.typed +0 -0
  24. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA +10 -6
  25. causaliq_knowledge-0.4.0.dist-info/RECORD +42 -0
  26. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/WHEEL +1 -1
  27. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt +3 -0
  28. causaliq_knowledge/cli.py +0 -414
  29. causaliq_knowledge-0.2.0.dist-info/RECORD +0 -22
  30. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/licenses/LICENSE +0 -0
  31. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,478 @@
1
+ """Cache management CLI commands.
2
+
3
+ This module provides commands for managing the LLM response cache:
4
+ - stats: Show cache statistics
5
+ - export: Export cache entries to files
6
+ - import: Import cache entries from files
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import sys
13
+ from typing import Any
14
+
15
+ import click
16
+
17
+
18
+ @click.group("cache")
19
+ def cache_group() -> None:
20
+ """Manage the LLM response cache.
21
+
22
+ Commands for inspecting, exporting, and importing cached LLM responses.
23
+
24
+ Examples:
25
+
26
+ cqknow cache stats ./llm_cache.db
27
+
28
+ cqknow cache export ./llm_cache.db ./export_dir
29
+
30
+ cqknow cache import ./llm_cache.db ./import_dir
31
+ """
32
+ pass
33
+
34
+
35
+ @cache_group.command("stats")
36
+ @click.argument("cache_path", type=click.Path(exists=True))
37
+ @click.option(
38
+ "--json",
39
+ "output_json",
40
+ is_flag=True,
41
+ help="Output result as JSON.",
42
+ )
43
+ def cache_stats(cache_path: str, output_json: bool) -> None:
44
+ """Show cache statistics.
45
+
46
+ CACHE_PATH is the path to the SQLite cache database.
47
+
48
+ Shows entry counts, token dictionary size, cache hit statistics,
49
+ and for LLM caches: breakdown by model with token usage and costs.
50
+
51
+ Examples:
52
+
53
+ cqknow cache stats ./llm_cache.db
54
+
55
+ cqknow cache stats ./llm_cache.db --json
56
+ """
57
+ from causaliq_knowledge.cache import TokenCache
58
+ from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
59
+
60
+ try:
61
+ with TokenCache(cache_path) as cache:
62
+ entry_count = cache.entry_count()
63
+ token_count = cache.token_count()
64
+ total_hits = cache.total_hits()
65
+
66
+ # Aggregate LLM-specific stats by model
67
+ model_stats: dict[str, dict[str, Any]] = {}
68
+ total_cost = 0.0
69
+ total_input_tokens = 0
70
+ total_output_tokens = 0
71
+
72
+ if cache.has_encoder("llm") or entry_count > 0:
73
+ # Register encoder if needed
74
+ if not cache.has_encoder("llm"):
75
+ cache.register_encoder("llm", LLMEntryEncoder())
76
+
77
+ # Query all LLM entries with hit counts
78
+ cursor = cache.conn.execute(
79
+ "SELECT data, hit_count FROM cache_entries "
80
+ "WHERE entry_type = 'llm'"
81
+ )
82
+ encoder = LLMEntryEncoder()
83
+
84
+ for row in cursor.fetchall():
85
+ try:
86
+ data = encoder.decode(row[0], cache)
87
+ entry = LLMCacheEntry.from_dict(data)
88
+ hit_count = row[1] or 0
89
+
90
+ model = entry.model
91
+ if model not in model_stats:
92
+ model_stats[model] = {
93
+ "provider": entry.metadata.provider,
94
+ "entries": 0,
95
+ "hits": 0,
96
+ "input_tokens": 0,
97
+ "output_tokens": 0,
98
+ "cost_usd": 0.0,
99
+ "avg_latency_ms": 0,
100
+ "total_latency_ms": 0,
101
+ }
102
+
103
+ stats = model_stats[model]
104
+ stats["entries"] += 1
105
+ stats["hits"] += hit_count
106
+ stats["input_tokens"] += entry.metadata.tokens.input
107
+ stats["output_tokens"] += entry.metadata.tokens.output
108
+ stats["cost_usd"] += entry.metadata.cost_usd
109
+ stats["total_latency_ms"] += entry.metadata.latency_ms
110
+
111
+ total_cost += entry.metadata.cost_usd
112
+ total_input_tokens += entry.metadata.tokens.input
113
+ total_output_tokens += entry.metadata.tokens.output
114
+ except Exception:
115
+ # Skip entries that can't be decoded
116
+ pass
117
+
118
+ # Calculate averages
119
+ for stats in model_stats.values():
120
+ if stats["entries"] > 0:
121
+ stats["avg_latency_ms"] = (
122
+ stats["total_latency_ms"] // stats["entries"]
123
+ )
124
+ del stats["total_latency_ms"]
125
+
126
+ # Calculate savings (cost avoided by cache hits)
127
+ total_requests = sum(
128
+ s["entries"] + s["hits"] for s in model_stats.values()
129
+ )
130
+ if total_requests > 0 and entry_count > 0:
131
+ avg_cost_per_request = total_cost / entry_count
132
+ savings = total_hits * avg_cost_per_request
133
+ else:
134
+ savings = 0.0
135
+
136
+ if output_json:
137
+ output: dict[str, Any] = {
138
+ "cache_path": cache_path,
139
+ "summary": {
140
+ "entry_count": entry_count,
141
+ "token_count": token_count,
142
+ "total_hits": total_hits,
143
+ "total_cost_usd": round(total_cost, 6),
144
+ "estimated_savings_usd": round(savings, 6),
145
+ "total_input_tokens": total_input_tokens,
146
+ "total_output_tokens": total_output_tokens,
147
+ },
148
+ "by_model": model_stats,
149
+ }
150
+ click.echo(json.dumps(output, indent=2))
151
+ else:
152
+ click.echo(f"\nCache: {cache_path}")
153
+ click.echo("=" * 60)
154
+ click.echo(f"Entries: {entry_count:,}")
155
+ click.echo(f"Token dictionary: {token_count:,}")
156
+ click.echo(f"Total cache hits: {total_hits:,}")
157
+
158
+ if model_stats:
159
+ click.echo(f"\nTotal cost: ${total_cost:.4f}")
160
+ click.echo(f"Est. savings: ${savings:.4f}")
161
+ click.echo(
162
+ f"Total tokens: {total_input_tokens:,} in / "
163
+ f"{total_output_tokens:,} out"
164
+ )
165
+
166
+ # Table header
167
+ click.echo()
168
+ click.echo(
169
+ f"{'Model':<32} {'Entries':>8} {'Hits':>8} "
170
+ f"{'Hit Rate':>8} {'Tokens In':>12} "
171
+ f"{'Tokens Out':>12} "
172
+ f"{'Cost':>10} {'Latency':>10}"
173
+ )
174
+ click.echo("-" * 114)
175
+
176
+ for model, stats in sorted(model_stats.items()):
177
+ hit_rate = (
178
+ stats["hits"]
179
+ / (stats["entries"] + stats["hits"])
180
+ * 100
181
+ if (stats["entries"] + stats["hits"]) > 0
182
+ else 0
183
+ )
184
+ # Truncate model name if too long
185
+ model_display = (
186
+ model[:29] + "..." if len(model) > 32 else model
187
+ )
188
+ cost_str = f"${stats['cost_usd']:.4f}"
189
+ latency_str = f"{stats['avg_latency_ms']:,} ms"
190
+ click.echo(
191
+ f"{model_display:<32} {stats['entries']:>8,} "
192
+ f"{stats['hits']:>8,} {hit_rate:>7.1f}% "
193
+ f"{stats['input_tokens']:>12,} "
194
+ f"{stats['output_tokens']:>12,} "
195
+ f"{cost_str:>10} {latency_str:>10}"
196
+ )
197
+
198
+ click.echo()
199
+ except Exception as e:
200
+ click.echo(f"Error opening cache: {e}", err=True)
201
+ sys.exit(1)
202
+
203
+
204
+ @cache_group.command("export")
205
+ @click.argument("cache_path", type=click.Path(exists=True))
206
+ @click.argument("output_dir", type=click.Path())
207
+ @click.option(
208
+ "--json",
209
+ "output_json",
210
+ is_flag=True,
211
+ help="Output result as JSON.",
212
+ )
213
+ def cache_export(cache_path: str, output_dir: str, output_json: bool) -> None:
214
+ """Export cache entries to human-readable files.
215
+
216
+ CACHE_PATH is the path to the SQLite cache database.
217
+ OUTPUT_DIR is the directory or zip file where files will be written.
218
+
219
+ If OUTPUT_DIR ends with .zip, entries are exported to a zip archive.
220
+ Otherwise, entries are exported to a directory.
221
+
222
+ Files are named using a human-readable format:
223
+ {model}_{node_a}_{node_b}_edge_{hash}.json
224
+
225
+ Examples:
226
+
227
+ cqknow cache export ./llm_cache.db ./export_dir
228
+
229
+ cqknow cache export ./llm_cache.db ./export.zip
230
+
231
+ cqknow cache export ./llm_cache.db ./export_dir --json
232
+ """
233
+ import tempfile
234
+ import zipfile
235
+ from pathlib import Path
236
+
237
+ from causaliq_knowledge.cache import TokenCache
238
+ from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
239
+
240
+ output_path = Path(output_dir)
241
+ is_zip = output_path.suffix.lower() == ".zip"
242
+
243
+ try:
244
+ with TokenCache(cache_path) as cache:
245
+ # Register encoders for decoding
246
+ encoder = LLMEntryEncoder()
247
+ cache.register_encoder("llm", encoder)
248
+
249
+ # Register generic JsonEncoder for other types
250
+ from causaliq_knowledge.cache.encoders import JsonEncoder
251
+
252
+ json_encoder = JsonEncoder()
253
+ cache.register_encoder("json", json_encoder)
254
+
255
+ # Get entry types in the cache
256
+ entry_types = cache.list_entry_types()
257
+
258
+ if not entry_types:
259
+ if output_json:
260
+ click.echo(json.dumps({"exported": 0, "error": None}))
261
+ else:
262
+ click.echo("No entries to export.")
263
+ return
264
+
265
+ # Determine export directory (temp if zipping)
266
+ if is_zip:
267
+ temp_dir = tempfile.mkdtemp()
268
+ export_dir = Path(temp_dir)
269
+ else:
270
+ export_dir = output_path
271
+ export_dir.mkdir(parents=True, exist_ok=True)
272
+
273
+ # Export entries
274
+ exported = 0
275
+ for entry_type in entry_types:
276
+ if entry_type == "llm":
277
+ # Query all entries of this type
278
+ cursor = cache.conn.execute(
279
+ "SELECT hash, data FROM cache_entries "
280
+ "WHERE entry_type = ?",
281
+ (entry_type,),
282
+ )
283
+ for cache_key, blob in cursor:
284
+ data = encoder.decode(blob, cache)
285
+ entry = LLMCacheEntry.from_dict(data)
286
+ filename = encoder.generate_export_filename(
287
+ entry, cache_key
288
+ )
289
+ file_path = export_dir / filename
290
+ encoder.export_entry(entry, file_path)
291
+ exported += 1
292
+ else:
293
+ # For non-LLM types, use generic export
294
+ count = cache.export_entries(export_dir, entry_type)
295
+ exported += count
296
+
297
+ # Create zip archive if requested
298
+ if is_zip:
299
+ output_path.parent.mkdir(parents=True, exist_ok=True)
300
+ with zipfile.ZipFile(
301
+ output_path, "w", zipfile.ZIP_DEFLATED
302
+ ) as zf:
303
+ for file_path in export_dir.iterdir():
304
+ if file_path.is_file():
305
+ zf.write(file_path, file_path.name)
306
+ # Clean up temp directory
307
+ import shutil
308
+
309
+ shutil.rmtree(temp_dir)
310
+
311
+ # Output results
312
+ if output_json:
313
+ output = {
314
+ "cache_path": cache_path,
315
+ "output_path": str(output_path),
316
+ "format": "zip" if is_zip else "directory",
317
+ "exported": exported,
318
+ "entry_types": entry_types,
319
+ }
320
+ click.echo(json.dumps(output, indent=2))
321
+ else:
322
+ fmt = "zip archive" if is_zip else "directory"
323
+ click.echo(
324
+ f"\nExported {exported} entries to {fmt}: {output_path}"
325
+ )
326
+ click.echo(f"Entry types: {', '.join(entry_types)}")
327
+ click.echo()
328
+
329
+ except Exception as e:
330
+ click.echo(f"Error exporting cache: {e}", err=True)
331
+ sys.exit(1)
332
+
333
+
334
+ def _is_llm_entry(data: Any) -> bool:
335
+ """Check if JSON data represents an LLM cache entry.
336
+
337
+ LLM entries have a specific structure with cache_key containing
338
+ model and messages, plus a response object.
339
+ """
340
+ if not isinstance(data, dict):
341
+ return False
342
+ cache_key = data.get("cache_key", {})
343
+ return (
344
+ isinstance(cache_key, dict)
345
+ and "model" in cache_key
346
+ and "messages" in cache_key
347
+ and "response" in data
348
+ )
349
+
350
+
351
+ @cache_group.command("import")
352
+ @click.argument("cache_path", type=click.Path())
353
+ @click.argument("input_path", type=click.Path(exists=True))
354
+ @click.option(
355
+ "--json",
356
+ "output_json",
357
+ is_flag=True,
358
+ help="Output result as JSON.",
359
+ )
360
+ def cache_import(cache_path: str, input_path: str, output_json: bool) -> None:
361
+ """Import cache entries from files.
362
+
363
+ CACHE_PATH is the path to the SQLite cache database (created if needed).
364
+ INPUT_PATH is a directory or zip file containing JSON files to import.
365
+
366
+ Entry types are auto-detected from JSON structure:
367
+ - LLM entries: contain cache_key.model, cache_key.messages, response
368
+ - Generic JSON: anything else
369
+
370
+ Examples:
371
+
372
+ cqknow cache import ./llm_cache.db ./import_dir
373
+
374
+ cqknow cache import ./llm_cache.db ./export.zip
375
+
376
+ cqknow cache import ./llm_cache.db ./import_dir --json
377
+ """
378
+ import hashlib
379
+ import tempfile
380
+ import zipfile
381
+ from pathlib import Path
382
+
383
+ from causaliq_knowledge.cache import TokenCache
384
+ from causaliq_knowledge.cache.encoders import JsonEncoder
385
+ from causaliq_knowledge.llm.cache import LLMEntryEncoder
386
+
387
+ input_file = Path(input_path)
388
+ is_zip = input_file.suffix.lower() == ".zip"
389
+
390
+ try:
391
+ with TokenCache(cache_path) as cache:
392
+ # Register encoders
393
+ llm_encoder = LLMEntryEncoder()
394
+ json_encoder = JsonEncoder()
395
+ cache.register_encoder("llm", llm_encoder)
396
+ cache.register_encoder("json", json_encoder)
397
+
398
+ # Determine input directory
399
+ if is_zip:
400
+ temp_dir = tempfile.mkdtemp()
401
+ import_dir = Path(temp_dir)
402
+ with zipfile.ZipFile(input_file, "r") as zf:
403
+ zf.extractall(import_dir)
404
+ else:
405
+ import_dir = input_file
406
+ temp_dir = None
407
+
408
+ # Import entries
409
+ imported = 0
410
+ llm_count = 0
411
+ json_count = 0
412
+ skipped = 0
413
+
414
+ for file_path in import_dir.iterdir():
415
+ if (
416
+ not file_path.is_file()
417
+ or file_path.suffix.lower() != ".json"
418
+ ):
419
+ continue
420
+
421
+ try:
422
+ data = json.loads(file_path.read_text(encoding="utf-8"))
423
+ except (json.JSONDecodeError, UnicodeDecodeError):
424
+ skipped += 1
425
+ continue
426
+
427
+ # Detect entry type and generate cache key
428
+ if _is_llm_entry(data):
429
+ # LLM entry - generate hash from cache_key contents
430
+ cache_key_data = data.get("cache_key", {})
431
+ key_str = json.dumps(cache_key_data, sort_keys=True)
432
+ cache_key = hashlib.sha256(key_str.encode()).hexdigest()[
433
+ :16
434
+ ]
435
+ cache.put_data(cache_key, "llm", data)
436
+ llm_count += 1
437
+ else:
438
+ # Generic JSON - use filename stem as key
439
+ cache_key = file_path.stem
440
+ cache.put_data(cache_key, "json", data)
441
+ json_count += 1
442
+
443
+ imported += 1
444
+
445
+ # Clean up temp directory
446
+ if temp_dir:
447
+ import shutil
448
+
449
+ shutil.rmtree(temp_dir)
450
+
451
+ # Output results
452
+ if output_json:
453
+ output = {
454
+ "cache_path": cache_path,
455
+ "input_path": str(input_file),
456
+ "format": "zip" if is_zip else "directory",
457
+ "imported": imported,
458
+ "llm_entries": llm_count,
459
+ "json_entries": json_count,
460
+ "skipped": skipped,
461
+ }
462
+ click.echo(json.dumps(output, indent=2))
463
+ else:
464
+ fmt = "zip archive" if is_zip else "directory"
465
+ click.echo(
466
+ f"\nImported {imported} entries from {fmt}: {input_file}"
467
+ )
468
+ if llm_count:
469
+ click.echo(f" LLM entries: {llm_count}")
470
+ if json_count:
471
+ click.echo(f" JSON entries: {json_count}")
472
+ if skipped:
473
+ click.echo(f" Skipped: {skipped}")
474
+ click.echo()
475
+
476
+ except Exception as e:
477
+ click.echo(f"Error importing cache: {e}", err=True)
478
+ sys.exit(1)