causaliq-knowledge 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- causaliq_knowledge/__init__.py +3 -3
- causaliq_knowledge/cache/__init__.py +18 -0
- causaliq_knowledge/cache/encoders/__init__.py +13 -0
- causaliq_knowledge/cache/encoders/base.py +90 -0
- causaliq_knowledge/cache/encoders/json_encoder.py +418 -0
- causaliq_knowledge/cache/token_cache.py +632 -0
- causaliq_knowledge/cli.py +588 -38
- causaliq_knowledge/llm/__init__.py +39 -10
- causaliq_knowledge/llm/anthropic_client.py +256 -0
- causaliq_knowledge/llm/base_client.py +360 -0
- causaliq_knowledge/llm/cache.py +380 -0
- causaliq_knowledge/llm/deepseek_client.py +108 -0
- causaliq_knowledge/llm/gemini_client.py +117 -39
- causaliq_knowledge/llm/groq_client.py +115 -40
- causaliq_knowledge/llm/mistral_client.py +122 -0
- causaliq_knowledge/llm/ollama_client.py +240 -0
- causaliq_knowledge/llm/openai_client.py +115 -0
- causaliq_knowledge/llm/openai_compat_client.py +287 -0
- causaliq_knowledge/llm/provider.py +99 -46
- {causaliq_knowledge-0.1.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/METADATA +9 -10
- causaliq_knowledge-0.3.0.dist-info/RECORD +28 -0
- {causaliq_knowledge-0.1.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/WHEEL +1 -1
- causaliq_knowledge-0.1.0.dist-info/RECORD +0 -15
- {causaliq_knowledge-0.1.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
- {causaliq_knowledge-0.1.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {causaliq_knowledge-0.1.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/top_level.txt +0 -0
causaliq_knowledge/cli.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import sys
|
|
7
|
-
from typing import Optional
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
9
|
import click
|
|
10
10
|
|
|
@@ -150,54 +150,604 @@ def query_edge(
|
|
|
150
150
|
|
|
151
151
|
|
|
152
152
|
@cli.command("models")
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
@click.argument("provider", required=False, default=None)
|
|
154
|
+
def list_models(provider: Optional[str]) -> None:
|
|
155
|
+
"""List available LLM models from each provider.
|
|
155
156
|
|
|
156
|
-
|
|
157
|
-
|
|
157
|
+
Queries each provider's API to show models accessible with your
|
|
158
|
+
current configuration. Results are filtered by your API key's
|
|
159
|
+
access level or locally installed models.
|
|
160
|
+
|
|
161
|
+
Optionally specify PROVIDER to list models from a single provider:
|
|
162
|
+
groq, anthropic, gemini, ollama, openai, deepseek, or mistral.
|
|
163
|
+
|
|
164
|
+
Examples:
|
|
165
|
+
|
|
166
|
+
cqknow models # List all providers
|
|
167
|
+
|
|
168
|
+
cqknow models groq # List only Groq models
|
|
169
|
+
|
|
170
|
+
cqknow models mistral # List only Mistral models
|
|
158
171
|
"""
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
172
|
+
from typing import Callable, List, Optional, Tuple, TypedDict
|
|
173
|
+
|
|
174
|
+
from causaliq_knowledge.llm import (
|
|
175
|
+
AnthropicClient,
|
|
176
|
+
AnthropicConfig,
|
|
177
|
+
DeepSeekClient,
|
|
178
|
+
DeepSeekConfig,
|
|
179
|
+
GeminiClient,
|
|
180
|
+
GeminiConfig,
|
|
181
|
+
GroqClient,
|
|
182
|
+
GroqConfig,
|
|
183
|
+
MistralClient,
|
|
184
|
+
MistralConfig,
|
|
185
|
+
OllamaClient,
|
|
186
|
+
OllamaConfig,
|
|
187
|
+
OpenAIClient,
|
|
188
|
+
OpenAIConfig,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Type for get_models functions
|
|
192
|
+
GetModelsFunc = Callable[[], Tuple[bool, List[str], Optional[str]]]
|
|
193
|
+
|
|
194
|
+
class ProviderInfo(TypedDict):
|
|
195
|
+
name: str
|
|
196
|
+
prefix: str
|
|
197
|
+
env_var: Optional[str]
|
|
198
|
+
url: str
|
|
199
|
+
get_models: GetModelsFunc
|
|
200
|
+
|
|
201
|
+
def get_groq_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
202
|
+
"""Returns (available, models, error_msg)."""
|
|
203
|
+
try:
|
|
204
|
+
client = GroqClient(GroqConfig())
|
|
205
|
+
if not client.is_available():
|
|
206
|
+
return False, [], "GROQ_API_KEY not set"
|
|
207
|
+
models = [f"groq/{m}" for m in client.list_models()]
|
|
208
|
+
return True, models, None
|
|
209
|
+
except ValueError as e:
|
|
210
|
+
return False, [], str(e)
|
|
211
|
+
|
|
212
|
+
def get_anthropic_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
213
|
+
"""Returns (available, models, error_msg)."""
|
|
214
|
+
try:
|
|
215
|
+
client = AnthropicClient(AnthropicConfig())
|
|
216
|
+
if not client.is_available():
|
|
217
|
+
return False, [], "ANTHROPIC_API_KEY not set"
|
|
218
|
+
models = [f"anthropic/{m}" for m in client.list_models()]
|
|
219
|
+
return True, models, None
|
|
220
|
+
except ValueError as e:
|
|
221
|
+
return False, [], str(e)
|
|
222
|
+
|
|
223
|
+
def get_gemini_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
224
|
+
"""Returns (available, models, error_msg)."""
|
|
225
|
+
try:
|
|
226
|
+
client = GeminiClient(GeminiConfig())
|
|
227
|
+
if not client.is_available():
|
|
228
|
+
return False, [], "GEMINI_API_KEY not set"
|
|
229
|
+
models = [f"gemini/{m}" for m in client.list_models()]
|
|
230
|
+
return True, models, None
|
|
231
|
+
except ValueError as e:
|
|
232
|
+
return False, [], str(e)
|
|
233
|
+
|
|
234
|
+
def get_ollama_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
235
|
+
"""Returns (available, models, error_msg)."""
|
|
236
|
+
try:
|
|
237
|
+
client = OllamaClient(OllamaConfig())
|
|
238
|
+
models = [f"ollama/{m}" for m in client.list_models()]
|
|
239
|
+
if not models:
|
|
240
|
+
msg = "No models installed. Run: ollama pull <model>"
|
|
241
|
+
return True, [], msg
|
|
242
|
+
return True, models, None
|
|
243
|
+
except ValueError as e:
|
|
244
|
+
return False, [], str(e)
|
|
245
|
+
|
|
246
|
+
def get_openai_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
247
|
+
"""Returns (available, models, error_msg)."""
|
|
248
|
+
try:
|
|
249
|
+
client = OpenAIClient(OpenAIConfig())
|
|
250
|
+
if not client.is_available():
|
|
251
|
+
return False, [], "OPENAI_API_KEY not set"
|
|
252
|
+
models = [f"openai/{m}" for m in client.list_models()]
|
|
253
|
+
return True, models, None
|
|
254
|
+
except ValueError as e:
|
|
255
|
+
return False, [], str(e)
|
|
256
|
+
|
|
257
|
+
def get_deepseek_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
258
|
+
"""Returns (available, models, error_msg)."""
|
|
259
|
+
try:
|
|
260
|
+
client = DeepSeekClient(DeepSeekConfig())
|
|
261
|
+
if not client.is_available():
|
|
262
|
+
return False, [], "DEEPSEEK_API_KEY not set"
|
|
263
|
+
models = [f"deepseek/{m}" for m in client.list_models()]
|
|
264
|
+
return True, models, None
|
|
265
|
+
except ValueError as e:
|
|
266
|
+
return False, [], str(e)
|
|
267
|
+
|
|
268
|
+
def get_mistral_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
269
|
+
"""Returns (available, models, error_msg)."""
|
|
270
|
+
try:
|
|
271
|
+
client = MistralClient(MistralConfig())
|
|
272
|
+
if not client.is_available():
|
|
273
|
+
return False, [], "MISTRAL_API_KEY not set"
|
|
274
|
+
models = [f"mistral/{m}" for m in client.list_models()]
|
|
275
|
+
return True, models, None
|
|
276
|
+
except ValueError as e:
|
|
277
|
+
return False, [], str(e)
|
|
278
|
+
|
|
279
|
+
providers: List[ProviderInfo] = [
|
|
280
|
+
{
|
|
281
|
+
"name": "Groq",
|
|
282
|
+
"prefix": "groq/",
|
|
283
|
+
"env_var": "GROQ_API_KEY",
|
|
284
|
+
"url": "https://console.groq.com",
|
|
285
|
+
"get_models": get_groq_models,
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
"name": "Anthropic",
|
|
289
|
+
"prefix": "anthropic/",
|
|
290
|
+
"env_var": "ANTHROPIC_API_KEY",
|
|
291
|
+
"url": "https://console.anthropic.com",
|
|
292
|
+
"get_models": get_anthropic_models,
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
"name": "Gemini",
|
|
296
|
+
"prefix": "gemini/",
|
|
297
|
+
"env_var": "GEMINI_API_KEY",
|
|
298
|
+
"url": "https://aistudio.google.com",
|
|
299
|
+
"get_models": get_gemini_models,
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
"name": "Ollama (Local)",
|
|
303
|
+
"prefix": "ollama/",
|
|
304
|
+
"env_var": None,
|
|
305
|
+
"url": "https://ollama.ai",
|
|
306
|
+
"get_models": get_ollama_models,
|
|
307
|
+
},
|
|
308
|
+
{
|
|
309
|
+
"name": "OpenAI",
|
|
310
|
+
"prefix": "openai/",
|
|
311
|
+
"env_var": "OPENAI_API_KEY",
|
|
312
|
+
"url": "https://platform.openai.com",
|
|
313
|
+
"get_models": get_openai_models,
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
"name": "DeepSeek",
|
|
317
|
+
"prefix": "deepseek/",
|
|
318
|
+
"env_var": "DEEPSEEK_API_KEY",
|
|
319
|
+
"url": "https://platform.deepseek.com",
|
|
320
|
+
"get_models": get_deepseek_models,
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
"name": "Mistral",
|
|
324
|
+
"prefix": "mistral/",
|
|
325
|
+
"env_var": "MISTRAL_API_KEY",
|
|
326
|
+
"url": "https://console.mistral.ai",
|
|
327
|
+
"get_models": get_mistral_models,
|
|
328
|
+
},
|
|
329
|
+
]
|
|
330
|
+
|
|
331
|
+
# Filter providers if a specific one is requested
|
|
332
|
+
valid_provider_names = [
|
|
333
|
+
"groq",
|
|
334
|
+
"anthropic",
|
|
335
|
+
"gemini",
|
|
336
|
+
"ollama",
|
|
337
|
+
"openai",
|
|
338
|
+
"deepseek",
|
|
339
|
+
"mistral",
|
|
181
340
|
]
|
|
341
|
+
if provider:
|
|
342
|
+
provider_lower = provider.lower()
|
|
343
|
+
if provider_lower not in valid_provider_names:
|
|
344
|
+
click.echo(
|
|
345
|
+
f"Unknown provider: {provider}. "
|
|
346
|
+
f"Valid options: {', '.join(valid_provider_names)}",
|
|
347
|
+
err=True,
|
|
348
|
+
)
|
|
349
|
+
sys.exit(1)
|
|
350
|
+
providers = [
|
|
351
|
+
p for p in providers if p["prefix"].rstrip("/") == provider_lower
|
|
352
|
+
]
|
|
353
|
+
|
|
354
|
+
click.echo("\nAvailable LLM Models:\n")
|
|
355
|
+
|
|
356
|
+
any_available = False
|
|
357
|
+
for prov in providers:
|
|
358
|
+
available, models, error = prov["get_models"]()
|
|
359
|
+
|
|
360
|
+
if available and models:
|
|
361
|
+
any_available = True
|
|
362
|
+
status = click.style("[OK]", fg="green")
|
|
363
|
+
count = len(models)
|
|
364
|
+
click.echo(f" {status} {prov['name']} ({count} models):")
|
|
365
|
+
for m in models:
|
|
366
|
+
click.echo(f" {m}")
|
|
367
|
+
elif available and not models:
|
|
368
|
+
status = click.style("[!]", fg="yellow")
|
|
369
|
+
click.echo(f" {status} {prov['name']}:")
|
|
370
|
+
click.echo(f" {error}")
|
|
371
|
+
else:
|
|
372
|
+
status = click.style("[X]", fg="red")
|
|
373
|
+
click.echo(f" {status} {prov['name']}:")
|
|
374
|
+
click.echo(f" {error}")
|
|
375
|
+
|
|
376
|
+
click.echo()
|
|
377
|
+
|
|
378
|
+
click.echo("Provider Setup:")
|
|
379
|
+
for prov in providers:
|
|
380
|
+
available, _, _ = prov["get_models"]()
|
|
381
|
+
if prov["env_var"]:
|
|
382
|
+
status = "configured" if available else "not set"
|
|
383
|
+
color = "green" if available else "yellow"
|
|
384
|
+
click.echo(
|
|
385
|
+
f" {prov['env_var']}: "
|
|
386
|
+
f"{click.style(status, fg=color)} - {prov['url']}"
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
status = "running" if available else "not running"
|
|
390
|
+
color = "green" if available else "yellow"
|
|
391
|
+
click.echo(
|
|
392
|
+
f" Ollama server: "
|
|
393
|
+
f"{click.style(status, fg=color)} - {prov['url']}"
|
|
394
|
+
)
|
|
182
395
|
|
|
183
|
-
click.echo("\nSupported LLM Models (Direct API Access):\n")
|
|
184
|
-
for provider, model_list in models:
|
|
185
|
-
click.echo(f" {provider}:")
|
|
186
|
-
for m in model_list:
|
|
187
|
-
click.echo(f" - {m}")
|
|
188
396
|
click.echo()
|
|
189
|
-
click.echo("Required API Keys:")
|
|
190
|
-
click.echo(
|
|
191
|
-
" GROQ_API_KEY - Get free API key at https://console.groq.com"
|
|
192
|
-
)
|
|
193
397
|
click.echo(
|
|
194
|
-
"
|
|
398
|
+
click.style("Note: ", fg="yellow")
|
|
399
|
+
+ "Some models may require a paid plan. "
|
|
400
|
+
+ "Free tier availability varies by provider."
|
|
195
401
|
)
|
|
196
402
|
click.echo()
|
|
197
|
-
|
|
403
|
+
if any_available:
|
|
404
|
+
click.echo("Default model: groq/llama-3.1-8b-instant")
|
|
198
405
|
click.echo()
|
|
199
406
|
|
|
200
407
|
|
|
408
|
+
# ============================================================================
|
|
409
|
+
# Cache Commands
|
|
410
|
+
# ============================================================================
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
@cli.group("cache")
|
|
414
|
+
def cache_group() -> None:
|
|
415
|
+
"""Manage the LLM response cache.
|
|
416
|
+
|
|
417
|
+
Commands for inspecting, exporting, and importing cached LLM responses.
|
|
418
|
+
|
|
419
|
+
Examples:
|
|
420
|
+
|
|
421
|
+
cqknow cache stats ./llm_cache.db
|
|
422
|
+
|
|
423
|
+
cqknow cache export ./llm_cache.db ./export_dir
|
|
424
|
+
|
|
425
|
+
cqknow cache import ./llm_cache.db ./import_dir
|
|
426
|
+
"""
|
|
427
|
+
pass
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
@cache_group.command("stats")
|
|
431
|
+
@click.argument("cache_path", type=click.Path(exists=True))
|
|
432
|
+
@click.option(
|
|
433
|
+
"--json",
|
|
434
|
+
"output_json",
|
|
435
|
+
is_flag=True,
|
|
436
|
+
help="Output result as JSON.",
|
|
437
|
+
)
|
|
438
|
+
def cache_stats(cache_path: str, output_json: bool) -> None:
|
|
439
|
+
"""Show cache statistics.
|
|
440
|
+
|
|
441
|
+
CACHE_PATH is the path to the SQLite cache database.
|
|
442
|
+
|
|
443
|
+
Examples:
|
|
444
|
+
|
|
445
|
+
cqknow cache stats ./llm_cache.db
|
|
446
|
+
|
|
447
|
+
cqknow cache stats ./llm_cache.db --json
|
|
448
|
+
"""
|
|
449
|
+
from causaliq_knowledge.cache import TokenCache
|
|
450
|
+
|
|
451
|
+
try:
|
|
452
|
+
with TokenCache(cache_path) as cache:
|
|
453
|
+
entry_count = cache.entry_count()
|
|
454
|
+
token_count = cache.token_count()
|
|
455
|
+
|
|
456
|
+
if output_json:
|
|
457
|
+
output = {
|
|
458
|
+
"cache_path": cache_path,
|
|
459
|
+
"entry_count": entry_count,
|
|
460
|
+
"token_count": token_count,
|
|
461
|
+
}
|
|
462
|
+
click.echo(json.dumps(output, indent=2))
|
|
463
|
+
else:
|
|
464
|
+
click.echo(f"\nCache: {cache_path}")
|
|
465
|
+
click.echo("=" * 40)
|
|
466
|
+
click.echo(f"Entries: {entry_count:,}")
|
|
467
|
+
click.echo(f"Tokens: {token_count:,}")
|
|
468
|
+
click.echo()
|
|
469
|
+
except Exception as e:
|
|
470
|
+
click.echo(f"Error opening cache: {e}", err=True)
|
|
471
|
+
sys.exit(1)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
@cache_group.command("export")
|
|
475
|
+
@click.argument("cache_path", type=click.Path(exists=True))
|
|
476
|
+
@click.argument("output_dir", type=click.Path())
|
|
477
|
+
@click.option(
|
|
478
|
+
"--json",
|
|
479
|
+
"output_json",
|
|
480
|
+
is_flag=True,
|
|
481
|
+
help="Output result as JSON.",
|
|
482
|
+
)
|
|
483
|
+
def cache_export(cache_path: str, output_dir: str, output_json: bool) -> None:
|
|
484
|
+
"""Export cache entries to human-readable files.
|
|
485
|
+
|
|
486
|
+
CACHE_PATH is the path to the SQLite cache database.
|
|
487
|
+
OUTPUT_DIR is the directory or zip file where files will be written.
|
|
488
|
+
|
|
489
|
+
If OUTPUT_DIR ends with .zip, entries are exported to a zip archive.
|
|
490
|
+
Otherwise, entries are exported to a directory.
|
|
491
|
+
|
|
492
|
+
Files are named using a human-readable format:
|
|
493
|
+
{model}_{node_a}_{node_b}_edge_{hash}.json
|
|
494
|
+
|
|
495
|
+
Examples:
|
|
496
|
+
|
|
497
|
+
cqknow cache export ./llm_cache.db ./export_dir
|
|
498
|
+
|
|
499
|
+
cqknow cache export ./llm_cache.db ./export.zip
|
|
500
|
+
|
|
501
|
+
cqknow cache export ./llm_cache.db ./export_dir --json
|
|
502
|
+
"""
|
|
503
|
+
import tempfile
|
|
504
|
+
import zipfile
|
|
505
|
+
from pathlib import Path
|
|
506
|
+
|
|
507
|
+
from causaliq_knowledge.cache import TokenCache
|
|
508
|
+
from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
|
|
509
|
+
|
|
510
|
+
output_path = Path(output_dir)
|
|
511
|
+
is_zip = output_path.suffix.lower() == ".zip"
|
|
512
|
+
|
|
513
|
+
try:
|
|
514
|
+
with TokenCache(cache_path) as cache:
|
|
515
|
+
# Register encoders for decoding
|
|
516
|
+
encoder = LLMEntryEncoder()
|
|
517
|
+
cache.register_encoder("llm", encoder)
|
|
518
|
+
|
|
519
|
+
# Register generic JsonEncoder for other types
|
|
520
|
+
from causaliq_knowledge.cache.encoders import JsonEncoder
|
|
521
|
+
|
|
522
|
+
json_encoder = JsonEncoder()
|
|
523
|
+
cache.register_encoder("json", json_encoder)
|
|
524
|
+
|
|
525
|
+
# Get entry types in the cache
|
|
526
|
+
entry_types = cache.list_entry_types()
|
|
527
|
+
|
|
528
|
+
if not entry_types:
|
|
529
|
+
if output_json:
|
|
530
|
+
click.echo(json.dumps({"exported": 0, "error": None}))
|
|
531
|
+
else:
|
|
532
|
+
click.echo("No entries to export.")
|
|
533
|
+
return
|
|
534
|
+
|
|
535
|
+
# Determine export directory (temp if zipping)
|
|
536
|
+
if is_zip:
|
|
537
|
+
temp_dir = tempfile.mkdtemp()
|
|
538
|
+
export_dir = Path(temp_dir)
|
|
539
|
+
else:
|
|
540
|
+
export_dir = output_path
|
|
541
|
+
export_dir.mkdir(parents=True, exist_ok=True)
|
|
542
|
+
|
|
543
|
+
# Export entries
|
|
544
|
+
exported = 0
|
|
545
|
+
for entry_type in entry_types:
|
|
546
|
+
if entry_type == "llm":
|
|
547
|
+
# Query all entries of this type
|
|
548
|
+
cursor = cache.conn.execute(
|
|
549
|
+
"SELECT hash, data FROM cache_entries "
|
|
550
|
+
"WHERE entry_type = ?",
|
|
551
|
+
(entry_type,),
|
|
552
|
+
)
|
|
553
|
+
for cache_key, blob in cursor:
|
|
554
|
+
data = encoder.decode(blob, cache)
|
|
555
|
+
entry = LLMCacheEntry.from_dict(data)
|
|
556
|
+
filename = encoder.generate_export_filename(
|
|
557
|
+
entry, cache_key
|
|
558
|
+
)
|
|
559
|
+
file_path = export_dir / filename
|
|
560
|
+
encoder.export_entry(entry, file_path)
|
|
561
|
+
exported += 1
|
|
562
|
+
else:
|
|
563
|
+
# For non-LLM types, use generic export
|
|
564
|
+
count = cache.export_entries(export_dir, entry_type)
|
|
565
|
+
exported += count
|
|
566
|
+
|
|
567
|
+
# Create zip archive if requested
|
|
568
|
+
if is_zip:
|
|
569
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
570
|
+
with zipfile.ZipFile(
|
|
571
|
+
output_path, "w", zipfile.ZIP_DEFLATED
|
|
572
|
+
) as zf:
|
|
573
|
+
for file_path in export_dir.iterdir():
|
|
574
|
+
if file_path.is_file():
|
|
575
|
+
zf.write(file_path, file_path.name)
|
|
576
|
+
# Clean up temp directory
|
|
577
|
+
import shutil
|
|
578
|
+
|
|
579
|
+
shutil.rmtree(temp_dir)
|
|
580
|
+
|
|
581
|
+
# Output results
|
|
582
|
+
if output_json:
|
|
583
|
+
output = {
|
|
584
|
+
"cache_path": cache_path,
|
|
585
|
+
"output_path": str(output_path),
|
|
586
|
+
"format": "zip" if is_zip else "directory",
|
|
587
|
+
"exported": exported,
|
|
588
|
+
"entry_types": entry_types,
|
|
589
|
+
}
|
|
590
|
+
click.echo(json.dumps(output, indent=2))
|
|
591
|
+
else:
|
|
592
|
+
fmt = "zip archive" if is_zip else "directory"
|
|
593
|
+
click.echo(
|
|
594
|
+
f"\nExported {exported} entries to {fmt}: {output_path}"
|
|
595
|
+
)
|
|
596
|
+
click.echo(f"Entry types: {', '.join(entry_types)}")
|
|
597
|
+
click.echo()
|
|
598
|
+
|
|
599
|
+
except Exception as e:
|
|
600
|
+
click.echo(f"Error exporting cache: {e}", err=True)
|
|
601
|
+
sys.exit(1)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _is_llm_entry(data: Any) -> bool:
|
|
605
|
+
"""Check if JSON data represents an LLM cache entry.
|
|
606
|
+
|
|
607
|
+
LLM entries have a specific structure with cache_key containing
|
|
608
|
+
model and messages, plus a response object.
|
|
609
|
+
"""
|
|
610
|
+
if not isinstance(data, dict):
|
|
611
|
+
return False
|
|
612
|
+
cache_key = data.get("cache_key", {})
|
|
613
|
+
return (
|
|
614
|
+
isinstance(cache_key, dict)
|
|
615
|
+
and "model" in cache_key
|
|
616
|
+
and "messages" in cache_key
|
|
617
|
+
and "response" in data
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
@cache_group.command("import")
|
|
622
|
+
@click.argument("cache_path", type=click.Path())
|
|
623
|
+
@click.argument("input_path", type=click.Path(exists=True))
|
|
624
|
+
@click.option(
|
|
625
|
+
"--json",
|
|
626
|
+
"output_json",
|
|
627
|
+
is_flag=True,
|
|
628
|
+
help="Output result as JSON.",
|
|
629
|
+
)
|
|
630
|
+
def cache_import(cache_path: str, input_path: str, output_json: bool) -> None:
|
|
631
|
+
"""Import cache entries from files.
|
|
632
|
+
|
|
633
|
+
CACHE_PATH is the path to the SQLite cache database (created if needed).
|
|
634
|
+
INPUT_PATH is a directory or zip file containing JSON files to import.
|
|
635
|
+
|
|
636
|
+
Entry types are auto-detected from JSON structure:
|
|
637
|
+
- LLM entries: contain cache_key.model, cache_key.messages, response
|
|
638
|
+
- Generic JSON: anything else
|
|
639
|
+
|
|
640
|
+
Examples:
|
|
641
|
+
|
|
642
|
+
cqknow cache import ./llm_cache.db ./import_dir
|
|
643
|
+
|
|
644
|
+
cqknow cache import ./llm_cache.db ./export.zip
|
|
645
|
+
|
|
646
|
+
cqknow cache import ./llm_cache.db ./import_dir --json
|
|
647
|
+
"""
|
|
648
|
+
import hashlib
|
|
649
|
+
import tempfile
|
|
650
|
+
import zipfile
|
|
651
|
+
from pathlib import Path
|
|
652
|
+
|
|
653
|
+
from causaliq_knowledge.cache import TokenCache
|
|
654
|
+
from causaliq_knowledge.cache.encoders import JsonEncoder
|
|
655
|
+
from causaliq_knowledge.llm.cache import LLMEntryEncoder
|
|
656
|
+
|
|
657
|
+
input_file = Path(input_path)
|
|
658
|
+
is_zip = input_file.suffix.lower() == ".zip"
|
|
659
|
+
|
|
660
|
+
try:
|
|
661
|
+
with TokenCache(cache_path) as cache:
|
|
662
|
+
# Register encoders
|
|
663
|
+
llm_encoder = LLMEntryEncoder()
|
|
664
|
+
json_encoder = JsonEncoder()
|
|
665
|
+
cache.register_encoder("llm", llm_encoder)
|
|
666
|
+
cache.register_encoder("json", json_encoder)
|
|
667
|
+
|
|
668
|
+
# Determine input directory
|
|
669
|
+
if is_zip:
|
|
670
|
+
temp_dir = tempfile.mkdtemp()
|
|
671
|
+
import_dir = Path(temp_dir)
|
|
672
|
+
with zipfile.ZipFile(input_file, "r") as zf:
|
|
673
|
+
zf.extractall(import_dir)
|
|
674
|
+
else:
|
|
675
|
+
import_dir = input_file
|
|
676
|
+
temp_dir = None
|
|
677
|
+
|
|
678
|
+
# Import entries
|
|
679
|
+
imported = 0
|
|
680
|
+
llm_count = 0
|
|
681
|
+
json_count = 0
|
|
682
|
+
skipped = 0
|
|
683
|
+
|
|
684
|
+
for file_path in import_dir.iterdir():
|
|
685
|
+
if (
|
|
686
|
+
not file_path.is_file()
|
|
687
|
+
or file_path.suffix.lower() != ".json"
|
|
688
|
+
):
|
|
689
|
+
continue
|
|
690
|
+
|
|
691
|
+
try:
|
|
692
|
+
data = json.loads(file_path.read_text(encoding="utf-8"))
|
|
693
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
694
|
+
skipped += 1
|
|
695
|
+
continue
|
|
696
|
+
|
|
697
|
+
# Detect entry type and generate cache key
|
|
698
|
+
if _is_llm_entry(data):
|
|
699
|
+
# LLM entry - generate hash from cache_key contents
|
|
700
|
+
cache_key_data = data.get("cache_key", {})
|
|
701
|
+
key_str = json.dumps(cache_key_data, sort_keys=True)
|
|
702
|
+
cache_key = hashlib.sha256(key_str.encode()).hexdigest()[
|
|
703
|
+
:16
|
|
704
|
+
]
|
|
705
|
+
cache.put_data(cache_key, "llm", data)
|
|
706
|
+
llm_count += 1
|
|
707
|
+
else:
|
|
708
|
+
# Generic JSON - use filename stem as key
|
|
709
|
+
cache_key = file_path.stem
|
|
710
|
+
cache.put_data(cache_key, "json", data)
|
|
711
|
+
json_count += 1
|
|
712
|
+
|
|
713
|
+
imported += 1
|
|
714
|
+
|
|
715
|
+
# Clean up temp directory
|
|
716
|
+
if temp_dir:
|
|
717
|
+
import shutil
|
|
718
|
+
|
|
719
|
+
shutil.rmtree(temp_dir)
|
|
720
|
+
|
|
721
|
+
# Output results
|
|
722
|
+
if output_json:
|
|
723
|
+
output = {
|
|
724
|
+
"cache_path": cache_path,
|
|
725
|
+
"input_path": str(input_file),
|
|
726
|
+
"format": "zip" if is_zip else "directory",
|
|
727
|
+
"imported": imported,
|
|
728
|
+
"llm_entries": llm_count,
|
|
729
|
+
"json_entries": json_count,
|
|
730
|
+
"skipped": skipped,
|
|
731
|
+
}
|
|
732
|
+
click.echo(json.dumps(output, indent=2))
|
|
733
|
+
else:
|
|
734
|
+
fmt = "zip archive" if is_zip else "directory"
|
|
735
|
+
click.echo(
|
|
736
|
+
f"\nImported {imported} entries from {fmt}: {input_file}"
|
|
737
|
+
)
|
|
738
|
+
if llm_count:
|
|
739
|
+
click.echo(f" LLM entries: {llm_count}")
|
|
740
|
+
if json_count:
|
|
741
|
+
click.echo(f" JSON entries: {json_count}")
|
|
742
|
+
if skipped:
|
|
743
|
+
click.echo(f" Skipped: {skipped}")
|
|
744
|
+
click.echo()
|
|
745
|
+
|
|
746
|
+
except Exception as e:
|
|
747
|
+
click.echo(f"Error importing cache: {e}", err=True)
|
|
748
|
+
sys.exit(1)
|
|
749
|
+
|
|
750
|
+
|
|
201
751
|
def main() -> None:
|
|
202
752
|
"""Entry point for the CLI."""
|
|
203
753
|
cli()
|