causaliq-knowledge 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
causaliq_knowledge/cli.py CHANGED
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  import sys
7
- from typing import Optional
7
+ from typing import Any, Optional
8
8
 
9
9
  import click
10
10
 
@@ -150,54 +150,604 @@ def query_edge(
150
150
 
151
151
 
152
152
  @cli.command("models")
153
- def list_models() -> None:
154
- """List supported LLM models.
153
+ @click.argument("provider", required=False, default=None)
154
+ def list_models(provider: Optional[str]) -> None:
155
+ """List available LLM models from each provider.
155
156
 
156
- These are model identifiers that work with our direct API clients.
157
- Only models with direct API support are listed.
157
+ Queries each provider's API to show models accessible with your
158
+ current configuration. Results are filtered by your API key's
159
+ access level or locally installed models.
160
+
161
+ Optionally specify PROVIDER to list models from a single provider:
162
+ groq, anthropic, gemini, ollama, openai, deepseek, or mistral.
163
+
164
+ Examples:
165
+
166
+ cqknow models # List all providers
167
+
168
+ cqknow models groq # List only Groq models
169
+
170
+ cqknow models mistral # List only Mistral models
158
171
  """
159
- models = [
160
- (
161
- "Groq (Fast, Free Tier Available)",
162
- [
163
- "groq/llama-3.1-8b-instant",
164
- "groq/llama-3.1-70b-versatile",
165
- "groq/llama-3.2-1b-preview",
166
- "groq/llama-3.2-3b-preview",
167
- "groq/mixtral-8x7b-32768",
168
- "groq/gemma-7b-it",
169
- "groq/gemma2-9b-it",
170
- ],
171
- ),
172
- (
173
- "Google Gemini (Free Tier Available)",
174
- [
175
- "gemini/gemini-2.5-flash",
176
- "gemini/gemini-1.5-pro",
177
- "gemini/gemini-1.5-flash",
178
- "gemini/gemini-1.5-flash-8b",
179
- ],
180
- ),
172
+ from typing import Callable, List, Optional, Tuple, TypedDict
173
+
174
+ from causaliq_knowledge.llm import (
175
+ AnthropicClient,
176
+ AnthropicConfig,
177
+ DeepSeekClient,
178
+ DeepSeekConfig,
179
+ GeminiClient,
180
+ GeminiConfig,
181
+ GroqClient,
182
+ GroqConfig,
183
+ MistralClient,
184
+ MistralConfig,
185
+ OllamaClient,
186
+ OllamaConfig,
187
+ OpenAIClient,
188
+ OpenAIConfig,
189
+ )
190
+
191
+ # Type for get_models functions
192
+ GetModelsFunc = Callable[[], Tuple[bool, List[str], Optional[str]]]
193
+
194
+ class ProviderInfo(TypedDict):
195
+ name: str
196
+ prefix: str
197
+ env_var: Optional[str]
198
+ url: str
199
+ get_models: GetModelsFunc
200
+
201
+ def get_groq_models() -> Tuple[bool, List[str], Optional[str]]:
202
+ """Returns (available, models, error_msg)."""
203
+ try:
204
+ client = GroqClient(GroqConfig())
205
+ if not client.is_available():
206
+ return False, [], "GROQ_API_KEY not set"
207
+ models = [f"groq/{m}" for m in client.list_models()]
208
+ return True, models, None
209
+ except ValueError as e:
210
+ return False, [], str(e)
211
+
212
+ def get_anthropic_models() -> Tuple[bool, List[str], Optional[str]]:
213
+ """Returns (available, models, error_msg)."""
214
+ try:
215
+ client = AnthropicClient(AnthropicConfig())
216
+ if not client.is_available():
217
+ return False, [], "ANTHROPIC_API_KEY not set"
218
+ models = [f"anthropic/{m}" for m in client.list_models()]
219
+ return True, models, None
220
+ except ValueError as e:
221
+ return False, [], str(e)
222
+
223
+ def get_gemini_models() -> Tuple[bool, List[str], Optional[str]]:
224
+ """Returns (available, models, error_msg)."""
225
+ try:
226
+ client = GeminiClient(GeminiConfig())
227
+ if not client.is_available():
228
+ return False, [], "GEMINI_API_KEY not set"
229
+ models = [f"gemini/{m}" for m in client.list_models()]
230
+ return True, models, None
231
+ except ValueError as e:
232
+ return False, [], str(e)
233
+
234
+ def get_ollama_models() -> Tuple[bool, List[str], Optional[str]]:
235
+ """Returns (available, models, error_msg)."""
236
+ try:
237
+ client = OllamaClient(OllamaConfig())
238
+ models = [f"ollama/{m}" for m in client.list_models()]
239
+ if not models:
240
+ msg = "No models installed. Run: ollama pull <model>"
241
+ return True, [], msg
242
+ return True, models, None
243
+ except ValueError as e:
244
+ return False, [], str(e)
245
+
246
+ def get_openai_models() -> Tuple[bool, List[str], Optional[str]]:
247
+ """Returns (available, models, error_msg)."""
248
+ try:
249
+ client = OpenAIClient(OpenAIConfig())
250
+ if not client.is_available():
251
+ return False, [], "OPENAI_API_KEY not set"
252
+ models = [f"openai/{m}" for m in client.list_models()]
253
+ return True, models, None
254
+ except ValueError as e:
255
+ return False, [], str(e)
256
+
257
+ def get_deepseek_models() -> Tuple[bool, List[str], Optional[str]]:
258
+ """Returns (available, models, error_msg)."""
259
+ try:
260
+ client = DeepSeekClient(DeepSeekConfig())
261
+ if not client.is_available():
262
+ return False, [], "DEEPSEEK_API_KEY not set"
263
+ models = [f"deepseek/{m}" for m in client.list_models()]
264
+ return True, models, None
265
+ except ValueError as e:
266
+ return False, [], str(e)
267
+
268
+ def get_mistral_models() -> Tuple[bool, List[str], Optional[str]]:
269
+ """Returns (available, models, error_msg)."""
270
+ try:
271
+ client = MistralClient(MistralConfig())
272
+ if not client.is_available():
273
+ return False, [], "MISTRAL_API_KEY not set"
274
+ models = [f"mistral/{m}" for m in client.list_models()]
275
+ return True, models, None
276
+ except ValueError as e:
277
+ return False, [], str(e)
278
+
279
+ providers: List[ProviderInfo] = [
280
+ {
281
+ "name": "Groq",
282
+ "prefix": "groq/",
283
+ "env_var": "GROQ_API_KEY",
284
+ "url": "https://console.groq.com",
285
+ "get_models": get_groq_models,
286
+ },
287
+ {
288
+ "name": "Anthropic",
289
+ "prefix": "anthropic/",
290
+ "env_var": "ANTHROPIC_API_KEY",
291
+ "url": "https://console.anthropic.com",
292
+ "get_models": get_anthropic_models,
293
+ },
294
+ {
295
+ "name": "Gemini",
296
+ "prefix": "gemini/",
297
+ "env_var": "GEMINI_API_KEY",
298
+ "url": "https://aistudio.google.com",
299
+ "get_models": get_gemini_models,
300
+ },
301
+ {
302
+ "name": "Ollama (Local)",
303
+ "prefix": "ollama/",
304
+ "env_var": None,
305
+ "url": "https://ollama.ai",
306
+ "get_models": get_ollama_models,
307
+ },
308
+ {
309
+ "name": "OpenAI",
310
+ "prefix": "openai/",
311
+ "env_var": "OPENAI_API_KEY",
312
+ "url": "https://platform.openai.com",
313
+ "get_models": get_openai_models,
314
+ },
315
+ {
316
+ "name": "DeepSeek",
317
+ "prefix": "deepseek/",
318
+ "env_var": "DEEPSEEK_API_KEY",
319
+ "url": "https://platform.deepseek.com",
320
+ "get_models": get_deepseek_models,
321
+ },
322
+ {
323
+ "name": "Mistral",
324
+ "prefix": "mistral/",
325
+ "env_var": "MISTRAL_API_KEY",
326
+ "url": "https://console.mistral.ai",
327
+ "get_models": get_mistral_models,
328
+ },
329
+ ]
330
+
331
+ # Filter providers if a specific one is requested
332
+ valid_provider_names = [
333
+ "groq",
334
+ "anthropic",
335
+ "gemini",
336
+ "ollama",
337
+ "openai",
338
+ "deepseek",
339
+ "mistral",
181
340
  ]
341
+ if provider:
342
+ provider_lower = provider.lower()
343
+ if provider_lower not in valid_provider_names:
344
+ click.echo(
345
+ f"Unknown provider: {provider}. "
346
+ f"Valid options: {', '.join(valid_provider_names)}",
347
+ err=True,
348
+ )
349
+ sys.exit(1)
350
+ providers = [
351
+ p for p in providers if p["prefix"].rstrip("/") == provider_lower
352
+ ]
353
+
354
+ click.echo("\nAvailable LLM Models:\n")
355
+
356
+ any_available = False
357
+ for prov in providers:
358
+ available, models, error = prov["get_models"]()
359
+
360
+ if available and models:
361
+ any_available = True
362
+ status = click.style("[OK]", fg="green")
363
+ count = len(models)
364
+ click.echo(f" {status} {prov['name']} ({count} models):")
365
+ for m in models:
366
+ click.echo(f" {m}")
367
+ elif available and not models:
368
+ status = click.style("[!]", fg="yellow")
369
+ click.echo(f" {status} {prov['name']}:")
370
+ click.echo(f" {error}")
371
+ else:
372
+ status = click.style("[X]", fg="red")
373
+ click.echo(f" {status} {prov['name']}:")
374
+ click.echo(f" {error}")
375
+
376
+ click.echo()
377
+
378
+ click.echo("Provider Setup:")
379
+ for prov in providers:
380
+ available, _, _ = prov["get_models"]()
381
+ if prov["env_var"]:
382
+ status = "configured" if available else "not set"
383
+ color = "green" if available else "yellow"
384
+ click.echo(
385
+ f" {prov['env_var']}: "
386
+ f"{click.style(status, fg=color)} - {prov['url']}"
387
+ )
388
+ else:
389
+ status = "running" if available else "not running"
390
+ color = "green" if available else "yellow"
391
+ click.echo(
392
+ f" Ollama server: "
393
+ f"{click.style(status, fg=color)} - {prov['url']}"
394
+ )
182
395
 
183
- click.echo("\nSupported LLM Models (Direct API Access):\n")
184
- for provider, model_list in models:
185
- click.echo(f" {provider}:")
186
- for m in model_list:
187
- click.echo(f" - {m}")
188
396
  click.echo()
189
- click.echo("Required API Keys:")
190
- click.echo(
191
- " GROQ_API_KEY - Get free API key at https://console.groq.com"
192
- )
193
397
  click.echo(
194
- " GEMINI_API_KEY - Get free API key at https://aistudio.google.com"
398
+ click.style("Note: ", fg="yellow")
399
+ + "Some models may require a paid plan. "
400
+ + "Free tier availability varies by provider."
195
401
  )
196
402
  click.echo()
197
- click.echo("Default model: groq/llama-3.1-8b-instant")
403
+ if any_available:
404
+ click.echo("Default model: groq/llama-3.1-8b-instant")
198
405
  click.echo()
199
406
 
200
407
 
408
+ # ============================================================================
409
+ # Cache Commands
410
+ # ============================================================================
411
+
412
+
413
+ @cli.group("cache")
414
+ def cache_group() -> None:
415
+ """Manage the LLM response cache.
416
+
417
+ Commands for inspecting, exporting, and importing cached LLM responses.
418
+
419
+ Examples:
420
+
421
+ cqknow cache stats ./llm_cache.db
422
+
423
+ cqknow cache export ./llm_cache.db ./export_dir
424
+
425
+ cqknow cache import ./llm_cache.db ./import_dir
426
+ """
427
+ pass
428
+
429
+
430
+ @cache_group.command("stats")
431
+ @click.argument("cache_path", type=click.Path(exists=True))
432
+ @click.option(
433
+ "--json",
434
+ "output_json",
435
+ is_flag=True,
436
+ help="Output result as JSON.",
437
+ )
438
+ def cache_stats(cache_path: str, output_json: bool) -> None:
439
+ """Show cache statistics.
440
+
441
+ CACHE_PATH is the path to the SQLite cache database.
442
+
443
+ Examples:
444
+
445
+ cqknow cache stats ./llm_cache.db
446
+
447
+ cqknow cache stats ./llm_cache.db --json
448
+ """
449
+ from causaliq_knowledge.cache import TokenCache
450
+
451
+ try:
452
+ with TokenCache(cache_path) as cache:
453
+ entry_count = cache.entry_count()
454
+ token_count = cache.token_count()
455
+
456
+ if output_json:
457
+ output = {
458
+ "cache_path": cache_path,
459
+ "entry_count": entry_count,
460
+ "token_count": token_count,
461
+ }
462
+ click.echo(json.dumps(output, indent=2))
463
+ else:
464
+ click.echo(f"\nCache: {cache_path}")
465
+ click.echo("=" * 40)
466
+ click.echo(f"Entries: {entry_count:,}")
467
+ click.echo(f"Tokens: {token_count:,}")
468
+ click.echo()
469
+ except Exception as e:
470
+ click.echo(f"Error opening cache: {e}", err=True)
471
+ sys.exit(1)
472
+
473
+
474
+ @cache_group.command("export")
475
+ @click.argument("cache_path", type=click.Path(exists=True))
476
+ @click.argument("output_dir", type=click.Path())
477
+ @click.option(
478
+ "--json",
479
+ "output_json",
480
+ is_flag=True,
481
+ help="Output result as JSON.",
482
+ )
483
+ def cache_export(cache_path: str, output_dir: str, output_json: bool) -> None:
484
+ """Export cache entries to human-readable files.
485
+
486
+ CACHE_PATH is the path to the SQLite cache database.
487
+ OUTPUT_DIR is the directory or zip file where files will be written.
488
+
489
+ If OUTPUT_DIR ends with .zip, entries are exported to a zip archive.
490
+ Otherwise, entries are exported to a directory.
491
+
492
+ Files are named using a human-readable format:
493
+ {model}_{node_a}_{node_b}_edge_{hash}.json
494
+
495
+ Examples:
496
+
497
+ cqknow cache export ./llm_cache.db ./export_dir
498
+
499
+ cqknow cache export ./llm_cache.db ./export.zip
500
+
501
+ cqknow cache export ./llm_cache.db ./export_dir --json
502
+ """
503
+ import tempfile
504
+ import zipfile
505
+ from pathlib import Path
506
+
507
+ from causaliq_knowledge.cache import TokenCache
508
+ from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
509
+
510
+ output_path = Path(output_dir)
511
+ is_zip = output_path.suffix.lower() == ".zip"
512
+
513
+ try:
514
+ with TokenCache(cache_path) as cache:
515
+ # Register encoders for decoding
516
+ encoder = LLMEntryEncoder()
517
+ cache.register_encoder("llm", encoder)
518
+
519
+ # Register generic JsonEncoder for other types
520
+ from causaliq_knowledge.cache.encoders import JsonEncoder
521
+
522
+ json_encoder = JsonEncoder()
523
+ cache.register_encoder("json", json_encoder)
524
+
525
+ # Get entry types in the cache
526
+ entry_types = cache.list_entry_types()
527
+
528
+ if not entry_types:
529
+ if output_json:
530
+ click.echo(json.dumps({"exported": 0, "error": None}))
531
+ else:
532
+ click.echo("No entries to export.")
533
+ return
534
+
535
+ # Determine export directory (temp if zipping)
536
+ if is_zip:
537
+ temp_dir = tempfile.mkdtemp()
538
+ export_dir = Path(temp_dir)
539
+ else:
540
+ export_dir = output_path
541
+ export_dir.mkdir(parents=True, exist_ok=True)
542
+
543
+ # Export entries
544
+ exported = 0
545
+ for entry_type in entry_types:
546
+ if entry_type == "llm":
547
+ # Query all entries of this type
548
+ cursor = cache.conn.execute(
549
+ "SELECT hash, data FROM cache_entries "
550
+ "WHERE entry_type = ?",
551
+ (entry_type,),
552
+ )
553
+ for cache_key, blob in cursor:
554
+ data = encoder.decode(blob, cache)
555
+ entry = LLMCacheEntry.from_dict(data)
556
+ filename = encoder.generate_export_filename(
557
+ entry, cache_key
558
+ )
559
+ file_path = export_dir / filename
560
+ encoder.export_entry(entry, file_path)
561
+ exported += 1
562
+ else:
563
+ # For non-LLM types, use generic export
564
+ count = cache.export_entries(export_dir, entry_type)
565
+ exported += count
566
+
567
+ # Create zip archive if requested
568
+ if is_zip:
569
+ output_path.parent.mkdir(parents=True, exist_ok=True)
570
+ with zipfile.ZipFile(
571
+ output_path, "w", zipfile.ZIP_DEFLATED
572
+ ) as zf:
573
+ for file_path in export_dir.iterdir():
574
+ if file_path.is_file():
575
+ zf.write(file_path, file_path.name)
576
+ # Clean up temp directory
577
+ import shutil
578
+
579
+ shutil.rmtree(temp_dir)
580
+
581
+ # Output results
582
+ if output_json:
583
+ output = {
584
+ "cache_path": cache_path,
585
+ "output_path": str(output_path),
586
+ "format": "zip" if is_zip else "directory",
587
+ "exported": exported,
588
+ "entry_types": entry_types,
589
+ }
590
+ click.echo(json.dumps(output, indent=2))
591
+ else:
592
+ fmt = "zip archive" if is_zip else "directory"
593
+ click.echo(
594
+ f"\nExported {exported} entries to {fmt}: {output_path}"
595
+ )
596
+ click.echo(f"Entry types: {', '.join(entry_types)}")
597
+ click.echo()
598
+
599
+ except Exception as e:
600
+ click.echo(f"Error exporting cache: {e}", err=True)
601
+ sys.exit(1)
602
+
603
+
604
+ def _is_llm_entry(data: Any) -> bool:
605
+ """Check if JSON data represents an LLM cache entry.
606
+
607
+ LLM entries have a specific structure with cache_key containing
608
+ model and messages, plus a response object.
609
+ """
610
+ if not isinstance(data, dict):
611
+ return False
612
+ cache_key = data.get("cache_key", {})
613
+ return (
614
+ isinstance(cache_key, dict)
615
+ and "model" in cache_key
616
+ and "messages" in cache_key
617
+ and "response" in data
618
+ )
619
+
620
+
621
+ @cache_group.command("import")
622
+ @click.argument("cache_path", type=click.Path())
623
+ @click.argument("input_path", type=click.Path(exists=True))
624
+ @click.option(
625
+ "--json",
626
+ "output_json",
627
+ is_flag=True,
628
+ help="Output result as JSON.",
629
+ )
630
+ def cache_import(cache_path: str, input_path: str, output_json: bool) -> None:
631
+ """Import cache entries from files.
632
+
633
+ CACHE_PATH is the path to the SQLite cache database (created if needed).
634
+ INPUT_PATH is a directory or zip file containing JSON files to import.
635
+
636
+ Entry types are auto-detected from JSON structure:
637
+ - LLM entries: contain cache_key.model, cache_key.messages, response
638
+ - Generic JSON: anything else
639
+
640
+ Examples:
641
+
642
+ cqknow cache import ./llm_cache.db ./import_dir
643
+
644
+ cqknow cache import ./llm_cache.db ./export.zip
645
+
646
+ cqknow cache import ./llm_cache.db ./import_dir --json
647
+ """
648
+ import hashlib
649
+ import tempfile
650
+ import zipfile
651
+ from pathlib import Path
652
+
653
+ from causaliq_knowledge.cache import TokenCache
654
+ from causaliq_knowledge.cache.encoders import JsonEncoder
655
+ from causaliq_knowledge.llm.cache import LLMEntryEncoder
656
+
657
+ input_file = Path(input_path)
658
+ is_zip = input_file.suffix.lower() == ".zip"
659
+
660
+ try:
661
+ with TokenCache(cache_path) as cache:
662
+ # Register encoders
663
+ llm_encoder = LLMEntryEncoder()
664
+ json_encoder = JsonEncoder()
665
+ cache.register_encoder("llm", llm_encoder)
666
+ cache.register_encoder("json", json_encoder)
667
+
668
+ # Determine input directory
669
+ if is_zip:
670
+ temp_dir = tempfile.mkdtemp()
671
+ import_dir = Path(temp_dir)
672
+ with zipfile.ZipFile(input_file, "r") as zf:
673
+ zf.extractall(import_dir)
674
+ else:
675
+ import_dir = input_file
676
+ temp_dir = None
677
+
678
+ # Import entries
679
+ imported = 0
680
+ llm_count = 0
681
+ json_count = 0
682
+ skipped = 0
683
+
684
+ for file_path in import_dir.iterdir():
685
+ if (
686
+ not file_path.is_file()
687
+ or file_path.suffix.lower() != ".json"
688
+ ):
689
+ continue
690
+
691
+ try:
692
+ data = json.loads(file_path.read_text(encoding="utf-8"))
693
+ except (json.JSONDecodeError, UnicodeDecodeError):
694
+ skipped += 1
695
+ continue
696
+
697
+ # Detect entry type and generate cache key
698
+ if _is_llm_entry(data):
699
+ # LLM entry - generate hash from cache_key contents
700
+ cache_key_data = data.get("cache_key", {})
701
+ key_str = json.dumps(cache_key_data, sort_keys=True)
702
+ cache_key = hashlib.sha256(key_str.encode()).hexdigest()[
703
+ :16
704
+ ]
705
+ cache.put_data(cache_key, "llm", data)
706
+ llm_count += 1
707
+ else:
708
+ # Generic JSON - use filename stem as key
709
+ cache_key = file_path.stem
710
+ cache.put_data(cache_key, "json", data)
711
+ json_count += 1
712
+
713
+ imported += 1
714
+
715
+ # Clean up temp directory
716
+ if temp_dir:
717
+ import shutil
718
+
719
+ shutil.rmtree(temp_dir)
720
+
721
+ # Output results
722
+ if output_json:
723
+ output = {
724
+ "cache_path": cache_path,
725
+ "input_path": str(input_file),
726
+ "format": "zip" if is_zip else "directory",
727
+ "imported": imported,
728
+ "llm_entries": llm_count,
729
+ "json_entries": json_count,
730
+ "skipped": skipped,
731
+ }
732
+ click.echo(json.dumps(output, indent=2))
733
+ else:
734
+ fmt = "zip archive" if is_zip else "directory"
735
+ click.echo(
736
+ f"\nImported {imported} entries from {fmt}: {input_file}"
737
+ )
738
+ if llm_count:
739
+ click.echo(f" LLM entries: {llm_count}")
740
+ if json_count:
741
+ click.echo(f" JSON entries: {json_count}")
742
+ if skipped:
743
+ click.echo(f" Skipped: {skipped}")
744
+ click.echo()
745
+
746
+ except Exception as e:
747
+ click.echo(f"Error importing cache: {e}", err=True)
748
+ sys.exit(1)
749
+
750
+
201
751
  def main() -> None:
202
752
  """Entry point for the CLI."""
203
753
  cli()