causaliq-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- causaliq_knowledge/__init__.py +3 -3
- causaliq_knowledge/cache/__init__.py +18 -0
- causaliq_knowledge/cache/encoders/__init__.py +13 -0
- causaliq_knowledge/cache/encoders/base.py +90 -0
- causaliq_knowledge/cache/encoders/json_encoder.py +418 -0
- causaliq_knowledge/cache/token_cache.py +632 -0
- causaliq_knowledge/cli.py +344 -1
- causaliq_knowledge/llm/base_client.py +141 -1
- causaliq_knowledge/llm/cache.py +380 -0
- {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/METADATA +2 -2
- {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/RECORD +15 -9
- {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/WHEEL +1 -1
- {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
- {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.3.0.dist-info}/top_level.txt +0 -0
causaliq_knowledge/cli.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import sys
|
|
7
|
-
from typing import Optional
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
9
|
import click
|
|
10
10
|
|
|
@@ -405,6 +405,349 @@ def list_models(provider: Optional[str]) -> None:
|
|
|
405
405
|
click.echo()
|
|
406
406
|
|
|
407
407
|
|
|
408
|
+
# ============================================================================
|
|
409
|
+
# Cache Commands
|
|
410
|
+
# ============================================================================
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
@cli.group("cache")
|
|
414
|
+
def cache_group() -> None:
|
|
415
|
+
"""Manage the LLM response cache.
|
|
416
|
+
|
|
417
|
+
Commands for inspecting, exporting, and importing cached LLM responses.
|
|
418
|
+
|
|
419
|
+
Examples:
|
|
420
|
+
|
|
421
|
+
cqknow cache stats ./llm_cache.db
|
|
422
|
+
|
|
423
|
+
cqknow cache export ./llm_cache.db ./export_dir
|
|
424
|
+
|
|
425
|
+
cqknow cache import ./llm_cache.db ./import_dir
|
|
426
|
+
"""
|
|
427
|
+
pass
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
@cache_group.command("stats")
|
|
431
|
+
@click.argument("cache_path", type=click.Path(exists=True))
|
|
432
|
+
@click.option(
|
|
433
|
+
"--json",
|
|
434
|
+
"output_json",
|
|
435
|
+
is_flag=True,
|
|
436
|
+
help="Output result as JSON.",
|
|
437
|
+
)
|
|
438
|
+
def cache_stats(cache_path: str, output_json: bool) -> None:
|
|
439
|
+
"""Show cache statistics.
|
|
440
|
+
|
|
441
|
+
CACHE_PATH is the path to the SQLite cache database.
|
|
442
|
+
|
|
443
|
+
Examples:
|
|
444
|
+
|
|
445
|
+
cqknow cache stats ./llm_cache.db
|
|
446
|
+
|
|
447
|
+
cqknow cache stats ./llm_cache.db --json
|
|
448
|
+
"""
|
|
449
|
+
from causaliq_knowledge.cache import TokenCache
|
|
450
|
+
|
|
451
|
+
try:
|
|
452
|
+
with TokenCache(cache_path) as cache:
|
|
453
|
+
entry_count = cache.entry_count()
|
|
454
|
+
token_count = cache.token_count()
|
|
455
|
+
|
|
456
|
+
if output_json:
|
|
457
|
+
output = {
|
|
458
|
+
"cache_path": cache_path,
|
|
459
|
+
"entry_count": entry_count,
|
|
460
|
+
"token_count": token_count,
|
|
461
|
+
}
|
|
462
|
+
click.echo(json.dumps(output, indent=2))
|
|
463
|
+
else:
|
|
464
|
+
click.echo(f"\nCache: {cache_path}")
|
|
465
|
+
click.echo("=" * 40)
|
|
466
|
+
click.echo(f"Entries: {entry_count:,}")
|
|
467
|
+
click.echo(f"Tokens: {token_count:,}")
|
|
468
|
+
click.echo()
|
|
469
|
+
except Exception as e:
|
|
470
|
+
click.echo(f"Error opening cache: {e}", err=True)
|
|
471
|
+
sys.exit(1)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
@cache_group.command("export")
|
|
475
|
+
@click.argument("cache_path", type=click.Path(exists=True))
|
|
476
|
+
@click.argument("output_dir", type=click.Path())
|
|
477
|
+
@click.option(
|
|
478
|
+
"--json",
|
|
479
|
+
"output_json",
|
|
480
|
+
is_flag=True,
|
|
481
|
+
help="Output result as JSON.",
|
|
482
|
+
)
|
|
483
|
+
def cache_export(cache_path: str, output_dir: str, output_json: bool) -> None:
|
|
484
|
+
"""Export cache entries to human-readable files.
|
|
485
|
+
|
|
486
|
+
CACHE_PATH is the path to the SQLite cache database.
|
|
487
|
+
OUTPUT_DIR is the directory or zip file where files will be written.
|
|
488
|
+
|
|
489
|
+
If OUTPUT_DIR ends with .zip, entries are exported to a zip archive.
|
|
490
|
+
Otherwise, entries are exported to a directory.
|
|
491
|
+
|
|
492
|
+
Files are named using a human-readable format:
|
|
493
|
+
{model}_{node_a}_{node_b}_edge_{hash}.json
|
|
494
|
+
|
|
495
|
+
Examples:
|
|
496
|
+
|
|
497
|
+
cqknow cache export ./llm_cache.db ./export_dir
|
|
498
|
+
|
|
499
|
+
cqknow cache export ./llm_cache.db ./export.zip
|
|
500
|
+
|
|
501
|
+
cqknow cache export ./llm_cache.db ./export_dir --json
|
|
502
|
+
"""
|
|
503
|
+
import tempfile
|
|
504
|
+
import zipfile
|
|
505
|
+
from pathlib import Path
|
|
506
|
+
|
|
507
|
+
from causaliq_knowledge.cache import TokenCache
|
|
508
|
+
from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
|
|
509
|
+
|
|
510
|
+
output_path = Path(output_dir)
|
|
511
|
+
is_zip = output_path.suffix.lower() == ".zip"
|
|
512
|
+
|
|
513
|
+
try:
|
|
514
|
+
with TokenCache(cache_path) as cache:
|
|
515
|
+
# Register encoders for decoding
|
|
516
|
+
encoder = LLMEntryEncoder()
|
|
517
|
+
cache.register_encoder("llm", encoder)
|
|
518
|
+
|
|
519
|
+
# Register generic JsonEncoder for other types
|
|
520
|
+
from causaliq_knowledge.cache.encoders import JsonEncoder
|
|
521
|
+
|
|
522
|
+
json_encoder = JsonEncoder()
|
|
523
|
+
cache.register_encoder("json", json_encoder)
|
|
524
|
+
|
|
525
|
+
# Get entry types in the cache
|
|
526
|
+
entry_types = cache.list_entry_types()
|
|
527
|
+
|
|
528
|
+
if not entry_types:
|
|
529
|
+
if output_json:
|
|
530
|
+
click.echo(json.dumps({"exported": 0, "error": None}))
|
|
531
|
+
else:
|
|
532
|
+
click.echo("No entries to export.")
|
|
533
|
+
return
|
|
534
|
+
|
|
535
|
+
# Determine export directory (temp if zipping)
|
|
536
|
+
if is_zip:
|
|
537
|
+
temp_dir = tempfile.mkdtemp()
|
|
538
|
+
export_dir = Path(temp_dir)
|
|
539
|
+
else:
|
|
540
|
+
export_dir = output_path
|
|
541
|
+
export_dir.mkdir(parents=True, exist_ok=True)
|
|
542
|
+
|
|
543
|
+
# Export entries
|
|
544
|
+
exported = 0
|
|
545
|
+
for entry_type in entry_types:
|
|
546
|
+
if entry_type == "llm":
|
|
547
|
+
# Query all entries of this type
|
|
548
|
+
cursor = cache.conn.execute(
|
|
549
|
+
"SELECT hash, data FROM cache_entries "
|
|
550
|
+
"WHERE entry_type = ?",
|
|
551
|
+
(entry_type,),
|
|
552
|
+
)
|
|
553
|
+
for cache_key, blob in cursor:
|
|
554
|
+
data = encoder.decode(blob, cache)
|
|
555
|
+
entry = LLMCacheEntry.from_dict(data)
|
|
556
|
+
filename = encoder.generate_export_filename(
|
|
557
|
+
entry, cache_key
|
|
558
|
+
)
|
|
559
|
+
file_path = export_dir / filename
|
|
560
|
+
encoder.export_entry(entry, file_path)
|
|
561
|
+
exported += 1
|
|
562
|
+
else:
|
|
563
|
+
# For non-LLM types, use generic export
|
|
564
|
+
count = cache.export_entries(export_dir, entry_type)
|
|
565
|
+
exported += count
|
|
566
|
+
|
|
567
|
+
# Create zip archive if requested
|
|
568
|
+
if is_zip:
|
|
569
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
570
|
+
with zipfile.ZipFile(
|
|
571
|
+
output_path, "w", zipfile.ZIP_DEFLATED
|
|
572
|
+
) as zf:
|
|
573
|
+
for file_path in export_dir.iterdir():
|
|
574
|
+
if file_path.is_file():
|
|
575
|
+
zf.write(file_path, file_path.name)
|
|
576
|
+
# Clean up temp directory
|
|
577
|
+
import shutil
|
|
578
|
+
|
|
579
|
+
shutil.rmtree(temp_dir)
|
|
580
|
+
|
|
581
|
+
# Output results
|
|
582
|
+
if output_json:
|
|
583
|
+
output = {
|
|
584
|
+
"cache_path": cache_path,
|
|
585
|
+
"output_path": str(output_path),
|
|
586
|
+
"format": "zip" if is_zip else "directory",
|
|
587
|
+
"exported": exported,
|
|
588
|
+
"entry_types": entry_types,
|
|
589
|
+
}
|
|
590
|
+
click.echo(json.dumps(output, indent=2))
|
|
591
|
+
else:
|
|
592
|
+
fmt = "zip archive" if is_zip else "directory"
|
|
593
|
+
click.echo(
|
|
594
|
+
f"\nExported {exported} entries to {fmt}: {output_path}"
|
|
595
|
+
)
|
|
596
|
+
click.echo(f"Entry types: {', '.join(entry_types)}")
|
|
597
|
+
click.echo()
|
|
598
|
+
|
|
599
|
+
except Exception as e:
|
|
600
|
+
click.echo(f"Error exporting cache: {e}", err=True)
|
|
601
|
+
sys.exit(1)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _is_llm_entry(data: Any) -> bool:
|
|
605
|
+
"""Check if JSON data represents an LLM cache entry.
|
|
606
|
+
|
|
607
|
+
LLM entries have a specific structure with cache_key containing
|
|
608
|
+
model and messages, plus a response object.
|
|
609
|
+
"""
|
|
610
|
+
if not isinstance(data, dict):
|
|
611
|
+
return False
|
|
612
|
+
cache_key = data.get("cache_key", {})
|
|
613
|
+
return (
|
|
614
|
+
isinstance(cache_key, dict)
|
|
615
|
+
and "model" in cache_key
|
|
616
|
+
and "messages" in cache_key
|
|
617
|
+
and "response" in data
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
@cache_group.command("import")
|
|
622
|
+
@click.argument("cache_path", type=click.Path())
|
|
623
|
+
@click.argument("input_path", type=click.Path(exists=True))
|
|
624
|
+
@click.option(
|
|
625
|
+
"--json",
|
|
626
|
+
"output_json",
|
|
627
|
+
is_flag=True,
|
|
628
|
+
help="Output result as JSON.",
|
|
629
|
+
)
|
|
630
|
+
def cache_import(cache_path: str, input_path: str, output_json: bool) -> None:
|
|
631
|
+
"""Import cache entries from files.
|
|
632
|
+
|
|
633
|
+
CACHE_PATH is the path to the SQLite cache database (created if needed).
|
|
634
|
+
INPUT_PATH is a directory or zip file containing JSON files to import.
|
|
635
|
+
|
|
636
|
+
Entry types are auto-detected from JSON structure:
|
|
637
|
+
- LLM entries: contain cache_key.model, cache_key.messages, response
|
|
638
|
+
- Generic JSON: anything else
|
|
639
|
+
|
|
640
|
+
Examples:
|
|
641
|
+
|
|
642
|
+
cqknow cache import ./llm_cache.db ./import_dir
|
|
643
|
+
|
|
644
|
+
cqknow cache import ./llm_cache.db ./export.zip
|
|
645
|
+
|
|
646
|
+
cqknow cache import ./llm_cache.db ./import_dir --json
|
|
647
|
+
"""
|
|
648
|
+
import hashlib
|
|
649
|
+
import tempfile
|
|
650
|
+
import zipfile
|
|
651
|
+
from pathlib import Path
|
|
652
|
+
|
|
653
|
+
from causaliq_knowledge.cache import TokenCache
|
|
654
|
+
from causaliq_knowledge.cache.encoders import JsonEncoder
|
|
655
|
+
from causaliq_knowledge.llm.cache import LLMEntryEncoder
|
|
656
|
+
|
|
657
|
+
input_file = Path(input_path)
|
|
658
|
+
is_zip = input_file.suffix.lower() == ".zip"
|
|
659
|
+
|
|
660
|
+
try:
|
|
661
|
+
with TokenCache(cache_path) as cache:
|
|
662
|
+
# Register encoders
|
|
663
|
+
llm_encoder = LLMEntryEncoder()
|
|
664
|
+
json_encoder = JsonEncoder()
|
|
665
|
+
cache.register_encoder("llm", llm_encoder)
|
|
666
|
+
cache.register_encoder("json", json_encoder)
|
|
667
|
+
|
|
668
|
+
# Determine input directory
|
|
669
|
+
if is_zip:
|
|
670
|
+
temp_dir = tempfile.mkdtemp()
|
|
671
|
+
import_dir = Path(temp_dir)
|
|
672
|
+
with zipfile.ZipFile(input_file, "r") as zf:
|
|
673
|
+
zf.extractall(import_dir)
|
|
674
|
+
else:
|
|
675
|
+
import_dir = input_file
|
|
676
|
+
temp_dir = None
|
|
677
|
+
|
|
678
|
+
# Import entries
|
|
679
|
+
imported = 0
|
|
680
|
+
llm_count = 0
|
|
681
|
+
json_count = 0
|
|
682
|
+
skipped = 0
|
|
683
|
+
|
|
684
|
+
for file_path in import_dir.iterdir():
|
|
685
|
+
if (
|
|
686
|
+
not file_path.is_file()
|
|
687
|
+
or file_path.suffix.lower() != ".json"
|
|
688
|
+
):
|
|
689
|
+
continue
|
|
690
|
+
|
|
691
|
+
try:
|
|
692
|
+
data = json.loads(file_path.read_text(encoding="utf-8"))
|
|
693
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
694
|
+
skipped += 1
|
|
695
|
+
continue
|
|
696
|
+
|
|
697
|
+
# Detect entry type and generate cache key
|
|
698
|
+
if _is_llm_entry(data):
|
|
699
|
+
# LLM entry - generate hash from cache_key contents
|
|
700
|
+
cache_key_data = data.get("cache_key", {})
|
|
701
|
+
key_str = json.dumps(cache_key_data, sort_keys=True)
|
|
702
|
+
cache_key = hashlib.sha256(key_str.encode()).hexdigest()[
|
|
703
|
+
:16
|
|
704
|
+
]
|
|
705
|
+
cache.put_data(cache_key, "llm", data)
|
|
706
|
+
llm_count += 1
|
|
707
|
+
else:
|
|
708
|
+
# Generic JSON - use filename stem as key
|
|
709
|
+
cache_key = file_path.stem
|
|
710
|
+
cache.put_data(cache_key, "json", data)
|
|
711
|
+
json_count += 1
|
|
712
|
+
|
|
713
|
+
imported += 1
|
|
714
|
+
|
|
715
|
+
# Clean up temp directory
|
|
716
|
+
if temp_dir:
|
|
717
|
+
import shutil
|
|
718
|
+
|
|
719
|
+
shutil.rmtree(temp_dir)
|
|
720
|
+
|
|
721
|
+
# Output results
|
|
722
|
+
if output_json:
|
|
723
|
+
output = {
|
|
724
|
+
"cache_path": cache_path,
|
|
725
|
+
"input_path": str(input_file),
|
|
726
|
+
"format": "zip" if is_zip else "directory",
|
|
727
|
+
"imported": imported,
|
|
728
|
+
"llm_entries": llm_count,
|
|
729
|
+
"json_entries": json_count,
|
|
730
|
+
"skipped": skipped,
|
|
731
|
+
}
|
|
732
|
+
click.echo(json.dumps(output, indent=2))
|
|
733
|
+
else:
|
|
734
|
+
fmt = "zip archive" if is_zip else "directory"
|
|
735
|
+
click.echo(
|
|
736
|
+
f"\nImported {imported} entries from {fmt}: {input_file}"
|
|
737
|
+
)
|
|
738
|
+
if llm_count:
|
|
739
|
+
click.echo(f" LLM entries: {llm_count}")
|
|
740
|
+
if json_count:
|
|
741
|
+
click.echo(f" JSON entries: {json_count}")
|
|
742
|
+
if skipped:
|
|
743
|
+
click.echo(f" Skipped: {skipped}")
|
|
744
|
+
click.echo()
|
|
745
|
+
|
|
746
|
+
except Exception as e:
|
|
747
|
+
click.echo(f"Error importing cache: {e}", err=True)
|
|
748
|
+
sys.exit(1)
|
|
749
|
+
|
|
750
|
+
|
|
408
751
|
def main() -> None:
|
|
409
752
|
"""Entry point for the CLI."""
|
|
410
753
|
cli()
|
|
@@ -5,11 +5,18 @@ must implement. This provides a consistent API regardless of the
|
|
|
5
5
|
underlying LLM provider.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
8
11
|
import json
|
|
9
12
|
import logging
|
|
13
|
+
import time
|
|
10
14
|
from abc import ABC, abstractmethod
|
|
11
15
|
from dataclasses import dataclass, field
|
|
12
|
-
from typing import Any, Dict, List, Optional
|
|
16
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
19
|
+
from causaliq_knowledge.cache import TokenCache
|
|
13
20
|
|
|
14
21
|
logger = logging.getLogger(__name__)
|
|
15
22
|
|
|
@@ -218,3 +225,136 @@ class BaseLLMClient(ABC):
|
|
|
218
225
|
Model identifier string.
|
|
219
226
|
"""
|
|
220
227
|
return getattr(self, "config", LLMConfig(model="unknown")).model
|
|
228
|
+
|
|
229
|
+
def _build_cache_key(
|
|
230
|
+
self,
|
|
231
|
+
messages: List[Dict[str, str]],
|
|
232
|
+
temperature: Optional[float] = None,
|
|
233
|
+
max_tokens: Optional[int] = None,
|
|
234
|
+
) -> str:
|
|
235
|
+
"""Build a deterministic cache key for the request.
|
|
236
|
+
|
|
237
|
+
Creates a SHA-256 hash from the model, messages, temperature, and
|
|
238
|
+
max_tokens. The hash is truncated to 16 hex characters (64 bits).
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
messages: List of message dicts with "role" and "content" keys.
|
|
242
|
+
temperature: Sampling temperature (defaults to config value).
|
|
243
|
+
max_tokens: Maximum tokens (defaults to config value).
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
16-character hex string cache key.
|
|
247
|
+
"""
|
|
248
|
+
config = getattr(self, "config", LLMConfig(model="unknown"))
|
|
249
|
+
key_data = {
|
|
250
|
+
"model": config.model,
|
|
251
|
+
"messages": messages,
|
|
252
|
+
"temperature": (
|
|
253
|
+
temperature if temperature is not None else config.temperature
|
|
254
|
+
),
|
|
255
|
+
"max_tokens": (
|
|
256
|
+
max_tokens if max_tokens is not None else config.max_tokens
|
|
257
|
+
),
|
|
258
|
+
}
|
|
259
|
+
key_json = json.dumps(key_data, sort_keys=True, separators=(",", ":"))
|
|
260
|
+
return hashlib.sha256(key_json.encode()).hexdigest()[:16]
|
|
261
|
+
|
|
262
|
+
def set_cache(
|
|
263
|
+
self,
|
|
264
|
+
cache: Optional["TokenCache"],
|
|
265
|
+
use_cache: bool = True,
|
|
266
|
+
) -> None:
|
|
267
|
+
"""Configure caching for this client.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
cache: TokenCache instance for caching, or None to disable.
|
|
271
|
+
use_cache: Whether to use the cache (default True).
|
|
272
|
+
"""
|
|
273
|
+
self._cache = cache
|
|
274
|
+
self._use_cache = use_cache
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def cache(self) -> Optional["TokenCache"]:
|
|
278
|
+
"""Return the configured cache, if any."""
|
|
279
|
+
return getattr(self, "_cache", None)
|
|
280
|
+
|
|
281
|
+
@property
|
|
282
|
+
def use_cache(self) -> bool:
|
|
283
|
+
"""Return whether caching is enabled."""
|
|
284
|
+
return getattr(self, "_use_cache", True)
|
|
285
|
+
|
|
286
|
+
def cached_completion(
|
|
287
|
+
self,
|
|
288
|
+
messages: List[Dict[str, str]],
|
|
289
|
+
**kwargs: Any,
|
|
290
|
+
) -> LLMResponse:
|
|
291
|
+
"""Make a completion request with caching.
|
|
292
|
+
|
|
293
|
+
If caching is enabled and a cached response exists, returns
|
|
294
|
+
the cached response without making an API call. Otherwise,
|
|
295
|
+
makes the API call and caches the result.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
messages: List of message dicts with "role" and "content" keys.
|
|
299
|
+
**kwargs: Provider-specific options (temperature, max_tokens, etc.)
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
LLMResponse with the generated content and metadata.
|
|
303
|
+
"""
|
|
304
|
+
from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
|
|
305
|
+
|
|
306
|
+
cache = self.cache
|
|
307
|
+
use_cache = self.use_cache
|
|
308
|
+
|
|
309
|
+
# Build cache key
|
|
310
|
+
temperature = kwargs.get("temperature")
|
|
311
|
+
max_tokens = kwargs.get("max_tokens")
|
|
312
|
+
cache_key = self._build_cache_key(messages, temperature, max_tokens)
|
|
313
|
+
|
|
314
|
+
# Check cache
|
|
315
|
+
if use_cache and cache is not None:
|
|
316
|
+
# Ensure encoder is registered
|
|
317
|
+
if not cache.has_encoder("llm"):
|
|
318
|
+
cache.register_encoder("llm", LLMEntryEncoder())
|
|
319
|
+
|
|
320
|
+
if cache.exists(cache_key, "llm"):
|
|
321
|
+
cached_data = cache.get_data(cache_key, "llm")
|
|
322
|
+
if cached_data is not None:
|
|
323
|
+
entry = LLMCacheEntry.from_dict(cached_data)
|
|
324
|
+
return LLMResponse(
|
|
325
|
+
content=entry.response.content,
|
|
326
|
+
model=entry.model,
|
|
327
|
+
input_tokens=entry.metadata.tokens.input,
|
|
328
|
+
output_tokens=entry.metadata.tokens.output,
|
|
329
|
+
cost=entry.metadata.cost_usd or 0.0,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Make API call with timing
|
|
333
|
+
start_time = time.perf_counter()
|
|
334
|
+
response = self.completion(messages, **kwargs)
|
|
335
|
+
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
|
336
|
+
|
|
337
|
+
# Store in cache
|
|
338
|
+
if use_cache and cache is not None:
|
|
339
|
+
config = getattr(self, "config", LLMConfig(model="unknown"))
|
|
340
|
+
entry = LLMCacheEntry.create(
|
|
341
|
+
model=config.model,
|
|
342
|
+
messages=messages,
|
|
343
|
+
content=response.content,
|
|
344
|
+
temperature=(
|
|
345
|
+
temperature
|
|
346
|
+
if temperature is not None
|
|
347
|
+
else config.temperature
|
|
348
|
+
),
|
|
349
|
+
max_tokens=(
|
|
350
|
+
max_tokens if max_tokens is not None else config.max_tokens
|
|
351
|
+
),
|
|
352
|
+
provider=self.provider_name,
|
|
353
|
+
latency_ms=latency_ms,
|
|
354
|
+
input_tokens=response.input_tokens,
|
|
355
|
+
output_tokens=response.output_tokens,
|
|
356
|
+
cost_usd=response.cost,
|
|
357
|
+
)
|
|
358
|
+
cache.put_data(cache_key, "llm", entry.to_dict())
|
|
359
|
+
|
|
360
|
+
return response
|