causaliq-knowledge 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- causaliq_knowledge/__init__.py +5 -2
- causaliq_knowledge/action.py +480 -0
- causaliq_knowledge/cache/encoders/json_encoder.py +15 -3
- causaliq_knowledge/cache/token_cache.py +36 -2
- causaliq_knowledge/cli/__init__.py +15 -0
- causaliq_knowledge/cli/cache.py +478 -0
- causaliq_knowledge/cli/generate.py +410 -0
- causaliq_knowledge/cli/main.py +172 -0
- causaliq_knowledge/cli/models.py +309 -0
- causaliq_knowledge/graph/__init__.py +78 -0
- causaliq_knowledge/graph/generator.py +457 -0
- causaliq_knowledge/graph/loader.py +222 -0
- causaliq_knowledge/graph/models.py +426 -0
- causaliq_knowledge/graph/params.py +175 -0
- causaliq_knowledge/graph/prompts.py +445 -0
- causaliq_knowledge/graph/response.py +392 -0
- causaliq_knowledge/graph/view_filter.py +154 -0
- causaliq_knowledge/llm/base_client.py +6 -0
- causaliq_knowledge/llm/cache.py +124 -61
- causaliq_knowledge/py.typed +0 -0
- {causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA +10 -6
- causaliq_knowledge-0.4.0.dist-info/RECORD +42 -0
- {causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt +3 -0
- causaliq_knowledge/cli.py +0 -757
- causaliq_knowledge-0.3.0.dist-info/RECORD +0 -28
- {causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/WHEEL +0 -0
- {causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/top_level.txt +0 -0
causaliq_knowledge/cli.py
DELETED
|
@@ -1,757 +0,0 @@
|
|
|
1
|
-
"""Command-line interface for causaliq-knowledge."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import sys
|
|
7
|
-
from typing import Any, Optional
|
|
8
|
-
|
|
9
|
-
import click
|
|
10
|
-
|
|
11
|
-
from causaliq_knowledge import __version__
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@click.group()
|
|
15
|
-
@click.version_option(version=__version__)
|
|
16
|
-
def cli() -> None:
|
|
17
|
-
"""CausalIQ Knowledge - LLM knowledge for causal discovery.
|
|
18
|
-
|
|
19
|
-
Query LLMs about causal relationships between variables.
|
|
20
|
-
"""
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@cli.command("query")
|
|
25
|
-
@click.argument("node_a")
|
|
26
|
-
@click.argument("node_b")
|
|
27
|
-
@click.option(
|
|
28
|
-
"--model",
|
|
29
|
-
"-m",
|
|
30
|
-
multiple=True,
|
|
31
|
-
default=["groq/llama-3.1-8b-instant"],
|
|
32
|
-
help="LLM model(s) to query. Can be specified multiple times.",
|
|
33
|
-
)
|
|
34
|
-
@click.option(
|
|
35
|
-
"--domain",
|
|
36
|
-
"-d",
|
|
37
|
-
default=None,
|
|
38
|
-
help="Domain context (e.g., 'medicine', 'economics').",
|
|
39
|
-
)
|
|
40
|
-
@click.option(
|
|
41
|
-
"--strategy",
|
|
42
|
-
"-s",
|
|
43
|
-
type=click.Choice(["weighted_vote", "highest_confidence"]),
|
|
44
|
-
default="weighted_vote",
|
|
45
|
-
help="Consensus strategy for multi-model queries.",
|
|
46
|
-
)
|
|
47
|
-
@click.option(
|
|
48
|
-
"--json",
|
|
49
|
-
"output_json",
|
|
50
|
-
is_flag=True,
|
|
51
|
-
help="Output result as JSON.",
|
|
52
|
-
)
|
|
53
|
-
@click.option(
|
|
54
|
-
"--temperature",
|
|
55
|
-
"-t",
|
|
56
|
-
type=float,
|
|
57
|
-
default=0.1,
|
|
58
|
-
help="LLM temperature (0.0-1.0).",
|
|
59
|
-
)
|
|
60
|
-
def query_edge(
|
|
61
|
-
node_a: str,
|
|
62
|
-
node_b: str,
|
|
63
|
-
model: tuple[str, ...],
|
|
64
|
-
domain: Optional[str],
|
|
65
|
-
strategy: str,
|
|
66
|
-
output_json: bool,
|
|
67
|
-
temperature: float,
|
|
68
|
-
) -> None:
|
|
69
|
-
"""Query LLMs about a causal relationship between two variables.
|
|
70
|
-
|
|
71
|
-
NODE_A and NODE_B are the variable names to query about.
|
|
72
|
-
|
|
73
|
-
Examples:
|
|
74
|
-
|
|
75
|
-
cqknow query smoking lung_cancer
|
|
76
|
-
|
|
77
|
-
cqknow query smoking lung_cancer --domain medicine
|
|
78
|
-
|
|
79
|
-
cqknow query X Y --model groq/llama-3.1-8b-instant \
|
|
80
|
-
--model gemini/gemini-2.5-flash
|
|
81
|
-
"""
|
|
82
|
-
# Import here to avoid slow startup for --help
|
|
83
|
-
from causaliq_knowledge.llm import LLMKnowledge
|
|
84
|
-
|
|
85
|
-
# Build context
|
|
86
|
-
context = None
|
|
87
|
-
if domain:
|
|
88
|
-
context = {"domain": domain}
|
|
89
|
-
|
|
90
|
-
# Create provider
|
|
91
|
-
try:
|
|
92
|
-
provider = LLMKnowledge(
|
|
93
|
-
models=list(model),
|
|
94
|
-
consensus_strategy=strategy,
|
|
95
|
-
temperature=temperature,
|
|
96
|
-
)
|
|
97
|
-
except Exception as e:
|
|
98
|
-
click.echo(f"Error creating provider: {e}", err=True)
|
|
99
|
-
sys.exit(1)
|
|
100
|
-
|
|
101
|
-
# Query
|
|
102
|
-
click.echo(
|
|
103
|
-
f"Querying {len(model)} model(s) about: {node_a} -> {node_b}",
|
|
104
|
-
err=True,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
try:
|
|
108
|
-
result = provider.query_edge(node_a, node_b, context=context)
|
|
109
|
-
except Exception as e:
|
|
110
|
-
click.echo(f"Error querying LLM: {e}", err=True)
|
|
111
|
-
sys.exit(1)
|
|
112
|
-
|
|
113
|
-
# Output
|
|
114
|
-
if output_json:
|
|
115
|
-
output = {
|
|
116
|
-
"node_a": node_a,
|
|
117
|
-
"node_b": node_b,
|
|
118
|
-
"exists": result.exists,
|
|
119
|
-
"direction": result.direction.value if result.direction else None,
|
|
120
|
-
"confidence": result.confidence,
|
|
121
|
-
"reasoning": result.reasoning,
|
|
122
|
-
"model": result.model,
|
|
123
|
-
}
|
|
124
|
-
click.echo(json.dumps(output, indent=2))
|
|
125
|
-
else:
|
|
126
|
-
# Human-readable output
|
|
127
|
-
exists_map = {True: "Yes", False: "No", None: "Uncertain"}
|
|
128
|
-
exists_str = exists_map[result.exists]
|
|
129
|
-
direction_str = result.direction.value if result.direction else "N/A"
|
|
130
|
-
|
|
131
|
-
click.echo(f"\n{'='*60}")
|
|
132
|
-
click.echo(f"Query: Does '{node_a}' cause '{node_b}'?")
|
|
133
|
-
click.echo("=" * 60)
|
|
134
|
-
click.echo(f"Exists: {exists_str}")
|
|
135
|
-
click.echo(f"Direction: {direction_str}")
|
|
136
|
-
click.echo(f"Confidence: {result.confidence:.2f}")
|
|
137
|
-
click.echo(f"Model(s): {result.model or 'unknown'}")
|
|
138
|
-
click.echo(f"{'='*60}")
|
|
139
|
-
click.echo(f"Reasoning: {result.reasoning}")
|
|
140
|
-
click.echo()
|
|
141
|
-
|
|
142
|
-
# Show stats
|
|
143
|
-
stats = provider.get_stats()
|
|
144
|
-
if stats["total_cost"] > 0:
|
|
145
|
-
click.echo(
|
|
146
|
-
f"Cost: ${stats['total_cost']:.6f} "
|
|
147
|
-
f"({stats['total_calls']} call(s))",
|
|
148
|
-
err=True,
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
@cli.command("models")
|
|
153
|
-
@click.argument("provider", required=False, default=None)
|
|
154
|
-
def list_models(provider: Optional[str]) -> None:
|
|
155
|
-
"""List available LLM models from each provider.
|
|
156
|
-
|
|
157
|
-
Queries each provider's API to show models accessible with your
|
|
158
|
-
current configuration. Results are filtered by your API key's
|
|
159
|
-
access level or locally installed models.
|
|
160
|
-
|
|
161
|
-
Optionally specify PROVIDER to list models from a single provider:
|
|
162
|
-
groq, anthropic, gemini, ollama, openai, deepseek, or mistral.
|
|
163
|
-
|
|
164
|
-
Examples:
|
|
165
|
-
|
|
166
|
-
cqknow models # List all providers
|
|
167
|
-
|
|
168
|
-
cqknow models groq # List only Groq models
|
|
169
|
-
|
|
170
|
-
cqknow models mistral # List only Mistral models
|
|
171
|
-
"""
|
|
172
|
-
from typing import Callable, List, Optional, Tuple, TypedDict
|
|
173
|
-
|
|
174
|
-
from causaliq_knowledge.llm import (
|
|
175
|
-
AnthropicClient,
|
|
176
|
-
AnthropicConfig,
|
|
177
|
-
DeepSeekClient,
|
|
178
|
-
DeepSeekConfig,
|
|
179
|
-
GeminiClient,
|
|
180
|
-
GeminiConfig,
|
|
181
|
-
GroqClient,
|
|
182
|
-
GroqConfig,
|
|
183
|
-
MistralClient,
|
|
184
|
-
MistralConfig,
|
|
185
|
-
OllamaClient,
|
|
186
|
-
OllamaConfig,
|
|
187
|
-
OpenAIClient,
|
|
188
|
-
OpenAIConfig,
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
# Type for get_models functions
|
|
192
|
-
GetModelsFunc = Callable[[], Tuple[bool, List[str], Optional[str]]]
|
|
193
|
-
|
|
194
|
-
class ProviderInfo(TypedDict):
|
|
195
|
-
name: str
|
|
196
|
-
prefix: str
|
|
197
|
-
env_var: Optional[str]
|
|
198
|
-
url: str
|
|
199
|
-
get_models: GetModelsFunc
|
|
200
|
-
|
|
201
|
-
def get_groq_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
202
|
-
"""Returns (available, models, error_msg)."""
|
|
203
|
-
try:
|
|
204
|
-
client = GroqClient(GroqConfig())
|
|
205
|
-
if not client.is_available():
|
|
206
|
-
return False, [], "GROQ_API_KEY not set"
|
|
207
|
-
models = [f"groq/{m}" for m in client.list_models()]
|
|
208
|
-
return True, models, None
|
|
209
|
-
except ValueError as e:
|
|
210
|
-
return False, [], str(e)
|
|
211
|
-
|
|
212
|
-
def get_anthropic_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
213
|
-
"""Returns (available, models, error_msg)."""
|
|
214
|
-
try:
|
|
215
|
-
client = AnthropicClient(AnthropicConfig())
|
|
216
|
-
if not client.is_available():
|
|
217
|
-
return False, [], "ANTHROPIC_API_KEY not set"
|
|
218
|
-
models = [f"anthropic/{m}" for m in client.list_models()]
|
|
219
|
-
return True, models, None
|
|
220
|
-
except ValueError as e:
|
|
221
|
-
return False, [], str(e)
|
|
222
|
-
|
|
223
|
-
def get_gemini_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
224
|
-
"""Returns (available, models, error_msg)."""
|
|
225
|
-
try:
|
|
226
|
-
client = GeminiClient(GeminiConfig())
|
|
227
|
-
if not client.is_available():
|
|
228
|
-
return False, [], "GEMINI_API_KEY not set"
|
|
229
|
-
models = [f"gemini/{m}" for m in client.list_models()]
|
|
230
|
-
return True, models, None
|
|
231
|
-
except ValueError as e:
|
|
232
|
-
return False, [], str(e)
|
|
233
|
-
|
|
234
|
-
def get_ollama_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
235
|
-
"""Returns (available, models, error_msg)."""
|
|
236
|
-
try:
|
|
237
|
-
client = OllamaClient(OllamaConfig())
|
|
238
|
-
models = [f"ollama/{m}" for m in client.list_models()]
|
|
239
|
-
if not models:
|
|
240
|
-
msg = "No models installed. Run: ollama pull <model>"
|
|
241
|
-
return True, [], msg
|
|
242
|
-
return True, models, None
|
|
243
|
-
except ValueError as e:
|
|
244
|
-
return False, [], str(e)
|
|
245
|
-
|
|
246
|
-
def get_openai_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
247
|
-
"""Returns (available, models, error_msg)."""
|
|
248
|
-
try:
|
|
249
|
-
client = OpenAIClient(OpenAIConfig())
|
|
250
|
-
if not client.is_available():
|
|
251
|
-
return False, [], "OPENAI_API_KEY not set"
|
|
252
|
-
models = [f"openai/{m}" for m in client.list_models()]
|
|
253
|
-
return True, models, None
|
|
254
|
-
except ValueError as e:
|
|
255
|
-
return False, [], str(e)
|
|
256
|
-
|
|
257
|
-
def get_deepseek_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
258
|
-
"""Returns (available, models, error_msg)."""
|
|
259
|
-
try:
|
|
260
|
-
client = DeepSeekClient(DeepSeekConfig())
|
|
261
|
-
if not client.is_available():
|
|
262
|
-
return False, [], "DEEPSEEK_API_KEY not set"
|
|
263
|
-
models = [f"deepseek/{m}" for m in client.list_models()]
|
|
264
|
-
return True, models, None
|
|
265
|
-
except ValueError as e:
|
|
266
|
-
return False, [], str(e)
|
|
267
|
-
|
|
268
|
-
def get_mistral_models() -> Tuple[bool, List[str], Optional[str]]:
|
|
269
|
-
"""Returns (available, models, error_msg)."""
|
|
270
|
-
try:
|
|
271
|
-
client = MistralClient(MistralConfig())
|
|
272
|
-
if not client.is_available():
|
|
273
|
-
return False, [], "MISTRAL_API_KEY not set"
|
|
274
|
-
models = [f"mistral/{m}" for m in client.list_models()]
|
|
275
|
-
return True, models, None
|
|
276
|
-
except ValueError as e:
|
|
277
|
-
return False, [], str(e)
|
|
278
|
-
|
|
279
|
-
providers: List[ProviderInfo] = [
|
|
280
|
-
{
|
|
281
|
-
"name": "Groq",
|
|
282
|
-
"prefix": "groq/",
|
|
283
|
-
"env_var": "GROQ_API_KEY",
|
|
284
|
-
"url": "https://console.groq.com",
|
|
285
|
-
"get_models": get_groq_models,
|
|
286
|
-
},
|
|
287
|
-
{
|
|
288
|
-
"name": "Anthropic",
|
|
289
|
-
"prefix": "anthropic/",
|
|
290
|
-
"env_var": "ANTHROPIC_API_KEY",
|
|
291
|
-
"url": "https://console.anthropic.com",
|
|
292
|
-
"get_models": get_anthropic_models,
|
|
293
|
-
},
|
|
294
|
-
{
|
|
295
|
-
"name": "Gemini",
|
|
296
|
-
"prefix": "gemini/",
|
|
297
|
-
"env_var": "GEMINI_API_KEY",
|
|
298
|
-
"url": "https://aistudio.google.com",
|
|
299
|
-
"get_models": get_gemini_models,
|
|
300
|
-
},
|
|
301
|
-
{
|
|
302
|
-
"name": "Ollama (Local)",
|
|
303
|
-
"prefix": "ollama/",
|
|
304
|
-
"env_var": None,
|
|
305
|
-
"url": "https://ollama.ai",
|
|
306
|
-
"get_models": get_ollama_models,
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
"name": "OpenAI",
|
|
310
|
-
"prefix": "openai/",
|
|
311
|
-
"env_var": "OPENAI_API_KEY",
|
|
312
|
-
"url": "https://platform.openai.com",
|
|
313
|
-
"get_models": get_openai_models,
|
|
314
|
-
},
|
|
315
|
-
{
|
|
316
|
-
"name": "DeepSeek",
|
|
317
|
-
"prefix": "deepseek/",
|
|
318
|
-
"env_var": "DEEPSEEK_API_KEY",
|
|
319
|
-
"url": "https://platform.deepseek.com",
|
|
320
|
-
"get_models": get_deepseek_models,
|
|
321
|
-
},
|
|
322
|
-
{
|
|
323
|
-
"name": "Mistral",
|
|
324
|
-
"prefix": "mistral/",
|
|
325
|
-
"env_var": "MISTRAL_API_KEY",
|
|
326
|
-
"url": "https://console.mistral.ai",
|
|
327
|
-
"get_models": get_mistral_models,
|
|
328
|
-
},
|
|
329
|
-
]
|
|
330
|
-
|
|
331
|
-
# Filter providers if a specific one is requested
|
|
332
|
-
valid_provider_names = [
|
|
333
|
-
"groq",
|
|
334
|
-
"anthropic",
|
|
335
|
-
"gemini",
|
|
336
|
-
"ollama",
|
|
337
|
-
"openai",
|
|
338
|
-
"deepseek",
|
|
339
|
-
"mistral",
|
|
340
|
-
]
|
|
341
|
-
if provider:
|
|
342
|
-
provider_lower = provider.lower()
|
|
343
|
-
if provider_lower not in valid_provider_names:
|
|
344
|
-
click.echo(
|
|
345
|
-
f"Unknown provider: {provider}. "
|
|
346
|
-
f"Valid options: {', '.join(valid_provider_names)}",
|
|
347
|
-
err=True,
|
|
348
|
-
)
|
|
349
|
-
sys.exit(1)
|
|
350
|
-
providers = [
|
|
351
|
-
p for p in providers if p["prefix"].rstrip("/") == provider_lower
|
|
352
|
-
]
|
|
353
|
-
|
|
354
|
-
click.echo("\nAvailable LLM Models:\n")
|
|
355
|
-
|
|
356
|
-
any_available = False
|
|
357
|
-
for prov in providers:
|
|
358
|
-
available, models, error = prov["get_models"]()
|
|
359
|
-
|
|
360
|
-
if available and models:
|
|
361
|
-
any_available = True
|
|
362
|
-
status = click.style("[OK]", fg="green")
|
|
363
|
-
count = len(models)
|
|
364
|
-
click.echo(f" {status} {prov['name']} ({count} models):")
|
|
365
|
-
for m in models:
|
|
366
|
-
click.echo(f" {m}")
|
|
367
|
-
elif available and not models:
|
|
368
|
-
status = click.style("[!]", fg="yellow")
|
|
369
|
-
click.echo(f" {status} {prov['name']}:")
|
|
370
|
-
click.echo(f" {error}")
|
|
371
|
-
else:
|
|
372
|
-
status = click.style("[X]", fg="red")
|
|
373
|
-
click.echo(f" {status} {prov['name']}:")
|
|
374
|
-
click.echo(f" {error}")
|
|
375
|
-
|
|
376
|
-
click.echo()
|
|
377
|
-
|
|
378
|
-
click.echo("Provider Setup:")
|
|
379
|
-
for prov in providers:
|
|
380
|
-
available, _, _ = prov["get_models"]()
|
|
381
|
-
if prov["env_var"]:
|
|
382
|
-
status = "configured" if available else "not set"
|
|
383
|
-
color = "green" if available else "yellow"
|
|
384
|
-
click.echo(
|
|
385
|
-
f" {prov['env_var']}: "
|
|
386
|
-
f"{click.style(status, fg=color)} - {prov['url']}"
|
|
387
|
-
)
|
|
388
|
-
else:
|
|
389
|
-
status = "running" if available else "not running"
|
|
390
|
-
color = "green" if available else "yellow"
|
|
391
|
-
click.echo(
|
|
392
|
-
f" Ollama server: "
|
|
393
|
-
f"{click.style(status, fg=color)} - {prov['url']}"
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
click.echo()
|
|
397
|
-
click.echo(
|
|
398
|
-
click.style("Note: ", fg="yellow")
|
|
399
|
-
+ "Some models may require a paid plan. "
|
|
400
|
-
+ "Free tier availability varies by provider."
|
|
401
|
-
)
|
|
402
|
-
click.echo()
|
|
403
|
-
if any_available:
|
|
404
|
-
click.echo("Default model: groq/llama-3.1-8b-instant")
|
|
405
|
-
click.echo()
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
# ============================================================================
|
|
409
|
-
# Cache Commands
|
|
410
|
-
# ============================================================================
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
@cli.group("cache")
|
|
414
|
-
def cache_group() -> None:
|
|
415
|
-
"""Manage the LLM response cache.
|
|
416
|
-
|
|
417
|
-
Commands for inspecting, exporting, and importing cached LLM responses.
|
|
418
|
-
|
|
419
|
-
Examples:
|
|
420
|
-
|
|
421
|
-
cqknow cache stats ./llm_cache.db
|
|
422
|
-
|
|
423
|
-
cqknow cache export ./llm_cache.db ./export_dir
|
|
424
|
-
|
|
425
|
-
cqknow cache import ./llm_cache.db ./import_dir
|
|
426
|
-
"""
|
|
427
|
-
pass
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
@cache_group.command("stats")
|
|
431
|
-
@click.argument("cache_path", type=click.Path(exists=True))
|
|
432
|
-
@click.option(
|
|
433
|
-
"--json",
|
|
434
|
-
"output_json",
|
|
435
|
-
is_flag=True,
|
|
436
|
-
help="Output result as JSON.",
|
|
437
|
-
)
|
|
438
|
-
def cache_stats(cache_path: str, output_json: bool) -> None:
|
|
439
|
-
"""Show cache statistics.
|
|
440
|
-
|
|
441
|
-
CACHE_PATH is the path to the SQLite cache database.
|
|
442
|
-
|
|
443
|
-
Examples:
|
|
444
|
-
|
|
445
|
-
cqknow cache stats ./llm_cache.db
|
|
446
|
-
|
|
447
|
-
cqknow cache stats ./llm_cache.db --json
|
|
448
|
-
"""
|
|
449
|
-
from causaliq_knowledge.cache import TokenCache
|
|
450
|
-
|
|
451
|
-
try:
|
|
452
|
-
with TokenCache(cache_path) as cache:
|
|
453
|
-
entry_count = cache.entry_count()
|
|
454
|
-
token_count = cache.token_count()
|
|
455
|
-
|
|
456
|
-
if output_json:
|
|
457
|
-
output = {
|
|
458
|
-
"cache_path": cache_path,
|
|
459
|
-
"entry_count": entry_count,
|
|
460
|
-
"token_count": token_count,
|
|
461
|
-
}
|
|
462
|
-
click.echo(json.dumps(output, indent=2))
|
|
463
|
-
else:
|
|
464
|
-
click.echo(f"\nCache: {cache_path}")
|
|
465
|
-
click.echo("=" * 40)
|
|
466
|
-
click.echo(f"Entries: {entry_count:,}")
|
|
467
|
-
click.echo(f"Tokens: {token_count:,}")
|
|
468
|
-
click.echo()
|
|
469
|
-
except Exception as e:
|
|
470
|
-
click.echo(f"Error opening cache: {e}", err=True)
|
|
471
|
-
sys.exit(1)
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
@cache_group.command("export")
|
|
475
|
-
@click.argument("cache_path", type=click.Path(exists=True))
|
|
476
|
-
@click.argument("output_dir", type=click.Path())
|
|
477
|
-
@click.option(
|
|
478
|
-
"--json",
|
|
479
|
-
"output_json",
|
|
480
|
-
is_flag=True,
|
|
481
|
-
help="Output result as JSON.",
|
|
482
|
-
)
|
|
483
|
-
def cache_export(cache_path: str, output_dir: str, output_json: bool) -> None:
|
|
484
|
-
"""Export cache entries to human-readable files.
|
|
485
|
-
|
|
486
|
-
CACHE_PATH is the path to the SQLite cache database.
|
|
487
|
-
OUTPUT_DIR is the directory or zip file where files will be written.
|
|
488
|
-
|
|
489
|
-
If OUTPUT_DIR ends with .zip, entries are exported to a zip archive.
|
|
490
|
-
Otherwise, entries are exported to a directory.
|
|
491
|
-
|
|
492
|
-
Files are named using a human-readable format:
|
|
493
|
-
{model}_{node_a}_{node_b}_edge_{hash}.json
|
|
494
|
-
|
|
495
|
-
Examples:
|
|
496
|
-
|
|
497
|
-
cqknow cache export ./llm_cache.db ./export_dir
|
|
498
|
-
|
|
499
|
-
cqknow cache export ./llm_cache.db ./export.zip
|
|
500
|
-
|
|
501
|
-
cqknow cache export ./llm_cache.db ./export_dir --json
|
|
502
|
-
"""
|
|
503
|
-
import tempfile
|
|
504
|
-
import zipfile
|
|
505
|
-
from pathlib import Path
|
|
506
|
-
|
|
507
|
-
from causaliq_knowledge.cache import TokenCache
|
|
508
|
-
from causaliq_knowledge.llm.cache import LLMCacheEntry, LLMEntryEncoder
|
|
509
|
-
|
|
510
|
-
output_path = Path(output_dir)
|
|
511
|
-
is_zip = output_path.suffix.lower() == ".zip"
|
|
512
|
-
|
|
513
|
-
try:
|
|
514
|
-
with TokenCache(cache_path) as cache:
|
|
515
|
-
# Register encoders for decoding
|
|
516
|
-
encoder = LLMEntryEncoder()
|
|
517
|
-
cache.register_encoder("llm", encoder)
|
|
518
|
-
|
|
519
|
-
# Register generic JsonEncoder for other types
|
|
520
|
-
from causaliq_knowledge.cache.encoders import JsonEncoder
|
|
521
|
-
|
|
522
|
-
json_encoder = JsonEncoder()
|
|
523
|
-
cache.register_encoder("json", json_encoder)
|
|
524
|
-
|
|
525
|
-
# Get entry types in the cache
|
|
526
|
-
entry_types = cache.list_entry_types()
|
|
527
|
-
|
|
528
|
-
if not entry_types:
|
|
529
|
-
if output_json:
|
|
530
|
-
click.echo(json.dumps({"exported": 0, "error": None}))
|
|
531
|
-
else:
|
|
532
|
-
click.echo("No entries to export.")
|
|
533
|
-
return
|
|
534
|
-
|
|
535
|
-
# Determine export directory (temp if zipping)
|
|
536
|
-
if is_zip:
|
|
537
|
-
temp_dir = tempfile.mkdtemp()
|
|
538
|
-
export_dir = Path(temp_dir)
|
|
539
|
-
else:
|
|
540
|
-
export_dir = output_path
|
|
541
|
-
export_dir.mkdir(parents=True, exist_ok=True)
|
|
542
|
-
|
|
543
|
-
# Export entries
|
|
544
|
-
exported = 0
|
|
545
|
-
for entry_type in entry_types:
|
|
546
|
-
if entry_type == "llm":
|
|
547
|
-
# Query all entries of this type
|
|
548
|
-
cursor = cache.conn.execute(
|
|
549
|
-
"SELECT hash, data FROM cache_entries "
|
|
550
|
-
"WHERE entry_type = ?",
|
|
551
|
-
(entry_type,),
|
|
552
|
-
)
|
|
553
|
-
for cache_key, blob in cursor:
|
|
554
|
-
data = encoder.decode(blob, cache)
|
|
555
|
-
entry = LLMCacheEntry.from_dict(data)
|
|
556
|
-
filename = encoder.generate_export_filename(
|
|
557
|
-
entry, cache_key
|
|
558
|
-
)
|
|
559
|
-
file_path = export_dir / filename
|
|
560
|
-
encoder.export_entry(entry, file_path)
|
|
561
|
-
exported += 1
|
|
562
|
-
else:
|
|
563
|
-
# For non-LLM types, use generic export
|
|
564
|
-
count = cache.export_entries(export_dir, entry_type)
|
|
565
|
-
exported += count
|
|
566
|
-
|
|
567
|
-
# Create zip archive if requested
|
|
568
|
-
if is_zip:
|
|
569
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
570
|
-
with zipfile.ZipFile(
|
|
571
|
-
output_path, "w", zipfile.ZIP_DEFLATED
|
|
572
|
-
) as zf:
|
|
573
|
-
for file_path in export_dir.iterdir():
|
|
574
|
-
if file_path.is_file():
|
|
575
|
-
zf.write(file_path, file_path.name)
|
|
576
|
-
# Clean up temp directory
|
|
577
|
-
import shutil
|
|
578
|
-
|
|
579
|
-
shutil.rmtree(temp_dir)
|
|
580
|
-
|
|
581
|
-
# Output results
|
|
582
|
-
if output_json:
|
|
583
|
-
output = {
|
|
584
|
-
"cache_path": cache_path,
|
|
585
|
-
"output_path": str(output_path),
|
|
586
|
-
"format": "zip" if is_zip else "directory",
|
|
587
|
-
"exported": exported,
|
|
588
|
-
"entry_types": entry_types,
|
|
589
|
-
}
|
|
590
|
-
click.echo(json.dumps(output, indent=2))
|
|
591
|
-
else:
|
|
592
|
-
fmt = "zip archive" if is_zip else "directory"
|
|
593
|
-
click.echo(
|
|
594
|
-
f"\nExported {exported} entries to {fmt}: {output_path}"
|
|
595
|
-
)
|
|
596
|
-
click.echo(f"Entry types: {', '.join(entry_types)}")
|
|
597
|
-
click.echo()
|
|
598
|
-
|
|
599
|
-
except Exception as e:
|
|
600
|
-
click.echo(f"Error exporting cache: {e}", err=True)
|
|
601
|
-
sys.exit(1)
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
def _is_llm_entry(data: Any) -> bool:
|
|
605
|
-
"""Check if JSON data represents an LLM cache entry.
|
|
606
|
-
|
|
607
|
-
LLM entries have a specific structure with cache_key containing
|
|
608
|
-
model and messages, plus a response object.
|
|
609
|
-
"""
|
|
610
|
-
if not isinstance(data, dict):
|
|
611
|
-
return False
|
|
612
|
-
cache_key = data.get("cache_key", {})
|
|
613
|
-
return (
|
|
614
|
-
isinstance(cache_key, dict)
|
|
615
|
-
and "model" in cache_key
|
|
616
|
-
and "messages" in cache_key
|
|
617
|
-
and "response" in data
|
|
618
|
-
)
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
@cache_group.command("import")
|
|
622
|
-
@click.argument("cache_path", type=click.Path())
|
|
623
|
-
@click.argument("input_path", type=click.Path(exists=True))
|
|
624
|
-
@click.option(
|
|
625
|
-
"--json",
|
|
626
|
-
"output_json",
|
|
627
|
-
is_flag=True,
|
|
628
|
-
help="Output result as JSON.",
|
|
629
|
-
)
|
|
630
|
-
def cache_import(cache_path: str, input_path: str, output_json: bool) -> None:
|
|
631
|
-
"""Import cache entries from files.
|
|
632
|
-
|
|
633
|
-
CACHE_PATH is the path to the SQLite cache database (created if needed).
|
|
634
|
-
INPUT_PATH is a directory or zip file containing JSON files to import.
|
|
635
|
-
|
|
636
|
-
Entry types are auto-detected from JSON structure:
|
|
637
|
-
- LLM entries: contain cache_key.model, cache_key.messages, response
|
|
638
|
-
- Generic JSON: anything else
|
|
639
|
-
|
|
640
|
-
Examples:
|
|
641
|
-
|
|
642
|
-
cqknow cache import ./llm_cache.db ./import_dir
|
|
643
|
-
|
|
644
|
-
cqknow cache import ./llm_cache.db ./export.zip
|
|
645
|
-
|
|
646
|
-
cqknow cache import ./llm_cache.db ./import_dir --json
|
|
647
|
-
"""
|
|
648
|
-
import hashlib
|
|
649
|
-
import tempfile
|
|
650
|
-
import zipfile
|
|
651
|
-
from pathlib import Path
|
|
652
|
-
|
|
653
|
-
from causaliq_knowledge.cache import TokenCache
|
|
654
|
-
from causaliq_knowledge.cache.encoders import JsonEncoder
|
|
655
|
-
from causaliq_knowledge.llm.cache import LLMEntryEncoder
|
|
656
|
-
|
|
657
|
-
input_file = Path(input_path)
|
|
658
|
-
is_zip = input_file.suffix.lower() == ".zip"
|
|
659
|
-
|
|
660
|
-
try:
|
|
661
|
-
with TokenCache(cache_path) as cache:
|
|
662
|
-
# Register encoders
|
|
663
|
-
llm_encoder = LLMEntryEncoder()
|
|
664
|
-
json_encoder = JsonEncoder()
|
|
665
|
-
cache.register_encoder("llm", llm_encoder)
|
|
666
|
-
cache.register_encoder("json", json_encoder)
|
|
667
|
-
|
|
668
|
-
# Determine input directory
|
|
669
|
-
if is_zip:
|
|
670
|
-
temp_dir = tempfile.mkdtemp()
|
|
671
|
-
import_dir = Path(temp_dir)
|
|
672
|
-
with zipfile.ZipFile(input_file, "r") as zf:
|
|
673
|
-
zf.extractall(import_dir)
|
|
674
|
-
else:
|
|
675
|
-
import_dir = input_file
|
|
676
|
-
temp_dir = None
|
|
677
|
-
|
|
678
|
-
# Import entries
|
|
679
|
-
imported = 0
|
|
680
|
-
llm_count = 0
|
|
681
|
-
json_count = 0
|
|
682
|
-
skipped = 0
|
|
683
|
-
|
|
684
|
-
for file_path in import_dir.iterdir():
|
|
685
|
-
if (
|
|
686
|
-
not file_path.is_file()
|
|
687
|
-
or file_path.suffix.lower() != ".json"
|
|
688
|
-
):
|
|
689
|
-
continue
|
|
690
|
-
|
|
691
|
-
try:
|
|
692
|
-
data = json.loads(file_path.read_text(encoding="utf-8"))
|
|
693
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
694
|
-
skipped += 1
|
|
695
|
-
continue
|
|
696
|
-
|
|
697
|
-
# Detect entry type and generate cache key
|
|
698
|
-
if _is_llm_entry(data):
|
|
699
|
-
# LLM entry - generate hash from cache_key contents
|
|
700
|
-
cache_key_data = data.get("cache_key", {})
|
|
701
|
-
key_str = json.dumps(cache_key_data, sort_keys=True)
|
|
702
|
-
cache_key = hashlib.sha256(key_str.encode()).hexdigest()[
|
|
703
|
-
:16
|
|
704
|
-
]
|
|
705
|
-
cache.put_data(cache_key, "llm", data)
|
|
706
|
-
llm_count += 1
|
|
707
|
-
else:
|
|
708
|
-
# Generic JSON - use filename stem as key
|
|
709
|
-
cache_key = file_path.stem
|
|
710
|
-
cache.put_data(cache_key, "json", data)
|
|
711
|
-
json_count += 1
|
|
712
|
-
|
|
713
|
-
imported += 1
|
|
714
|
-
|
|
715
|
-
# Clean up temp directory
|
|
716
|
-
if temp_dir:
|
|
717
|
-
import shutil
|
|
718
|
-
|
|
719
|
-
shutil.rmtree(temp_dir)
|
|
720
|
-
|
|
721
|
-
# Output results
|
|
722
|
-
if output_json:
|
|
723
|
-
output = {
|
|
724
|
-
"cache_path": cache_path,
|
|
725
|
-
"input_path": str(input_file),
|
|
726
|
-
"format": "zip" if is_zip else "directory",
|
|
727
|
-
"imported": imported,
|
|
728
|
-
"llm_entries": llm_count,
|
|
729
|
-
"json_entries": json_count,
|
|
730
|
-
"skipped": skipped,
|
|
731
|
-
}
|
|
732
|
-
click.echo(json.dumps(output, indent=2))
|
|
733
|
-
else:
|
|
734
|
-
fmt = "zip archive" if is_zip else "directory"
|
|
735
|
-
click.echo(
|
|
736
|
-
f"\nImported {imported} entries from {fmt}: {input_file}"
|
|
737
|
-
)
|
|
738
|
-
if llm_count:
|
|
739
|
-
click.echo(f" LLM entries: {llm_count}")
|
|
740
|
-
if json_count:
|
|
741
|
-
click.echo(f" JSON entries: {json_count}")
|
|
742
|
-
if skipped:
|
|
743
|
-
click.echo(f" Skipped: {skipped}")
|
|
744
|
-
click.echo()
|
|
745
|
-
|
|
746
|
-
except Exception as e:
|
|
747
|
-
click.echo(f"Error importing cache: {e}", err=True)
|
|
748
|
-
sys.exit(1)
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
def main() -> None:
|
|
752
|
-
"""Entry point for the CLI."""
|
|
753
|
-
cli()
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
if __name__ == "__main__": # pragma: no cover
|
|
757
|
-
main()
|