causaliq-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. causaliq_knowledge/__init__.py +6 -3
  2. causaliq_knowledge/action.py +480 -0
  3. causaliq_knowledge/cache/__init__.py +18 -0
  4. causaliq_knowledge/cache/encoders/__init__.py +13 -0
  5. causaliq_knowledge/cache/encoders/base.py +90 -0
  6. causaliq_knowledge/cache/encoders/json_encoder.py +430 -0
  7. causaliq_knowledge/cache/token_cache.py +666 -0
  8. causaliq_knowledge/cli/__init__.py +15 -0
  9. causaliq_knowledge/cli/cache.py +478 -0
  10. causaliq_knowledge/cli/generate.py +410 -0
  11. causaliq_knowledge/cli/main.py +172 -0
  12. causaliq_knowledge/cli/models.py +309 -0
  13. causaliq_knowledge/graph/__init__.py +78 -0
  14. causaliq_knowledge/graph/generator.py +457 -0
  15. causaliq_knowledge/graph/loader.py +222 -0
  16. causaliq_knowledge/graph/models.py +426 -0
  17. causaliq_knowledge/graph/params.py +175 -0
  18. causaliq_knowledge/graph/prompts.py +445 -0
  19. causaliq_knowledge/graph/response.py +392 -0
  20. causaliq_knowledge/graph/view_filter.py +154 -0
  21. causaliq_knowledge/llm/base_client.py +147 -1
  22. causaliq_knowledge/llm/cache.py +443 -0
  23. causaliq_knowledge/py.typed +0 -0
  24. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA +10 -6
  25. causaliq_knowledge-0.4.0.dist-info/RECORD +42 -0
  26. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/WHEEL +1 -1
  27. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt +3 -0
  28. causaliq_knowledge/cli.py +0 -414
  29. causaliq_knowledge-0.2.0.dist-info/RECORD +0 -22
  30. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/licenses/LICENSE +0 -0
  31. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,445 @@
1
+ """Prompt templates for LLM graph generation queries.
2
+
3
+ This module provides prompt builders for generating complete causal
4
+ graphs from variable specifications, distinct from the edge-by-edge
5
+ queries in the llm.prompts module.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from enum import Enum
12
+ from typing import TYPE_CHECKING, Any, Optional
13
+
14
+ if TYPE_CHECKING: # pragma: no cover
15
+ from causaliq_knowledge.graph.models import ModelSpec
16
+
17
+ from causaliq_knowledge.graph.view_filter import PromptDetail, ViewFilter
18
+
19
+
20
+ class OutputFormat(str, Enum):
21
+ """Output format for graph generation responses.
22
+
23
+ Determines the structure of the JSON response expected from the LLM.
24
+
25
+ Attributes:
26
+ EDGE_LIST: Graph as a list of edges with source, target, confidence.
27
+ ADJACENCY_MATRIX: Graph as a square matrix where entry (i,j)
28
+ represents confidence that variable i causes variable j.
29
+ """
30
+
31
+ EDGE_LIST = "edge_list"
32
+ ADJACENCY_MATRIX = "adjacency_matrix"
33
+
34
+
35
+ # System prompt for graph generation (edge list format)
36
+ GRAPH_SYSTEM_PROMPT_EDGE_LIST = """\
37
+ You are an expert in causal reasoning and domain knowledge.
38
+ Your task is to propose a complete causal graph structure given a set of \
39
+ variables.
40
+
41
+ Respond ONLY with valid JSON in this exact format:
42
+ {
43
+ "edges": [
44
+ {
45
+ "source": "variable_name_1",
46
+ "target": "variable_name_2",
47
+ "confidence": 0.0 to 1.0
48
+ }
49
+ ],
50
+ "reasoning": "brief explanation of your approach"
51
+ }
52
+
53
+ Guidelines:
54
+ - Each edge represents a direct causal relationship (source causes target)
55
+ - Include ONLY direct causal relationships, not indirect ones
56
+ - confidence: your confidence in each edge from 0.0 (low) to 1.0 (certain)
57
+ - Use the exact variable names provided
58
+ - Do not add edges between a variable and itself
59
+ - Consider domain knowledge and temporal ordering
60
+ - Omit edges where no causal relationship exists"""
61
+
62
+ # System prompt for graph generation (adjacency matrix format)
63
+ GRAPH_SYSTEM_PROMPT_ADJACENCY = """\
64
+ You are an expert in causal reasoning and domain knowledge.
65
+ Your task is to propose a complete causal graph structure given a set of \
66
+ variables.
67
+
68
+ Respond ONLY with valid JSON in this exact format:
69
+ {
70
+ "variables": ["var1", "var2", "var3"],
71
+ "adjacency_matrix": [
72
+ [0.0, 0.8, 0.0],
73
+ [0.0, 0.0, 0.6],
74
+ [0.0, 0.0, 0.0]
75
+ ],
76
+ "reasoning": "brief explanation of your approach"
77
+ }
78
+
79
+ Guidelines:
80
+ - List variables in the order you will use in the matrix
81
+ - adjacency_matrix(i,j) = confidence that variables(i) causes variables(j)
82
+ - Values range from 0.0 (no edge) to 1.0 (certain edge)
83
+ - Diagonal elements must be 0.0 (no self-loops)
84
+ - Consider domain knowledge and temporal ordering
85
+ - Use 0.0 for pairs with no causal relationship"""
86
+
87
+ # User prompt template for minimal context (names only)
88
+ USER_PROMPT_MINIMAL = """\
89
+ Propose a causal graph for the following variables:
90
+
91
+ Variables: {variable_names}
92
+
93
+ Based on your domain knowledge, identify which variables directly cause \
94
+ others."""
95
+
96
+ # User prompt template for minimal context with domain
97
+ USER_PROMPT_MINIMAL_WITH_DOMAIN = """\
98
+ In the domain of {domain}:
99
+
100
+ Propose a causal graph for the following variables:
101
+
102
+ Variables: {variable_names}
103
+
104
+ Based on your domain knowledge, identify which variables directly cause \
105
+ others."""
106
+
107
+ # User prompt template for standard context
108
+ USER_PROMPT_STANDARD = """\
109
+ Propose a causal graph for the following variables:
110
+
111
+ {variable_details}
112
+
113
+ Based on the variable types and descriptions, identify which variables \
114
+ directly cause others."""
115
+
116
+ # User prompt template for standard context with domain
117
+ USER_PROMPT_STANDARD_WITH_DOMAIN = """\
118
+ In the domain of {domain}:
119
+
120
+ Propose a causal graph for the following variables:
121
+
122
+ {variable_details}
123
+
124
+ Based on the variable types and descriptions, identify which variables \
125
+ directly cause others."""
126
+
127
+ # User prompt template for rich context
128
+ USER_PROMPT_RICH = """\
129
+ Propose a causal graph for the following variables:
130
+
131
+ {variable_details}
132
+
133
+ Consider:
134
+ - Variable roles (exogenous variables have no parents)
135
+ - Domain-specific causal mechanisms
136
+ - Temporal ordering where applicable
137
+ - Related domain knowledge provided
138
+
139
+ Identify which variables directly cause others."""
140
+
141
+ # User prompt template for rich context with domain
142
+ USER_PROMPT_RICH_WITH_DOMAIN = """\
143
+ In the domain of {domain}:
144
+
145
+ Propose a causal graph for the following variables:
146
+
147
+ {variable_details}
148
+
149
+ Consider:
150
+ - Variable roles (exogenous variables have no parents)
151
+ - Domain-specific causal mechanisms
152
+ - Temporal ordering where applicable
153
+ - Related domain knowledge provided
154
+
155
+ Identify which variables directly cause others."""
156
+
157
+
158
+ def _format_variable_details(
159
+ variables: list[dict[str, Any]],
160
+ level: PromptDetail,
161
+ ) -> str:
162
+ """Format variable details for prompt inclusion.
163
+
164
+ Args:
165
+ variables: List of filtered variable dictionaries.
166
+ level: The view level for formatting style.
167
+
168
+ Returns:
169
+ Formatted string of variable details.
170
+ """
171
+ lines = []
172
+
173
+ for var in variables:
174
+ name = var.get("name", "unknown")
175
+
176
+ if level == PromptDetail.MINIMAL:
177
+ lines.append(f"- {name}")
178
+ elif level == PromptDetail.STANDARD:
179
+ var_type = var.get("type", "")
180
+ desc = var.get("short_description", "")
181
+ states = var.get("states", [])
182
+
183
+ parts = [f"- {name}"]
184
+ if var_type:
185
+ parts.append(f" Type: {var_type}")
186
+ if desc:
187
+ parts.append(f" Description: {desc}")
188
+ if states:
189
+ parts.append(f" States: {', '.join(str(s) for s in states)}")
190
+ lines.append("\n".join(parts))
191
+ else: # RICH
192
+ var_type = var.get("type", "")
193
+ role = var.get("role", "")
194
+ category = var.get("category", "")
195
+ short_desc = var.get("short_description", "")
196
+ extended_desc = var.get("extended_description", "")
197
+ states = var.get("states", [])
198
+ hints = var.get("sensitivity_hints", "")
199
+ knowledge = var.get("related_domain_knowledge", [])
200
+
201
+ parts = [f"- {name}"]
202
+ if var_type:
203
+ parts.append(f" Type: {var_type}")
204
+ if role:
205
+ parts.append(f" Role: {role}")
206
+ if category:
207
+ parts.append(f" Category: {category}")
208
+ if short_desc:
209
+ parts.append(f" Description: {short_desc}")
210
+ if extended_desc:
211
+ parts.append(f" Extended: {extended_desc}")
212
+ if states:
213
+ parts.append(f" States: {', '.join(str(s) for s in states)}")
214
+ if hints:
215
+ parts.append(f" Causal hints: {hints}")
216
+ if knowledge:
217
+ knowledge_str = "; ".join(str(k) for k in knowledge)
218
+ parts.append(f" Domain knowledge: {knowledge_str}")
219
+ lines.append("\n".join(parts))
220
+
221
+ return "\n\n".join(lines)
222
+
223
+
224
+ @dataclass
225
+ class GraphQueryPrompt:
226
+ """Builder for graph generation query prompts.
227
+
228
+ This class constructs system and user prompts for querying an LLM
229
+ to generate a complete causal graph from variable specifications.
230
+
231
+ Attributes:
232
+ variables: List of filtered variable dictionaries.
233
+ level: The view level (minimal, standard, rich).
234
+ domain: Optional domain context.
235
+ output_format: Desired output format (edge_list or adjacency_matrix).
236
+ system_prompt: Custom system prompt (uses default if None).
237
+
238
+ Example:
239
+ >>> spec = ModelLoader.load("model.json")
240
+ >>> view_filter = ViewFilter(spec)
241
+ >>> variables = view_filter.filter_variables(PromptDetail.STANDARD)
242
+ >>> prompt = GraphQueryPrompt(
243
+ ... variables=variables,
244
+ ... level=PromptDetail.STANDARD,
245
+ ... domain=spec.domain,
246
+ ... )
247
+ >>> system, user = prompt.build()
248
+ """
249
+
250
+ variables: list[dict[str, Any]]
251
+ level: PromptDetail = PromptDetail.STANDARD
252
+ domain: Optional[str] = None
253
+ output_format: OutputFormat = OutputFormat.EDGE_LIST
254
+ system_prompt: Optional[str] = None
255
+
256
+ def build(self) -> tuple[str, str]:
257
+ """Build the system and user prompts for LLM graph generation.
258
+
259
+ Constructs a system prompt (instructions for the LLM) and a user
260
+ prompt (the actual query with variable information) based on the
261
+ configured output format, view level, and domain context.
262
+
263
+ Returns:
264
+ Tuple of (system_prompt, user_prompt) strings ready for use
265
+ with an LLM client.
266
+
267
+ Example:
268
+ >>> prompt = GraphQueryPrompt(
269
+ ... variables=[{"name": "age"}, {"name": "income"}],
270
+ ... level=PromptDetail.MINIMAL,
271
+ ... )
272
+ >>> system, user = prompt.build()
273
+ >>> # system contains JSON format instructions
274
+ >>> # user contains the variable query
275
+ """
276
+ # Select system prompt based on output format
277
+ if self.system_prompt:
278
+ system = self.system_prompt
279
+ elif self.output_format == OutputFormat.ADJACENCY_MATRIX:
280
+ system = GRAPH_SYSTEM_PROMPT_ADJACENCY
281
+ else:
282
+ system = GRAPH_SYSTEM_PROMPT_EDGE_LIST
283
+
284
+ # Build user prompt based on view level
285
+ user = self._build_user_prompt()
286
+
287
+ return system, user
288
+
289
+ def _build_user_prompt(self) -> str:
290
+ """Build the user prompt based on view level and domain.
291
+
292
+ Selects the appropriate template based on view level (minimal,
293
+ standard, or rich) and whether a domain context is provided.
294
+ Formats variable information according to the selected template.
295
+
296
+ Returns:
297
+ The formatted user prompt string.
298
+ """
299
+ if self.level == PromptDetail.MINIMAL:
300
+ # Extract just the names for minimal view
301
+ names = [v.get("name", "unknown") for v in self.variables]
302
+ variable_names = ", ".join(names)
303
+
304
+ if self.domain:
305
+ return USER_PROMPT_MINIMAL_WITH_DOMAIN.format(
306
+ domain=self.domain,
307
+ variable_names=variable_names,
308
+ )
309
+ return USER_PROMPT_MINIMAL.format(variable_names=variable_names)
310
+
311
+ # Standard and Rich views use detailed variable info
312
+ variable_details = _format_variable_details(self.variables, self.level)
313
+
314
+ if self.level == PromptDetail.STANDARD:
315
+ if self.domain:
316
+ return USER_PROMPT_STANDARD_WITH_DOMAIN.format(
317
+ domain=self.domain,
318
+ variable_details=variable_details,
319
+ )
320
+ return USER_PROMPT_STANDARD.format(
321
+ variable_details=variable_details,
322
+ )
323
+
324
+ # Rich level
325
+ if self.domain:
326
+ return USER_PROMPT_RICH_WITH_DOMAIN.format(
327
+ domain=self.domain,
328
+ variable_details=variable_details,
329
+ )
330
+ return USER_PROMPT_RICH.format(variable_details=variable_details)
331
+
332
+ def get_variable_names(self) -> list[str]:
333
+ """Get the list of variable names from the filtered variables.
334
+
335
+ Extracts the name field from each variable dictionary. Useful for
336
+ validating LLM responses to ensure they reference valid variables.
337
+
338
+ Returns:
339
+ List of variable names. Returns "unknown" for any variable
340
+ missing a name field.
341
+
342
+ Example:
343
+ >>> prompt = GraphQueryPrompt(
344
+ ... variables=[{"name": "age"}, {"name": "income"}],
345
+ ... level=PromptDetail.MINIMAL,
346
+ ... )
347
+ >>> prompt.get_variable_names()
348
+ ['age', 'income']
349
+ """
350
+ return [v.get("name", "unknown") for v in self.variables]
351
+
352
+ @classmethod
353
+ def from_model_spec(
354
+ cls,
355
+ spec: "ModelSpec",
356
+ level: PromptDetail = PromptDetail.STANDARD,
357
+ output_format: OutputFormat = OutputFormat.EDGE_LIST,
358
+ system_prompt: Optional[str] = None,
359
+ use_llm_names: bool = True,
360
+ ) -> "GraphQueryPrompt":
361
+ """Create a GraphQueryPrompt from a ModelSpec.
362
+
363
+ Convenience factory method that automatically applies view
364
+ filtering to extract variables at the specified level. This is
365
+ the recommended way to create prompts when working with ModelSpec
366
+ objects.
367
+
368
+ Args:
369
+ spec: The model specification containing variable definitions.
370
+ level: The view level determining context depth. Defaults to
371
+ STANDARD which includes names, types, and descriptions.
372
+ output_format: Desired output format for LLM response. Defaults
373
+ to EDGE_LIST.
374
+ system_prompt: Custom system prompt to override the default.
375
+ If None, uses the appropriate default for the output format.
376
+ use_llm_names: If True (default), use llm_name for variables in
377
+ the prompt. If False, use benchmark names directly.
378
+
379
+ Returns:
380
+ GraphQueryPrompt instance configured with filtered variables
381
+ and domain context from the specification.
382
+
383
+ Example:
384
+ >>> spec = ModelLoader.load("model.json")
385
+ >>> prompt = GraphQueryPrompt.from_model_spec(
386
+ ... spec,
387
+ ... level=PromptDetail.RICH,
388
+ ... )
389
+ >>> system, user = prompt.build()
390
+ """
391
+ view_filter = ViewFilter(spec, use_llm_names=use_llm_names)
392
+ variables = view_filter.filter_variables(level)
393
+
394
+ return cls(
395
+ variables=variables,
396
+ level=level,
397
+ domain=spec.domain,
398
+ output_format=output_format,
399
+ system_prompt=system_prompt,
400
+ )
401
+
402
+
403
+ # Expected response schemas for documentation and validation
404
+ EDGE_LIST_RESPONSE_SCHEMA = {
405
+ "type": "object",
406
+ "required": ["edges"],
407
+ "properties": {
408
+ "edges": {
409
+ "type": "array",
410
+ "items": {
411
+ "type": "object",
412
+ "required": ["source", "target"],
413
+ "properties": {
414
+ "source": {"type": "string"},
415
+ "target": {"type": "string"},
416
+ "confidence": {
417
+ "type": "number",
418
+ "minimum": 0,
419
+ "maximum": 1,
420
+ },
421
+ },
422
+ },
423
+ },
424
+ "reasoning": {"type": "string"},
425
+ },
426
+ }
427
+
428
+ ADJACENCY_MATRIX_RESPONSE_SCHEMA = {
429
+ "type": "object",
430
+ "required": ["variables", "adjacency_matrix"],
431
+ "properties": {
432
+ "variables": {
433
+ "type": "array",
434
+ "items": {"type": "string"},
435
+ },
436
+ "adjacency_matrix": {
437
+ "type": "array",
438
+ "items": {
439
+ "type": "array",
440
+ "items": {"type": "number", "minimum": 0, "maximum": 1},
441
+ },
442
+ },
443
+ "reasoning": {"type": "string"},
444
+ },
445
+ }