causaliq-knowledge 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ The base cache infrastructure will migrate to causaliq-core.
10
10
 
11
11
  from __future__ import annotations
12
12
 
13
+ import json
13
14
  from dataclasses import asdict, dataclass, field
14
15
  from datetime import datetime, timezone
15
16
  from pathlib import Path
@@ -47,6 +48,7 @@ class LLMMetadata:
47
48
  tokens: Token usage statistics.
48
49
  cost_usd: Estimated cost of the request in USD.
49
50
  cache_hit: Whether this was served from cache.
51
+ request_id: Optional identifier for the request (not in cache key).
50
52
  """
51
53
 
52
54
  provider: str = ""
@@ -55,6 +57,7 @@ class LLMMetadata:
55
57
  tokens: LLMTokenUsage = field(default_factory=LLMTokenUsage)
56
58
  cost_usd: float = 0.0
57
59
  cache_hit: bool = False
60
+ request_id: str = ""
58
61
 
59
62
  def to_dict(self) -> dict[str, Any]:
60
63
  """Convert to dictionary for JSON serialisation."""
@@ -65,6 +68,7 @@ class LLMMetadata:
65
68
  "tokens": asdict(self.tokens),
66
69
  "cost_usd": self.cost_usd,
67
70
  "cache_hit": self.cache_hit,
71
+ "request_id": self.request_id,
68
72
  }
69
73
 
70
74
  @classmethod
@@ -82,6 +86,7 @@ class LLMMetadata:
82
86
  ),
83
87
  cost_usd=data.get("cost_usd", 0.0),
84
88
  cache_hit=data.get("cache_hit", False),
89
+ request_id=data.get("request_id", ""),
85
90
  )
86
91
 
87
92
 
@@ -107,11 +112,33 @@ class LLMResponse:
107
112
  "model_version": self.model_version,
108
113
  }
109
114
 
115
+ def to_export_dict(self) -> dict[str, Any]:
116
+ """Convert to dictionary for export, parsing JSON content if valid.
117
+
118
+ Unlike to_dict(), this attempts to parse the content as JSON
119
+ for more readable exported files.
120
+ """
121
+ # Try to parse content as JSON for cleaner export
122
+ try:
123
+ parsed_content = json.loads(self.content)
124
+ except (json.JSONDecodeError, TypeError):
125
+ parsed_content = self.content
126
+
127
+ return {
128
+ "content": parsed_content,
129
+ "finish_reason": self.finish_reason,
130
+ "model_version": self.model_version,
131
+ }
132
+
110
133
  @classmethod
111
134
  def from_dict(cls, data: dict[str, Any]) -> LLMResponse:
112
135
  """Create from dictionary."""
136
+ content = data.get("content", "")
137
+ # Handle both string and parsed JSON content (from export files)
138
+ if isinstance(content, dict):
139
+ content = json.dumps(content)
113
140
  return cls(
114
- content=data.get("content", ""),
141
+ content=content,
115
142
  finish_reason=data.get("finish_reason", "stop"),
116
143
  model_version=data.get("model_version", ""),
117
144
  )
@@ -137,6 +164,30 @@ class LLMCacheEntry:
137
164
  response: LLMResponse = field(default_factory=LLMResponse)
138
165
  metadata: LLMMetadata = field(default_factory=LLMMetadata)
139
166
 
167
+ @staticmethod
168
+ def _split_message_content(messages: list[dict[str, Any]]) -> list[Any]:
169
+ """Convert message content with newlines into arrays of lines."""
170
+ result = []
171
+ for msg in messages:
172
+ new_msg = dict(msg)
173
+ content = new_msg.get("content", "")
174
+ if isinstance(content, str) and "\n" in content:
175
+ new_msg["content"] = content.split("\n")
176
+ result.append(new_msg)
177
+ return result
178
+
179
+ @staticmethod
180
+ def _join_message_content(messages: list[Any]) -> list[dict[str, Any]]:
181
+ """Convert message content arrays back into strings with newlines."""
182
+ result = []
183
+ for msg in messages:
184
+ new_msg = dict(msg)
185
+ content = new_msg.get("content", "")
186
+ if isinstance(content, list):
187
+ new_msg["content"] = "\n".join(content)
188
+ result.append(new_msg)
189
+ return result
190
+
140
191
  def to_dict(self) -> dict[str, Any]:
141
192
  """Convert to dictionary for JSON serialisation."""
142
193
  return {
@@ -150,13 +201,37 @@ class LLMCacheEntry:
150
201
  "metadata": self.metadata.to_dict(),
151
202
  }
152
203
 
204
+ def to_export_dict(self) -> dict[str, Any]:
205
+ """Convert to dictionary for export with readable formatting.
206
+
207
+ - Message content with newlines is split into arrays of lines
208
+ - Response JSON content is parsed into a proper JSON structure
209
+ """
210
+ return {
211
+ "cache_key": {
212
+ "model": self.model,
213
+ "messages": self._split_message_content(self.messages),
214
+ "temperature": self.temperature,
215
+ "max_tokens": self.max_tokens,
216
+ },
217
+ "response": self.response.to_export_dict(),
218
+ "metadata": self.metadata.to_dict(),
219
+ }
220
+
153
221
  @classmethod
154
222
  def from_dict(cls, data: dict[str, Any]) -> LLMCacheEntry:
155
- """Create from dictionary."""
223
+ """Create from dictionary.
224
+
225
+ Handles both internal format (string content) and export format
226
+ (array of lines for content).
227
+ """
156
228
  cache_key = data.get("cache_key", {})
229
+ messages = cache_key.get("messages", [])
230
+ # Handle export format where content is array of lines
231
+ messages = cls._join_message_content(messages)
157
232
  return cls(
158
233
  model=cache_key.get("model", ""),
159
- messages=cache_key.get("messages", []),
234
+ messages=messages,
160
235
  temperature=cache_key.get("temperature", 0.0),
161
236
  max_tokens=cache_key.get("max_tokens"),
162
237
  response=LLMResponse.from_dict(data.get("response", {})),
@@ -179,6 +254,7 @@ class LLMCacheEntry:
179
254
  input_tokens: int = 0,
180
255
  output_tokens: int = 0,
181
256
  cost_usd: float = 0.0,
257
+ request_id: str = "",
182
258
  ) -> LLMCacheEntry:
183
259
  """Create a cache entry with common parameters.
184
260
 
@@ -195,6 +271,7 @@ class LLMCacheEntry:
195
271
  input_tokens: Number of input tokens.
196
272
  output_tokens: Number of output tokens.
197
273
  cost_usd: Estimated cost in USD.
274
+ request_id: Optional identifier for the request (not part of hash).
198
275
 
199
276
  Returns:
200
277
  Configured LLMCacheEntry.
@@ -220,6 +297,7 @@ class LLMCacheEntry:
220
297
  ),
221
298
  cost_usd=cost_usd,
222
299
  cache_hit=False,
300
+ request_id=request_id,
223
301
  ),
224
302
  )
225
303
 
@@ -285,18 +363,14 @@ class LLMEntryEncoder(JsonEncoder):
285
363
  ) -> str:
286
364
  """Generate a human-readable filename for export.
287
365
 
288
- Creates a filename from model name and query details, with a
289
- short hash suffix for uniqueness.
366
+ Creates a filename using request_id, timestamp, and provider:
367
+ {request_id}_{yyyy-mm-dd-hhmmss}_{provider}.json
290
368
 
291
- For edge queries, extracts node names for format:
292
- {model}_{node_a}_{node_b}_edge_{hash}.json
293
-
294
- For other queries, uses prompt excerpt:
295
- {model}_{prompt_excerpt}_{hash}.json
369
+ If request_id is not set, falls back to a short hash prefix.
296
370
 
297
371
  Args:
298
372
  entry: The cache entry to generate filename for.
299
- cache_key: The cache key (hash) for uniqueness suffix.
373
+ cache_key: The cache key (hash) for fallback uniqueness.
300
374
 
301
375
  Returns:
302
376
  Human-readable filename with .json extension.
@@ -305,67 +379,56 @@ class LLMEntryEncoder(JsonEncoder):
305
379
  >>> encoder = LLMEntryEncoder()
306
380
  >>> entry = LLMCacheEntry.create(
307
381
  ... model="gpt-4",
308
- ... messages=[{"role": "user", "content": "smoking and lung"}],
309
- ... content="Yes...",
382
+ ... messages=[{"role": "user", "content": "test"}],
383
+ ... content="Response",
384
+ ... provider="openai",
385
+ ... request_id="expt23",
310
386
  ... )
311
- >>> encoder.generate_export_filename(entry, "a1b2c3d4e5f6")
312
- 'gpt4_smoking_lung_edge_a1b2.json'
387
+ >>> # Returns something like: expt23_2026-01-29-143052_openai.json
313
388
  """
314
389
  import re
315
-
316
- # Sanitize model name (alphanumeric only, lowercase)
317
- model = re.sub(r"[^a-z0-9]", "", entry.model.lower())
318
- if len(model) > 15:
319
- model = model[:15]
320
-
321
- # Extract user message content
322
- prompt = ""
323
- for msg in entry.messages:
324
- if msg.get("role") == "user":
325
- prompt = msg.get("content", "")
326
- break
327
-
328
- # Try to extract node names for edge queries
329
- # Look for patterns like "X and Y", "X cause Y", "between X and Y"
330
- prompt_lower = prompt.lower()
331
- slug = ""
332
-
333
- # Pattern: "between X and Y" or "X and Y"
334
- match = re.search(r"(?:between\s+)?(\w+)\s+and\s+(\w+)", prompt_lower)
335
- if match:
336
- node_a = match.group(1)[:15]
337
- node_b = match.group(2)[:15]
338
- slug = f"{node_a}_{node_b}_edge"
339
-
340
- # Fallback: extract first significant words from prompt
341
- if not slug:
342
- # Remove common words, keep alphanumeric
343
- cleaned = re.sub(r"[^a-z0-9\s]", "", prompt_lower)
344
- words = [
345
- w
346
- for w in cleaned.split()
347
- if w
348
- not in ("the", "a", "an", "is", "are", "does", "do", "can")
349
- ]
350
- slug = "_".join(words[:4])
351
- if len(slug) > 30:
352
- slug = slug[:30].rstrip("_")
353
-
354
- # Short hash suffix for uniqueness (4 chars)
355
- hash_suffix = cache_key[:4] if cache_key else "0000"
356
-
357
- # Build filename
358
- parts = [p for p in [model, slug, hash_suffix] if p]
359
- return "_".join(parts) + ".json"
390
+ from datetime import datetime
391
+
392
+ # Get request_id or use hash prefix as fallback
393
+ request_id = entry.metadata.request_id or cache_key[:8]
394
+ # Sanitise request_id (alphanumeric, hyphens, underscores only)
395
+ request_id = re.sub(r"[^a-zA-Z0-9_-]", "", request_id)
396
+ if not request_id:
397
+ request_id = cache_key[:8] if cache_key else "unknown"
398
+
399
+ # Parse timestamp and format as yyyy-mm-dd-hhmmss
400
+ timestamp_str = entry.metadata.timestamp
401
+ if timestamp_str:
402
+ try:
403
+ # Parse ISO format timestamp
404
+ dt = datetime.fromisoformat(
405
+ timestamp_str.replace("Z", "+00:00")
406
+ )
407
+ formatted_ts = dt.strftime("%Y-%m-%d-%H%M%S")
408
+ except ValueError:
409
+ formatted_ts = "unknown"
410
+ else:
411
+ formatted_ts = "unknown"
412
+
413
+ # Get provider, sanitised
414
+ provider = entry.metadata.provider or "unknown"
415
+ provider = re.sub(r"[^a-z0-9]", "", provider.lower())
416
+ if not provider:
417
+ provider = "unknown"
418
+
419
+ # Build filename: id_timestamp_provider.json
420
+ return f"{request_id}_{formatted_ts}_{provider}.json"
360
421
 
361
422
  def export_entry(self, entry: LLMCacheEntry, path: Path) -> None:
362
423
  """Export an LLMCacheEntry to a JSON file.
363
424
 
425
+ Uses to_export_dict() to parse JSON content for readability.
426
+
364
427
  Args:
365
428
  entry: The cache entry to export.
366
429
  path: Destination file path.
367
430
  """
368
- self.export(entry.to_dict(), path)
431
+ self.export(entry.to_export_dict(), path)
369
432
 
370
433
  def import_entry(self, path: Path) -> LLMCacheEntry:
371
434
  """Import an LLMCacheEntry from a JSON file.
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: causaliq-knowledge
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Incorporating LLM and human knowledge into causal discovery
5
5
  Author-email: CausalIQ <info@causaliq.com>
6
6
  Maintainer-email: CausalIQ <info@causaliq.com>
@@ -24,6 +24,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
24
  Requires-Python: >=3.9
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
+ Requires-Dist: causaliq-workflow>=0.1.1.dev3
27
28
  Requires-Dist: click>=8.0.0
28
29
  Requires-Dist: httpx>=0.24.0
29
30
  Requires-Dist: pydantic>=2.0.0
@@ -32,7 +33,7 @@ Requires-Dist: causaliq-core>=0.3.0; extra == "dev"
32
33
  Requires-Dist: pytest>=7.0.0; extra == "dev"
33
34
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
34
35
  Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
35
- Requires-Dist: black>=22.0.0; extra == "dev"
36
+ Requires-Dist: black<26.0.0,>=25.0.0; extra == "dev"
36
37
  Requires-Dist: isort>=5.10.0; extra == "dev"
37
38
  Requires-Dist: flake8>=5.0.0; extra == "dev"
38
39
  Requires-Dist: mypy>=1.0.0; extra == "dev"
@@ -42,6 +43,7 @@ Requires-Dist: build>=0.8.0; extra == "dev"
42
43
  Requires-Dist: twine>=4.0.0; extra == "dev"
43
44
  Provides-Extra: test
44
45
  Requires-Dist: causaliq-core>=0.3.0; extra == "test"
46
+ Requires-Dist: causaliq-workflow>=0.1.1.dev3; extra == "test"
45
47
  Requires-Dist: pytest>=7.0.0; extra == "test"
46
48
  Requires-Dist: pytest-cov>=4.0.0; extra == "test"
47
49
  Requires-Dist: pytest-mock>=3.10.0; extra == "test"
@@ -89,13 +91,15 @@ Currently implemented releases:
89
91
 
90
92
  - **Release v0.1.0 - Foundation LLM**: Simple LLM queries to 1 or 2 LLMs about edge existence and orientation to support graph averaging
91
93
  - **Release v0.2.0 - Additional LLMs**: Support for 7 LLM providers (Groq, Gemini, OpenAI, Anthropic, DeepSeek, Mistral, Ollama)
92
- - **Release v0.3.0 - LLM Caching** *(in development)*: SQLite-based response caching with CLI tools for cache management
94
+ - **Release v0.3.0 - LLM Caching**: SQLite-based response caching with CLI tools for cache management
95
+ - **Release v0.4.0 - Graph Generation**: CLI and CausalIQ workflow action for LLM-generated causal graphs
93
96
 
94
97
  Planned:
95
98
 
96
- - **Release v0.4.0 - LLM Context**: Variable/role/literature etc context
97
- - **Release v0.5.0 - Algorithm integration**: Integration into structure learning algorithms
98
- - **Release v0.6.0 - Legacy Reference**: Support for legacy approaches of deriving knowledge from reference networks
99
+ - **Release v0.5.0 - Graph Caching**: save generated graphs to Workflow caches
100
+ - **Release v0.6.0 - LLM Cost Tracking**: Query LLM provider APIs for usage and cost statistics
101
+ - **Release v0.7.0 - LLM Context**: Variable/role/literature etc context
102
+ - **Release v0.8.0 - Algorithm integration**: Integration into structure learning algorithms
99
103
 
100
104
  ## Implementation Approach
101
105
 
@@ -0,0 +1,42 @@
1
+ causaliq_knowledge/__init__.py,sha256=IVlm0G1g-xxJS13SFeC0h1D6LL7rfzX96F7rjfU-wqA,982
2
+ causaliq_knowledge/action.py,sha256=X7EGSTV7IiwnO8cTcz5-ExXpRuwamSaC1jWpAc86i6I,16416
3
+ causaliq_knowledge/base.py,sha256=GBG-sftOKkmUoQzTpm6anDTjP-2nInRZN_36dxoYhvk,2917
4
+ causaliq_knowledge/models.py,sha256=tWGf186ASwO8NHiN97pEOLuBJmJI6Q9jvpU0mYZNdS0,4058
5
+ causaliq_knowledge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ causaliq_knowledge/cache/__init__.py,sha256=Av92YdCdVTRt9TmB2edRsIFDxq3f1Qi0daq0sFV1rp0,549
7
+ causaliq_knowledge/cache/token_cache.py,sha256=o3qYGnc1e7nSJm9BsM6pmp6cbsPzhaCnEM1utCY682E,23085
8
+ causaliq_knowledge/cache/encoders/__init__.py,sha256=gZ7gw96paFDbnJuc4v1aJsEJfVinI4zc03tXyFvfZxo,461
9
+ causaliq_knowledge/cache/encoders/base.py,sha256=jK7--Or3lVp1UkKghKYFo_gKJp0HsMxosL_8eYL7RQQ,2679
10
+ causaliq_knowledge/cache/encoders/json_encoder.py,sha256=7zN0qRdpVa8EZS08F22buyAYoIpnx5lngK5p1wK-3WI,15689
11
+ causaliq_knowledge/cli/__init__.py,sha256=worFcurYE_T5_uYvnM4oL3aP3v_fSWLUhggbCP9TZMc,434
12
+ causaliq_knowledge/cli/cache.py,sha256=w_fF3e8Ru7Sxl3FMgab_x2UWOfPMVq7BDvTUD-2Kayg,17863
13
+ causaliq_knowledge/cli/generate.py,sha256=78c1GUv4w42qtPjy9NV0X1q5kw9ATl2yKcdl-KBTxhI,13187
14
+ causaliq_knowledge/cli/main.py,sha256=MwUmokX9x3bL7E7pZMquWvVnKg3b-qdVk8VMR0ejK5o,4665
15
+ causaliq_knowledge/cli/models.py,sha256=2ga5PWhOOo2vE6e3A3oxvO2FB88zztuRoUMPGlhyE6M,9587
16
+ causaliq_knowledge/graph/__init__.py,sha256=920si3oBsuYIBW8gzHBYQnHCt9KupDdkPqVxTsj_py0,1952
17
+ causaliq_knowledge/graph/generator.py,sha256=tM1KKKgpsiLLziCUKKnAiH9n1yO8zUnSFZ-QbFZKdJU,15971
18
+ causaliq_knowledge/graph/loader.py,sha256=EO5Yj02qRrPY22rvfVk-LfXSZMVNEn37-H4u5kHCY0M,6615
19
+ causaliq_knowledge/graph/models.py,sha256=4f9kaHHs9J_ma95EgV0GItliY-G4BLNNyIwBq8yTiVk,14924
20
+ causaliq_knowledge/graph/params.py,sha256=RPviCO3ZOsOrm_rsysST4Y4hhWDN6jcJt46ajDvSY0M,5828
21
+ causaliq_knowledge/graph/prompts.py,sha256=C29w5LQDf2tF9JeFADRrKSjkP6dVzjsa1FNX_6ndt70,15399
22
+ causaliq_knowledge/graph/response.py,sha256=UaYbnVpfkWDZWMS9wQbEU4QP5In1YAqId2EuJ1V2kho,12437
23
+ causaliq_knowledge/graph/view_filter.py,sha256=-ebhj8cXxgLimAeAZ023YgW6kI-c8jTp_LDKjYf1Kow,5297
24
+ causaliq_knowledge/llm/__init__.py,sha256=30AL0h64zIkXoiqhMY7gjaf7mrtwtwMW38vzhns0My4,1663
25
+ causaliq_knowledge/llm/anthropic_client.py,sha256=dPFHYGWL4xwQCtmQuGwGY4DBKSINOgOS-11ekznaiXo,8719
26
+ causaliq_knowledge/llm/base_client.py,sha256=FJGX5QYawcelc3UScSMwvBJnKrUVR3PrBIY72KYthTU,12544
27
+ causaliq_knowledge/llm/cache.py,sha256=6bpCyBv_bUorKceYc5qpgXi30A0tDRwAtlhxS3TQklE,15404
28
+ causaliq_knowledge/llm/deepseek_client.py,sha256=ZcOpgnYa66XHjiTaF5ekR_BtosRYvVmzlIafp_Gsx_A,3543
29
+ causaliq_knowledge/llm/gemini_client.py,sha256=XJMq9sPo7zExrALSr2rIRHLheSPqKo8ENG0KtdJ1cjw,9924
30
+ causaliq_knowledge/llm/groq_client.py,sha256=PnTXqtMF1Km9DY4HiCZXQ6LeOzdjZtQJaeuGe1GbeME,7531
31
+ causaliq_knowledge/llm/mistral_client.py,sha256=dTAOtymffCM1AJp5-JcfizofYrUA-jhKfHWrhZe2DDI,4187
32
+ causaliq_knowledge/llm/ollama_client.py,sha256=PPU3g-nD8D546zcYB3uGxZ9yVbU4Gngo3snM2tRFeTc,8612
33
+ causaliq_knowledge/llm/openai_client.py,sha256=MJmB6P32TZESMlXhn9d0-b3vFWXmf7ojHQ5CY8mCENI,3835
34
+ causaliq_knowledge/llm/openai_compat_client.py,sha256=L8ZW5csuhUePq4mt3EGOUqhR3tleFmM72UlhPBsgIMQ,9518
35
+ causaliq_knowledge/llm/prompts.py,sha256=bJ9iVGKUfTfLi2eWh-FFM4cNzk5Ux4Z0x8R6Ia27Dbo,6598
36
+ causaliq_knowledge/llm/provider.py,sha256=VDEv-1esT_EgJk_Gwlfl4423ojglOxzPCBCFbOFE4DQ,15184
37
+ causaliq_knowledge-0.4.0.dist-info/licenses/LICENSE,sha256=vUFUzQnti-D-MLSi9NxFlsFYOKwU25sxxH7WgJOQFIs,1084
38
+ causaliq_knowledge-0.4.0.dist-info/METADATA,sha256=ZWEguAYGAWwk73VlIvb2KFXEyf37pTctjYZNlUgHWZM,9038
39
+ causaliq_knowledge-0.4.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
40
+ causaliq_knowledge-0.4.0.dist-info/entry_points.txt,sha256=tuHaj0XNw9KySBpHOZeAC5Q08G96ftxocOy2POV1DdA,179
41
+ causaliq_knowledge-0.4.0.dist-info/top_level.txt,sha256=GcxQf4BQAGa38i2-j8ylk2FmnBHtEZ9-8bSt-7Uka7k,19
42
+ causaliq_knowledge-0.4.0.dist-info/RECORD,,
@@ -1,3 +1,6 @@
1
+ [causaliq.actions]
2
+ causaliq-knowledge = causaliq_knowledge:CausalIQAction
3
+
1
4
  [console_scripts]
2
5
  causaliq-knowledge = causaliq_knowledge.cli:main
3
6
  cqknow = causaliq_knowledge.cli:main