PyPI - causaliq-knowledge - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

causaliq-knowledge 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

causaliq_knowledge/__init__.py +5 -2
causaliq_knowledge/action.py +480 -0
causaliq_knowledge/cache/encoders/json_encoder.py +15 -3
causaliq_knowledge/cache/token_cache.py +36 -2
causaliq_knowledge/cli/__init__.py +15 -0
causaliq_knowledge/cli/cache.py +478 -0
causaliq_knowledge/cli/generate.py +410 -0
causaliq_knowledge/cli/main.py +172 -0
causaliq_knowledge/cli/models.py +309 -0
causaliq_knowledge/graph/__init__.py +78 -0
causaliq_knowledge/graph/generator.py +457 -0
causaliq_knowledge/graph/loader.py +222 -0
causaliq_knowledge/graph/models.py +426 -0
causaliq_knowledge/graph/params.py +175 -0
causaliq_knowledge/graph/prompts.py +445 -0
causaliq_knowledge/graph/response.py +392 -0
causaliq_knowledge/graph/view_filter.py +154 -0
causaliq_knowledge/llm/base_client.py +6 -0
causaliq_knowledge/llm/cache.py +124 -61
causaliq_knowledge/py.typed +0 -0
{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA +10 -6
causaliq_knowledge-0.4.0.dist-info/RECORD +42 -0
{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt +3 -0
causaliq_knowledge/cli.py +0 -757
causaliq_knowledge-0.3.0.dist-info/RECORD +0 -28
{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/WHEEL +0 -0
{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/licenses/LICENSE +0 -0
{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/top_level.txt +0 -0

causaliq_knowledge/llm/cache.py CHANGED Viewed

@@ -10,6 +10,7 @@ The base cache infrastructure will migrate to causaliq-core.
 from __future__ import annotations
+import json
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
@@ -47,6 +48,7 @@ class LLMMetadata:
         tokens: Token usage statistics.
         cost_usd: Estimated cost of the request in USD.
         cache_hit: Whether this was served from cache.
+        request_id: Optional identifier for the request (not in cache key).
     """
     provider: str = ""
@@ -55,6 +57,7 @@ class LLMMetadata:
     tokens: LLMTokenUsage = field(default_factory=LLMTokenUsage)
     cost_usd: float = 0.0
     cache_hit: bool = False
+    request_id: str = ""
     def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary for JSON serialisation."""
@@ -65,6 +68,7 @@ class LLMMetadata:
             "tokens": asdict(self.tokens),
             "cost_usd": self.cost_usd,
             "cache_hit": self.cache_hit,
+            "request_id": self.request_id,
         }
     @classmethod
@@ -82,6 +86,7 @@ class LLMMetadata:
             ),
             cost_usd=data.get("cost_usd", 0.0),
             cache_hit=data.get("cache_hit", False),
+            request_id=data.get("request_id", ""),
         )
@@ -107,11 +112,33 @@ class LLMResponse:
             "model_version": self.model_version,
         }
+    def to_export_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for export, parsing JSON content if valid.
+        Unlike to_dict(), this attempts to parse the content as JSON
+        for more readable exported files.
+        """
+        # Try to parse content as JSON for cleaner export
+        try:
+            parsed_content = json.loads(self.content)
+        except (json.JSONDecodeError, TypeError):
+            parsed_content = self.content
+        return {
+            "content": parsed_content,
+            "finish_reason": self.finish_reason,
+            "model_version": self.model_version,
+        }
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> LLMResponse:
         """Create from dictionary."""
+        content = data.get("content", "")
+        # Handle both string and parsed JSON content (from export files)
+        if isinstance(content, dict):
+            content = json.dumps(content)
         return cls(
-            content=data.get("content", ""),
+            content=content,
             finish_reason=data.get("finish_reason", "stop"),
             model_version=data.get("model_version", ""),
         )
@@ -137,6 +164,30 @@ class LLMCacheEntry:
     response: LLMResponse = field(default_factory=LLMResponse)
     metadata: LLMMetadata = field(default_factory=LLMMetadata)
+    @staticmethod
+    def _split_message_content(messages: list[dict[str, Any]]) -> list[Any]:
+        """Convert message content with newlines into arrays of lines."""
+        result = []
+        for msg in messages:
+            new_msg = dict(msg)
+            content = new_msg.get("content", "")
+            if isinstance(content, str) and "\n" in content:
+                new_msg["content"] = content.split("\n")
+            result.append(new_msg)
+        return result
+    @staticmethod
+    def _join_message_content(messages: list[Any]) -> list[dict[str, Any]]:
+        """Convert message content arrays back into strings with newlines."""
+        result = []
+        for msg in messages:
+            new_msg = dict(msg)
+            content = new_msg.get("content", "")
+            if isinstance(content, list):
+                new_msg["content"] = "\n".join(content)
+            result.append(new_msg)
+        return result
     def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary for JSON serialisation."""
         return {
@@ -150,13 +201,37 @@ class LLMCacheEntry:
             "metadata": self.metadata.to_dict(),
         }
+    def to_export_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for export with readable formatting.
+        - Message content with newlines is split into arrays of lines
+        - Response JSON content is parsed into a proper JSON structure
+        """
+        return {
+            "cache_key": {
+                "model": self.model,
+                "messages": self._split_message_content(self.messages),
+                "temperature": self.temperature,
+                "max_tokens": self.max_tokens,
+            },
+            "response": self.response.to_export_dict(),
+            "metadata": self.metadata.to_dict(),
+        }
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> LLMCacheEntry:
-        """Create from dictionary."""
+        """Create from dictionary.
+        Handles both internal format (string content) and export format
+        (array of lines for content).
+        """
         cache_key = data.get("cache_key", {})
+        messages = cache_key.get("messages", [])
+        # Handle export format where content is array of lines
+        messages = cls._join_message_content(messages)
         return cls(
             model=cache_key.get("model", ""),
-            messages=cache_key.get("messages", []),
+            messages=messages,
             temperature=cache_key.get("temperature", 0.0),
             max_tokens=cache_key.get("max_tokens"),
             response=LLMResponse.from_dict(data.get("response", {})),
@@ -179,6 +254,7 @@ class LLMCacheEntry:
         input_tokens: int = 0,
         output_tokens: int = 0,
         cost_usd: float = 0.0,
+        request_id: str = "",
     ) -> LLMCacheEntry:
         """Create a cache entry with common parameters.
@@ -195,6 +271,7 @@ class LLMCacheEntry:
             input_tokens: Number of input tokens.
             output_tokens: Number of output tokens.
             cost_usd: Estimated cost in USD.
+            request_id: Optional identifier for the request (not part of hash).
         Returns:
             Configured LLMCacheEntry.
@@ -220,6 +297,7 @@ class LLMCacheEntry:
                 ),
                 cost_usd=cost_usd,
                 cache_hit=False,
+                request_id=request_id,
             ),
         )
@@ -285,18 +363,14 @@ class LLMEntryEncoder(JsonEncoder):
     ) -> str:
         """Generate a human-readable filename for export.
-        Creates a filename from model name and query details, with a
-        short hash suffix for uniqueness.
+        Creates a filename using request_id, timestamp, and provider:
+            {request_id}_{yyyy-mm-dd-hhmmss}_{provider}.json
-        For edge queries, extracts node names for format:
-            {model}_{node_a}_{node_b}_edge_{hash}.json
-        For other queries, uses prompt excerpt:
-            {model}_{prompt_excerpt}_{hash}.json
+        If request_id is not set, falls back to a short hash prefix.
         Args:
             entry: The cache entry to generate filename for.
-            cache_key: The cache key (hash) for uniqueness suffix.
+            cache_key: The cache key (hash) for fallback uniqueness.
         Returns:
             Human-readable filename with .json extension.
@@ -305,67 +379,56 @@ class LLMEntryEncoder(JsonEncoder):
             >>> encoder = LLMEntryEncoder()
             >>> entry = LLMCacheEntry.create(
             ...     model="gpt-4",
-            ...     messages=[{"role": "user", "content": "smoking and lung"}],
-            ...     content="Yes...",
+            ...     messages=[{"role": "user", "content": "test"}],
+            ...     content="Response",
+            ...     provider="openai",
+            ...     request_id="expt23",
             ... )
-            >>> encoder.generate_export_filename(entry, "a1b2c3d4e5f6")
-            'gpt4_smoking_lung_edge_a1b2.json'
+            >>> # Returns something like: expt23_2026-01-29-143052_openai.json
         """
         import re
-        # Sanitize model name (alphanumeric only, lowercase)
-        model = re.sub(r"[^a-z0-9]", "", entry.model.lower())
-        if len(model) > 15:
-            model = model[:15]
-        # Extract user message content
-        prompt = ""
-        for msg in entry.messages:
-            if msg.get("role") == "user":
-                prompt = msg.get("content", "")
-                break
-        # Try to extract node names for edge queries
-        # Look for patterns like "X and Y", "X cause Y", "between X and Y"
-        prompt_lower = prompt.lower()
-        slug = ""
-        # Pattern: "between X and Y" or "X and Y"
-        match = re.search(r"(?:between\s+)?(\w+)\s+and\s+(\w+)", prompt_lower)
-        if match:
-            node_a = match.group(1)[:15]
-            node_b = match.group(2)[:15]
-            slug = f"{node_a}_{node_b}_edge"
-        # Fallback: extract first significant words from prompt
-        if not slug:
-            # Remove common words, keep alphanumeric
-            cleaned = re.sub(r"[^a-z0-9\s]", "", prompt_lower)
-            words = [
-                w
-                for w in cleaned.split()
-                if w
-                not in ("the", "a", "an", "is", "are", "does", "do", "can")
-            ]
-            slug = "_".join(words[:4])
-            if len(slug) > 30:
-                slug = slug[:30].rstrip("_")
-        # Short hash suffix for uniqueness (4 chars)
-        hash_suffix = cache_key[:4] if cache_key else "0000"
-        # Build filename
-        parts = [p for p in [model, slug, hash_suffix] if p]
-        return "_".join(parts) + ".json"
+        from datetime import datetime
+        # Get request_id or use hash prefix as fallback
+        request_id = entry.metadata.request_id or cache_key[:8]
+        # Sanitise request_id (alphanumeric, hyphens, underscores only)
+        request_id = re.sub(r"[^a-zA-Z0-9_-]", "", request_id)
+        if not request_id:
+            request_id = cache_key[:8] if cache_key else "unknown"
+        # Parse timestamp and format as yyyy-mm-dd-hhmmss
+        timestamp_str = entry.metadata.timestamp
+        if timestamp_str:
+            try:
+                # Parse ISO format timestamp
+                dt = datetime.fromisoformat(
+                    timestamp_str.replace("Z", "+00:00")
+                )
+                formatted_ts = dt.strftime("%Y-%m-%d-%H%M%S")
+            except ValueError:
+                formatted_ts = "unknown"
+        else:
+            formatted_ts = "unknown"
+        # Get provider, sanitised
+        provider = entry.metadata.provider or "unknown"
+        provider = re.sub(r"[^a-z0-9]", "", provider.lower())
+        if not provider:
+            provider = "unknown"
+        # Build filename: id_timestamp_provider.json
+        return f"{request_id}_{formatted_ts}_{provider}.json"
     def export_entry(self, entry: LLMCacheEntry, path: Path) -> None:
         """Export an LLMCacheEntry to a JSON file.
+        Uses to_export_dict() to parse JSON content for readability.
         Args:
             entry: The cache entry to export.
             path: Destination file path.
         """
-        self.export(entry.to_dict(), path)
+        self.export(entry.to_export_dict(), path)
     def import_entry(self, path: Path) -> LLMCacheEntry:
         """Import an LLMCacheEntry from a JSON file.

causaliq_knowledge/py.typed ADDED Viewed

File without changes

{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: causaliq-knowledge
-Version: 0.3.0
+Version: 0.4.0
 Summary: Incorporating LLM and human knowledge into causal discovery
 Author-email: CausalIQ <info@causaliq.com>
 Maintainer-email: CausalIQ <info@causaliq.com>
@@ -24,6 +24,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: causaliq-workflow>=0.1.1.dev3
 Requires-Dist: click>=8.0.0
 Requires-Dist: httpx>=0.24.0
 Requires-Dist: pydantic>=2.0.0
@@ -32,7 +33,7 @@ Requires-Dist: causaliq-core>=0.3.0; extra == "dev"
 Requires-Dist: pytest>=7.0.0; extra == "dev"
 Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
 Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
-Requires-Dist: black>=22.0.0; extra == "dev"
+Requires-Dist: black<26.0.0,>=25.0.0; extra == "dev"
 Requires-Dist: isort>=5.10.0; extra == "dev"
 Requires-Dist: flake8>=5.0.0; extra == "dev"
 Requires-Dist: mypy>=1.0.0; extra == "dev"
@@ -42,6 +43,7 @@ Requires-Dist: build>=0.8.0; extra == "dev"
 Requires-Dist: twine>=4.0.0; extra == "dev"
 Provides-Extra: test
 Requires-Dist: causaliq-core>=0.3.0; extra == "test"
+Requires-Dist: causaliq-workflow>=0.1.1.dev3; extra == "test"
 Requires-Dist: pytest>=7.0.0; extra == "test"
 Requires-Dist: pytest-cov>=4.0.0; extra == "test"
 Requires-Dist: pytest-mock>=3.10.0; extra == "test"
@@ -89,13 +91,15 @@ Currently implemented releases:
 - **Release v0.1.0 - Foundation LLM**: Simple LLM queries to 1 or 2 LLMs about edge existence and orientation to support graph averaging
 - **Release v0.2.0 - Additional LLMs**: Support for 7 LLM providers (Groq, Gemini, OpenAI, Anthropic, DeepSeek, Mistral, Ollama)
-- **Release v0.3.0 - LLM Caching** *(in development)*: SQLite-based response caching with CLI tools for cache management
+- **Release v0.3.0 - LLM Caching**: SQLite-based response caching with CLI tools for cache management
+- **Release v0.4.0 - Graph Generation**: CLI and CausalIQ workflow action for LLM-generated causal graphs
 Planned:
-- **Release v0.4.0 - LLM Context**: Variable/role/literature etc context
-- **Release v0.5.0 - Algorithm integration**: Integration into structure learning algorithms
-- **Release v0.6.0 - Legacy Reference**: Support for legacy approaches of deriving knowledge from reference networks
+- **Release v0.5.0 - Graph Caching**: save generated graphs to Workflow caches
+- **Release v0.6.0 - LLM Cost Tracking**: Query LLM provider APIs for usage and cost statistics
+- **Release v0.7.0 - LLM Context**: Variable/role/literature etc context
+- **Release v0.8.0 - Algorithm integration**: Integration into structure learning algorithms
 ## Implementation Approach

causaliq_knowledge-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,42 @@
+causaliq_knowledge/__init__.py,sha256=IVlm0G1g-xxJS13SFeC0h1D6LL7rfzX96F7rjfU-wqA,982
+causaliq_knowledge/action.py,sha256=X7EGSTV7IiwnO8cTcz5-ExXpRuwamSaC1jWpAc86i6I,16416
+causaliq_knowledge/base.py,sha256=GBG-sftOKkmUoQzTpm6anDTjP-2nInRZN_36dxoYhvk,2917
+causaliq_knowledge/models.py,sha256=tWGf186ASwO8NHiN97pEOLuBJmJI6Q9jvpU0mYZNdS0,4058
+causaliq_knowledge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+causaliq_knowledge/cache/__init__.py,sha256=Av92YdCdVTRt9TmB2edRsIFDxq3f1Qi0daq0sFV1rp0,549
+causaliq_knowledge/cache/token_cache.py,sha256=o3qYGnc1e7nSJm9BsM6pmp6cbsPzhaCnEM1utCY682E,23085
+causaliq_knowledge/cache/encoders/__init__.py,sha256=gZ7gw96paFDbnJuc4v1aJsEJfVinI4zc03tXyFvfZxo,461
+causaliq_knowledge/cache/encoders/base.py,sha256=jK7--Or3lVp1UkKghKYFo_gKJp0HsMxosL_8eYL7RQQ,2679
+causaliq_knowledge/cache/encoders/json_encoder.py,sha256=7zN0qRdpVa8EZS08F22buyAYoIpnx5lngK5p1wK-3WI,15689
+causaliq_knowledge/cli/__init__.py,sha256=worFcurYE_T5_uYvnM4oL3aP3v_fSWLUhggbCP9TZMc,434
+causaliq_knowledge/cli/cache.py,sha256=w_fF3e8Ru7Sxl3FMgab_x2UWOfPMVq7BDvTUD-2Kayg,17863
+causaliq_knowledge/cli/generate.py,sha256=78c1GUv4w42qtPjy9NV0X1q5kw9ATl2yKcdl-KBTxhI,13187
+causaliq_knowledge/cli/main.py,sha256=MwUmokX9x3bL7E7pZMquWvVnKg3b-qdVk8VMR0ejK5o,4665
+causaliq_knowledge/cli/models.py,sha256=2ga5PWhOOo2vE6e3A3oxvO2FB88zztuRoUMPGlhyE6M,9587
+causaliq_knowledge/graph/__init__.py,sha256=920si3oBsuYIBW8gzHBYQnHCt9KupDdkPqVxTsj_py0,1952
+causaliq_knowledge/graph/generator.py,sha256=tM1KKKgpsiLLziCUKKnAiH9n1yO8zUnSFZ-QbFZKdJU,15971
+causaliq_knowledge/graph/loader.py,sha256=EO5Yj02qRrPY22rvfVk-LfXSZMVNEn37-H4u5kHCY0M,6615
+causaliq_knowledge/graph/models.py,sha256=4f9kaHHs9J_ma95EgV0GItliY-G4BLNNyIwBq8yTiVk,14924
+causaliq_knowledge/graph/params.py,sha256=RPviCO3ZOsOrm_rsysST4Y4hhWDN6jcJt46ajDvSY0M,5828
+causaliq_knowledge/graph/prompts.py,sha256=C29w5LQDf2tF9JeFADRrKSjkP6dVzjsa1FNX_6ndt70,15399
+causaliq_knowledge/graph/response.py,sha256=UaYbnVpfkWDZWMS9wQbEU4QP5In1YAqId2EuJ1V2kho,12437
+causaliq_knowledge/graph/view_filter.py,sha256=-ebhj8cXxgLimAeAZ023YgW6kI-c8jTp_LDKjYf1Kow,5297
+causaliq_knowledge/llm/__init__.py,sha256=30AL0h64zIkXoiqhMY7gjaf7mrtwtwMW38vzhns0My4,1663
+causaliq_knowledge/llm/anthropic_client.py,sha256=dPFHYGWL4xwQCtmQuGwGY4DBKSINOgOS-11ekznaiXo,8719
+causaliq_knowledge/llm/base_client.py,sha256=FJGX5QYawcelc3UScSMwvBJnKrUVR3PrBIY72KYthTU,12544
+causaliq_knowledge/llm/cache.py,sha256=6bpCyBv_bUorKceYc5qpgXi30A0tDRwAtlhxS3TQklE,15404
+causaliq_knowledge/llm/deepseek_client.py,sha256=ZcOpgnYa66XHjiTaF5ekR_BtosRYvVmzlIafp_Gsx_A,3543
+causaliq_knowledge/llm/gemini_client.py,sha256=XJMq9sPo7zExrALSr2rIRHLheSPqKo8ENG0KtdJ1cjw,9924
+causaliq_knowledge/llm/groq_client.py,sha256=PnTXqtMF1Km9DY4HiCZXQ6LeOzdjZtQJaeuGe1GbeME,7531
+causaliq_knowledge/llm/mistral_client.py,sha256=dTAOtymffCM1AJp5-JcfizofYrUA-jhKfHWrhZe2DDI,4187
+causaliq_knowledge/llm/ollama_client.py,sha256=PPU3g-nD8D546zcYB3uGxZ9yVbU4Gngo3snM2tRFeTc,8612
+causaliq_knowledge/llm/openai_client.py,sha256=MJmB6P32TZESMlXhn9d0-b3vFWXmf7ojHQ5CY8mCENI,3835
+causaliq_knowledge/llm/openai_compat_client.py,sha256=L8ZW5csuhUePq4mt3EGOUqhR3tleFmM72UlhPBsgIMQ,9518
+causaliq_knowledge/llm/prompts.py,sha256=bJ9iVGKUfTfLi2eWh-FFM4cNzk5Ux4Z0x8R6Ia27Dbo,6598
+causaliq_knowledge/llm/provider.py,sha256=VDEv-1esT_EgJk_Gwlfl4423ojglOxzPCBCFbOFE4DQ,15184
+causaliq_knowledge-0.4.0.dist-info/licenses/LICENSE,sha256=vUFUzQnti-D-MLSi9NxFlsFYOKwU25sxxH7WgJOQFIs,1084
+causaliq_knowledge-0.4.0.dist-info/METADATA,sha256=ZWEguAYGAWwk73VlIvb2KFXEyf37pTctjYZNlUgHWZM,9038
+causaliq_knowledge-0.4.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+causaliq_knowledge-0.4.0.dist-info/entry_points.txt,sha256=tuHaj0XNw9KySBpHOZeAC5Q08G96ftxocOy2POV1DdA,179
+causaliq_knowledge-0.4.0.dist-info/top_level.txt,sha256=GcxQf4BQAGa38i2-j8ylk2FmnBHtEZ9-8bSt-7Uka7k,19
+causaliq_knowledge-0.4.0.dist-info/RECORD,,

{causaliq_knowledge-0.3.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,3 +1,6 @@
+[causaliq.actions]
+causaliq-knowledge = causaliq_knowledge:CausalIQAction
 [console_scripts]
 causaliq-knowledge = causaliq_knowledge.cli:main
 cqknow = causaliq_knowledge.cli:main

causaliq-knowledge 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

causaliq-knowledge 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl