PyPI - causaliq-knowledge - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

causaliq-knowledge 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

causaliq_knowledge/__init__.py +6 -3
causaliq_knowledge/action.py +480 -0
causaliq_knowledge/cache/__init__.py +18 -0
causaliq_knowledge/cache/encoders/__init__.py +13 -0
causaliq_knowledge/cache/encoders/base.py +90 -0
causaliq_knowledge/cache/encoders/json_encoder.py +430 -0
causaliq_knowledge/cache/token_cache.py +666 -0
causaliq_knowledge/cli/__init__.py +15 -0
causaliq_knowledge/cli/cache.py +478 -0
causaliq_knowledge/cli/generate.py +410 -0
causaliq_knowledge/cli/main.py +172 -0
causaliq_knowledge/cli/models.py +309 -0
causaliq_knowledge/graph/__init__.py +78 -0
causaliq_knowledge/graph/generator.py +457 -0
causaliq_knowledge/graph/loader.py +222 -0
causaliq_knowledge/graph/models.py +426 -0
causaliq_knowledge/graph/params.py +175 -0
causaliq_knowledge/graph/prompts.py +445 -0
causaliq_knowledge/graph/response.py +392 -0
causaliq_knowledge/graph/view_filter.py +154 -0
causaliq_knowledge/llm/base_client.py +147 -1
causaliq_knowledge/llm/cache.py +443 -0
causaliq_knowledge/py.typed +0 -0
{causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA +10 -6
causaliq_knowledge-0.4.0.dist-info/RECORD +42 -0
{causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/WHEEL +1 -1
{causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt +3 -0
causaliq_knowledge/cli.py +0 -414
causaliq_knowledge-0.2.0.dist-info/RECORD +0 -22
{causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/licenses/LICENSE +0 -0
{causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/top_level.txt +0 -0

causaliq_knowledge/__init__.py CHANGED Viewed

@@ -2,10 +2,11 @@
 causaliq-knowledge: LLM and human knowledge for causal discovery.
 """
+from causaliq_knowledge.action import CausalIQAction
 from causaliq_knowledge.base import KnowledgeProvider
 from causaliq_knowledge.models import EdgeDirection, EdgeKnowledge
-__version__ = "0.2.0"
+__version__ = "0.4.0"
 __author__ = "CausalIQ"
 __email__ = "info@causaliq.com"
@@ -16,8 +17,8 @@ __description__ = "LLM and human knowledge for causal discovery"
 __url__ = "https://github.com/causaliq/causaliq-knowledge"
 __license__ = "MIT"
-# Version tuple for programmatic access
-VERSION = tuple(map(int, __version__.split(".")))
+# Version tuple for programmatic access (major, minor, patch)
+VERSION = (0, 4, 0)
 __all__ = [
     "__version__",
@@ -29,5 +30,7 @@ __all__ = [
     "EdgeDirection",
     # Abstract interface
     "KnowledgeProvider",
+    # Workflow action (auto-discovered by causaliq-workflow)
+    "CausalIQAction",
     # Note: Import LLMKnowledge from causaliq_knowledge.llm
 ]

causaliq_knowledge/action.py ADDED Viewed

@@ -0,0 +1,480 @@
+"""CausalIQ workflow action for graph generation.
+This module provides the workflow action integration for causaliq-knowledge,
+allowing graph generation to be used as a step in CausalIQ workflows.
+The action is auto-discovered by causaliq-workflow when this package is
+imported, using the convention of exporting a class named 'CausalIQAction'.
+"""
+from __future__ import annotations
+import json
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, Optional
+from causaliq_workflow.action import (
+    ActionExecutionError,
+    ActionInput,
+    ActionValidationError,
+)
+from causaliq_workflow.action import CausalIQAction as BaseCausalIQAction
+from causaliq_workflow.logger import WorkflowLogger
+from causaliq_workflow.registry import WorkflowContext
+from pydantic import ValidationError
+from causaliq_knowledge.graph.params import GenerateGraphParams
+if TYPE_CHECKING:  # pragma: no cover
+    from causaliq_knowledge.graph.response import GeneratedGraph
+logger = logging.getLogger(__name__)
+# Re-export for convenience (unused imports are intentional for API surface)
+__all__ = [
+    "ActionExecutionError",
+    "ActionInput",
+    "ActionValidationError",
+    "BaseCausalIQAction",
+    "WorkflowContext",
+    "WorkflowLogger",
+    "GenerateGraphAction",
+    "CausalIQAction",
+    "SUPPORTED_ACTIONS",
+]
+# Supported actions within this package
+SUPPORTED_ACTIONS = {"generate_graph"}
+def _create_action_inputs() -> Dict[str, Any]:
+    """Create action input specifications.
+    Returns:
+        Dictionary of ActionInput specifications.
+    """
+    return {
+        "action": ActionInput(
+            name="action",
+            description="Action to perform (e.g., 'generate_graph')",
+            required=True,
+            type_hint="str",
+        ),
+        "model_spec": ActionInput(
+            name="model_spec",
+            description="Path to model specification JSON file",
+            required=True,
+            type_hint="str",
+        ),
+        "prompt_detail": ActionInput(
+            name="prompt_detail",
+            description="Detail level for prompts: minimal, standard, or rich",
+            required=False,
+            default="standard",
+            type_hint="str",
+        ),
+        "use_benchmark_names": ActionInput(
+            name="use_benchmark_names",
+            description="Use benchmark names instead of LLM names",
+            required=False,
+            default=False,
+            type_hint="bool",
+        ),
+        "llm_model": ActionInput(
+            name="llm_model",
+            description="LLM model identifier (e.g., groq/llama-3.1-8b)",
+            required=False,
+            default="groq/llama-3.1-8b-instant",
+            type_hint="str",
+        ),
+        "output": ActionInput(
+            name="output",
+            description="Output: .json file path or 'none' for stdout",
+            required=True,
+            type_hint="str",
+        ),
+        "llm_cache": ActionInput(
+            name="llm_cache",
+            description="Path to cache database (.db) or 'none' to disable",
+            required=True,
+            type_hint="str",
+        ),
+        "llm_temperature": ActionInput(
+            name="llm_temperature",
+            description="LLM sampling temperature (0.0-2.0)",
+            required=False,
+            default=0.1,
+            type_hint="float",
+        ),
+    }
+class GenerateGraphAction(BaseCausalIQAction):
+    """Workflow action for generating causal graphs from model specifications.
+    This action integrates causaliq-knowledge graph generation into
+    CausalIQ workflows, allowing LLM-based graph generation to be used
+    as workflow steps.
+    The action supports the 'generate_graph' operation, which:
+    - Loads a model specification from a JSON file
+    - Queries an LLM to propose causal relationships
+    - Returns the generated graph structure
+    Attributes:
+        name: Action identifier for workflow 'uses' field.
+        version: Action version.
+        description: Human-readable description.
+        inputs: Input parameter specifications.
+    Example workflow step:
+        ```yaml
+        steps:
+          - name: Generate causal graph
+            uses: causaliq-knowledge
+            with:
+              action: generate_graph
+              model_spec: "{{data_dir}}/cancer.json"
+              llm_cache: "{{data_dir}}/cancer_llm.db"
+              prompt_detail: standard
+              llm_model: groq/llama-3.1-8b-instant
+        ```
+    """
+    name: str = "causaliq-knowledge"
+    version: str = "0.4.0"
+    description: str = "Generate causal graphs using LLM knowledge"
+    author: str = "CausalIQ"
+    inputs: Dict[str, Any] = _create_action_inputs()
+    outputs: Dict[str, str] = {
+        "graph": "Generated graph structure as JSON",
+        "edge_count": "Number of edges in generated graph",
+        "variable_count": "Number of variables in the model",
+        "model_used": "LLM model used for generation",
+        "cached": "Whether the result was retrieved from cache",
+    }
+    def validate_inputs(self, inputs: Dict[str, Any]) -> bool:
+        """Validate input values against specifications.
+        Args:
+            inputs: Dictionary of input values to validate.
+        Returns:
+            True if all inputs are valid.
+        Raises:
+            ActionValidationError: If validation fails.
+        """
+        # Check required 'action' parameter
+        if "action" not in inputs:
+            raise ActionValidationError(
+                "Missing required input: 'action'. "
+                f"Supported actions: {SUPPORTED_ACTIONS}"
+            )
+        action = inputs["action"]
+        if action not in SUPPORTED_ACTIONS:
+            raise ActionValidationError(
+                f"Unknown action: '{action}'. "
+                f"Supported actions: {SUPPORTED_ACTIONS}"
+            )
+        # For generate_graph, validate using GenerateGraphParams
+        if action == "generate_graph":
+            # Check required model_spec
+            if "model_spec" not in inputs:
+                raise ActionValidationError(
+                    "Missing required input: 'model_spec' for generate_graph"
+                )
+            # Build params dict (excluding 'action' which isn't a param)
+            params_dict = {k: v for k, v in inputs.items() if k != "action"}
+            try:
+                # Validate using Pydantic model
+                GenerateGraphParams.from_dict(params_dict)
+            except (ValidationError, ValueError) as e:
+                raise ActionValidationError(
+                    f"Invalid parameters for generate_graph: {e}"
+                )
+        return True
+    def run(
+        self,
+        inputs: Dict[str, Any],
+        mode: str = "dry-run",
+        context: Optional[Any] = None,
+        logger: Optional[Any] = None,
+    ) -> Dict[str, Any]:
+        """Execute the action with validated inputs.
+        Args:
+            inputs: Dictionary of input values keyed by input name.
+            mode: Execution mode ('dry-run', 'run', 'compare').
+            context: Workflow context for optimisation.
+            logger: Optional logger for task execution reporting.
+        Returns:
+            Dictionary containing:
+            - status: 'success' or 'skipped' (for dry-run)
+            - graph: Generated graph as JSON (if run mode)
+            - edge_count: Number of edges
+            - variable_count: Number of variables
+            - model_used: LLM model identifier
+            - cached: Whether result was from cache
+        Raises:
+            ActionExecutionError: If action execution fails.
+        """
+        # Validate inputs first
+        self.validate_inputs(inputs)
+        action = inputs["action"]
+        if action == "generate_graph":
+            return self._run_generate_graph(inputs, mode, context, logger)
+        else:  # pragma: no cover
+            # This shouldn't happen after validate_inputs
+            raise ActionExecutionError(f"Unknown action: {action}")
+    def _run_generate_graph(
+        self,
+        inputs: Dict[str, Any],
+        mode: str,
+        context: Optional[Any],
+        logger: Optional[Any],
+    ) -> Dict[str, Any]:
+        """Execute the generate_graph action.
+        Args:
+            inputs: Validated input parameters.
+            mode: Execution mode.
+            context: Workflow context.
+            logger: Optional workflow logger.
+        Returns:
+            Action result dictionary.
+        """
+        # Build params (excluding 'action')
+        params_dict = {k: v for k, v in inputs.items() if k != "action"}
+        try:
+            params = GenerateGraphParams.from_dict(params_dict)
+        except (ValidationError, ValueError) as e:
+            raise ActionExecutionError(f"Parameter validation failed: {e}")
+        # Check model_spec exists
+        if not params.model_spec.exists():
+            raise ActionExecutionError(
+                f"Model specification not found: {params.model_spec}"
+            )
+        # Dry-run mode: validate only, don't execute
+        if mode == "dry-run":
+            return self._dry_run_result(params)
+        # Run mode: execute graph generation
+        return self._execute_generate_graph(params)
+    def _dry_run_result(self, params: GenerateGraphParams) -> Dict[str, Any]:
+        """Return dry-run result without executing.
+        Args:
+            params: Validated parameters.
+        Returns:
+            Dry-run result dictionary.
+        """
+        return {
+            "status": "skipped",
+            "message": "Dry-run mode: would generate graph",
+            "model_spec": str(params.model_spec),
+            "llm_model": params.llm_model,
+            "prompt_detail": params.prompt_detail.value,
+            "output": params.output,
+        }
+    def _execute_generate_graph(
+        self, params: GenerateGraphParams
+    ) -> Dict[str, Any]:
+        """Execute graph generation.
+        Args:
+            params: Validated parameters.
+        Returns:
+            Result dictionary with generated graph.
+        """
+        # Import here to avoid slow startup and circular imports
+        from causaliq_knowledge.cache import TokenCache
+        from causaliq_knowledge.graph import ModelLoader
+        from causaliq_knowledge.graph.generator import (
+            GraphGenerator,
+            GraphGeneratorConfig,
+        )
+        try:
+            # Load model specification
+            spec = ModelLoader.load(params.model_spec)
+            logger.info(
+                f"Loaded model specification: {spec.dataset_id} "
+                f"({len(spec.variables)} variables)"
+            )
+        except Exception as e:
+            raise ActionExecutionError(
+                f"Failed to load model specification: {e}"
+            )
+        # Track mapping for name conversion
+        llm_to_benchmark_mapping: Dict[str, str] = {}
+        # Determine naming mode
+        use_llm_names = not params.use_benchmark_names
+        if use_llm_names and spec.uses_distinct_llm_names():
+            llm_to_benchmark_mapping = spec.get_llm_to_name_mapping()
+        # Set up cache
+        cache: Optional[TokenCache] = None
+        cache_path = params.get_effective_cache_path()
+        if cache_path is not None:
+            try:
+                cache = TokenCache(str(cache_path))
+                cache.open()
+            except Exception as e:
+                raise ActionExecutionError(f"Failed to open cache: {e}")
+        try:
+            # Import OutputFormat for generator config
+            from causaliq_knowledge.graph.prompts import OutputFormat
+            # Create generator - always use edge_list format
+            # Derive request_id from output filename stem
+            if params.output.lower() == "none":
+                request_id = "none"
+            else:
+                request_id = Path(params.output).stem
+            config = GraphGeneratorConfig(
+                temperature=params.llm_temperature,
+                output_format=OutputFormat.EDGE_LIST,
+                prompt_detail=params.prompt_detail,
+                use_llm_names=use_llm_names,
+                request_id=request_id,
+            )
+            generator = GraphGenerator(
+                model=params.llm_model, config=config, cache=cache
+            )
+            # Generate graph
+            graph = generator.generate_from_spec(
+                spec, level=params.prompt_detail
+            )
+            # Map LLM names back to benchmark names
+            if llm_to_benchmark_mapping:
+                graph = self._map_graph_names(graph, llm_to_benchmark_mapping)
+            # Get stats
+            stats = generator.get_stats()
+            # Build result
+            result = {
+                "status": "success",
+                "graph": self._graph_to_dict(graph),
+                "edge_count": len(graph.edges),
+                "variable_count": len(graph.variables),
+                "model_used": params.llm_model,
+                "cached": stats.get("cache_hits", 0) > 0,
+                "outputs": {
+                    "graph": self._graph_to_dict(graph),
+                    "edge_count": len(graph.edges),
+                    "variable_count": len(graph.variables),
+                    "model_used": params.llm_model,
+                    "cached": stats.get("cache_hits", 0) > 0,
+                },
+            }
+            # Write output file if specified
+            output_path = params.get_effective_output_path()
+            if output_path:
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                output_path.write_text(
+                    json.dumps(result["graph"], indent=2),
+                    encoding="utf-8",
+                )
+                result["output_file"] = str(output_path)
+            return result
+        except Exception as e:
+            raise ActionExecutionError(f"Graph generation failed: {e}")
+        finally:
+            if cache:
+                cache.close()
+    def _graph_to_dict(self, graph: "GeneratedGraph") -> Dict[str, Any]:
+        """Convert GeneratedGraph to dictionary.
+        Args:
+            graph: Generated graph object.
+        Returns:
+            Dictionary representation of the graph.
+        """
+        return {
+            "edges": [
+                {
+                    "source": edge.source,
+                    "target": edge.target,
+                    "confidence": edge.confidence,
+                }
+                for edge in graph.edges
+            ],
+            "variables": graph.variables,
+            "reasoning": graph.reasoning,
+        }
+    def _map_graph_names(
+        self, graph: "GeneratedGraph", mapping: Dict[str, str]
+    ) -> "GeneratedGraph":
+        """Map variable names in a graph using a mapping dictionary.
+        Args:
+            graph: The generated graph with edges to map.
+            mapping: Dictionary mapping old names to new names.
+        Returns:
+            New GeneratedGraph with mapped variable names.
+        """
+        from causaliq_knowledge.graph.response import (
+            GeneratedGraph,
+            ProposedEdge,
+        )
+        new_edges = []
+        for edge in graph.edges:
+            new_edge = ProposedEdge(
+                source=mapping.get(edge.source, edge.source),
+                target=mapping.get(edge.target, edge.target),
+                confidence=edge.confidence,
+            )
+            new_edges.append(new_edge)
+        new_variables = [mapping.get(v, v) for v in graph.variables]
+        return GeneratedGraph(
+            edges=new_edges,
+            variables=new_variables,
+            reasoning=graph.reasoning,
+            metadata=graph.metadata,
+        )
+# Export as CausalIQAction for auto-discovery by causaliq-workflow
+# This name is required by the auto-discovery convention
+CausalIQAction = GenerateGraphAction

causaliq_knowledge/cache/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+Core caching infrastructure for causaliq.
+This module provides a generic caching system with:
+- SQLite-backed storage with concurrency support
+- Pluggable encoders for type-specific compression
+- Shared token dictionary for cross-entry compression
+- Import/export for human-readable formats
+Note: This module is designed for future migration to causaliq-core.
+LLM-specific caching code remains in causaliq_knowledge.llm.cache.
+"""
+from causaliq_knowledge.cache.token_cache import TokenCache
+__all__ = [
+    "TokenCache",
+]

causaliq_knowledge/cache/encoders/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Pluggable encoders for type-specific cache entry compression.
+Encoders transform data to/from compact binary representations,
+using a shared token dictionary for cross-entry compression.
+Note: This submodule is designed for future migration to causaliq-core.
+"""
+from causaliq_knowledge.cache.encoders.base import EntryEncoder
+from causaliq_knowledge.cache.encoders.json_encoder import JsonEncoder
+__all__ = ["EntryEncoder", "JsonEncoder"]

causaliq_knowledge/cache/encoders/base.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""
+Abstract base class for cache entry encoders.
+Encoders transform data to/from compact binary representations,
+optionally using a shared token dictionary for compression.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:  # pragma: no cover
+    from causaliq_knowledge.cache.token_cache import TokenCache
+class EntryEncoder(ABC):
+    """Abstract base class for type-specific cache entry encoders.
+    Encoders handle:
+    - Encoding data to compact binary format for storage
+    - Decoding binary data back to original structure
+    - Exporting to human-readable formats (JSON, GraphML, etc.)
+    - Importing from human-readable formats
+    Encoders may use the shared token dictionary in TokenCache
+    for cross-entry compression of repeated strings.
+    Example:
+        >>> class MyEncoder(EntryEncoder):
+        ...     def encode(self, data, token_cache):
+        ...         return json.dumps(data).encode()
+        ...     def decode(self, blob, token_cache):
+        ...         return json.loads(blob.decode())
+        ...     # ... export/import methods
+    """
+    @property
+    def default_export_format(self) -> str:
+        """Default file extension for exports (e.g. 'json', 'graphml')."""
+        return "json"
+    @abstractmethod
+    def encode(self, data: Any, token_cache: TokenCache) -> bytes:
+        """Encode data to binary format.
+        Args:
+            data: The data to encode (type depends on encoder).
+            token_cache: Cache instance for shared token dictionary.
+        Returns:
+            Compact binary representation.
+        """
+        ...
+    @abstractmethod
+    def decode(self, blob: bytes, token_cache: TokenCache) -> Any:
+        """Decode binary data back to original structure.
+        Args:
+            blob: Binary data from cache.
+            token_cache: Cache instance for shared token dictionary.
+        Returns:
+            Decoded data in original format.
+        """
+        ...
+    @abstractmethod
+    def export(self, data: Any, path: Path) -> None:
+        """Export data to human-readable file format.
+        Args:
+            data: The data to export (decoded format).
+            path: Destination file path.
+        """
+        ...
+    @abstractmethod
+    def import_(self, path: Path) -> Any:
+        """Import data from human-readable file format.
+        Args:
+            path: Source file path.
+        Returns:
+            Imported data ready for encoding.
+        """
+        ...

causaliq-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

causaliq-knowledge 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl