PyPI - ragit - Versions diffs - 0.3__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

ragit 0.3py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

ragit/__init__.py +128 -2
ragit/assistant.py +757 -0
ragit/config.py +204 -0
ragit/core/__init__.py +5 -0
ragit/core/experiment/__init__.py +22 -0
ragit/core/experiment/experiment.py +577 -0
ragit/core/experiment/results.py +131 -0
ragit/exceptions.py +271 -0
ragit/loaders.py +401 -0
ragit/logging.py +194 -0
ragit/monitor.py +307 -0
ragit/providers/__init__.py +35 -0
ragit/providers/base.py +147 -0
ragit/providers/function_adapter.py +237 -0
ragit/providers/ollama.py +670 -0
ragit/utils/__init__.py +105 -0
ragit/version.py +5 -0
ragit-0.10.1.dist-info/METADATA +153 -0
ragit-0.10.1.dist-info/RECORD +22 -0
{ragit-0.3.dist-info → ragit-0.10.1.dist-info}/WHEEL +1 -1
ragit-0.10.1.dist-info/licenses/LICENSE +201 -0
ragit/main.py +0 -384
ragit-0.3.dist-info/METADATA +0 -163
ragit-0.3.dist-info/RECORD +0 -6
{ragit-0.3.dist-info → ragit-0.10.1.dist-info}/top_level.txt +0 -0

ragit/logging.py ADDED Viewed

@@ -0,0 +1,194 @@
+#
+# Copyright RODMENA LIMITED 2025
+# SPDX-License-Identifier: Apache-2.0
+#
+"""
+Structured logging for ragit.
+Provides consistent logging across all ragit components with:
+- Operation timing
+- Context tracking
+- Configurable log levels
+"""
+import logging
+import time
+from collections.abc import Callable, Generator
+from contextlib import contextmanager
+from functools import wraps
+from typing import Any, TypeVar
+# Create ragit logger
+logger = logging.getLogger("ragit")
+# Type variable for decorated functions
+F = TypeVar("F", bound=Callable[..., Any])
+def setup_logging(level: str = "INFO", format_string: str | None = None) -> None:
+    """Configure ragit logging.
+    Parameters
+    ----------
+    level : str
+        Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
+    format_string : str, optional
+        Custom format string. If None, uses default format.
+    Examples
+    --------
+    >>> from ragit.logging import setup_logging
+    >>> setup_logging("DEBUG")
+    """
+    logger.setLevel(level.upper())
+    # Only add handler if none exist
+    if not logger.handlers:
+        handler = logging.StreamHandler()
+        handler.setLevel(level.upper())
+        if format_string is None:
+            format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        formatter = logging.Formatter(format_string)
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+@contextmanager
+def log_operation(operation: str, **context: Any) -> Generator[dict[str, Any], None, None]:
+    """Context manager for logging operations with timing.
+    Parameters
+    ----------
+    operation : str
+        Name of the operation being performed.
+    **context
+        Additional context to include in log messages.
+    Yields
+    ------
+    dict
+        Mutable dict to add additional context during the operation.
+    Examples
+    --------
+    >>> with log_operation("embed", model="nomic-embed-text") as ctx:
+    ...     result = provider.embed(text, model)
+    ...     ctx["dimensions"] = len(result.embedding)
+    """
+    start = time.perf_counter()
+    extra_context: dict[str, Any] = {}
+    # Build context string
+    ctx_str = ", ".join(f"{k}={v}" for k, v in context.items()) if context else ""
+    logger.debug(f"{operation}.start" + (f" [{ctx_str}]" if ctx_str else ""))
+    try:
+        yield extra_context
+        duration_ms = (time.perf_counter() - start) * 1000
+        # Combine original context with extra context
+        all_context = {**context, **extra_context, "duration_ms": f"{duration_ms:.2f}"}
+        ctx_str = ", ".join(f"{k}={v}" for k, v in all_context.items())
+        logger.info(f"{operation}.success [{ctx_str}]")
+    except Exception as e:
+        duration_ms = (time.perf_counter() - start) * 1000
+        all_context = {**context, **extra_context, "duration_ms": f"{duration_ms:.2f}", "error": str(e)}
+        ctx_str = ", ".join(f"{k}={v}" for k, v in all_context.items())
+        logger.error(f"{operation}.failed [{ctx_str}]", exc_info=True)
+        raise
+def log_method(operation: str) -> Callable[[F], F]:
+    """Decorator for logging method calls with timing.
+    Parameters
+    ----------
+    operation : str
+        Name of the operation for logging.
+    Returns
+    -------
+    Callable
+        Decorated function.
+    Examples
+    --------
+    >>> class MyProvider:
+    ...     @log_method("embed")
+    ...     def embed(self, text: str, model: str):
+    ...         ...
+    """
+    def decorator(func: F) -> F:
+        @wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> Any:
+            with log_operation(operation, method=func.__name__):
+                return func(*args, **kwargs)
+        return wrapper  # type: ignore
+    return decorator
+class LogContext:
+    """Context tracker for correlating related log messages.
+    Useful for tracing operations across multiple components.
+    Examples
+    --------
+    >>> ctx = LogContext("query-123")
+    >>> ctx.log("Starting retrieval", top_k=5)
+    >>> ctx.log("Retrieved chunks", count=3)
+    """
+    def __init__(self, request_id: str | None = None):
+        """Initialize log context.
+        Parameters
+        ----------
+        request_id : str, optional
+            Unique identifier for this context. Auto-generated if not provided.
+        """
+        self.request_id = request_id or f"req-{int(time.time() * 1000) % 100000}"
+        self._start_time = time.perf_counter()
+    def log(self, message: str, level: str = "INFO", **context: Any) -> None:
+        """Log a message with this context.
+        Parameters
+        ----------
+        message : str
+            Log message.
+        level : str
+            Log level (DEBUG, INFO, WARNING, ERROR).
+        **context
+            Additional context key-value pairs.
+        """
+        elapsed_ms = (time.perf_counter() - self._start_time) * 1000
+        ctx_str = ", ".join(f"{k}={v}" for k, v in context.items())
+        full_msg = f"[{self.request_id}] {message}" + (f" [{ctx_str}]" if ctx_str else "") + f" (+{elapsed_ms:.0f}ms)"
+        log_level = getattr(logging, level.upper(), logging.INFO)
+        logger.log(log_level, full_msg)
+    def debug(self, message: str, **context: Any) -> None:
+        """Log debug message."""
+        self.log(message, "DEBUG", **context)
+    def info(self, message: str, **context: Any) -> None:
+        """Log info message."""
+        self.log(message, "INFO", **context)
+    def warning(self, message: str, **context: Any) -> None:
+        """Log warning message."""
+        self.log(message, "WARNING", **context)
+    def error(self, message: str, **context: Any) -> None:
+        """Log error message."""
+        self.log(message, "ERROR", **context)

ragit/monitor.py ADDED Viewed

@@ -0,0 +1,307 @@
+#
+# Copyright RODMENA LIMITED 2025
+# SPDX-License-Identifier: Apache-2.0
+#
+"""
+Execution monitoring with timing and JSON export.
+Pattern inspired by ai4rag experiment_monitor.py.
+Provides structured tracking of:
+- Pattern execution times (e.g., experiment configurations)
+- Step execution times within patterns
+- Summary statistics and JSON export
+"""
+import json
+import time
+from collections.abc import Generator
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+@dataclass
+class StepTiming:
+    """Timing information for a single step."""
+    name: str
+    start_time: float
+    end_time: float | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+    @property
+    def duration_ms(self) -> float | None:
+        """Duration in milliseconds."""
+        if self.end_time is None:
+            return None
+        return (self.end_time - self.start_time) * 1000
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "name": self.name,
+            "duration_ms": self.duration_ms,
+            **self.metadata,
+        }
+@dataclass
+class PatternTiming:
+    """Timing information for a pattern (e.g., experiment configuration)."""
+    name: str
+    start_time: float
+    end_time: float | None = None
+    steps: list[StepTiming] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    @property
+    def duration_ms(self) -> float | None:
+        """Duration in milliseconds."""
+        if self.end_time is None:
+            return None
+        return (self.end_time - self.start_time) * 1000
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "name": self.name,
+            "duration_ms": self.duration_ms,
+            "steps": [s.to_dict() for s in self.steps],
+            **self.metadata,
+        }
+class ExecutionMonitor:
+    """
+    Monitor experiment execution with timing and export.
+    Tracks pattern execution times, step timings within patterns,
+    and provides summary statistics and JSON export.
+    Pattern from ai4rag experiment_monitor.py.
+    Examples
+    --------
+    >>> monitor = ExecutionMonitor()
+    >>> with monitor.pattern("config-1"):
+    ...     with monitor.step("indexing", chunk_size=512):
+    ...         # Index documents
+    ...         pass
+    ...     with monitor.step("retrieval", top_k=3):
+    ...         # Retrieve results
+    ...         pass
+    >>> monitor.print_summary()
+    >>> monitor.export_json("timing.json")
+    """
+    def __init__(self) -> None:
+        self._patterns: list[PatternTiming] = []
+        self._current_pattern: PatternTiming | None = None
+        self._current_step: StepTiming | None = None
+        self._start_time = time.perf_counter()
+    @contextmanager
+    def pattern(self, name: str, **metadata: Any) -> Generator[PatternTiming, None, None]:
+        """
+        Context manager for timing a pattern execution.
+        Parameters
+        ----------
+        name : str
+            Pattern name (e.g., configuration identifier).
+        **metadata
+            Additional metadata to attach to the pattern.
+        Yields
+        ------
+        PatternTiming
+            The pattern timing object (can be modified).
+        """
+        pattern = PatternTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
+        self._current_pattern = pattern
+        try:
+            yield pattern
+        finally:
+            pattern.end_time = time.perf_counter()
+            self._patterns.append(pattern)
+            self._current_pattern = None
+    @contextmanager
+    def step(self, name: str, **metadata: Any) -> Generator[StepTiming, None, None]:
+        """
+        Context manager for timing a step within a pattern.
+        Parameters
+        ----------
+        name : str
+            Step name (e.g., "indexing", "retrieval", "evaluation").
+        **metadata
+            Additional metadata to attach to the step.
+        Yields
+        ------
+        StepTiming
+            The step timing object (can be modified).
+        """
+        step = StepTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
+        self._current_step = step
+        try:
+            yield step
+        finally:
+            step.end_time = time.perf_counter()
+            if self._current_pattern is not None:
+                self._current_pattern.steps.append(step)
+            self._current_step = None
+    def on_pattern_start(self, pattern_name: str, **metadata: Any) -> None:
+        """Manual pattern start (alternative to context manager)."""
+        self._current_pattern = PatternTiming(name=pattern_name, start_time=time.perf_counter(), metadata=metadata)
+    def on_pattern_finish(self, **metadata: Any) -> None:
+        """Manual pattern finish (alternative to context manager)."""
+        if self._current_pattern:
+            self._current_pattern.end_time = time.perf_counter()
+            self._current_pattern.metadata.update(metadata)
+            self._patterns.append(self._current_pattern)
+            self._current_pattern = None
+    def on_step_start(self, step_name: str, **metadata: Any) -> None:
+        """Manual step start (alternative to context manager)."""
+        self._current_step = StepTiming(name=step_name, start_time=time.perf_counter(), metadata=metadata)
+    def on_step_finish(self, **metadata: Any) -> None:
+        """Manual step finish (alternative to context manager)."""
+        if self._current_step:
+            self._current_step.end_time = time.perf_counter()
+            self._current_step.metadata.update(metadata)
+            if self._current_pattern is not None:
+                self._current_pattern.steps.append(self._current_step)
+            self._current_step = None
+    @property
+    def total_duration_ms(self) -> float:
+        """Total duration since monitor creation in milliseconds."""
+        return (time.perf_counter() - self._start_time) * 1000
+    @property
+    def pattern_count(self) -> int:
+        """Number of completed patterns."""
+        return len(self._patterns)
+    def get_summary(self) -> dict[str, Any]:
+        """
+        Get summary statistics as dictionary.
+        Returns
+        -------
+        dict
+            Summary with total duration, pattern count, and pattern details.
+        """
+        return {
+            "total_duration_ms": self.total_duration_ms,
+            "pattern_count": self.pattern_count,
+            "patterns": [p.to_dict() for p in self._patterns],
+        }
+    def get_step_aggregates(self) -> dict[str, dict[str, float]]:
+        """
+        Get aggregated step statistics across all patterns.
+        Returns
+        -------
+        dict
+            Step name -> {count, total_ms, avg_ms, min_ms, max_ms}
+        """
+        step_stats: dict[str, list[float]] = {}
+        for pattern in self._patterns:
+            for step in pattern.steps:
+                if step.duration_ms is not None:
+                    if step.name not in step_stats:
+                        step_stats[step.name] = []
+                    step_stats[step.name].append(step.duration_ms)
+        aggregates = {}
+        for name, durations in step_stats.items():
+            aggregates[name] = {
+                "count": len(durations),
+                "total_ms": sum(durations),
+                "avg_ms": sum(durations) / len(durations),
+                "min_ms": min(durations),
+                "max_ms": max(durations),
+            }
+        return aggregates
+    def export_json(self, path: Path | str, indent: int = 2) -> None:
+        """
+        Export monitoring data to JSON file.
+        Parameters
+        ----------
+        path : Path or str
+            Output file path.
+        indent : int
+            JSON indentation (default: 2).
+        """
+        path = Path(path)
+        data = {
+            **self.get_summary(),
+            "step_aggregates": self.get_step_aggregates(),
+        }
+        with open(path, "w") as f:
+            json.dump(data, f, indent=indent)
+    def print_summary(self, show_steps: bool = True) -> None:
+        """
+        Print human-readable summary to console.
+        Parameters
+        ----------
+        show_steps : bool
+            Include step-level details (default: True).
+        """
+        summary = self.get_summary()
+        print(f"\n{'=' * 60}")
+        print(f"Execution Summary (Total: {summary['total_duration_ms']:.0f}ms)")
+        print(f"Patterns: {summary['pattern_count']}")
+        print(f"{'=' * 60}")
+        for pattern in summary["patterns"]:
+            duration = pattern.get("duration_ms")
+            duration_str = f"{duration:.0f}ms" if duration else "in progress"
+            print(f"\n{pattern['name']}: {duration_str}")
+            if show_steps:
+                for step in pattern.get("steps", []):
+                    step_duration = step.get("duration_ms")
+                    step_duration_str = f"{step_duration:.0f}ms" if step_duration else "in progress"
+                    # Show first few metadata items
+                    meta_items = [(k, v) for k, v in step.items() if k not in ("name", "duration_ms")][:3]
+                    meta_str = ", ".join(f"{k}={v}" for k, v in meta_items) if meta_items else ""
+                    print(f"  - {step['name']}: {step_duration_str}" + (f" ({meta_str})" if meta_str else ""))
+        # Print step aggregates
+        aggregates = self.get_step_aggregates()
+        if aggregates:
+            print(f"\n{'-' * 60}")
+            print("Step Aggregates:")
+            for name, stats in sorted(aggregates.items(), key=lambda x: -x[1]["total_ms"]):
+                print(
+                    f"  {name}: {stats['count']}x, total={stats['total_ms']:.0f}ms, "
+                    f"avg={stats['avg_ms']:.0f}ms, range=[{stats['min_ms']:.0f}-{stats['max_ms']:.0f}]ms"
+                )
+    def reset(self) -> None:
+        """Reset the monitor, clearing all recorded patterns."""
+        self._patterns.clear()
+        self._current_pattern = None
+        self._current_step = None
+        self._start_time = time.perf_counter()

ragit/providers/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+#
+# Copyright RODMENA LIMITED 2025
+# SPDX-License-Identifier: Apache-2.0
+#
+"""
+Ragit Providers - LLM and Embedding providers for RAG optimization.
+Supported providers:
+- OllamaProvider: Connect to local or remote Ollama servers (supports nomic-embed-text)
+- FunctionProvider: Wrap custom embedding/LLM functions
+Base classes for implementing custom providers:
+- BaseLLMProvider: Abstract base for LLM providers
+- BaseEmbeddingProvider: Abstract base for embedding providers
+"""
+from ragit.providers.base import (
+    BaseEmbeddingProvider,
+    BaseLLMProvider,
+    EmbeddingResponse,
+    LLMResponse,
+)
+from ragit.providers.function_adapter import FunctionProvider
+from ragit.providers.ollama import OllamaProvider
+__all__ = [
+    # Base classes
+    "BaseLLMProvider",
+    "BaseEmbeddingProvider",
+    "LLMResponse",
+    "EmbeddingResponse",
+    # Built-in providers
+    "OllamaProvider",
+    "FunctionProvider",
+]

ragit/providers/base.py ADDED Viewed

@@ -0,0 +1,147 @@
+#
+# Copyright RODMENA LIMITED 2025
+# SPDX-License-Identifier: Apache-2.0
+#
+"""
+Base provider interfaces for LLM and Embedding providers.
+These abstract classes define the interface that all providers must implement,
+making it easy to add new providers (Gemini, Claude, OpenAI, etc.)
+"""
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+@dataclass
+class LLMResponse:
+    """Response from an LLM call."""
+    text: str
+    model: str
+    provider: str
+    usage: dict[str, int] | None = None
+@dataclass(frozen=True)
+class EmbeddingResponse:
+    """Response from an embedding call (immutable)."""
+    embedding: tuple[float, ...]
+    model: str
+    provider: str
+    dimensions: int
+class BaseLLMProvider(ABC):
+    """
+    Abstract base class for LLM providers.
+    Implement this to add support for new LLM providers like Gemini, Claude, etc.
+    """
+    @property
+    @abstractmethod
+    def provider_name(self) -> str:
+        """Return the provider name (e.g., 'ollama', 'gemini', 'claude')."""
+        pass
+    @abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        model: str,
+        system_prompt: str | None = None,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+    ) -> LLMResponse:
+        """
+        Generate text from the LLM.
+        Parameters
+        ----------
+        prompt : str
+            The user prompt/query.
+        model : str
+            Model identifier (e.g., 'llama3', 'qwen3-vl:235b-instruct-cloud').
+        system_prompt : str, optional
+            System prompt for context/instructions.
+        temperature : float
+            Sampling temperature (0.0 to 1.0).
+        max_tokens : int, optional
+            Maximum tokens to generate.
+        Returns
+        -------
+        LLMResponse
+            The generated response.
+        """
+        pass
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if the provider is available and configured."""
+        pass
+class BaseEmbeddingProvider(ABC):
+    """
+    Abstract base class for embedding providers.
+    Implement this to add support for new embedding providers.
+    """
+    @property
+    @abstractmethod
+    def provider_name(self) -> str:
+        """Return the provider name."""
+        pass
+    @property
+    @abstractmethod
+    def dimensions(self) -> int:
+        """Return the embedding dimensions for the current model."""
+        pass
+    @abstractmethod
+    def embed(self, text: str, model: str) -> EmbeddingResponse:
+        """
+        Generate embedding for text.
+        Parameters
+        ----------
+        text : str
+            Text to embed.
+        model : str
+            Model identifier (e.g., 'nomic-embed-text').
+        Returns
+        -------
+        EmbeddingResponse
+            The embedding response.
+        """
+        pass
+    @abstractmethod
+    def embed_batch(self, texts: list[str], model: str) -> list[EmbeddingResponse]:
+        """
+        Generate embeddings for multiple texts.
+        Parameters
+        ----------
+        texts : list[str]
+            Texts to embed.
+        model : str
+            Model identifier.
+        Returns
+        -------
+        list[EmbeddingResponse]
+            List of embedding responses.
+        """
+        pass
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if the provider is available and configured."""
+        pass

ragit 0.3__py3-none-any.whl → 0.10.1__py3-none-any.whl

ragit 0.3py3-none-any.whl → 0.10.1py3-none-any.whl