PyPI - queryframe - Versions diffs - 0.1.0__py3-none-any.whl - Mend

queryframe 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

queryframe/__init__.py +15 -0
queryframe/cache/__init__.py +0 -0
queryframe/cache/disk.py +135 -0
queryframe/cache/hasher.py +33 -0
queryframe/cache/memory.py +104 -0
queryframe/core/__init__.py +0 -0
queryframe/core/accessor.py +65 -0
queryframe/core/config.py +96 -0
queryframe/core/engine.py +280 -0
queryframe/core/result.py +95 -0
queryframe/core/schema.py +99 -0
queryframe/llm/__init__.py +0 -0
queryframe/llm/anthropic.py +63 -0
queryframe/llm/base.py +39 -0
queryframe/llm/gemini.py +74 -0
queryframe/llm/lmstudio.py +76 -0
queryframe/llm/ollama.py +135 -0
queryframe/llm/openai.py +67 -0
queryframe/llm/prompt/__init__.py +0 -0
queryframe/llm/prompt/builder.py +134 -0
queryframe/llm/prompt/compressor.py +47 -0
queryframe/llm/prompt/templates.py +92 -0
queryframe/llm/registry.py +113 -0
queryframe/memory/__init__.py +0 -0
queryframe/memory/context.py +38 -0
queryframe/memory/conversation.py +70 -0
queryframe/py.typed +0 -0
queryframe/sandbox/__init__.py +0 -0
queryframe/sandbox/executor.py +136 -0
queryframe/sandbox/restricted.py +108 -0
queryframe/sandbox/timeout.py +47 -0
queryframe/sandbox/validator.py +127 -0
queryframe/utils/__init__.py +0 -0
queryframe/utils/dataframe.py +33 -0
queryframe/utils/errors.py +83 -0
queryframe/utils/logger.py +18 -0
queryframe/viz/__init__.py +0 -0
queryframe/viz/altair_renderer.py +215 -0
queryframe/viz/base.py +34 -0
queryframe/viz/chart_types.py +84 -0
queryframe/viz/matplotlib_renderer.py +217 -0
queryframe/viz/plotly_renderer.py +236 -0
queryframe/viz/selector.py +128 -0
queryframe/viz/style.py +94 -0
queryframe/viz/theme.py +126 -0
queryframe-0.1.0.dist-info/METADATA +360 -0
queryframe-0.1.0.dist-info/RECORD +49 -0
queryframe-0.1.0.dist-info/WHEEL +4 -0
queryframe-0.1.0.dist-info/licenses/LICENSE +21 -0

queryframe/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""QueryFrame — Super fast natural language data visualization for pandas."""
+from queryframe.core.accessor import ask, configure
+from queryframe.core.config import QueryFrameConfig
+from queryframe.core.engine import QueryEngine
+from queryframe.core.result import QueryResult
+__version__ = "0.1.0"
+__all__ = [
+    "QueryEngine",
+    "QueryFrameConfig",
+    "QueryResult",
+    "ask",
+    "configure",
+]

queryframe/cache/__init__.py ADDED Viewed

File without changes

queryframe/cache/disk.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""SQLite-backed persistent disk cache."""
+from __future__ import annotations
+import json
+import os
+import pickle
+import sqlite3
+import time
+from pathlib import Path
+from typing import Any
+from queryframe.cache.memory import CachedEntry
+from queryframe.utils.logger import get_logger
+logger = get_logger(__name__)
+_DEFAULT_CACHE_DIR = os.path.expanduser("~/.queryframe")
+_DEFAULT_CACHE_DB = os.path.join(_DEFAULT_CACHE_DIR, "cache.db")
+class DiskCache:
+    """SQLite-backed persistent cache for cross-session result reuse."""
+    def __init__(
+        self,
+        db_path: str | None = None,
+        default_ttl: float = 86400.0,  # 24 hours
+    ) -> None:
+        self._db_path = db_path or _DEFAULT_CACHE_DB
+        self._default_ttl = default_ttl
+        self._ensure_db()
+    def _ensure_db(self) -> None:
+        """Create the database and table if they don't exist."""
+        os.makedirs(os.path.dirname(self._db_path), exist_ok=True)
+        with self._connect() as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS cache (
+                    key TEXT PRIMARY KEY,
+                    code TEXT NOT NULL,
+                    chart_type TEXT,
+                    explanation TEXT,
+                    data_pickle BLOB,
+                    created_at REAL NOT NULL,
+                    ttl_seconds REAL NOT NULL
+                )
+            """)
+    def _connect(self) -> sqlite3.Connection:
+        return sqlite3.connect(self._db_path, timeout=5.0)
+    def get(self, key: str) -> CachedEntry | None:
+        """Get a cached entry from disk."""
+        with self._connect() as conn:
+            row = conn.execute(
+                "SELECT code, chart_type, explanation, data_pickle, created_at, ttl_seconds "
+                "FROM cache WHERE key = ?",
+                (key,),
+            ).fetchone()
+        if row is None:
+            return None
+        code, chart_type, explanation, data_pickle, created_at, ttl_seconds = row
+        # Check TTL
+        if time.time() - created_at > ttl_seconds:
+            self.delete(key)
+            return None
+        try:
+            data = pickle.loads(data_pickle) if data_pickle else None
+        except Exception:
+            self.delete(key)
+            return None
+        return CachedEntry(
+            data=data,
+            code=code,
+            chart_type=chart_type,
+            explanation=explanation or "",
+            created_at=created_at,
+            ttl_seconds=ttl_seconds,
+        )
+    def set(
+        self,
+        key: str,
+        data: Any,
+        code: str,
+        chart_type: str | None = None,
+        explanation: str = "",
+        ttl: float | None = None,
+    ) -> None:
+        """Store a result on disk."""
+        try:
+            data_pickle = pickle.dumps(data, protocol=5)
+        except Exception:
+            logger.warning("Cannot pickle data for cache, skipping disk cache")
+            return
+        with self._connect() as conn:
+            conn.execute(
+                "INSERT OR REPLACE INTO cache "
+                "(key, code, chart_type, explanation, data_pickle, created_at, ttl_seconds) "
+                "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                (key, code, chart_type, explanation, data_pickle, time.time(), ttl or self._default_ttl),
+            )
+    def delete(self, key: str) -> None:
+        """Delete a specific cache entry."""
+        with self._connect() as conn:
+            conn.execute("DELETE FROM cache WHERE key = ?", (key,))
+    def clear(self) -> None:
+        """Clear all cached entries."""
+        with self._connect() as conn:
+            conn.execute("DELETE FROM cache")
+    def cleanup_expired(self) -> int:
+        """Remove expired entries. Returns count of removed entries."""
+        now = time.time()
+        with self._connect() as conn:
+            cursor = conn.execute(
+                "DELETE FROM cache WHERE (? - created_at) > ttl_seconds",
+                (now,),
+            )
+            return cursor.rowcount
+    @property
+    def size(self) -> int:
+        with self._connect() as conn:
+            row = conn.execute("SELECT COUNT(*) FROM cache").fetchone()
+            return row[0] if row else 0

queryframe/cache/hasher.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Query and schema fingerprinting for cache keys."""
+from __future__ import annotations
+import re
+import xxhash
+def normalize_query(query: str) -> str:
+    """Normalize a query for consistent cache key generation.
+    - Lowercase
+    - Strip extra whitespace
+    - Remove filler words
+    """
+    q = query.lower().strip()
+    q = re.sub(r"\s+", " ", q)
+    # Remove common filler words that don't change query meaning
+    fillers = {"please", "can you", "could you", "show me", "give me", "tell me",
+               "i want to", "i'd like to", "i would like to", "let me see"}
+    for filler in fillers:
+        q = q.replace(filler, "")
+    return q.strip()
+def hash_query(query: str, schema_fingerprint: str) -> str:
+    """Generate a fast hash key from a normalized query + schema fingerprint."""
+    normalized = normalize_query(query)
+    raw = f"{normalized}|{schema_fingerprint}"
+    return xxhash.xxh64(raw.encode()).hexdigest()

queryframe/cache/memory.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""In-memory LRU cache."""
+from __future__ import annotations
+import threading
+import time
+from collections import OrderedDict
+from dataclasses import dataclass
+from typing import Any
+@dataclass(frozen=True)
+class CachedEntry:
+    """A cached query result."""
+    data: Any
+    code: str
+    chart_type: str | None
+    explanation: str
+    created_at: float
+    ttl_seconds: float
+class MemoryCache:
+    """Thread-safe in-memory LRU cache."""
+    def __init__(self, max_size: int = 100, default_ttl: float = 3600.0) -> None:
+        self._cache: OrderedDict[str, CachedEntry] = OrderedDict()
+        self._max_size = max_size
+        self._default_ttl = default_ttl
+        self._lock = threading.Lock()
+        self._hits = 0
+        self._misses = 0
+    def get(self, key: str) -> CachedEntry | None:
+        """Get a cached entry by key. Returns None on miss or expiry."""
+        with self._lock:
+            entry = self._cache.get(key)
+            if entry is None:
+                self._misses += 1
+                return None
+            # Check TTL
+            if time.time() - entry.created_at > entry.ttl_seconds:
+                del self._cache[key]
+                self._misses += 1
+                return None
+            # Move to end (most recently used)
+            self._cache.move_to_end(key)
+            self._hits += 1
+            return entry
+    def set(
+        self,
+        key: str,
+        data: Any,
+        code: str,
+        chart_type: str | None = None,
+        explanation: str = "",
+        ttl: float | None = None,
+    ) -> None:
+        """Store a result in the cache."""
+        with self._lock:
+            entry = CachedEntry(
+                data=data,
+                code=code,
+                chart_type=chart_type,
+                explanation=explanation,
+                created_at=time.time(),
+                ttl_seconds=ttl or self._default_ttl,
+            )
+            self._cache[key] = entry
+            self._cache.move_to_end(key)
+            # Evict oldest if over capacity
+            while len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+    def clear(self) -> None:
+        """Clear all cached entries."""
+        with self._lock:
+            self._cache.clear()
+            self._hits = 0
+            self._misses = 0
+    @property
+    def size(self) -> int:
+        return len(self._cache)
+    @property
+    def hit_rate(self) -> float:
+        total = self._hits + self._misses
+        return self._hits / total if total > 0 else 0.0
+    @property
+    def stats(self) -> dict[str, Any]:
+        return {
+            "size": self.size,
+            "max_size": self._max_size,
+            "hits": self._hits,
+            "misses": self._misses,
+            "hit_rate": f"{self.hit_rate:.1%}",
+        }

queryframe/core/__init__.py ADDED Viewed

File without changes

queryframe/core/accessor.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Pandas DataFrame accessor — enables df.qf.ask() and df.ask()."""
+from __future__ import annotations
+from typing import Any
+import pandas as pd
+from queryframe.core.config import QueryFrameConfig
+from queryframe.core.engine import QueryEngine
+from queryframe.core.result import QueryResult
+# Global engine instance (lazily created)
+_global_engine: QueryEngine | None = None
+def _get_engine() -> QueryEngine:
+    """Get or create the global engine instance."""
+    global _global_engine
+    if _global_engine is None:
+        _global_engine = QueryEngine()
+    return _global_engine
+def configure(**kwargs: Any) -> None:
+    """Configure the global QueryFrame engine.
+    Example:
+        import queryframe as qf
+        qf.configure(provider="openai", model="gpt-4o")
+    """
+    global _global_engine
+    config = QueryFrameConfig.from_env().with_overrides(**kwargs)
+    _global_engine = QueryEngine(config=config)
+def ask(df: pd.DataFrame, query: str, **kwargs: Any) -> QueryResult:
+    """Ask a question about a DataFrame using the global engine.
+    Example:
+        import queryframe as qf
+        result = qf.ask(df, "what is the average sales by region?")
+    """
+    return _get_engine().ask(df, query, **kwargs)
+@pd.api.extensions.register_dataframe_accessor("qf")
+class QueryFrameAccessor:
+    """Pandas accessor that adds .qf.ask() to DataFrames.
+    Example:
+        result = df.qf.ask("show me sales by region")
+        result = df.qf.ask("what is the average price?")
+    """
+    def __init__(self, pandas_obj: pd.DataFrame) -> None:
+        self._df = pandas_obj
+    def ask(self, query: str, **kwargs: Any) -> QueryResult:
+        """Ask a natural language question about this DataFrame."""
+        return _get_engine().ask(self._df, query, **kwargs)
+    def config(self, **kwargs: Any) -> None:
+        """Configure the global engine."""
+        configure(**kwargs)

queryframe/core/config.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Global configuration for QueryFrame."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from typing import Literal
+from queryframe.utils.errors import ConfigError
+@dataclass(frozen=True)
+class QueryFrameConfig:
+    """Immutable configuration for QueryFrame.
+    Can be created directly or via `from_env()` to read QF_* environment variables.
+    """
+    provider: str = "auto"
+    model: str | None = None
+    api_key: str | None = None
+    api_base: str | None = None
+    cache_enabled: bool = True
+    sandbox_enabled: bool = True
+    timeout: int = 30
+    viz_mode: Literal["auto", "plotly", "matplotlib", "altair"] = "auto"
+    max_retries: int = 2
+    verbose: bool = False
+    max_sample_rows: int = 3
+    max_context_turns: int = 3
+    @classmethod
+    def from_env(cls) -> QueryFrameConfig:
+        """Create config from QF_* environment variables."""
+        kwargs: dict = {}
+        env_map = {
+            "QF_PROVIDER": "provider",
+            "QF_MODEL": "model",
+            "QF_API_KEY": "api_key",
+            "QF_API_BASE": "api_base",
+            "QF_TIMEOUT": "timeout",
+            "QF_VIZ": "viz_mode",
+            "QF_MAX_RETRIES": "max_retries",
+            "QF_VERBOSE": "verbose",
+        }
+        for env_var, field_name in env_map.items():
+            val = os.environ.get(env_var)
+            if val is not None:
+                if field_name in ("timeout", "max_retries"):
+                    kwargs[field_name] = int(val)
+                elif field_name == "verbose":
+                    kwargs[field_name] = val.lower() in ("1", "true", "yes")
+                else:
+                    kwargs[field_name] = val
+        # Auto-detect API keys from standard env vars
+        if "api_key" not in kwargs:
+            for env_var in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY"):
+                key = os.environ.get(env_var)
+                if key:
+                    kwargs["api_key"] = key
+                    if "provider" not in kwargs:
+                        provider_map = {
+                            "OPENAI_API_KEY": "openai",
+                            "ANTHROPIC_API_KEY": "anthropic",
+                            "GOOGLE_API_KEY": "gemini",
+                        }
+                        kwargs["provider"] = provider_map[env_var]
+                    break
+        return cls(**kwargs)
+    def with_overrides(self, **kwargs: object) -> QueryFrameConfig:
+        """Return a new config with the given overrides applied."""
+        from dataclasses import asdict
+        current = asdict(self)
+        current.update(kwargs)
+        return QueryFrameConfig(**current)
+    def validate(self) -> None:
+        """Validate the configuration, raising ConfigError if invalid."""
+        valid_providers = {"auto", "openai", "anthropic", "gemini", "ollama", "lmstudio"}
+        if self.provider not in valid_providers:
+            raise ConfigError(
+                f"Unknown provider '{self.provider}'. Valid: {', '.join(sorted(valid_providers))}"
+            )
+        valid_viz = {"auto", "plotly", "matplotlib", "altair"}
+        if self.viz_mode not in valid_viz:
+            raise ConfigError(
+                f"Unknown viz_mode '{self.viz_mode}'. Valid: {', '.join(sorted(valid_viz))}"
+            )
+        if self.timeout < 1:
+            raise ConfigError("timeout must be >= 1")
+        if self.max_retries < 0:
+            raise ConfigError("max_retries must be >= 0")