queryframe 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. queryframe/__init__.py +15 -0
  2. queryframe/cache/__init__.py +0 -0
  3. queryframe/cache/disk.py +135 -0
  4. queryframe/cache/hasher.py +33 -0
  5. queryframe/cache/memory.py +104 -0
  6. queryframe/core/__init__.py +0 -0
  7. queryframe/core/accessor.py +65 -0
  8. queryframe/core/config.py +96 -0
  9. queryframe/core/engine.py +280 -0
  10. queryframe/core/result.py +95 -0
  11. queryframe/core/schema.py +99 -0
  12. queryframe/llm/__init__.py +0 -0
  13. queryframe/llm/anthropic.py +63 -0
  14. queryframe/llm/base.py +39 -0
  15. queryframe/llm/gemini.py +74 -0
  16. queryframe/llm/lmstudio.py +76 -0
  17. queryframe/llm/ollama.py +135 -0
  18. queryframe/llm/openai.py +67 -0
  19. queryframe/llm/prompt/__init__.py +0 -0
  20. queryframe/llm/prompt/builder.py +134 -0
  21. queryframe/llm/prompt/compressor.py +47 -0
  22. queryframe/llm/prompt/templates.py +92 -0
  23. queryframe/llm/registry.py +113 -0
  24. queryframe/memory/__init__.py +0 -0
  25. queryframe/memory/context.py +38 -0
  26. queryframe/memory/conversation.py +70 -0
  27. queryframe/py.typed +0 -0
  28. queryframe/sandbox/__init__.py +0 -0
  29. queryframe/sandbox/executor.py +136 -0
  30. queryframe/sandbox/restricted.py +108 -0
  31. queryframe/sandbox/timeout.py +47 -0
  32. queryframe/sandbox/validator.py +127 -0
  33. queryframe/utils/__init__.py +0 -0
  34. queryframe/utils/dataframe.py +33 -0
  35. queryframe/utils/errors.py +83 -0
  36. queryframe/utils/logger.py +18 -0
  37. queryframe/viz/__init__.py +0 -0
  38. queryframe/viz/altair_renderer.py +215 -0
  39. queryframe/viz/base.py +34 -0
  40. queryframe/viz/chart_types.py +84 -0
  41. queryframe/viz/matplotlib_renderer.py +217 -0
  42. queryframe/viz/plotly_renderer.py +236 -0
  43. queryframe/viz/selector.py +128 -0
  44. queryframe/viz/style.py +94 -0
  45. queryframe/viz/theme.py +126 -0
  46. queryframe-0.1.0.dist-info/METADATA +360 -0
  47. queryframe-0.1.0.dist-info/RECORD +49 -0
  48. queryframe-0.1.0.dist-info/WHEEL +4 -0
  49. queryframe-0.1.0.dist-info/licenses/LICENSE +21 -0
queryframe/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """QueryFrame — Super fast natural language data visualization for pandas."""
2
+
3
+ from queryframe.core.accessor import ask, configure
4
+ from queryframe.core.config import QueryFrameConfig
5
+ from queryframe.core.engine import QueryEngine
6
+ from queryframe.core.result import QueryResult
7
+
8
+ __version__ = "0.1.0"
9
+ __all__ = [
10
+ "QueryEngine",
11
+ "QueryFrameConfig",
12
+ "QueryResult",
13
+ "ask",
14
+ "configure",
15
+ ]
File without changes
@@ -0,0 +1,135 @@
1
+ """SQLite-backed persistent disk cache."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import pickle
8
+ import sqlite3
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from queryframe.cache.memory import CachedEntry
14
+ from queryframe.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+ _DEFAULT_CACHE_DIR = os.path.expanduser("~/.queryframe")
19
+ _DEFAULT_CACHE_DB = os.path.join(_DEFAULT_CACHE_DIR, "cache.db")
20
+
21
+
22
+ class DiskCache:
23
+ """SQLite-backed persistent cache for cross-session result reuse."""
24
+
25
+ def __init__(
26
+ self,
27
+ db_path: str | None = None,
28
+ default_ttl: float = 86400.0, # 24 hours
29
+ ) -> None:
30
+ self._db_path = db_path or _DEFAULT_CACHE_DB
31
+ self._default_ttl = default_ttl
32
+ self._ensure_db()
33
+
34
+ def _ensure_db(self) -> None:
35
+ """Create the database and table if they don't exist."""
36
+ os.makedirs(os.path.dirname(self._db_path), exist_ok=True)
37
+ with self._connect() as conn:
38
+ conn.execute("""
39
+ CREATE TABLE IF NOT EXISTS cache (
40
+ key TEXT PRIMARY KEY,
41
+ code TEXT NOT NULL,
42
+ chart_type TEXT,
43
+ explanation TEXT,
44
+ data_pickle BLOB,
45
+ created_at REAL NOT NULL,
46
+ ttl_seconds REAL NOT NULL
47
+ )
48
+ """)
49
+
50
+ def _connect(self) -> sqlite3.Connection:
51
+ return sqlite3.connect(self._db_path, timeout=5.0)
52
+
53
+ def get(self, key: str) -> CachedEntry | None:
54
+ """Get a cached entry from disk."""
55
+ with self._connect() as conn:
56
+ row = conn.execute(
57
+ "SELECT code, chart_type, explanation, data_pickle, created_at, ttl_seconds "
58
+ "FROM cache WHERE key = ?",
59
+ (key,),
60
+ ).fetchone()
61
+
62
+ if row is None:
63
+ return None
64
+
65
+ code, chart_type, explanation, data_pickle, created_at, ttl_seconds = row
66
+
67
+ # Check TTL
68
+ if time.time() - created_at > ttl_seconds:
69
+ self.delete(key)
70
+ return None
71
+
72
+ try:
73
+ data = pickle.loads(data_pickle) if data_pickle else None
74
+ except Exception:
75
+ self.delete(key)
76
+ return None
77
+
78
+ return CachedEntry(
79
+ data=data,
80
+ code=code,
81
+ chart_type=chart_type,
82
+ explanation=explanation or "",
83
+ created_at=created_at,
84
+ ttl_seconds=ttl_seconds,
85
+ )
86
+
87
+ def set(
88
+ self,
89
+ key: str,
90
+ data: Any,
91
+ code: str,
92
+ chart_type: str | None = None,
93
+ explanation: str = "",
94
+ ttl: float | None = None,
95
+ ) -> None:
96
+ """Store a result on disk."""
97
+ try:
98
+ data_pickle = pickle.dumps(data, protocol=5)
99
+ except Exception:
100
+ logger.warning("Cannot pickle data for cache, skipping disk cache")
101
+ return
102
+
103
+ with self._connect() as conn:
104
+ conn.execute(
105
+ "INSERT OR REPLACE INTO cache "
106
+ "(key, code, chart_type, explanation, data_pickle, created_at, ttl_seconds) "
107
+ "VALUES (?, ?, ?, ?, ?, ?, ?)",
108
+ (key, code, chart_type, explanation, data_pickle, time.time(), ttl or self._default_ttl),
109
+ )
110
+
111
+ def delete(self, key: str) -> None:
112
+ """Delete a specific cache entry."""
113
+ with self._connect() as conn:
114
+ conn.execute("DELETE FROM cache WHERE key = ?", (key,))
115
+
116
+ def clear(self) -> None:
117
+ """Clear all cached entries."""
118
+ with self._connect() as conn:
119
+ conn.execute("DELETE FROM cache")
120
+
121
+ def cleanup_expired(self) -> int:
122
+ """Remove expired entries. Returns count of removed entries."""
123
+ now = time.time()
124
+ with self._connect() as conn:
125
+ cursor = conn.execute(
126
+ "DELETE FROM cache WHERE (? - created_at) > ttl_seconds",
127
+ (now,),
128
+ )
129
+ return cursor.rowcount
130
+
131
+ @property
132
+ def size(self) -> int:
133
+ with self._connect() as conn:
134
+ row = conn.execute("SELECT COUNT(*) FROM cache").fetchone()
135
+ return row[0] if row else 0
@@ -0,0 +1,33 @@
1
+ """Query and schema fingerprinting for cache keys."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ import xxhash
8
+
9
+
10
+ def normalize_query(query: str) -> str:
11
+ """Normalize a query for consistent cache key generation.
12
+
13
+ - Lowercase
14
+ - Strip extra whitespace
15
+ - Remove filler words
16
+ """
17
+ q = query.lower().strip()
18
+ q = re.sub(r"\s+", " ", q)
19
+
20
+ # Remove common filler words that don't change query meaning
21
+ fillers = {"please", "can you", "could you", "show me", "give me", "tell me",
22
+ "i want to", "i'd like to", "i would like to", "let me see"}
23
+ for filler in fillers:
24
+ q = q.replace(filler, "")
25
+
26
+ return q.strip()
27
+
28
+
29
+ def hash_query(query: str, schema_fingerprint: str) -> str:
30
+ """Generate a fast hash key from a normalized query + schema fingerprint."""
31
+ normalized = normalize_query(query)
32
+ raw = f"{normalized}|{schema_fingerprint}"
33
+ return xxhash.xxh64(raw.encode()).hexdigest()
@@ -0,0 +1,104 @@
1
+ """In-memory LRU cache."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+ from collections import OrderedDict
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class CachedEntry:
14
+ """A cached query result."""
15
+
16
+ data: Any
17
+ code: str
18
+ chart_type: str | None
19
+ explanation: str
20
+ created_at: float
21
+ ttl_seconds: float
22
+
23
+
24
+ class MemoryCache:
25
+ """Thread-safe in-memory LRU cache."""
26
+
27
+ def __init__(self, max_size: int = 100, default_ttl: float = 3600.0) -> None:
28
+ self._cache: OrderedDict[str, CachedEntry] = OrderedDict()
29
+ self._max_size = max_size
30
+ self._default_ttl = default_ttl
31
+ self._lock = threading.Lock()
32
+ self._hits = 0
33
+ self._misses = 0
34
+
35
+ def get(self, key: str) -> CachedEntry | None:
36
+ """Get a cached entry by key. Returns None on miss or expiry."""
37
+ with self._lock:
38
+ entry = self._cache.get(key)
39
+ if entry is None:
40
+ self._misses += 1
41
+ return None
42
+
43
+ # Check TTL
44
+ if time.time() - entry.created_at > entry.ttl_seconds:
45
+ del self._cache[key]
46
+ self._misses += 1
47
+ return None
48
+
49
+ # Move to end (most recently used)
50
+ self._cache.move_to_end(key)
51
+ self._hits += 1
52
+ return entry
53
+
54
+ def set(
55
+ self,
56
+ key: str,
57
+ data: Any,
58
+ code: str,
59
+ chart_type: str | None = None,
60
+ explanation: str = "",
61
+ ttl: float | None = None,
62
+ ) -> None:
63
+ """Store a result in the cache."""
64
+ with self._lock:
65
+ entry = CachedEntry(
66
+ data=data,
67
+ code=code,
68
+ chart_type=chart_type,
69
+ explanation=explanation,
70
+ created_at=time.time(),
71
+ ttl_seconds=ttl or self._default_ttl,
72
+ )
73
+ self._cache[key] = entry
74
+ self._cache.move_to_end(key)
75
+
76
+ # Evict oldest if over capacity
77
+ while len(self._cache) > self._max_size:
78
+ self._cache.popitem(last=False)
79
+
80
+ def clear(self) -> None:
81
+ """Clear all cached entries."""
82
+ with self._lock:
83
+ self._cache.clear()
84
+ self._hits = 0
85
+ self._misses = 0
86
+
87
+ @property
88
+ def size(self) -> int:
89
+ return len(self._cache)
90
+
91
+ @property
92
+ def hit_rate(self) -> float:
93
+ total = self._hits + self._misses
94
+ return self._hits / total if total > 0 else 0.0
95
+
96
+ @property
97
+ def stats(self) -> dict[str, Any]:
98
+ return {
99
+ "size": self.size,
100
+ "max_size": self._max_size,
101
+ "hits": self._hits,
102
+ "misses": self._misses,
103
+ "hit_rate": f"{self.hit_rate:.1%}",
104
+ }
File without changes
@@ -0,0 +1,65 @@
1
+ """Pandas DataFrame accessor — enables df.qf.ask() and df.ask()."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import pandas as pd
8
+
9
+ from queryframe.core.config import QueryFrameConfig
10
+ from queryframe.core.engine import QueryEngine
11
+ from queryframe.core.result import QueryResult
12
+
13
+ # Global engine instance (lazily created)
14
+ _global_engine: QueryEngine | None = None
15
+
16
+
17
+ def _get_engine() -> QueryEngine:
18
+ """Get or create the global engine instance."""
19
+ global _global_engine
20
+ if _global_engine is None:
21
+ _global_engine = QueryEngine()
22
+ return _global_engine
23
+
24
+
25
+ def configure(**kwargs: Any) -> None:
26
+ """Configure the global QueryFrame engine.
27
+
28
+ Example:
29
+ import queryframe as qf
30
+ qf.configure(provider="openai", model="gpt-4o")
31
+ """
32
+ global _global_engine
33
+ config = QueryFrameConfig.from_env().with_overrides(**kwargs)
34
+ _global_engine = QueryEngine(config=config)
35
+
36
+
37
+ def ask(df: pd.DataFrame, query: str, **kwargs: Any) -> QueryResult:
38
+ """Ask a question about a DataFrame using the global engine.
39
+
40
+ Example:
41
+ import queryframe as qf
42
+ result = qf.ask(df, "what is the average sales by region?")
43
+ """
44
+ return _get_engine().ask(df, query, **kwargs)
45
+
46
+
47
+ @pd.api.extensions.register_dataframe_accessor("qf")
48
+ class QueryFrameAccessor:
49
+ """Pandas accessor that adds .qf.ask() to DataFrames.
50
+
51
+ Example:
52
+ result = df.qf.ask("show me sales by region")
53
+ result = df.qf.ask("what is the average price?")
54
+ """
55
+
56
+ def __init__(self, pandas_obj: pd.DataFrame) -> None:
57
+ self._df = pandas_obj
58
+
59
+ def ask(self, query: str, **kwargs: Any) -> QueryResult:
60
+ """Ask a natural language question about this DataFrame."""
61
+ return _get_engine().ask(self._df, query, **kwargs)
62
+
63
+ def config(self, **kwargs: Any) -> None:
64
+ """Configure the global engine."""
65
+ configure(**kwargs)
@@ -0,0 +1,96 @@
1
+ """Global configuration for QueryFrame."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from typing import Literal
8
+
9
+ from queryframe.utils.errors import ConfigError
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class QueryFrameConfig:
14
+ """Immutable configuration for QueryFrame.
15
+
16
+ Can be created directly or via `from_env()` to read QF_* environment variables.
17
+ """
18
+
19
+ provider: str = "auto"
20
+ model: str | None = None
21
+ api_key: str | None = None
22
+ api_base: str | None = None
23
+ cache_enabled: bool = True
24
+ sandbox_enabled: bool = True
25
+ timeout: int = 30
26
+ viz_mode: Literal["auto", "plotly", "matplotlib", "altair"] = "auto"
27
+ max_retries: int = 2
28
+ verbose: bool = False
29
+ max_sample_rows: int = 3
30
+ max_context_turns: int = 3
31
+
32
+ @classmethod
33
+ def from_env(cls) -> QueryFrameConfig:
34
+ """Create config from QF_* environment variables."""
35
+ kwargs: dict = {}
36
+ env_map = {
37
+ "QF_PROVIDER": "provider",
38
+ "QF_MODEL": "model",
39
+ "QF_API_KEY": "api_key",
40
+ "QF_API_BASE": "api_base",
41
+ "QF_TIMEOUT": "timeout",
42
+ "QF_VIZ": "viz_mode",
43
+ "QF_MAX_RETRIES": "max_retries",
44
+ "QF_VERBOSE": "verbose",
45
+ }
46
+ for env_var, field_name in env_map.items():
47
+ val = os.environ.get(env_var)
48
+ if val is not None:
49
+ if field_name in ("timeout", "max_retries"):
50
+ kwargs[field_name] = int(val)
51
+ elif field_name == "verbose":
52
+ kwargs[field_name] = val.lower() in ("1", "true", "yes")
53
+ else:
54
+ kwargs[field_name] = val
55
+
56
+ # Auto-detect API keys from standard env vars
57
+ if "api_key" not in kwargs:
58
+ for env_var in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY"):
59
+ key = os.environ.get(env_var)
60
+ if key:
61
+ kwargs["api_key"] = key
62
+ if "provider" not in kwargs:
63
+ provider_map = {
64
+ "OPENAI_API_KEY": "openai",
65
+ "ANTHROPIC_API_KEY": "anthropic",
66
+ "GOOGLE_API_KEY": "gemini",
67
+ }
68
+ kwargs["provider"] = provider_map[env_var]
69
+ break
70
+
71
+ return cls(**kwargs)
72
+
73
+ def with_overrides(self, **kwargs: object) -> QueryFrameConfig:
74
+ """Return a new config with the given overrides applied."""
75
+ from dataclasses import asdict
76
+
77
+ current = asdict(self)
78
+ current.update(kwargs)
79
+ return QueryFrameConfig(**current)
80
+
81
+ def validate(self) -> None:
82
+ """Validate the configuration, raising ConfigError if invalid."""
83
+ valid_providers = {"auto", "openai", "anthropic", "gemini", "ollama", "lmstudio"}
84
+ if self.provider not in valid_providers:
85
+ raise ConfigError(
86
+ f"Unknown provider '{self.provider}'. Valid: {', '.join(sorted(valid_providers))}"
87
+ )
88
+ valid_viz = {"auto", "plotly", "matplotlib", "altair"}
89
+ if self.viz_mode not in valid_viz:
90
+ raise ConfigError(
91
+ f"Unknown viz_mode '{self.viz_mode}'. Valid: {', '.join(sorted(valid_viz))}"
92
+ )
93
+ if self.timeout < 1:
94
+ raise ConfigError("timeout must be >= 1")
95
+ if self.max_retries < 0:
96
+ raise ConfigError("max_retries must be >= 0")