flatseek 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flatseek/__init__.py ADDED
@@ -0,0 +1,41 @@
1
+ """Flatseek — dual-mode client for Flatseek trigram index.
2
+
3
+ Usage (API mode):
4
+ from flatseek import Flatseek
5
+
6
+ client = Flatseek("http://localhost:8000")
7
+ result = client.search(index="solana_txs", q="program:raydium AND amount:>1000000")
8
+
9
+ Usage (direct mode):
10
+ from flatseek import Flatseek
11
+
12
+ qe = Flatseek("./data") # single index
13
+ qe = Flatseek("./data", index="solana_txs") # named sub-index
14
+ result = qe.search(q="program:raydium AND signer:*7xMg*", size=10)
15
+
16
+ # Aggregation (direct mode, with query filter)
17
+ result = qe.aggregate(q="status:active AND country:ID", aggs={
18
+ "by_campaign": {"terms": {"field": "campaign", "size": 10}},
19
+ "bid_stats": {"stats": {"field": "bid"}}
20
+ })
21
+ print(result.aggs["by_campaign"]["buckets"])
22
+ """
23
+
24
+ __version__ = "0.1.0"
25
+
26
+ from flatseek.client import (
27
+ Flatseek,
28
+ Response,
29
+ CountResponse,
30
+ AggsResponse,
31
+ Elasticsearch,
32
+ )
33
+
34
+ __all__ = [
35
+ "Flatseek",
36
+ "Response",
37
+ "CountResponse",
38
+ "AggsResponse",
39
+ "Elasticsearch",
40
+ "__version__",
41
+ ]
@@ -0,0 +1 @@
1
+ # Flatseek API - FastAPI wrapper for trigram inverted index
flatseek/api/deps.py ADDED
@@ -0,0 +1,128 @@
1
+ """Dependency injection for Flatseek API."""
2
+
3
+ import os
4
+ from contextlib import asynccontextmanager
5
+ from typing import TYPE_CHECKING, AsyncGenerator, Any
6
+
7
+ from fastapi import Depends, HTTPException
8
+
9
+ # Default data directory
10
+ DEFAULT_DATA_DIR = os.environ.get("FLATSEEK_DATA_DIR", "data")
11
+
12
+ # Import QueryEngine lazily to avoid circular imports
13
+ if TYPE_CHECKING:
14
+ from flatseek.core.query_engine import QueryEngine
15
+
16
+
17
+ class IndexManager:
18
+ """Manages QueryEngine instances per index (lazy loading)."""
19
+
20
+ def __init__(self, data_dir: str = DEFAULT_DATA_DIR):
21
+ self.data_dir = data_dir
22
+ self._engines: dict[str, Any] = {}
23
+ # Per-index passwords for encrypted indexes
24
+ self._index_passwords: dict[str, str] = {}
25
+
26
+ def set_password(self, index: str, password: str) -> None:
27
+ """Store password for an encrypted index."""
28
+ self._index_passwords[index] = password
29
+
30
+ def get_password(self, index: str) -> str | None:
31
+ """Get stored password for an index, or None."""
32
+ return self._index_passwords.get(index)
33
+
34
+ def clear_password(self, index: str) -> None:
35
+ """Remove stored password for an index."""
36
+ self._index_passwords.pop(index, None)
37
+
38
+ def is_encrypted(self, index: str) -> bool:
39
+ """Check if a specific index is encrypted.
40
+
41
+ Each index stores its own encryption.json inside its folder.
42
+ If that file exists, the index is encrypted.
43
+ """
44
+ possible_paths = [
45
+ os.path.join(self.data_dir, index),
46
+ os.path.join(self.data_dir, index, "..", index),
47
+ ]
48
+ for index_path in possible_paths:
49
+ enc_path = os.path.join(index_path, "encryption.json")
50
+ if os.path.isfile(enc_path):
51
+ return True
52
+ return False
53
+
54
+ def get_engine(self, index: str) -> "QueryEngine":
55
+ """Get or create QueryEngine for an index."""
56
+ if not index.replace("_", "").replace("-", "").replace("/", "").isalnum():
57
+ raise HTTPException(400, f"Invalid index name: {index}")
58
+
59
+ engine = self._engines.get(index)
60
+ if engine is None:
61
+ possible_paths = [
62
+ os.path.join(self.data_dir, index),
63
+ os.path.join(self.data_dir, index, "..", index),
64
+ ]
65
+ from flatseek.core.query_engine import QueryEngine
66
+ for path in possible_paths:
67
+ if os.path.isdir(os.path.join(path, "index")):
68
+ engine = QueryEngine(path)
69
+ self._engines[index] = engine
70
+ break
71
+
72
+ if engine is None:
73
+ raise HTTPException(404, f"Index not found: {index}")
74
+
75
+ return engine
76
+
77
+ def list_indices(self) -> list[str]:
78
+ """List all available indices."""
79
+ if not os.path.isdir(self.data_dir):
80
+ return []
81
+
82
+ indices = []
83
+ for name in os.listdir(self.data_dir):
84
+ path = os.path.join(self.data_dir, name)
85
+ if os.path.isdir(os.path.join(path, "index")):
86
+ indices.append(name)
87
+ elif os.path.isdir(path):
88
+ # Check for sub-indexes
89
+ for sub in os.listdir(path):
90
+ sub_path = os.path.join(path, sub)
91
+ if os.path.isdir(os.path.join(sub_path, "index")):
92
+ indices.append(f"{name}/{sub}")
93
+ return sorted(indices)
94
+
95
+
96
+ # Global index manager
97
+ _index_manager: IndexManager | None = None
98
+
99
+
100
+ def get_index_manager() -> IndexManager:
101
+ """Get the global IndexManager instance, re-reading FLATSEEK_DATA_DIR each call."""
102
+ global _index_manager
103
+ if _index_manager is None:
104
+ _index_manager = IndexManager()
105
+ else:
106
+ # Pick up changed FLATSEEK_DATA_DIR for tests
107
+ _index_manager.data_dir = os.environ.get("FLATSEEK_DATA_DIR", _index_manager.data_dir)
108
+ return _index_manager
109
+
110
+
111
+ async def get_query_engine(
112
+ index: str,
113
+ manager: IndexManager = Depends(get_index_manager),
114
+ ) -> "QueryEngine":
115
+ """Dependency to get QueryEngine for an index."""
116
+ return manager.get_engine(index)
117
+
118
+
119
+ def require_index(
120
+ index: str,
121
+ manager: IndexManager = Depends(get_index_manager),
122
+ ) -> str:
123
+ """Dependency that validates index exists."""
124
+ try:
125
+ manager.get_engine(index)
126
+ except Exception as e:
127
+ raise HTTPException(404, str(e)) from e
128
+ return index
flatseek/api/main.py ADDED
@@ -0,0 +1,264 @@
1
+ """FastAPI application for Flatseek (Elasticsearch-like API)."""
2
+
3
+ import os
4
+ import sys
5
+ import logging
6
+ import re
7
+ import tempfile
8
+ import shutil
9
+ from fastapi.staticfiles import StaticFiles
10
+ from pydantic import BaseModel, Field
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
16
+ datefmt="%Y-%m-%d %H:%M:%S",
17
+ )
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def create_app():
22
+ """Create and return the FastAPI app."""
23
+ try:
24
+ from fastapi import FastAPI
25
+ from fastapi.middleware.cors import CORSMiddleware
26
+
27
+ app = FastAPI(
28
+ title="Flatseek API",
29
+ description="Trigram inverted index API — search, aggregate, and index your data. "
30
+ "Supports Solana blockchain txs, aviation ADS-B, AdTech campaigns, DevOps logs, and more.",
31
+ version="0.1.0",
32
+ terms_of_service="https://flatseek.io/terms",
33
+ contact={"name": "Flatseek", "url": "https://flatseek.io"},
34
+ )
35
+
36
+ app.add_middleware(
37
+ CORSMiddleware,
38
+ allow_origins=["*"],
39
+ allow_credentials=True,
40
+ allow_methods=["*"],
41
+ allow_headers=["*"],
42
+ )
43
+
44
+ return app
45
+ except ImportError:
46
+ return _MockFastAPI()
47
+
48
+
49
+ # ─── OpenAPI Schemas ──────────────────────────────────────────────────────────
50
+
51
+ class RootInfo(BaseModel):
52
+ name: str = Field(..., example="Flatseek API")
53
+ version: str = Field(..., example="0.10.0")
54
+ description: str = Field(..., example="Trigram inverted index API")
55
+
56
+
57
+ class ClusterHealth(BaseModel):
58
+ status: str = Field(..., example="green")
59
+ number_of_indices: int = Field(..., example=3)
60
+ number_of_nodes: int = Field(..., example=1)
61
+ indices: list[str]
62
+
63
+
64
+ class IndicesList(BaseModel):
65
+ indices: list[str]
66
+ count: int
67
+
68
+
69
+ class _MockFastAPI:
70
+ """Minimal FastAPI mock for testing without FastAPI installed."""
71
+
72
+ def __init__(self):
73
+ self._routes = []
74
+ self._middlewares = []
75
+
76
+ def add_middleware(self, *args, **kwargs):
77
+ self._middlewares.append((args, kwargs))
78
+
79
+ def include_router(self, router):
80
+ self._routes.append(router)
81
+
82
+ def get(self, path):
83
+ def decorator(func):
84
+ self._routes.append((path, "GET", func))
85
+ return func
86
+ return decorator
87
+
88
+ def post(self, path):
89
+ def decorator(func):
90
+ self._routes.append((path, "POST", func))
91
+ return func
92
+ return decorator
93
+
94
+ def delete(self, path):
95
+ def decorator(func):
96
+ self._routes.append((path, "DELETE", func))
97
+ return func
98
+ return decorator
99
+
100
+
101
+ # ─── Dashboard (flatlens) mounting ────────────────────────────────────
102
+
103
+ _dashboard_attached = False
104
+ _dashboard_temp_dir = None # holds patched flatlens dir if we rewrite API_BASE
105
+
106
+
107
+ def _find_flatlens():
108
+ """Return path to flatlens directory or None."""
109
+ candidates = [
110
+ os.environ.get("FLATSEEK_FLATLENS_DIR", ""),
111
+ os.environ.get("FLATLENS_DIR", ""),
112
+ os.path.join(os.path.expanduser("~"), ".local", "share", "flatlens"),
113
+ "/opt/flatlens",
114
+ ]
115
+ # Dev: sibling repo at ../../flatlens relative to flatseek repo root
116
+ # flatseek_pkg = flatseek/flatseek/src/flatseek
117
+ # flatseek_repo = flatseek/flatseek/src → flatlens is at flatseek/flatlens (two levels up)
118
+ _flatseek_pkg = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
119
+ _flatseek_repo = os.path.dirname(_flatseek_pkg)
120
+ candidates.insert(0, os.path.normpath(os.path.join(_flatseek_repo, "..", "..", "flatlens")))
121
+
122
+ for p in candidates:
123
+ if p and os.path.isdir(p) and os.path.isfile(os.path.join(p, "index.html")):
124
+ return p
125
+ return None
126
+
127
+
128
+ def _copy_and_patch(flatlens_dir, api_base):
129
+ """Copy flatlens to temp dir with API_BASE rewrite + logo href patch.
130
+
131
+ Always copies to a temp dir so we never modify the original flatlens source.
132
+ """
133
+ global _dashboard_temp_dir
134
+ if _dashboard_temp_dir:
135
+ shutil.rmtree(_dashboard_temp_dir, ignore_errors=True)
136
+
137
+ dest = tempfile.mkdtemp(prefix="flatlens_patched_")
138
+ shutil.copytree(flatlens_dir, dest, dirs_exist_ok=True)
139
+
140
+ api_js = os.path.join(dest, "js", "api.js")
141
+ if os.path.exists(api_js):
142
+ with open(api_js, "r", encoding="utf-8") as f:
143
+ content = f.read()
144
+ content = re.sub(
145
+ r"const API_BASE\s*=\s*['\"][^'\"]*['\"]",
146
+ f"const API_BASE = '{api_base}'",
147
+ content,
148
+ )
149
+ with open(api_js, "w", encoding="utf-8") as f:
150
+ f.write(content)
151
+
152
+ # Fix logo href in index.html — "/" would conflict with API docs routes
153
+ index_html = os.path.join(dest, "index.html")
154
+ if os.path.exists(index_html):
155
+ with open(index_html, "r", encoding="utf-8") as f:
156
+ content = f.read()
157
+ content = content.replace(
158
+ 'href="/" class="dashboard-title"',
159
+ 'href="/dashboard" class="dashboard-title"',
160
+ )
161
+ with open(index_html, "w", encoding="utf-8") as f:
162
+ f.write(content)
163
+
164
+ _dashboard_temp_dir = dest
165
+ return dest
166
+
167
+
168
+ def attach_dashboard(app, api_base):
169
+ """Mount flatlens dashboard at /dashboard with correct API_BASE."""
170
+ global _dashboard_attached
171
+ if _dashboard_attached:
172
+ return
173
+
174
+ flatlens_dir = _find_flatlens()
175
+ if not flatlens_dir:
176
+ logger.warning("flatlens dashboard not found — skipping /dashboard mount")
177
+ logger.warning(" Set FLATLENS_DIR or install flatlens to ~/.local/share/flatlens")
178
+ return
179
+
180
+ # Always copy to temp and patch — never touch the original
181
+ patched_dir = _copy_and_patch(flatlens_dir, api_base)
182
+
183
+ app.mount("/dashboard", StaticFiles(directory=patched_dir, html=True), name="flatlens")
184
+ _dashboard_attached = True
185
+ logger.info(f"Flatlens dashboard mounted at /dashboard (API_BASE={api_base})")
186
+
187
+
188
+ # ─── App setup ──────────────────────────────────────────────────────────
189
+
190
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
191
+
192
+ from flatseek.api.deps import get_index_manager, IndexManager
193
+ from fastapi import Depends
194
+
195
+ app = create_app()
196
+
197
+ # Mount dashboard BEFORE routers so StaticFiles takes priority over /{index} routes.
198
+ _api_base = os.environ.get("FLATSEEK_API_BASE", "")
199
+ if not _api_base:
200
+ _api_base = f"http://localhost:{os.environ.get('FLATSEEK_PORT', '8000')}"
201
+
202
+ attach_dashboard(app, _api_base)
203
+ # Also add a redirect from /dashboard → /dashboard/ (without trailing slash)
204
+ from starlette.responses import RedirectResponse
205
+ @app.get("/dashboard", include_in_schema=False)
206
+ async def redirect_dashboard():
207
+ return RedirectResponse(url="/dashboard/", status_code=302)
208
+
209
+ from flatseek.api.routes.index import router as index_router
210
+ from flatseek.api.routes.search import router as search_router
211
+ from flatseek.api.routes.aggregate import router as aggregate_router
212
+ app.include_router(index_router)
213
+ app.include_router(search_router)
214
+ app.include_router(aggregate_router)
215
+
216
+
217
+ # ─── Root ───────────────────────────────────────────────────────────────
218
+
219
+ @app.get("/", response_model=RootInfo)
220
+ async def root():
221
+ """Root endpoint — API name and version."""
222
+ return {
223
+ "name": "Flatseek API",
224
+ "version": "0.1.0",
225
+ "description": "Trigram inverted index API",
226
+ }
227
+
228
+
229
+ @app.get("/_cluster/health", response_model=ClusterHealth)
230
+ async def cluster_health(manager: IndexManager = Depends(get_index_manager)):
231
+ """Cluster health (single node). Returns all indices and their count."""
232
+ indices = manager.list_indices()
233
+ return {
234
+ "status": "green",
235
+ "number_of_indices": len(indices),
236
+ "number_of_nodes": 1,
237
+ "indices": indices,
238
+ }
239
+
240
+
241
+ @app.get("/_indices", response_model=IndicesList)
242
+ async def list_indices(manager: IndexManager = Depends(get_index_manager)):
243
+ """List all available indices in the cluster."""
244
+ indices = manager.list_indices()
245
+ return {
246
+ "indices": indices,
247
+ "count": len(indices),
248
+ }
249
+
250
+
251
+ # ─── Run ─────────────────────────────────────────────────────────────────
252
+
253
+ if __name__ == "__main__":
254
+ import uvicorn
255
+
256
+ port = int(os.environ.get("FLATSEEK_PORT", "8000"))
257
+ host = os.environ.get("FLATSEEK_HOST", "0.0.0.0")
258
+
259
+ uvicorn.run(
260
+ "api.main:app",
261
+ host=host,
262
+ port=port,
263
+ reload=os.environ.get("FLATSEEK_RELOAD", "0") == "1",
264
+ )
@@ -0,0 +1 @@
1
+ # Routes package
@@ -0,0 +1,136 @@
1
+ """Aggregation endpoints (Elasticsearch-inspired).
2
+
3
+ All business logic is delegated to QueryEngine.aggregate() in core.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import logging
9
+ import os
10
+ import time
11
+ from typing import Any
12
+
13
+ from fastapi import APIRouter, Depends, HTTPException, Query, Request
14
+ from pydantic import BaseModel, Field
15
+
16
+ from flatseek.api.deps import get_index_manager, IndexManager
17
+
18
+ router = APIRouter(tags=["aggregations"])
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ # ─── OpenAPI Schemas ──────────────────────────────────────────────────────────
23
+
24
+ class AggBucket(BaseModel):
25
+ key: str | int = Field(..., example="raydium")
26
+ doc_count: int = Field(..., example=892147)
27
+
28
+
29
+ class TermsAgg(BaseModel):
30
+ buckets: list[AggBucket]
31
+
32
+
33
+ class StatsAgg(BaseModel):
34
+ count: int = Field(..., example=1284)
35
+ min: float = Field(..., example=5000.0)
36
+ max: float = Field(..., example=10000.0)
37
+ avg: float = Field(..., example=5500.0)
38
+ sum: float = Field(..., example=7064000000.0)
39
+
40
+
41
+ class Aggregations(BaseModel):
42
+ by_program: TermsAgg | None = None
43
+ fee_stats: StatsAgg | None = None
44
+
45
+ class Config:
46
+ extra = "allow"
47
+
48
+
49
+ class AggregateHits(BaseModel):
50
+ total: int = Field(..., example=1284)
51
+
52
+
53
+ class AggregateResponse(BaseModel):
54
+ hits: AggregateHits
55
+ aggregations: dict
56
+
57
+
58
+ @router.post("/{index}/_aggregate", response_model=AggregateResponse)
59
+ async def aggregate(
60
+ index: str,
61
+ body: dict[str, Any],
62
+ request: Request = None,
63
+ manager: IndexManager = Depends(get_index_manager),
64
+ ):
65
+ """Run aggregations (facets, statistics, buckets) over matching documents.
66
+
67
+ Supports `terms` (top-N by count), `stats` (count/min/max/avg/sum), and
68
+ `cardinality` (HyperLogLog unique count) aggregations. Combine multiple
69
+ aggregations in one request for rich analytics.
70
+
71
+ Example — program breakdown with fee stats:
72
+ ```json
73
+ {
74
+ "query": "status:success",
75
+ "aggs": {
76
+ "by_program": {"terms": {"field": "program", "size": 10}},
77
+ "fee_stats": {"stats": {"field": "fee"}}
78
+ }
79
+ }
80
+ ```
81
+ """
82
+ if manager.is_encrypted(index):
83
+ stored_pass = manager.get_password(index)
84
+ if not stored_pass and request:
85
+ stored_pass = request.headers.get("x-index-password")
86
+ if not stored_pass:
87
+ raise HTTPException(
88
+ 403,
89
+ f"Index '{index}' is encrypted. Submit password via POST /{index}/_authenticate"
90
+ )
91
+ try:
92
+ from flatseek.core.query_engine import load_encryption_key
93
+ index_dir = os.path.join(manager.data_dir, index)
94
+ key = load_encryption_key(index_dir, stored_pass)
95
+ manager.get_engine(index).set_key(key)
96
+ manager.set_password(index, stored_pass)
97
+ except Exception:
98
+ raise HTTPException(401, "Invalid passphrase for encrypted index")
99
+
100
+ try:
101
+ engine = manager.get_engine(index)
102
+ except Exception as e:
103
+ raise HTTPException(404, f"Index not found: {index}") from e
104
+
105
+ query = body.get("query", None)
106
+ size = body.get("size", 10)
107
+ aggs = body.get("aggs", {})
108
+
109
+ try:
110
+ result = await asyncio.to_thread(
111
+ engine.aggregate, q=query, aggs=aggs, size=size
112
+ )
113
+ return result
114
+ except MemoryError as e:
115
+ raise HTTPException(
116
+ 503,
117
+ f"Memory limit exceeded: {e}. Try a narrower query or fewer aggregations."
118
+ ) from e
119
+
120
+
121
+ @router.get("/{index}/_aggregate", response_model=AggregateResponse)
122
+ async def aggregate_get(
123
+ index: str,
124
+ q: str = Query("*", description="Query string"),
125
+ aggs: str = Query(None, description="Aggregations as JSON"),
126
+ request: Request = None,
127
+ manager: IndexManager = Depends(get_index_manager),
128
+ ):
129
+ """Run aggregations via GET (query params)."""
130
+ body = {"query": q}
131
+ if aggs:
132
+ try:
133
+ body["aggs"] = json.loads(aggs)
134
+ except Exception:
135
+ raise HTTPException(400, "Invalid aggs JSON")
136
+ return await aggregate(index, body, request, manager)