flatseek 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flatseek/__init__.py +41 -0
- flatseek/api/__init__.py +1 -0
- flatseek/api/deps.py +128 -0
- flatseek/api/main.py +264 -0
- flatseek/api/routes/__init__.py +1 -0
- flatseek/api/routes/aggregate.py +136 -0
- flatseek/api/routes/index.py +1651 -0
- flatseek/api/routes/search.py +345 -0
- flatseek/api/schemas.py +39 -0
- flatseek/api_server.py +29 -0
- flatseek/cli.py +1825 -0
- flatseek/client.py +443 -0
- flatseek/core/__init__.py +0 -0
- flatseek/core/builder.py +3250 -0
- flatseek/core/chat.py +159 -0
- flatseek/core/classify.py +436 -0
- flatseek/core/query_engine.py +1594 -0
- flatseek/core/query_parser.py +324 -0
- flatseek/core/scanner.py +274 -0
- flatseek/test/__init__.py +0 -0
- flatseek/test/fixtures.py +170 -0
- flatseek/test/test_api.py +347 -0
- flatseek/test/test_cli.py +327 -0
- flatseek-0.1.0.dist-info/METADATA +277 -0
- flatseek-0.1.0.dist-info/RECORD +29 -0
- flatseek-0.1.0.dist-info/WHEEL +5 -0
- flatseek-0.1.0.dist-info/entry_points.txt +3 -0
- flatseek-0.1.0.dist-info/licenses/LICENSE +201 -0
- flatseek-0.1.0.dist-info/top_level.txt +1 -0
flatseek/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Flatseek — dual-mode client for Flatseek trigram index.
|
|
2
|
+
|
|
3
|
+
Usage (API mode):
|
|
4
|
+
from flatseek import Flatseek
|
|
5
|
+
|
|
6
|
+
client = Flatseek("http://localhost:8000")
|
|
7
|
+
result = client.search(index="solana_txs", q="program:raydium AND amount:>1000000")
|
|
8
|
+
|
|
9
|
+
Usage (direct mode):
|
|
10
|
+
from flatseek import Flatseek
|
|
11
|
+
|
|
12
|
+
qe = Flatseek("./data") # single index
|
|
13
|
+
qe = Flatseek("./data", index="solana_txs") # named sub-index
|
|
14
|
+
result = qe.search(q="program:raydium AND signer:*7xMg*", size=10)
|
|
15
|
+
|
|
16
|
+
# Aggregation (direct mode, with query filter)
|
|
17
|
+
result = qe.aggregate(q="status:active AND country:ID", aggs={
|
|
18
|
+
"by_campaign": {"terms": {"field": "campaign", "size": 10}},
|
|
19
|
+
"bid_stats": {"stats": {"field": "bid"}}
|
|
20
|
+
})
|
|
21
|
+
print(result.aggs["by_campaign"]["buckets"])
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__version__ = "0.1.0"
|
|
25
|
+
|
|
26
|
+
from flatseek.client import (
|
|
27
|
+
Flatseek,
|
|
28
|
+
Response,
|
|
29
|
+
CountResponse,
|
|
30
|
+
AggsResponse,
|
|
31
|
+
Elasticsearch,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"Flatseek",
|
|
36
|
+
"Response",
|
|
37
|
+
"CountResponse",
|
|
38
|
+
"AggsResponse",
|
|
39
|
+
"Elasticsearch",
|
|
40
|
+
"__version__",
|
|
41
|
+
]
|
flatseek/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Flatseek API - FastAPI wrapper for trigram inverted index
|
flatseek/api/deps.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Dependency injection for Flatseek API."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
from typing import TYPE_CHECKING, AsyncGenerator, Any
|
|
6
|
+
|
|
7
|
+
from fastapi import Depends, HTTPException
|
|
8
|
+
|
|
9
|
+
# Default data directory
|
|
10
|
+
DEFAULT_DATA_DIR = os.environ.get("FLATSEEK_DATA_DIR", "data")
|
|
11
|
+
|
|
12
|
+
# Import QueryEngine lazily to avoid circular imports
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from flatseek.core.query_engine import QueryEngine
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class IndexManager:
|
|
18
|
+
"""Manages QueryEngine instances per index (lazy loading)."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, data_dir: str = DEFAULT_DATA_DIR):
|
|
21
|
+
self.data_dir = data_dir
|
|
22
|
+
self._engines: dict[str, Any] = {}
|
|
23
|
+
# Per-index passwords for encrypted indexes
|
|
24
|
+
self._index_passwords: dict[str, str] = {}
|
|
25
|
+
|
|
26
|
+
def set_password(self, index: str, password: str) -> None:
|
|
27
|
+
"""Store password for an encrypted index."""
|
|
28
|
+
self._index_passwords[index] = password
|
|
29
|
+
|
|
30
|
+
def get_password(self, index: str) -> str | None:
|
|
31
|
+
"""Get stored password for an index, or None."""
|
|
32
|
+
return self._index_passwords.get(index)
|
|
33
|
+
|
|
34
|
+
def clear_password(self, index: str) -> None:
|
|
35
|
+
"""Remove stored password for an index."""
|
|
36
|
+
self._index_passwords.pop(index, None)
|
|
37
|
+
|
|
38
|
+
def is_encrypted(self, index: str) -> bool:
|
|
39
|
+
"""Check if a specific index is encrypted.
|
|
40
|
+
|
|
41
|
+
Each index stores its own encryption.json inside its folder.
|
|
42
|
+
If that file exists, the index is encrypted.
|
|
43
|
+
"""
|
|
44
|
+
possible_paths = [
|
|
45
|
+
os.path.join(self.data_dir, index),
|
|
46
|
+
os.path.join(self.data_dir, index, "..", index),
|
|
47
|
+
]
|
|
48
|
+
for index_path in possible_paths:
|
|
49
|
+
enc_path = os.path.join(index_path, "encryption.json")
|
|
50
|
+
if os.path.isfile(enc_path):
|
|
51
|
+
return True
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
def get_engine(self, index: str) -> "QueryEngine":
|
|
55
|
+
"""Get or create QueryEngine for an index."""
|
|
56
|
+
if not index.replace("_", "").replace("-", "").replace("/", "").isalnum():
|
|
57
|
+
raise HTTPException(400, f"Invalid index name: {index}")
|
|
58
|
+
|
|
59
|
+
engine = self._engines.get(index)
|
|
60
|
+
if engine is None:
|
|
61
|
+
possible_paths = [
|
|
62
|
+
os.path.join(self.data_dir, index),
|
|
63
|
+
os.path.join(self.data_dir, index, "..", index),
|
|
64
|
+
]
|
|
65
|
+
from flatseek.core.query_engine import QueryEngine
|
|
66
|
+
for path in possible_paths:
|
|
67
|
+
if os.path.isdir(os.path.join(path, "index")):
|
|
68
|
+
engine = QueryEngine(path)
|
|
69
|
+
self._engines[index] = engine
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
if engine is None:
|
|
73
|
+
raise HTTPException(404, f"Index not found: {index}")
|
|
74
|
+
|
|
75
|
+
return engine
|
|
76
|
+
|
|
77
|
+
def list_indices(self) -> list[str]:
|
|
78
|
+
"""List all available indices."""
|
|
79
|
+
if not os.path.isdir(self.data_dir):
|
|
80
|
+
return []
|
|
81
|
+
|
|
82
|
+
indices = []
|
|
83
|
+
for name in os.listdir(self.data_dir):
|
|
84
|
+
path = os.path.join(self.data_dir, name)
|
|
85
|
+
if os.path.isdir(os.path.join(path, "index")):
|
|
86
|
+
indices.append(name)
|
|
87
|
+
elif os.path.isdir(path):
|
|
88
|
+
# Check for sub-indexes
|
|
89
|
+
for sub in os.listdir(path):
|
|
90
|
+
sub_path = os.path.join(path, sub)
|
|
91
|
+
if os.path.isdir(os.path.join(sub_path, "index")):
|
|
92
|
+
indices.append(f"{name}/{sub}")
|
|
93
|
+
return sorted(indices)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Global index manager
|
|
97
|
+
_index_manager: IndexManager | None = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_index_manager() -> IndexManager:
|
|
101
|
+
"""Get the global IndexManager instance, re-reading FLATSEEK_DATA_DIR each call."""
|
|
102
|
+
global _index_manager
|
|
103
|
+
if _index_manager is None:
|
|
104
|
+
_index_manager = IndexManager()
|
|
105
|
+
else:
|
|
106
|
+
# Pick up changed FLATSEEK_DATA_DIR for tests
|
|
107
|
+
_index_manager.data_dir = os.environ.get("FLATSEEK_DATA_DIR", _index_manager.data_dir)
|
|
108
|
+
return _index_manager
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def get_query_engine(
|
|
112
|
+
index: str,
|
|
113
|
+
manager: IndexManager = Depends(get_index_manager),
|
|
114
|
+
) -> "QueryEngine":
|
|
115
|
+
"""Dependency to get QueryEngine for an index."""
|
|
116
|
+
return manager.get_engine(index)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def require_index(
|
|
120
|
+
index: str,
|
|
121
|
+
manager: IndexManager = Depends(get_index_manager),
|
|
122
|
+
) -> str:
|
|
123
|
+
"""Dependency that validates index exists."""
|
|
124
|
+
try:
|
|
125
|
+
manager.get_engine(index)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
raise HTTPException(404, str(e)) from e
|
|
128
|
+
return index
|
flatseek/api/main.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""FastAPI application for Flatseek (Elasticsearch-like API)."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import tempfile
|
|
8
|
+
import shutil
|
|
9
|
+
from fastapi.staticfiles import StaticFiles
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
# Configure logging
|
|
13
|
+
logging.basicConfig(
|
|
14
|
+
level=logging.INFO,
|
|
15
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
16
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
17
|
+
)
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def create_app():
|
|
22
|
+
"""Create and return the FastAPI app."""
|
|
23
|
+
try:
|
|
24
|
+
from fastapi import FastAPI
|
|
25
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
26
|
+
|
|
27
|
+
app = FastAPI(
|
|
28
|
+
title="Flatseek API",
|
|
29
|
+
description="Trigram inverted index API — search, aggregate, and index your data. "
|
|
30
|
+
"Supports Solana blockchain txs, aviation ADS-B, AdTech campaigns, DevOps logs, and more.",
|
|
31
|
+
version="0.1.0",
|
|
32
|
+
terms_of_service="https://flatseek.io/terms",
|
|
33
|
+
contact={"name": "Flatseek", "url": "https://flatseek.io"},
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
app.add_middleware(
|
|
37
|
+
CORSMiddleware,
|
|
38
|
+
allow_origins=["*"],
|
|
39
|
+
allow_credentials=True,
|
|
40
|
+
allow_methods=["*"],
|
|
41
|
+
allow_headers=["*"],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return app
|
|
45
|
+
except ImportError:
|
|
46
|
+
return _MockFastAPI()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ─── OpenAPI Schemas ──────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
class RootInfo(BaseModel):
|
|
52
|
+
name: str = Field(..., example="Flatseek API")
|
|
53
|
+
version: str = Field(..., example="0.10.0")
|
|
54
|
+
description: str = Field(..., example="Trigram inverted index API")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ClusterHealth(BaseModel):
|
|
58
|
+
status: str = Field(..., example="green")
|
|
59
|
+
number_of_indices: int = Field(..., example=3)
|
|
60
|
+
number_of_nodes: int = Field(..., example=1)
|
|
61
|
+
indices: list[str]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class IndicesList(BaseModel):
|
|
65
|
+
indices: list[str]
|
|
66
|
+
count: int
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class _MockFastAPI:
|
|
70
|
+
"""Minimal FastAPI mock for testing without FastAPI installed."""
|
|
71
|
+
|
|
72
|
+
def __init__(self):
|
|
73
|
+
self._routes = []
|
|
74
|
+
self._middlewares = []
|
|
75
|
+
|
|
76
|
+
def add_middleware(self, *args, **kwargs):
|
|
77
|
+
self._middlewares.append((args, kwargs))
|
|
78
|
+
|
|
79
|
+
def include_router(self, router):
|
|
80
|
+
self._routes.append(router)
|
|
81
|
+
|
|
82
|
+
def get(self, path):
|
|
83
|
+
def decorator(func):
|
|
84
|
+
self._routes.append((path, "GET", func))
|
|
85
|
+
return func
|
|
86
|
+
return decorator
|
|
87
|
+
|
|
88
|
+
def post(self, path):
|
|
89
|
+
def decorator(func):
|
|
90
|
+
self._routes.append((path, "POST", func))
|
|
91
|
+
return func
|
|
92
|
+
return decorator
|
|
93
|
+
|
|
94
|
+
def delete(self, path):
|
|
95
|
+
def decorator(func):
|
|
96
|
+
self._routes.append((path, "DELETE", func))
|
|
97
|
+
return func
|
|
98
|
+
return decorator
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ─── Dashboard (flatlens) mounting ────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
_dashboard_attached = False
|
|
104
|
+
_dashboard_temp_dir = None # holds patched flatlens dir if we rewrite API_BASE
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _find_flatlens():
|
|
108
|
+
"""Return path to flatlens directory or None."""
|
|
109
|
+
candidates = [
|
|
110
|
+
os.environ.get("FLATSEEK_FLATLENS_DIR", ""),
|
|
111
|
+
os.environ.get("FLATLENS_DIR", ""),
|
|
112
|
+
os.path.join(os.path.expanduser("~"), ".local", "share", "flatlens"),
|
|
113
|
+
"/opt/flatlens",
|
|
114
|
+
]
|
|
115
|
+
# Dev: sibling repo at ../../flatlens relative to flatseek repo root
|
|
116
|
+
# flatseek_pkg = flatseek/flatseek/src/flatseek
|
|
117
|
+
# flatseek_repo = flatseek/flatseek/src → flatlens is at flatseek/flatlens (two levels up)
|
|
118
|
+
_flatseek_pkg = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
119
|
+
_flatseek_repo = os.path.dirname(_flatseek_pkg)
|
|
120
|
+
candidates.insert(0, os.path.normpath(os.path.join(_flatseek_repo, "..", "..", "flatlens")))
|
|
121
|
+
|
|
122
|
+
for p in candidates:
|
|
123
|
+
if p and os.path.isdir(p) and os.path.isfile(os.path.join(p, "index.html")):
|
|
124
|
+
return p
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _copy_and_patch(flatlens_dir, api_base):
|
|
129
|
+
"""Copy flatlens to temp dir with API_BASE rewrite + logo href patch.
|
|
130
|
+
|
|
131
|
+
Always copies to a temp dir so we never modify the original flatlens source.
|
|
132
|
+
"""
|
|
133
|
+
global _dashboard_temp_dir
|
|
134
|
+
if _dashboard_temp_dir:
|
|
135
|
+
shutil.rmtree(_dashboard_temp_dir, ignore_errors=True)
|
|
136
|
+
|
|
137
|
+
dest = tempfile.mkdtemp(prefix="flatlens_patched_")
|
|
138
|
+
shutil.copytree(flatlens_dir, dest, dirs_exist_ok=True)
|
|
139
|
+
|
|
140
|
+
api_js = os.path.join(dest, "js", "api.js")
|
|
141
|
+
if os.path.exists(api_js):
|
|
142
|
+
with open(api_js, "r", encoding="utf-8") as f:
|
|
143
|
+
content = f.read()
|
|
144
|
+
content = re.sub(
|
|
145
|
+
r"const API_BASE\s*=\s*['\"][^'\"]*['\"]",
|
|
146
|
+
f"const API_BASE = '{api_base}'",
|
|
147
|
+
content,
|
|
148
|
+
)
|
|
149
|
+
with open(api_js, "w", encoding="utf-8") as f:
|
|
150
|
+
f.write(content)
|
|
151
|
+
|
|
152
|
+
# Fix logo href in index.html — "/" would conflict with API docs routes
|
|
153
|
+
index_html = os.path.join(dest, "index.html")
|
|
154
|
+
if os.path.exists(index_html):
|
|
155
|
+
with open(index_html, "r", encoding="utf-8") as f:
|
|
156
|
+
content = f.read()
|
|
157
|
+
content = content.replace(
|
|
158
|
+
'href="/" class="dashboard-title"',
|
|
159
|
+
'href="/dashboard" class="dashboard-title"',
|
|
160
|
+
)
|
|
161
|
+
with open(index_html, "w", encoding="utf-8") as f:
|
|
162
|
+
f.write(content)
|
|
163
|
+
|
|
164
|
+
_dashboard_temp_dir = dest
|
|
165
|
+
return dest
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def attach_dashboard(app, api_base):
|
|
169
|
+
"""Mount flatlens dashboard at /dashboard with correct API_BASE."""
|
|
170
|
+
global _dashboard_attached
|
|
171
|
+
if _dashboard_attached:
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
flatlens_dir = _find_flatlens()
|
|
175
|
+
if not flatlens_dir:
|
|
176
|
+
logger.warning("flatlens dashboard not found — skipping /dashboard mount")
|
|
177
|
+
logger.warning(" Set FLATLENS_DIR or install flatlens to ~/.local/share/flatlens")
|
|
178
|
+
return
|
|
179
|
+
|
|
180
|
+
# Always copy to temp and patch — never touch the original
|
|
181
|
+
patched_dir = _copy_and_patch(flatlens_dir, api_base)
|
|
182
|
+
|
|
183
|
+
app.mount("/dashboard", StaticFiles(directory=patched_dir, html=True), name="flatlens")
|
|
184
|
+
_dashboard_attached = True
|
|
185
|
+
logger.info(f"Flatlens dashboard mounted at /dashboard (API_BASE={api_base})")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# ─── App setup ──────────────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
191
|
+
|
|
192
|
+
from flatseek.api.deps import get_index_manager, IndexManager
|
|
193
|
+
from fastapi import Depends
|
|
194
|
+
|
|
195
|
+
app = create_app()
|
|
196
|
+
|
|
197
|
+
# Mount dashboard BEFORE routers so StaticFiles takes priority over /{index} routes.
|
|
198
|
+
_api_base = os.environ.get("FLATSEEK_API_BASE", "")
|
|
199
|
+
if not _api_base:
|
|
200
|
+
_api_base = f"http://localhost:{os.environ.get('FLATSEEK_PORT', '8000')}"
|
|
201
|
+
|
|
202
|
+
attach_dashboard(app, _api_base)
|
|
203
|
+
# Also add a redirect from /dashboard → /dashboard/ (without trailing slash)
|
|
204
|
+
from starlette.responses import RedirectResponse
|
|
205
|
+
@app.get("/dashboard", include_in_schema=False)
|
|
206
|
+
async def redirect_dashboard():
|
|
207
|
+
return RedirectResponse(url="/dashboard/", status_code=302)
|
|
208
|
+
|
|
209
|
+
from flatseek.api.routes.index import router as index_router
|
|
210
|
+
from flatseek.api.routes.search import router as search_router
|
|
211
|
+
from flatseek.api.routes.aggregate import router as aggregate_router
|
|
212
|
+
app.include_router(index_router)
|
|
213
|
+
app.include_router(search_router)
|
|
214
|
+
app.include_router(aggregate_router)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ─── Root ───────────────────────────────────────────────────────────────
|
|
218
|
+
|
|
219
|
+
@app.get("/", response_model=RootInfo)
|
|
220
|
+
async def root():
|
|
221
|
+
"""Root endpoint — API name and version."""
|
|
222
|
+
return {
|
|
223
|
+
"name": "Flatseek API",
|
|
224
|
+
"version": "0.1.0",
|
|
225
|
+
"description": "Trigram inverted index API",
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
@app.get("/_cluster/health", response_model=ClusterHealth)
|
|
230
|
+
async def cluster_health(manager: IndexManager = Depends(get_index_manager)):
|
|
231
|
+
"""Cluster health (single node). Returns all indices and their count."""
|
|
232
|
+
indices = manager.list_indices()
|
|
233
|
+
return {
|
|
234
|
+
"status": "green",
|
|
235
|
+
"number_of_indices": len(indices),
|
|
236
|
+
"number_of_nodes": 1,
|
|
237
|
+
"indices": indices,
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@app.get("/_indices", response_model=IndicesList)
|
|
242
|
+
async def list_indices(manager: IndexManager = Depends(get_index_manager)):
|
|
243
|
+
"""List all available indices in the cluster."""
|
|
244
|
+
indices = manager.list_indices()
|
|
245
|
+
return {
|
|
246
|
+
"indices": indices,
|
|
247
|
+
"count": len(indices),
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ─── Run ─────────────────────────────────────────────────────────────────
|
|
252
|
+
|
|
253
|
+
if __name__ == "__main__":
|
|
254
|
+
import uvicorn
|
|
255
|
+
|
|
256
|
+
port = int(os.environ.get("FLATSEEK_PORT", "8000"))
|
|
257
|
+
host = os.environ.get("FLATSEEK_HOST", "0.0.0.0")
|
|
258
|
+
|
|
259
|
+
uvicorn.run(
|
|
260
|
+
"api.main:app",
|
|
261
|
+
host=host,
|
|
262
|
+
port=port,
|
|
263
|
+
reload=os.environ.get("FLATSEEK_RELOAD", "0") == "1",
|
|
264
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Routes package
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Aggregation endpoints (Elasticsearch-inspired).
|
|
2
|
+
|
|
3
|
+
All business logic is delegated to QueryEngine.aggregate() in core.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
|
|
16
|
+
from flatseek.api.deps import get_index_manager, IndexManager
|
|
17
|
+
|
|
18
|
+
router = APIRouter(tags=["aggregations"])
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ─── OpenAPI Schemas ──────────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
class AggBucket(BaseModel):
|
|
25
|
+
key: str | int = Field(..., example="raydium")
|
|
26
|
+
doc_count: int = Field(..., example=892147)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TermsAgg(BaseModel):
|
|
30
|
+
buckets: list[AggBucket]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class StatsAgg(BaseModel):
|
|
34
|
+
count: int = Field(..., example=1284)
|
|
35
|
+
min: float = Field(..., example=5000.0)
|
|
36
|
+
max: float = Field(..., example=10000.0)
|
|
37
|
+
avg: float = Field(..., example=5500.0)
|
|
38
|
+
sum: float = Field(..., example=7064000000.0)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Aggregations(BaseModel):
|
|
42
|
+
by_program: TermsAgg | None = None
|
|
43
|
+
fee_stats: StatsAgg | None = None
|
|
44
|
+
|
|
45
|
+
class Config:
|
|
46
|
+
extra = "allow"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class AggregateHits(BaseModel):
|
|
50
|
+
total: int = Field(..., example=1284)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AggregateResponse(BaseModel):
|
|
54
|
+
hits: AggregateHits
|
|
55
|
+
aggregations: dict
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@router.post("/{index}/_aggregate", response_model=AggregateResponse)
|
|
59
|
+
async def aggregate(
|
|
60
|
+
index: str,
|
|
61
|
+
body: dict[str, Any],
|
|
62
|
+
request: Request = None,
|
|
63
|
+
manager: IndexManager = Depends(get_index_manager),
|
|
64
|
+
):
|
|
65
|
+
"""Run aggregations (facets, statistics, buckets) over matching documents.
|
|
66
|
+
|
|
67
|
+
Supports `terms` (top-N by count), `stats` (count/min/max/avg/sum), and
|
|
68
|
+
`cardinality` (HyperLogLog unique count) aggregations. Combine multiple
|
|
69
|
+
aggregations in one request for rich analytics.
|
|
70
|
+
|
|
71
|
+
Example — program breakdown with fee stats:
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"query": "status:success",
|
|
75
|
+
"aggs": {
|
|
76
|
+
"by_program": {"terms": {"field": "program", "size": 10}},
|
|
77
|
+
"fee_stats": {"stats": {"field": "fee"}}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
"""
|
|
82
|
+
if manager.is_encrypted(index):
|
|
83
|
+
stored_pass = manager.get_password(index)
|
|
84
|
+
if not stored_pass and request:
|
|
85
|
+
stored_pass = request.headers.get("x-index-password")
|
|
86
|
+
if not stored_pass:
|
|
87
|
+
raise HTTPException(
|
|
88
|
+
403,
|
|
89
|
+
f"Index '{index}' is encrypted. Submit password via POST /{index}/_authenticate"
|
|
90
|
+
)
|
|
91
|
+
try:
|
|
92
|
+
from flatseek.core.query_engine import load_encryption_key
|
|
93
|
+
index_dir = os.path.join(manager.data_dir, index)
|
|
94
|
+
key = load_encryption_key(index_dir, stored_pass)
|
|
95
|
+
manager.get_engine(index).set_key(key)
|
|
96
|
+
manager.set_password(index, stored_pass)
|
|
97
|
+
except Exception:
|
|
98
|
+
raise HTTPException(401, "Invalid passphrase for encrypted index")
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
engine = manager.get_engine(index)
|
|
102
|
+
except Exception as e:
|
|
103
|
+
raise HTTPException(404, f"Index not found: {index}") from e
|
|
104
|
+
|
|
105
|
+
query = body.get("query", None)
|
|
106
|
+
size = body.get("size", 10)
|
|
107
|
+
aggs = body.get("aggs", {})
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
result = await asyncio.to_thread(
|
|
111
|
+
engine.aggregate, q=query, aggs=aggs, size=size
|
|
112
|
+
)
|
|
113
|
+
return result
|
|
114
|
+
except MemoryError as e:
|
|
115
|
+
raise HTTPException(
|
|
116
|
+
503,
|
|
117
|
+
f"Memory limit exceeded: {e}. Try a narrower query or fewer aggregations."
|
|
118
|
+
) from e
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@router.get("/{index}/_aggregate", response_model=AggregateResponse)
|
|
122
|
+
async def aggregate_get(
|
|
123
|
+
index: str,
|
|
124
|
+
q: str = Query("*", description="Query string"),
|
|
125
|
+
aggs: str = Query(None, description="Aggregations as JSON"),
|
|
126
|
+
request: Request = None,
|
|
127
|
+
manager: IndexManager = Depends(get_index_manager),
|
|
128
|
+
):
|
|
129
|
+
"""Run aggregations via GET (query params)."""
|
|
130
|
+
body = {"query": q}
|
|
131
|
+
if aggs:
|
|
132
|
+
try:
|
|
133
|
+
body["aggs"] = json.loads(aggs)
|
|
134
|
+
except Exception:
|
|
135
|
+
raise HTTPException(400, "Invalid aggs JSON")
|
|
136
|
+
return await aggregate(index, body, request, manager)
|