n6k 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: n6k
3
+ Version: 0.1.0
4
+ Summary: Arrow/Parquet REST server
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: fastapi>=0.115
7
+ Requires-Dist: pandas>=2
8
+ Requires-Dist: pyarrow>=19
9
+ Requires-Dist: uvicorn[standard]>=0.34
@@ -0,0 +1,9 @@
1
+ n6k_server/__init__.py,sha256=cRelU0AvkI-kzoA58FvX3TuuEF6cOfJovIL28Qvgmik,44
2
+ n6k_server/app.py,sha256=Gg5lTHEAngRpwX1GyQmk12c8ge42-8mnbfE8EoiWdK4,1296
3
+ n6k_server/formats.py,sha256=O9BFNyoYRB7kBohuFL7Sg_LbV7zF3bZ9CBE98y1Pomk,1635
4
+ n6k_server/pushdown.py,sha256=2Mme6B6GTs8t1gDGWjjrwaPu9L8iau3AkjRFzl6n0NY,1793
5
+ n6k_server/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ n6k_server/routes/tables.py,sha256=DKVHtsoCYBIj3HwFxGF5Nd4G7j3h2Yy91D6wmAC7bbw,1827
7
+ n6k-0.1.0.dist-info/METADATA,sha256=g2qVb5btgxr0Zx3OanTT7MFXSAD1vGbHui5QKMrXETk,227
8
+ n6k-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ n6k-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
n6k_server/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from n6k_server.app import create_app, data
n6k_server/app.py ADDED
@@ -0,0 +1,44 @@
1
+ import pandas as pd
2
+ import pyarrow as pa
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from starlette.exceptions import HTTPException as StarletteHTTPException
6
+ from starlette.responses import PlainTextResponse
7
+
8
+ from n6k_server.routes import tables
9
+
10
+
11
+ def create_app() -> FastAPI:
12
+ application = FastAPI()
13
+ application.state.data_sources = {}
14
+
15
+ application.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ @application.exception_handler(StarletteHTTPException)
23
+ async def http_exception_handler(
24
+ request: Request, exc: StarletteHTTPException
25
+ ) -> PlainTextResponse:
26
+ return PlainTextResponse(exc.detail, status_code=exc.status_code)
27
+
28
+ @application.exception_handler(Exception)
29
+ async def plain_text_exception_handler(
30
+ request: Request, exc: Exception
31
+ ) -> PlainTextResponse:
32
+ status = getattr(exc, "status_code", 500)
33
+ return PlainTextResponse(str(exc), status_code=status)
34
+
35
+ application.include_router(tables.router, prefix="/tables")
36
+
37
+ return application
38
+
39
+
40
+ def data(app: FastAPI, name: str):
41
+ def decorator(fn):
42
+ app.state.data_sources[name] = fn
43
+ return fn
44
+ return decorator
n6k_server/formats.py ADDED
@@ -0,0 +1,58 @@
1
+ from enum import StrEnum
2
+ from io import BytesIO
3
+
4
+ import pyarrow as pa
5
+ import pyarrow.ipc as ipc
6
+ import pyarrow.parquet as pq
7
+ from starlette.requests import Request
8
+ from starlette.responses import Response
9
+
10
+
11
+ class ArrowFormat(StrEnum):
12
+ IPC_STREAM = "application/vnd.apache.arrow.stream"
13
+ IPC_FILE = "application/vnd.apache.arrow.file"
14
+ PARQUET = "application/x-parquet"
15
+
16
+
17
+ _ACCEPT_MAP: dict[str, ArrowFormat] = {f.value: f for f in ArrowFormat}
18
+
19
+
20
+ def negotiate(request: Request) -> ArrowFormat:
21
+ accept = request.headers.get("accept", "")
22
+ for media_type in accept.split(","):
23
+ key = media_type.strip().split(";")[0].strip()
24
+ if key in _ACCEPT_MAP:
25
+ return _ACCEPT_MAP[key]
26
+ return ArrowFormat.IPC_STREAM
27
+
28
+
29
+ def _serialize_ipc_stream(table: pa.Table) -> bytes:
30
+ sink = pa.BufferOutputStream()
31
+ with ipc.new_stream(sink, table.schema) as writer:
32
+ writer.write_table(table)
33
+ return sink.getvalue().to_pybytes()
34
+
35
+
36
+ def _serialize_ipc_file(table: pa.Table) -> bytes:
37
+ sink = pa.BufferOutputStream()
38
+ with ipc.new_file(sink, table.schema) as writer:
39
+ writer.write_table(table)
40
+ return sink.getvalue().to_pybytes()
41
+
42
+
43
+ def _serialize_parquet(table: pa.Table) -> bytes:
44
+ buf = BytesIO()
45
+ pq.write_table(table, buf)
46
+ return buf.getvalue()
47
+
48
+
49
+ _SERIALIZERS = {
50
+ ArrowFormat.IPC_STREAM: _serialize_ipc_stream,
51
+ ArrowFormat.IPC_FILE: _serialize_ipc_file,
52
+ ArrowFormat.PARQUET: _serialize_parquet,
53
+ }
54
+
55
+
56
+ def arrow_response(table: pa.Table, fmt: ArrowFormat) -> Response:
57
+ body = _SERIALIZERS[fmt](table)
58
+ return Response(content=body, media_type=fmt.value)
n6k_server/pushdown.py ADDED
@@ -0,0 +1,70 @@
1
+ import json
2
+
3
+ import pyarrow as pa
4
+ import pyarrow.compute as pc
5
+
6
+
7
+ def apply_columns(table: pa.Table, columns_param: str | None) -> pa.Table:
8
+ if not columns_param:
9
+ return table
10
+ names = [c.strip() for c in columns_param.split(",") if c.strip()]
11
+ valid = [n for n in names if n in table.schema.names]
12
+ if not valid:
13
+ return table
14
+ return table.select(valid)
15
+
16
+
17
+ _OPS = {
18
+ "=": pc.equal,
19
+ "!=": pc.not_equal,
20
+ ">": pc.greater,
21
+ ">=": pc.greater_equal,
22
+ "<": pc.less,
23
+ "<=": pc.less_equal,
24
+ }
25
+
26
+
27
+ def apply_filters(table: pa.Table, filters_param: str | None) -> pa.Table:
28
+ if not filters_param:
29
+ return table
30
+ try:
31
+ predicates = json.loads(filters_param)
32
+ except (json.JSONDecodeError, TypeError):
33
+ return table
34
+ if not isinstance(predicates, list):
35
+ return table
36
+
37
+ mask: pa.ChunkedArray | None = None
38
+
39
+ for pred in predicates:
40
+ if not isinstance(pred, dict):
41
+ continue
42
+ col = pred.get("col")
43
+ op = pred.get("op")
44
+ if col not in table.schema.names or op is None:
45
+ continue
46
+
47
+ column = table.column(col)
48
+
49
+ if op == "is_null":
50
+ m = pc.is_null(column)
51
+ elif op == "is_not_null":
52
+ m = pc.is_valid(column)
53
+ elif op == "in":
54
+ value = pred.get("value")
55
+ if not isinstance(value, list):
56
+ continue
57
+ m = pc.is_in(column, pa.array(value))
58
+ elif op in _OPS:
59
+ value = pred.get("value")
60
+ if value is None:
61
+ continue
62
+ m = _OPS[op](column, pa.scalar(value))
63
+ else:
64
+ continue
65
+
66
+ mask = m if mask is None else pc.and_(mask, m)
67
+
68
+ if mask is None:
69
+ return table
70
+ return table.filter(mask)
File without changes
@@ -0,0 +1,47 @@
1
+ import pandas as pd
2
+ import pyarrow as pa
3
+ from fastapi import APIRouter, Request
4
+ from starlette.responses import Response
5
+
6
+ from n6k_server.formats import arrow_response, negotiate
7
+ from n6k_server.pushdown import apply_columns, apply_filters
8
+
9
+ router = APIRouter()
10
+
11
+
12
+ @router.get("/")
13
+ async def list_tables(request: Request) -> Response:
14
+ names = list(request.app.state.data_sources.keys())
15
+ table = pa.table({
16
+ "name": pa.array(names, type=pa.utf8()),
17
+ "path": pa.array([f"/tables/{n}" for n in names], type=pa.utf8()),
18
+ "schema_path": pa.array([f"/tables/{n}/_schema" for n in names], type=pa.utf8()),
19
+ })
20
+ return arrow_response(table, negotiate(request))
21
+
22
+
23
+ @router.get("/{name}")
24
+ async def get_table(name: str, request: Request, columns: str | None = None, filters: str | None = None) -> Response:
25
+ fn = request.app.state.data_sources.get(name)
26
+ if fn is None:
27
+ return Response(content=f"table not found: {name}", status_code=404)
28
+ df = fn()
29
+ if not isinstance(df, pd.DataFrame):
30
+ raise TypeError(f"data source '{name}' must return a DataFrame, got {type(df).__name__}")
31
+ table = pa.Table.from_pandas(df)
32
+ table = apply_filters(table, filters)
33
+ table = apply_columns(table, columns)
34
+ return arrow_response(table, negotiate(request))
35
+
36
+
37
+ @router.get("/{name}/_schema")
38
+ async def get_schema(name: str, request: Request) -> Response:
39
+ fn = request.app.state.data_sources.get(name)
40
+ if fn is None:
41
+ return Response(content=f"table not found: {name}", status_code=404)
42
+ df = fn()
43
+ if not isinstance(df, pd.DataFrame):
44
+ raise TypeError(f"data source '{name}' must return a DataFrame, got {type(df).__name__}")
45
+ table = pa.Table.from_pandas(df)
46
+ empty = table.schema.empty_table()
47
+ return arrow_response(empty, negotiate(request))