datajunction-query 0.0.1a58__py3-none-any.whl → 0.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: datajunction-query
3
- Version: 0.0.1a58
3
+ Version: 0.0.28
4
4
  Summary: OSS Implementation of a DataJunction Query Service
5
5
  Project-URL: repository, https://github.com/DataJunction/dj
6
6
  Author-email: DataJunction Authors <roberto@dealmeida.net>
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
- Requires-Python: <4.0,>=3.8
17
+ Requires-Python: <4.0,>=3.10
18
18
  Requires-Dist: accept-types==0.4.1
19
19
  Requires-Dist: cachelib>=0.4.0
20
20
  Requires-Dist: duckdb-engine
@@ -22,15 +22,16 @@ Requires-Dist: duckdb==0.8.1
22
22
  Requires-Dist: fastapi>=0.79.0
23
23
  Requires-Dist: importlib-metadata
24
24
  Requires-Dist: msgpack>=1.0.3
25
+ Requires-Dist: psycopg[async,pool]>=3.2.1
26
+ Requires-Dist: pytest-asyncio>=0.24.0
27
+ Requires-Dist: pytest-integration>=0.2.3
25
28
  Requires-Dist: python-dotenv==0.19.2
26
29
  Requires-Dist: pyyaml>=6.0.1
27
30
  Requires-Dist: requests<=2.29.0,>=2.28.2
28
31
  Requires-Dist: rich>=10.16.2
29
32
  Requires-Dist: snowflake-connector-python>=3.3.1
30
- Requires-Dist: sqlalchemy-utils>=0.37.7
31
- Requires-Dist: sqlalchemy<2.0.0,>=1.4.41
32
- Requires-Dist: sqlmodel<1.0.0,>=0.0.8
33
- Requires-Dist: sqlparse<1.0.0,>=0.4.3
33
+ Requires-Dist: sqlalchemy>=2.0.34
34
+ Requires-Dist: tenacity>=9.0.0
34
35
  Requires-Dist: toml>=0.10.2
35
36
  Requires-Dist: trino>=0.324.0
36
37
  Provides-Extra: uvicorn
@@ -0,0 +1,24 @@
1
+ djqs/__about__.py,sha256=oNvpngKiA251lBU0BHPc_2TiTzMx8pneErlLS5cOHSA,50
2
+ djqs/__init__.py,sha256=nN5-uJoSVEwuc8n-wMygqeF0Xhxi_zqqbCgutZvAt3E,384
3
+ djqs/config.py,sha256=2tfHXFbW9HgvIEQ1DT4_WnSfRoxCRHzS_ateekj0I94,6664
4
+ djqs/constants.py,sha256=WS0uC-cKLQ_ZPhtevcv6tJ8WI6UtxYGZwTqJJtsTep0,485
5
+ djqs/engine.py,sha256=J5vHXG_xspI0AfJX3Mp0bZ99h5u7zJCVifDMZ-45w6g,7759
6
+ djqs/enum.py,sha256=dHX8Z_0I_e7HQDUxQdpt18l7tC1P0NZru86ZGmMw23g,591
7
+ djqs/exceptions.py,sha256=B9RqLG1Y3-5vz-iBiEKPIUBcE6TLpIfRZQ_rCyYFPqk,5983
8
+ djqs/fixes.py,sha256=TcXnh0I1z4vEAupPatzrvnqyToGihndnxmLJtIn-_Z8,33
9
+ djqs/typing.py,sha256=TpZHhrK_lzEYg_ZlT5qVCJz8seQBKwrULiTPO-lMxEU,6220
10
+ djqs/utils.py,sha256=iTctVXY4uSQnGn1O-_CdDXldiOwfufz582Lo8izox9M,906
11
+ djqs/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ djqs/api/helpers.py,sha256=V-CFmflRYorZ90BI7b8EPWbsP8j7RKn0aDItC3-_QdQ,1359
13
+ djqs/api/main.py,sha256=SR0vbX0W1v8orfluyfsALrT1G3bF9emH1KBWoO9lcyw,2237
14
+ djqs/api/queries.py,sha256=mcYI2xTvnBnHiNZcw0n5gjGdlAJbvmGsY6xovx_yjmE,7905
15
+ djqs/api/tables.py,sha256=c3_VugV5fqGQU0uf5Mn7fx7LJ6ZjzOB5svCW40r_yE0,1626
16
+ djqs/db/postgres.py,sha256=MnCKlZiX3PgS3qGUvwXHfjrV9Bgjb_css7x7vSlZ6_8,4863
17
+ djqs/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ djqs/models/query.py,sha256=n02TPSq8cZOuH49FyVieVwhGdrkaZ-4NpqCSuOgL5X4,3324
19
+ djqs/models/table.py,sha256=EEJigO3RNjn-O9VWD7n3BtcvyKokQUiI5mfwKaGe-Lc,239
20
+ datajunction_query-0.0.28.dist-info/METADATA,sha256=NcBrrl8KkpQSqw1PIR5dgfnA4cslBKEv6nGqIM00NYw,9523
21
+ datajunction_query-0.0.28.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
22
+ datajunction_query-0.0.28.dist-info/licenses/AUTHORS.rst,sha256=G9YmXPfQ0kAuxqlCwvWVvUnZitP9hAc-rPfZ5q7Pv1U,235
23
+ datajunction_query-0.0.28.dist-info/licenses/LICENSE.txt,sha256=KuSxhVgPuUGoYWphJig4POcTAIUNLUj8vOx-cqQFMj8,1081
24
+ datajunction_query-0.0.28.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
djqs/__about__.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """
2
2
  Version for Hatch
3
3
  """
4
- __version__ = "0.0.1a58"
4
+
5
+ __version__ = "0.0.28"
djqs/api/helpers.py CHANGED
@@ -1,48 +1,13 @@
1
1
  """
2
2
  Helper functions for API
3
3
  """
4
- from http import HTTPStatus
4
+
5
5
  from typing import Any, Dict, List, Optional
6
6
 
7
- from fastapi import HTTPException
8
- from sqlalchemy import inspect
9
- from sqlalchemy.exc import NoResultFound, NoSuchTableError, OperationalError
10
- from sqlmodel import Session, create_engine, select
7
+ from sqlalchemy import create_engine, inspect
8
+ from sqlalchemy.exc import NoSuchTableError, OperationalError
11
9
 
12
10
  from djqs.exceptions import DJException, DJTableNotFound
13
- from djqs.models.catalog import Catalog
14
- from djqs.models.engine import Engine
15
-
16
-
17
- def get_catalog(session: Session, name: str) -> Catalog:
18
- """
19
- Get a catalog by name
20
- """
21
- statement = select(Catalog).where(Catalog.name == name)
22
- catalog = session.exec(statement).one_or_none()
23
- if not catalog:
24
- raise DJException(
25
- message=f"Catalog with name `{name}` does not exist.",
26
- http_status_code=404,
27
- )
28
- return catalog
29
-
30
-
31
- def get_engine(session: Session, name: str, version: str) -> Engine:
32
- """
33
- Return an Engine instance given an engine name and version
34
- """
35
- statement = (
36
- select(Engine).where(Engine.name == name).where(Engine.version == version)
37
- )
38
- try:
39
- engine = session.exec(statement).one()
40
- except NoResultFound as exc:
41
- raise HTTPException(
42
- status_code=HTTPStatus.NOT_FOUND,
43
- detail=f"Engine not found: `{name}` version `{version}`",
44
- ) from exc
45
- return engine
46
11
 
47
12
 
48
13
  def get_columns(
djqs/api/main.py CHANGED
@@ -12,29 +12,41 @@ from contextlib import asynccontextmanager
12
12
 
13
13
  from fastapi import FastAPI, Request
14
14
  from fastapi.responses import JSONResponse
15
+ from psycopg.rows import dict_row
16
+ from psycopg_pool import AsyncConnectionPool
15
17
 
16
18
  from djqs import __version__
17
- from djqs.api import catalogs, engines, queries, tables
18
- from djqs.config import load_djqs_config
19
+ from djqs.api import queries, tables
19
20
  from djqs.exceptions import DJException
20
- from djqs.utils import get_session, get_settings
21
+ from djqs.utils import get_settings
21
22
 
22
23
  _logger = logging.getLogger(__name__)
23
24
 
24
25
  settings = get_settings()
25
- session = next(get_session())
26
26
 
27
27
 
28
28
  @asynccontextmanager
29
- async def lifespan(app: FastAPI): # pylint: disable=W0621,W0613
29
+ async def lifespan(fastapi_app: FastAPI):
30
30
  """
31
- Load DJQS config on app startup
31
+ Create a postgres connection pool and store it in the app state
32
32
  """
33
+ _logger.info("Starting PostgreSQL connection pool...")
34
+ pool = AsyncConnectionPool(
35
+ settings.index,
36
+ kwargs={"row_factory": dict_row},
37
+ check=AsyncConnectionPool.check_connection,
38
+ min_size=5,
39
+ max_size=20,
40
+ timeout=15,
41
+ )
42
+ fastapi_app.state.pool = pool
33
43
  try:
34
- load_djqs_config(settings=settings, session=session)
35
- except Exception as e: # pylint: disable=W0718
36
- _logger.warning("Could not load DJQS config: %s", e)
37
- yield
44
+ _logger.info("PostgreSQL connection pool started with DSN: %s", settings.index)
45
+ yield
46
+ finally:
47
+ _logger.info("Closing PostgreSQL connection pool")
48
+ await pool.close()
49
+ _logger.info("PostgreSQL connection pool closed")
38
50
 
39
51
 
40
52
  app = FastAPI(
@@ -47,12 +59,10 @@ app = FastAPI(
47
59
  },
48
60
  lifespan=lifespan,
49
61
  )
50
- app.include_router(catalogs.get_router)
51
- app.include_router(engines.get_router)
52
62
  app.include_router(queries.router)
53
63
  app.include_router(tables.router)
54
- app.include_router(catalogs.post_router) if settings.enable_dynamic_config else None
55
- app.include_router(engines.post_router) if settings.enable_dynamic_config else None
64
+
65
+ app.router.lifespan_context = lifespan
56
66
 
57
67
 
58
68
  @app.exception_handler(DJException)
djqs/api/queries.py CHANGED
@@ -1,11 +1,13 @@
1
1
  """
2
2
  Query related APIs.
3
3
  """
4
+
4
5
  import json
5
6
  import logging
6
7
  import uuid
8
+ from dataclasses import asdict
7
9
  from http import HTTPStatus
8
- from typing import Any, List, Optional
10
+ from typing import Any, Dict, List, Optional
9
11
 
10
12
  import msgpack
11
13
  from accept_types import get_best_match
@@ -19,21 +21,21 @@ from fastapi import (
19
21
  Request,
20
22
  Response,
21
23
  )
22
- from sqlmodel import Session
24
+ from psycopg_pool import AsyncConnectionPool
23
25
 
24
26
  from djqs.config import Settings
27
+ from djqs.db.postgres import DBQuery, get_postgres_pool
25
28
  from djqs.engine import process_query
26
29
  from djqs.models.query import (
27
30
  Query,
28
31
  QueryCreate,
29
32
  QueryResults,
30
33
  QueryState,
31
- Results,
32
34
  StatementResults,
33
35
  decode_results,
34
36
  encode_results,
35
37
  )
36
- from djqs.utils import get_session, get_settings
38
+ from djqs.utils import get_settings
37
39
 
38
40
  _logger = logging.getLogger(__name__)
39
41
  router = APIRouter(tags=["SQL Queries"])
@@ -49,32 +51,24 @@ router = APIRouter(tags=["SQL Queries"])
49
51
  "description": "Return results as JSON or msgpack",
50
52
  },
51
53
  },
52
- openapi_extra={
53
- "requestBody": {
54
- "content": {
55
- "application/json": {
56
- "schema": QueryCreate.schema(
57
- ref_template="#/components/schemas/{model}",
58
- ),
59
- },
60
- "application/msgpack": {
61
- "schema": QueryCreate.schema(
62
- ref_template="#/components/schemas/{model}",
63
- ),
64
- },
65
- },
66
- },
67
- },
68
54
  )
69
55
  async def submit_query( # pylint: disable=too-many-arguments
70
56
  accept: Optional[str] = Header(None),
71
57
  *,
72
- session: Session = Depends(get_session),
73
58
  settings: Settings = Depends(get_settings),
74
59
  request: Request,
75
60
  response: Response,
61
+ postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
76
62
  background_tasks: BackgroundTasks,
77
- body: Any = Body(...),
63
+ body: Any = Body(
64
+ ...,
65
+ example={
66
+ "catalog_name": "warehouse",
67
+ "engine_name": "trino",
68
+ "engine_version": "451",
69
+ "submitted_query": "select * from tpch.sf1.customer limit 10",
70
+ },
71
+ ),
78
72
  ) -> QueryResults:
79
73
  """
80
74
  Run or schedule a query.
@@ -86,7 +80,7 @@ async def submit_query( # pylint: disable=too-many-arguments
86
80
  if content_type == "application/json":
87
81
  data = body
88
82
  elif content_type == "application/msgpack":
89
- data = msgpack.unpackb(body, ext_hook=decode_results)
83
+ data = json.loads(msgpack.unpackb(body, ext_hook=decode_results))
90
84
  elif content_type is None:
91
85
  raise HTTPException(
92
86
  status_code=HTTPStatus.BAD_REQUEST,
@@ -97,14 +91,25 @@ async def submit_query( # pylint: disable=too-many-arguments
97
91
  status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
98
92
  detail=f"Content type not accepted: {content_type}",
99
93
  )
94
+
95
+ # Set default catalog and engine if not explicitly specified in submitted query
96
+ if not data.get("engine_name") and not data.get("engine_version"):
97
+ data["engine_name"] = settings.default_engine
98
+ data["engine_version"] = settings.default_engine_version
99
+ else:
100
+ data["engine_name"] = data.get("engine_name") or settings.default_engine
101
+ data["engine_version"] = data.get("engine_version") or ""
102
+ data["catalog_name"] = data.get("catalog_name") or settings.default_catalog
103
+
100
104
  create_query = QueryCreate(**data)
101
105
 
102
- query_with_results = save_query_and_run(
103
- create_query,
104
- session,
105
- settings,
106
- response,
107
- background_tasks,
106
+ query_with_results = await save_query_and_run(
107
+ create_query=create_query,
108
+ settings=settings,
109
+ response=response,
110
+ background_tasks=background_tasks,
111
+ postgres_pool=postgres_pool,
112
+ headers=request.headers,
108
113
  )
109
114
 
110
115
  return_type = get_best_match(accept, ["application/json", "application/msgpack"])
@@ -116,11 +121,11 @@ async def submit_query( # pylint: disable=too-many-arguments
116
121
 
117
122
  if return_type == "application/msgpack":
118
123
  content = msgpack.packb(
119
- query_with_results.dict(by_alias=True),
124
+ asdict(query_with_results),
120
125
  default=encode_results,
121
126
  )
122
127
  else:
123
- content = query_with_results.json(by_alias=True)
128
+ content = json.dumps(asdict(query_with_results), default=str)
124
129
 
125
130
  return Response(
126
131
  content=content,
@@ -129,30 +134,76 @@ async def submit_query( # pylint: disable=too-many-arguments
129
134
  )
130
135
 
131
136
 
132
- def save_query_and_run(
137
+ async def save_query_and_run( # pylint: disable=R0913
133
138
  create_query: QueryCreate,
134
- session: Session,
135
139
  settings: Settings,
136
140
  response: Response,
137
141
  background_tasks: BackgroundTasks,
142
+ postgres_pool: AsyncConnectionPool,
143
+ headers: Optional[Dict[str, str]] = None,
138
144
  ) -> QueryResults:
139
145
  """
140
146
  Store a new query to the DB and run it.
141
147
  """
142
- query = Query(**create_query.dict(by_alias=True))
148
+ query = Query(
149
+ catalog_name=create_query.catalog_name, # type: ignore
150
+ engine_name=create_query.engine_name, # type: ignore
151
+ engine_version=create_query.engine_version, # type: ignore
152
+ submitted_query=create_query.submitted_query,
153
+ async_=create_query.async_,
154
+ )
143
155
  query.state = QueryState.ACCEPTED
144
156
 
145
- session.add(query)
146
- session.commit()
147
- session.refresh(query)
157
+ async with postgres_pool.connection() as conn:
158
+ results = (
159
+ await DBQuery()
160
+ .save_query(
161
+ query_id=query.id,
162
+ catalog_name=query.catalog_name,
163
+ engine_name=query.engine_name,
164
+ engine_version=query.engine_version,
165
+ submitted_query=query.submitted_query,
166
+ async_=query.async_,
167
+ state=query.state.value,
168
+ )
169
+ .execute(conn=conn)
170
+ )
171
+ query_save_result = results[0]
172
+ if not query_save_result: # pragma: no cover
173
+ raise HTTPException(
174
+ status_code=HTTPStatus.NOT_FOUND,
175
+ detail="Query failed to save",
176
+ )
148
177
 
149
- if query.async_:
150
- background_tasks.add_task(process_query, session, settings, query)
178
+ if query.async_:
179
+ background_tasks.add_task(
180
+ process_query,
181
+ settings,
182
+ postgres_pool,
183
+ query,
184
+ headers,
185
+ )
151
186
 
152
- response.status_code = HTTPStatus.CREATED
153
- return QueryResults(results=[], errors=[], **query.dict())
187
+ response.status_code = HTTPStatus.CREATED
188
+ return QueryResults(
189
+ id=query.id,
190
+ catalog_name=query.catalog_name,
191
+ engine_name=query.engine_name,
192
+ engine_version=query.engine_version,
193
+ submitted_query=query.submitted_query,
194
+ executed_query=query.executed_query,
195
+ state=QueryState.SCHEDULED,
196
+ results=[],
197
+ errors=[],
198
+ )
154
199
 
155
- return process_query(session, settings, query)
200
+ query_results = await process_query(
201
+ settings=settings,
202
+ postgres_pool=postgres_pool,
203
+ query=query,
204
+ headers=headers,
205
+ )
206
+ return query_results
156
207
 
157
208
 
158
209
  def load_query_results(
@@ -169,7 +220,7 @@ def load_query_results(
169
220
  _logger.info("Reading results from results backend")
170
221
  cached = settings.results_backend.get(key)
171
222
  query_results = json.loads(cached)
172
- else:
223
+ else: # pragma: no cover
173
224
  _logger.warning("No results found")
174
225
  query_results = []
175
226
 
@@ -177,11 +228,11 @@ def load_query_results(
177
228
 
178
229
 
179
230
  @router.get("/queries/{query_id}/", response_model=QueryResults)
180
- def read_query(
231
+ async def read_query(
181
232
  query_id: uuid.UUID,
182
233
  *,
183
- session: Session = Depends(get_session),
184
234
  settings: Settings = Depends(get_settings),
235
+ postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
185
236
  ) -> QueryResults:
186
237
  """
187
238
  Fetch information about a query.
@@ -189,15 +240,26 @@ def read_query(
189
240
  For paginated queries we move the data from the results backend to the cache for a
190
241
  short period, anticipating additional requests.
191
242
  """
192
- query = session.get(Query, query_id)
193
- if not query:
194
- raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Query not found")
243
+ async with postgres_pool.connection() as conn:
244
+ dbquery_results = (
245
+ await DBQuery().get_query(query_id=query_id).execute(conn=conn)
246
+ )
247
+ queries = dbquery_results[0]
248
+ if not queries:
249
+ raise HTTPException(
250
+ status_code=HTTPStatus.NOT_FOUND,
251
+ detail="Query not found",
252
+ )
253
+ query = queries[0]
195
254
 
196
255
  query_results = load_query_results(settings, str(query_id))
197
256
 
198
257
  prev = next_ = None
199
- results = Results(__root__=query_results)
200
258
 
201
259
  return QueryResults(
202
- results=results, next=next_, previous=prev, errors=[], **query.dict()
260
+ results=query_results,
261
+ next=next_,
262
+ previous=prev,
263
+ errors=[],
264
+ **query,
203
265
  )
djqs/api/tables.py CHANGED
@@ -1,26 +1,24 @@
1
1
  """
2
2
  Table related APIs.
3
3
  """
4
+
4
5
  from typing import Optional
5
6
 
6
- from fastapi import APIRouter, Depends
7
- from sqlmodel import Session
7
+ from fastapi import APIRouter, Path, Query
8
8
 
9
- from djqs.api.helpers import get_columns, get_engine
9
+ from djqs.api.helpers import get_columns
10
10
  from djqs.exceptions import DJInvalidTableRef
11
11
  from djqs.models.table import TableInfo
12
- from djqs.utils import get_session, get_settings
12
+ from djqs.utils import get_settings
13
13
 
14
14
  router = APIRouter(tags=["Table Reflection"])
15
15
 
16
16
 
17
17
  @router.get("/table/{table}/columns/", response_model=TableInfo)
18
18
  def table_columns(
19
- table: str,
20
- engine: Optional[str] = None,
21
- engine_version: Optional[str] = None,
22
- *,
23
- session: Session = Depends(get_session),
19
+ table: str = Path(..., example="tpch.sf1.customer"),
20
+ engine: Optional[str] = Query(None, example="trino"),
21
+ engine_version: Optional[str] = Query(None, example="451"),
24
22
  ) -> TableInfo:
25
23
  """
26
24
  Get column information for a table
@@ -33,20 +31,19 @@ def table_columns(
33
31
  f"for `table` must be in the format `<catalog>.<schema>.<table>`",
34
32
  )
35
33
  settings = get_settings()
36
-
37
- if engine_version == "":
38
- version = ""
39
- else: # pragma: no cover
40
- version = engine_version or settings.default_reflection_engine_version
41
-
42
- engine = get_engine(
43
- session=session,
44
- name=engine or settings.default_reflection_engine,
45
- version=version,
46
- )
34
+ if engine:
35
+ engine_config = settings.find_engine(
36
+ engine_name=engine,
37
+ engine_version=engine_version or "",
38
+ )
39
+ else:
40
+ engine_config = settings.find_engine(
41
+ engine_name=settings.default_engine,
42
+ engine_version=engine_version or settings.default_engine_version,
43
+ )
47
44
  external_columns = get_columns(
48
- uri=engine.uri,
49
- extra_params=engine.extra_params,
45
+ uri=engine_config.uri,
46
+ extra_params=engine_config.extra_params,
50
47
  catalog=table_parts[0],
51
48
  schema=table_parts[1],
52
49
  table=table_parts[2],