datajunction-query 0.0.1a64__py3-none-any.whl → 0.0.1a66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datajunction-query might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datajunction-query
3
- Version: 0.0.1a64
3
+ Version: 0.0.1a66
4
4
  Summary: OSS Implementation of a DataJunction Query Service
5
5
  Project-URL: repository, https://github.com/DataJunction/dj
6
6
  Author-email: DataJunction Authors <roberto@dealmeida.net>
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
- Requires-Python: <4.0,>=3.8
17
+ Requires-Python: <4.0,>=3.10
18
18
  Requires-Dist: accept-types==0.4.1
19
19
  Requires-Dist: cachelib>=0.4.0
20
20
  Requires-Dist: duckdb-engine
@@ -22,15 +22,16 @@ Requires-Dist: duckdb==0.8.1
22
22
  Requires-Dist: fastapi>=0.79.0
23
23
  Requires-Dist: importlib-metadata
24
24
  Requires-Dist: msgpack>=1.0.3
25
+ Requires-Dist: psycopg[async,pool]>=3.2.1
26
+ Requires-Dist: pytest-asyncio>=0.24.0
27
+ Requires-Dist: pytest-integration>=0.2.3
25
28
  Requires-Dist: python-dotenv==0.19.2
26
29
  Requires-Dist: pyyaml>=6.0.1
27
30
  Requires-Dist: requests<=2.29.0,>=2.28.2
28
31
  Requires-Dist: rich>=10.16.2
29
32
  Requires-Dist: snowflake-connector-python>=3.3.1
30
- Requires-Dist: sqlalchemy-utils>=0.37.7
31
- Requires-Dist: sqlalchemy<2.0.0,>=1.4.41
32
- Requires-Dist: sqlmodel<1.0.0,>=0.0.8
33
- Requires-Dist: sqlparse<1.0.0,>=0.4.3
33
+ Requires-Dist: sqlalchemy>=2.0.34
34
+ Requires-Dist: tenacity>=9.0.0
34
35
  Requires-Dist: toml>=0.10.2
35
36
  Requires-Dist: trino>=0.324.0
36
37
  Provides-Extra: uvicorn
@@ -0,0 +1,24 @@
1
+ djqs/__about__.py,sha256=mKc8a1KL2DOwHh4C4Sfmqi_Cpqvps_-YclpQjIrRYLM,51
2
+ djqs/__init__.py,sha256=nN5-uJoSVEwuc8n-wMygqeF0Xhxi_zqqbCgutZvAt3E,384
3
+ djqs/config.py,sha256=8WLfe3M3maO4WhWb4ukrj8gdIlHQyLC4j9Q7KQ95bOQ,6662
4
+ djqs/constants.py,sha256=WS0uC-cKLQ_ZPhtevcv6tJ8WI6UtxYGZwTqJJtsTep0,485
5
+ djqs/engine.py,sha256=o_WhKhYJ4Ld6RB9KuRb_J7exZVo3yQ9ztId_FbOBzIQ,7340
6
+ djqs/enum.py,sha256=GJVLYDJ2zWjjUBENgyjZZ_94A24BJtvci-AKTmA0zek,590
7
+ djqs/exceptions.py,sha256=7S5hU6i9umAtITIDCq5_NIC8obTnF_f8Z7xTutyQU1I,5872
8
+ djqs/fixes.py,sha256=TcXnh0I1z4vEAupPatzrvnqyToGihndnxmLJtIn-_Z8,33
9
+ djqs/typing.py,sha256=TpZHhrK_lzEYg_ZlT5qVCJz8seQBKwrULiTPO-lMxEU,6220
10
+ djqs/utils.py,sha256=iTctVXY4uSQnGn1O-_CdDXldiOwfufz582Lo8izox9M,906
11
+ djqs/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ djqs/api/helpers.py,sha256=VjMIIz6Se9GUCE8xfAEtNfPqjCWNRburjjX3AWTcAMY,1358
13
+ djqs/api/main.py,sha256=SR0vbX0W1v8orfluyfsALrT1G3bF9emH1KBWoO9lcyw,2237
14
+ djqs/api/queries.py,sha256=xh11sIbFlKcnOHfbOLCf4ZWzAAhJ4m4FVq0oYMSPRO8,7709
15
+ djqs/api/tables.py,sha256=oxjSMDUxtuCzvXP58Oyo0Ji_T0BnNrpGo9wQ1hv9zeQ,1674
16
+ djqs/db/postgres.py,sha256=ggTAVjm52oRbdfgHg1e0QXXb7AkYgFP_bGBys7IP5yE,4862
17
+ djqs/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ djqs/models/query.py,sha256=n02TPSq8cZOuH49FyVieVwhGdrkaZ-4NpqCSuOgL5X4,3324
19
+ djqs/models/table.py,sha256=7HmiXWHBWEthqdwPYLlsOgeKD-w9urCHoC4xaQ1RnM8,238
20
+ datajunction_query-0.0.1a66.dist-info/METADATA,sha256=8MIrqCyR5fSCey6lArtDwud-OWccAsc1r-Vtcz6-tJY,9525
21
+ datajunction_query-0.0.1a66.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
22
+ datajunction_query-0.0.1a66.dist-info/licenses/AUTHORS.rst,sha256=G9YmXPfQ0kAuxqlCwvWVvUnZitP9hAc-rPfZ5q7Pv1U,235
23
+ datajunction_query-0.0.1a66.dist-info/licenses/LICENSE.txt,sha256=KuSxhVgPuUGoYWphJig4POcTAIUNLUj8vOx-cqQFMj8,1081
24
+ datajunction_query-0.0.1a66.dist-info/RECORD,,
djqs/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Version for Hatch
3
3
  """
4
- __version__ = "0.0.1a64"
4
+ __version__ = "0.0.1a66"
djqs/api/helpers.py CHANGED
@@ -1,48 +1,12 @@
1
1
  """
2
2
  Helper functions for API
3
3
  """
4
- from http import HTTPStatus
5
4
  from typing import Any, Dict, List, Optional
6
5
 
7
- from fastapi import HTTPException
8
- from sqlalchemy import inspect
9
- from sqlalchemy.exc import NoResultFound, NoSuchTableError, OperationalError
10
- from sqlmodel import Session, create_engine, select
6
+ from sqlalchemy import create_engine, inspect
7
+ from sqlalchemy.exc import NoSuchTableError, OperationalError
11
8
 
12
9
  from djqs.exceptions import DJException, DJTableNotFound
13
- from djqs.models.catalog import Catalog
14
- from djqs.models.engine import Engine
15
-
16
-
17
- def get_catalog(session: Session, name: str) -> Catalog:
18
- """
19
- Get a catalog by name
20
- """
21
- statement = select(Catalog).where(Catalog.name == name)
22
- catalog = session.exec(statement).one_or_none()
23
- if not catalog:
24
- raise DJException(
25
- message=f"Catalog with name `{name}` does not exist.",
26
- http_status_code=404,
27
- )
28
- return catalog
29
-
30
-
31
- def get_engine(session: Session, name: str, version: str) -> Engine:
32
- """
33
- Return an Engine instance given an engine name and version
34
- """
35
- statement = (
36
- select(Engine).where(Engine.name == name).where(Engine.version == version)
37
- )
38
- try:
39
- engine = session.exec(statement).one()
40
- except NoResultFound as exc:
41
- raise HTTPException(
42
- status_code=HTTPStatus.NOT_FOUND,
43
- detail=f"Engine not found: `{name}` version `{version}`",
44
- ) from exc
45
- return engine
46
10
 
47
11
 
48
12
  def get_columns(
djqs/api/main.py CHANGED
@@ -12,29 +12,41 @@ from contextlib import asynccontextmanager
12
12
 
13
13
  from fastapi import FastAPI, Request
14
14
  from fastapi.responses import JSONResponse
15
+ from psycopg.rows import dict_row
16
+ from psycopg_pool import AsyncConnectionPool
15
17
 
16
18
  from djqs import __version__
17
- from djqs.api import catalogs, engines, queries, tables
18
- from djqs.config import load_djqs_config
19
+ from djqs.api import queries, tables
19
20
  from djqs.exceptions import DJException
20
- from djqs.utils import get_session, get_settings
21
+ from djqs.utils import get_settings
21
22
 
22
23
  _logger = logging.getLogger(__name__)
23
24
 
24
25
  settings = get_settings()
25
- session = next(get_session())
26
26
 
27
27
 
28
28
  @asynccontextmanager
29
- async def lifespan(app: FastAPI): # pylint: disable=W0621,W0613
29
+ async def lifespan(fastapi_app: FastAPI):
30
30
  """
31
- Load DJQS config on app startup
31
+ Create a postgres connection pool and store it in the app state
32
32
  """
33
+ _logger.info("Starting PostgreSQL connection pool...")
34
+ pool = AsyncConnectionPool(
35
+ settings.index,
36
+ kwargs={"row_factory": dict_row},
37
+ check=AsyncConnectionPool.check_connection,
38
+ min_size=5,
39
+ max_size=20,
40
+ timeout=15,
41
+ )
42
+ fastapi_app.state.pool = pool
33
43
  try:
34
- load_djqs_config(settings=settings, session=session)
35
- except Exception as e: # pylint: disable=W0718,C0103
36
- _logger.warning("Could not load DJQS config: %s", e)
37
- yield
44
+ _logger.info("PostgreSQL connection pool started with DSN: %s", settings.index)
45
+ yield
46
+ finally:
47
+ _logger.info("Closing PostgreSQL connection pool")
48
+ await pool.close()
49
+ _logger.info("PostgreSQL connection pool closed")
38
50
 
39
51
 
40
52
  app = FastAPI(
@@ -47,12 +59,10 @@ app = FastAPI(
47
59
  },
48
60
  lifespan=lifespan,
49
61
  )
50
- app.include_router(catalogs.get_router)
51
- app.include_router(engines.get_router)
52
62
  app.include_router(queries.router)
53
63
  app.include_router(tables.router)
54
- app.include_router(catalogs.post_router) if settings.enable_dynamic_config else None
55
- app.include_router(engines.post_router) if settings.enable_dynamic_config else None
64
+
65
+ app.router.lifespan_context = lifespan
56
66
 
57
67
 
58
68
  @app.exception_handler(DJException)
djqs/api/queries.py CHANGED
@@ -1,9 +1,11 @@
1
1
  """
2
2
  Query related APIs.
3
3
  """
4
+
4
5
  import json
5
6
  import logging
6
7
  import uuid
8
+ from dataclasses import asdict
7
9
  from http import HTTPStatus
8
10
  from typing import Any, Dict, List, Optional
9
11
 
@@ -19,21 +21,21 @@ from fastapi import (
19
21
  Request,
20
22
  Response,
21
23
  )
22
- from sqlmodel import Session
24
+ from psycopg_pool import AsyncConnectionPool
23
25
 
24
26
  from djqs.config import Settings
27
+ from djqs.db.postgres import DBQuery, get_postgres_pool
25
28
  from djqs.engine import process_query
26
29
  from djqs.models.query import (
27
30
  Query,
28
31
  QueryCreate,
29
32
  QueryResults,
30
33
  QueryState,
31
- Results,
32
34
  StatementResults,
33
35
  decode_results,
34
36
  encode_results,
35
37
  )
36
- from djqs.utils import get_session, get_settings
38
+ from djqs.utils import get_settings
37
39
 
38
40
  _logger = logging.getLogger(__name__)
39
41
  router = APIRouter(tags=["SQL Queries"])
@@ -49,32 +51,24 @@ router = APIRouter(tags=["SQL Queries"])
49
51
  "description": "Return results as JSON or msgpack",
50
52
  },
51
53
  },
52
- openapi_extra={
53
- "requestBody": {
54
- "content": {
55
- "application/json": {
56
- "schema": QueryCreate.schema(
57
- ref_template="#/components/schemas/{model}",
58
- ),
59
- },
60
- "application/msgpack": {
61
- "schema": QueryCreate.schema(
62
- ref_template="#/components/schemas/{model}",
63
- ),
64
- },
65
- },
66
- },
67
- },
68
54
  )
69
55
  async def submit_query( # pylint: disable=too-many-arguments
70
56
  accept: Optional[str] = Header(None),
71
57
  *,
72
- session: Session = Depends(get_session),
73
58
  settings: Settings = Depends(get_settings),
74
59
  request: Request,
75
60
  response: Response,
61
+ postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
76
62
  background_tasks: BackgroundTasks,
77
- body: Any = Body(...),
63
+ body: Any = Body(
64
+ ...,
65
+ example={
66
+ "catalog_name": "warehouse",
67
+ "engine_name": "trino",
68
+ "engine_version": "451",
69
+ "submitted_query": "select * from tpch.sf1.customer limit 10",
70
+ },
71
+ ),
78
72
  ) -> QueryResults:
79
73
  """
80
74
  Run or schedule a query.
@@ -86,7 +80,7 @@ async def submit_query( # pylint: disable=too-many-arguments
86
80
  if content_type == "application/json":
87
81
  data = body
88
82
  elif content_type == "application/msgpack":
89
- data = msgpack.unpackb(body, ext_hook=decode_results)
83
+ data = json.loads(msgpack.unpackb(body, ext_hook=decode_results))
90
84
  elif content_type is None:
91
85
  raise HTTPException(
92
86
  status_code=HTTPStatus.BAD_REQUEST,
@@ -97,15 +91,23 @@ async def submit_query( # pylint: disable=too-many-arguments
97
91
  status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
98
92
  detail=f"Content type not accepted: {content_type}",
99
93
  )
94
+
95
+ # Set default catalog and engine if not explicitly specified in submitted query
96
+ data["engine_name"] = data.get("engine_name") or settings.default_engine
97
+ data["engine_version"] = (
98
+ data.get("engine_version") or settings.default_engine_version
99
+ )
100
+ data["catalog_name"] = data.get("catalog_name") or settings.default_catalog
101
+
100
102
  create_query = QueryCreate(**data)
101
103
 
102
- query_with_results = save_query_and_run(
103
- create_query,
104
- session,
105
- settings,
106
- response,
107
- background_tasks,
108
- request.headers,
104
+ query_with_results = await save_query_and_run(
105
+ create_query=create_query,
106
+ settings=settings,
107
+ response=response,
108
+ background_tasks=background_tasks,
109
+ postgres_pool=postgres_pool,
110
+ headers=request.headers,
109
111
  )
110
112
 
111
113
  return_type = get_best_match(accept, ["application/json", "application/msgpack"])
@@ -117,11 +119,11 @@ async def submit_query( # pylint: disable=too-many-arguments
117
119
 
118
120
  if return_type == "application/msgpack":
119
121
  content = msgpack.packb(
120
- query_with_results.dict(by_alias=True),
122
+ asdict(query_with_results),
121
123
  default=encode_results,
122
124
  )
123
125
  else:
124
- content = query_with_results.json(by_alias=True)
126
+ content = json.dumps(asdict(query_with_results), default=str)
125
127
 
126
128
  return Response(
127
129
  content=content,
@@ -130,31 +132,76 @@ async def submit_query( # pylint: disable=too-many-arguments
130
132
  )
131
133
 
132
134
 
133
- def save_query_and_run( # pylint: disable=R0913
135
+ async def save_query_and_run( # pylint: disable=R0913
134
136
  create_query: QueryCreate,
135
- session: Session,
136
137
  settings: Settings,
137
138
  response: Response,
138
139
  background_tasks: BackgroundTasks,
140
+ postgres_pool: AsyncConnectionPool,
139
141
  headers: Optional[Dict[str, str]] = None,
140
142
  ) -> QueryResults:
141
143
  """
142
144
  Store a new query to the DB and run it.
143
145
  """
144
- query = Query(**create_query.dict(by_alias=True))
146
+ query = Query(
147
+ catalog_name=create_query.catalog_name, # type: ignore
148
+ engine_name=create_query.engine_name, # type: ignore
149
+ engine_version=create_query.engine_version, # type: ignore
150
+ submitted_query=create_query.submitted_query,
151
+ async_=create_query.async_,
152
+ )
145
153
  query.state = QueryState.ACCEPTED
146
154
 
147
- session.add(query)
148
- session.commit()
149
- session.refresh(query)
155
+ async with postgres_pool.connection() as conn:
156
+ results = (
157
+ await DBQuery()
158
+ .save_query(
159
+ query_id=query.id,
160
+ catalog_name=query.catalog_name,
161
+ engine_name=query.engine_name,
162
+ engine_version=query.engine_version,
163
+ submitted_query=query.submitted_query,
164
+ async_=query.async_,
165
+ state=query.state.value,
166
+ )
167
+ .execute(conn=conn)
168
+ )
169
+ query_save_result = results[0]
170
+ if not query_save_result: # pragma: no cover
171
+ raise HTTPException(
172
+ status_code=HTTPStatus.NOT_FOUND,
173
+ detail="Query failed to save",
174
+ )
150
175
 
151
- if query.async_:
152
- background_tasks.add_task(process_query, session, settings, query, headers)
176
+ if query.async_:
177
+ background_tasks.add_task(
178
+ process_query,
179
+ settings,
180
+ postgres_pool,
181
+ query,
182
+ headers,
183
+ )
153
184
 
154
- response.status_code = HTTPStatus.CREATED
155
- return QueryResults(results=[], errors=[], **query.dict())
185
+ response.status_code = HTTPStatus.CREATED
186
+ return QueryResults(
187
+ id=query.id,
188
+ catalog_name=query.catalog_name,
189
+ engine_name=query.engine_name,
190
+ engine_version=query.engine_version,
191
+ submitted_query=query.submitted_query,
192
+ executed_query=query.executed_query,
193
+ state=QueryState.SCHEDULED,
194
+ results=[],
195
+ errors=[],
196
+ )
156
197
 
157
- return process_query(session, settings, query, headers)
198
+ query_results = await process_query(
199
+ settings=settings,
200
+ postgres_pool=postgres_pool,
201
+ query=query,
202
+ headers=headers,
203
+ )
204
+ return query_results
158
205
 
159
206
 
160
207
  def load_query_results(
@@ -171,7 +218,7 @@ def load_query_results(
171
218
  _logger.info("Reading results from results backend")
172
219
  cached = settings.results_backend.get(key)
173
220
  query_results = json.loads(cached)
174
- else:
221
+ else: # pragma: no cover
175
222
  _logger.warning("No results found")
176
223
  query_results = []
177
224
 
@@ -179,11 +226,11 @@ def load_query_results(
179
226
 
180
227
 
181
228
  @router.get("/queries/{query_id}/", response_model=QueryResults)
182
- def read_query(
229
+ async def read_query(
183
230
  query_id: uuid.UUID,
184
231
  *,
185
- session: Session = Depends(get_session),
186
232
  settings: Settings = Depends(get_settings),
233
+ postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
187
234
  ) -> QueryResults:
188
235
  """
189
236
  Fetch information about a query.
@@ -191,15 +238,22 @@ def read_query(
191
238
  For paginated queries we move the data from the results backend to the cache for a
192
239
  short period, anticipating additional requests.
193
240
  """
194
- query = session.get(Query, query_id)
195
- if not query:
196
- raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Query not found")
241
+ async with postgres_pool.connection() as conn:
242
+ dbquery_results = (
243
+ await DBQuery().get_query(query_id=query_id).execute(conn=conn)
244
+ )
245
+ queries = dbquery_results[0]
246
+ if not queries:
247
+ raise HTTPException(
248
+ status_code=HTTPStatus.NOT_FOUND,
249
+ detail="Query not found",
250
+ )
251
+ query = queries[0]
197
252
 
198
253
  query_results = load_query_results(settings, str(query_id))
199
254
 
200
255
  prev = next_ = None
201
- results = Results(__root__=query_results)
202
256
 
203
257
  return QueryResults(
204
- results=results, next=next_, previous=prev, errors=[], **query.dict()
258
+ results=query_results, next=next_, previous=prev, errors=[], **query
205
259
  )
djqs/api/tables.py CHANGED
@@ -3,24 +3,21 @@ Table related APIs.
3
3
  """
4
4
  from typing import Optional
5
5
 
6
- from fastapi import APIRouter, Depends
7
- from sqlmodel import Session
6
+ from fastapi import APIRouter, Path, Query
8
7
 
9
- from djqs.api.helpers import get_columns, get_engine
8
+ from djqs.api.helpers import get_columns
10
9
  from djqs.exceptions import DJInvalidTableRef
11
10
  from djqs.models.table import TableInfo
12
- from djqs.utils import get_session, get_settings
11
+ from djqs.utils import get_settings
13
12
 
14
13
  router = APIRouter(tags=["Table Reflection"])
15
14
 
16
15
 
17
16
  @router.get("/table/{table}/columns/", response_model=TableInfo)
18
17
  def table_columns(
19
- table: str,
20
- engine: Optional[str] = None,
21
- engine_version: Optional[str] = None,
22
- *,
23
- session: Session = Depends(get_session),
18
+ table: str = Path(..., example="tpch.sf1.customer"),
19
+ engine: Optional[str] = Query(None, example="trino"),
20
+ engine_version: Optional[str] = Query(None, example="451"),
24
21
  ) -> TableInfo:
25
22
  """
26
23
  Get column information for a table
@@ -34,19 +31,19 @@ def table_columns(
34
31
  )
35
32
  settings = get_settings()
36
33
 
37
- if engine_version == "":
38
- version = ""
39
- else: # pragma: no cover
40
- version = engine_version or settings.default_reflection_engine_version
41
-
42
- engine = get_engine(
43
- session=session,
44
- name=engine or settings.default_reflection_engine,
45
- version=version,
46
- )
34
+ if engine and engine_version:
35
+ engine_config = settings.find_engine(
36
+ engine_name=engine,
37
+ engine_version=engine_version or settings.default_engine_version,
38
+ )
39
+ else:
40
+ engine_config = settings.find_engine(
41
+ engine_name=settings.default_engine,
42
+ engine_version=engine_version or settings.default_engine_version,
43
+ )
47
44
  external_columns = get_columns(
48
- uri=engine.uri,
49
- extra_params=engine.extra_params,
45
+ uri=engine_config.uri,
46
+ extra_params=engine_config.extra_params,
50
47
  catalog=table_parts[0],
51
48
  schema=table_parts[1],
52
49
  table=table_parts[2],