datajunction-query 0.0.1a65__tar.gz → 0.0.1a66__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datajunction-query might be problematic. Click here for more details.

Files changed (103) hide show
  1. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.coveragerc +3 -1
  2. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/Dockerfile +4 -0
  3. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/Makefile +2 -2
  4. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/PKG-INFO +7 -6
  5. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/alembic/env.py +13 -12
  6. datajunction_query-0.0.1a66/alembic/versions/2024_09_09_0000-b8f22b3549c7_initial_migration.py +41 -0
  7. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/__about__.py +1 -1
  8. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/api/helpers.py +2 -38
  9. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/api/main.py +24 -14
  10. datajunction_query-0.0.1a66/djqs/api/queries.py +259 -0
  11. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/api/tables.py +18 -21
  12. datajunction_query-0.0.1a66/djqs/config.py +199 -0
  13. datajunction_query-0.0.1a66/djqs/db/postgres.py +138 -0
  14. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/engine.py +60 -37
  15. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/exceptions.py +73 -32
  16. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/models/query.py +30 -46
  17. datajunction_query-0.0.1a66/djqs/utils.py +41 -0
  18. datajunction_query-0.0.1a66/pdm.lock +1807 -0
  19. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/pyproject.toml +38 -21
  20. datajunction_query-0.0.1a66/tests/api/queries_test.py +651 -0
  21. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/api/table_test.py +29 -10
  22. datajunction_query-0.0.1a66/tests/config.djqs.yml +32 -0
  23. datajunction_query-0.0.1a66/tests/conftest.py +85 -0
  24. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/utils_test.py +1 -24
  25. datajunction_query-0.0.1a65/alembic/versions/2023_02_28_0541-a7e11a2438b4_initial_migration.py +0 -86
  26. datajunction_query-0.0.1a65/alembic/versions/2023_10_09_1858-f3407a1ec625_add_type_and_extra_para_ms_field_for_.py +0 -36
  27. datajunction_query-0.0.1a65/djqs/api/catalogs.py +0 -115
  28. datajunction_query-0.0.1a65/djqs/api/engines.py +0 -61
  29. datajunction_query-0.0.1a65/djqs/api/queries.py +0 -205
  30. datajunction_query-0.0.1a65/djqs/config.py +0 -96
  31. datajunction_query-0.0.1a65/djqs/models/catalog.py +0 -75
  32. datajunction_query-0.0.1a65/djqs/models/engine.py +0 -50
  33. datajunction_query-0.0.1a65/djqs/utils.py +0 -90
  34. datajunction_query-0.0.1a65/pdm.lock +0 -2217
  35. datajunction_query-0.0.1a65/tests/api/catalogs_test.py +0 -374
  36. datajunction_query-0.0.1a65/tests/api/engines_test.py +0 -153
  37. datajunction_query-0.0.1a65/tests/api/queries_test.py +0 -732
  38. datajunction_query-0.0.1a65/tests/config.djqs.yml +0 -11
  39. datajunction_query-0.0.1a65/tests/configs/databases/druid.yaml +0 -4
  40. datajunction_query-0.0.1a65/tests/configs/databases/gsheets.yaml +0 -8
  41. datajunction_query-0.0.1a65/tests/configs/databases/postgres.yaml +0 -7
  42. datajunction_query-0.0.1a65/tests/configs/nodes/core/comments.yaml +0 -28
  43. datajunction_query-0.0.1a65/tests/configs/nodes/core/dim_users.yaml +0 -18
  44. datajunction_query-0.0.1a65/tests/configs/nodes/core/num_comments.yaml +0 -6
  45. datajunction_query-0.0.1a65/tests/configs/nodes/core/users.yaml +0 -28
  46. datajunction_query-0.0.1a65/tests/conftest.py +0 -127
  47. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.flake8 +0 -0
  48. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.gitignore +0 -0
  49. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.isort.cfg +0 -0
  50. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.pre-commit-config.yaml +0 -0
  51. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.pylintrc +0 -0
  52. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/.readthedocs.yml +0 -0
  53. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/AUTHORS.rst +0 -0
  54. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/CODE_OF_CONDUCT.md +0 -0
  55. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/LICENSE.txt +0 -0
  56. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/README.rst +0 -0
  57. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/alembic/README +0 -0
  58. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/alembic/script.py.mako +0 -0
  59. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/alembic.ini +0 -0
  60. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/config.djqs.yml +0 -0
  61. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/config.jsonschema +0 -0
  62. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/__init__.py +0 -0
  63. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/api/__init__.py +0 -0
  64. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/constants.py +0 -0
  65. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/enum.py +0 -0
  66. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/fixes.py +0 -0
  67. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/models/__init__.py +0 -0
  68. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/models/table.py +0 -0
  69. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/djqs/typing.py +0 -0
  70. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/cockroachdb/cockroachdb_examples_init.sql +0 -0
  71. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/cockroachdb/cockroachdb_metadata_init.sql +0 -0
  72. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/cockroachdb/steam-games.csv +0 -0
  73. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/cockroachdb/steam-hours-played.csv +0 -0
  74. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/default.duckdb +0 -0
  75. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/druid_environment +0 -0
  76. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/druid_init.sh +0 -0
  77. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/druid_spec.json +0 -0
  78. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/duckdb.sql +0 -0
  79. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/duckdb_load.py +0 -0
  80. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/postgres_init.roads.sql +0 -0
  81. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/postgres_init.sql +0 -0
  82. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/spark.roads.sql +0 -0
  83. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/spark_load_roads.py +0 -0
  84. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/docker/wait-for +0 -0
  85. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/openapi.json +0 -0
  86. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/scripts/generate-openapi.py +0 -0
  87. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/setup.cfg +0 -0
  88. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/__init__.py +0 -0
  89. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/api/__init__.py +0 -0
  90. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/exceptions_test.py +0 -0
  91. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/contractors.parquet +0 -0
  92. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/dispatchers.parquet +0 -0
  93. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/hard_hat_state.parquet +0 -0
  94. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/hard_hats.parquet +0 -0
  95. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/municipality.parquet +0 -0
  96. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/municipality_municipality_type.parquet +0 -0
  97. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/municipality_type.parquet +0 -0
  98. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/repair_order_details.parquet +0 -0
  99. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/repair_orders.parquet +0 -0
  100. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/repair_type.parquet +0 -0
  101. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/us_region.parquet +0 -0
  102. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tests/resources/us_states.parquet +0 -0
  103. {datajunction_query-0.0.1a65 → datajunction_query-0.0.1a66}/tox.ini +0 -0
@@ -2,7 +2,9 @@
2
2
  [run]
3
3
  branch = True
4
4
  source = dj
5
- # omit = bad_file.py
5
+ omit =
6
+ djqs/config.py
7
+ djqs/exceptions.py
6
8
 
7
9
  [paths]
8
10
  source =
@@ -2,6 +2,10 @@ FROM jupyter/pyspark-notebook
2
2
  USER root
3
3
  WORKDIR /code
4
4
  COPY . /code
5
+ RUN apt-get update && apt-get install -y \
6
+ libpq-dev \
7
+ && apt-get clean \
8
+ && rm -rf /var/lib/apt/lists/*
5
9
  RUN pip install -e .[uvicorn]
6
10
  CMD ["uvicorn", "djqs.api.main:app", "--host", "0.0.0.0", "--port", "8001", "--reload"]
7
11
  EXPOSE 8001
@@ -27,10 +27,10 @@ docker-run-with-cockroachdb:
27
27
  docker compose -f docker-compose.yml -f docker-compose.cockroachdb.yml up
28
28
 
29
29
  test: pyenv
30
- pdm run pytest --cov=djqs -vv tests/ --doctest-modules djqs --without-integration --without-slow-integration ${PYTEST_ARGS}
30
+ pdm run pytest --cov=djqs -vv tests/ --cov-report term-missing --doctest-modules djqs --without-integration --without-slow-integration ${PYTEST_ARGS}
31
31
 
32
32
  integration: pyenv
33
- pdm run pytest --cov=djqs -vv tests/ --doctest-modules djqs --with-integration --with-slow-integration
33
+ pdm run pytest --cov=djqs -vv tests/ --cov-report term-missing --doctest-modules djqs --with-integration --with-slow-integration
34
34
 
35
35
  clean:
36
36
  pyenv virtualenv-delete djqs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datajunction-query
3
- Version: 0.0.1a65
3
+ Version: 0.0.1a66
4
4
  Summary: OSS Implementation of a DataJunction Query Service
5
5
  Project-URL: repository, https://github.com/DataJunction/dj
6
6
  Author-email: DataJunction Authors <roberto@dealmeida.net>
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
- Requires-Python: <4.0,>=3.8
17
+ Requires-Python: <4.0,>=3.10
18
18
  Requires-Dist: accept-types==0.4.1
19
19
  Requires-Dist: cachelib>=0.4.0
20
20
  Requires-Dist: duckdb-engine
@@ -22,15 +22,16 @@ Requires-Dist: duckdb==0.8.1
22
22
  Requires-Dist: fastapi>=0.79.0
23
23
  Requires-Dist: importlib-metadata
24
24
  Requires-Dist: msgpack>=1.0.3
25
+ Requires-Dist: psycopg[async,pool]>=3.2.1
26
+ Requires-Dist: pytest-asyncio>=0.24.0
27
+ Requires-Dist: pytest-integration>=0.2.3
25
28
  Requires-Dist: python-dotenv==0.19.2
26
29
  Requires-Dist: pyyaml>=6.0.1
27
30
  Requires-Dist: requests<=2.29.0,>=2.28.2
28
31
  Requires-Dist: rich>=10.16.2
29
32
  Requires-Dist: snowflake-connector-python>=3.3.1
30
- Requires-Dist: sqlalchemy-utils>=0.37.7
31
- Requires-Dist: sqlalchemy<2.0.0,>=1.4.41
32
- Requires-Dist: sqlmodel<1.0.0,>=0.0.8
33
- Requires-Dist: sqlparse<1.0.0,>=0.4.3
33
+ Requires-Dist: sqlalchemy>=2.0.34
34
+ Requires-Dist: tenacity>=9.0.0
34
35
  Requires-Dist: toml>=0.10.2
35
36
  Requires-Dist: trino>=0.324.0
36
37
  Provides-Extra: uvicorn
@@ -2,18 +2,13 @@
2
2
  Environment for Alembic migrations.
3
3
  """
4
4
  # pylint: disable=no-member, unused-import, no-name-in-module, import-error
5
-
6
5
  from logging.config import fileConfig
7
6
 
8
- from sqlmodel import SQLModel, create_engine
7
+ from sqlalchemy import create_engine
9
8
 
10
9
  from alembic import context
11
- from djqs.models.catalog import Catalog
12
- from djqs.models.engine import Engine
13
- from djqs.models.query import Query
14
- from djqs.utils import get_settings
15
10
 
16
- settings = get_settings()
11
+ DEFAULT_URI = "postgresql+psycopg://dj:dj@postgres_metadata:5432/dj"
17
12
 
18
13
  # this is the Alembic Config object, which provides
19
14
  # access to the values within the .ini file in use.
@@ -28,7 +23,7 @@ if config.config_file_name is not None:
28
23
  # for 'autogenerate' support
29
24
  # from myapp import mymodel
30
25
  # target_metadata = mymodel.Base.metadata
31
- target_metadata = SQLModel.metadata
26
+ target_metadata = None # pylint: disable=invalid-name
32
27
 
33
28
  # other values from the config, defined by the needs of env.py,
34
29
  # can be acquired:
@@ -48,12 +43,13 @@ def run_migrations_offline():
48
43
  script output.
49
44
 
50
45
  """
51
- url = settings.index
46
+ x_args = context.get_x_argument(as_dictionary=True)
52
47
  context.configure(
53
- url=url,
48
+ url=x_args.get("uri") or DEFAULT_URI,
54
49
  target_metadata=target_metadata,
55
50
  literal_binds=True,
56
51
  dialect_opts={"paramstyle": "named"},
52
+ version_table="alembic_version_djqs",
57
53
  )
58
54
 
59
55
  with context.begin_transaction():
@@ -67,10 +63,15 @@ def run_migrations_online():
67
63
  and associate a connection with the context.
68
64
 
69
65
  """
70
- connectable = create_engine(settings.index)
66
+ x_args = context.get_x_argument(as_dictionary=True)
67
+ connectable = create_engine(x_args.get("uri") or DEFAULT_URI)
71
68
 
72
69
  with connectable.connect() as connection:
73
- context.configure(connection=connection, target_metadata=target_metadata)
70
+ context.configure(
71
+ connection=connection,
72
+ target_metadata=target_metadata,
73
+ version_table="alembic_version_djqs",
74
+ )
74
75
 
75
76
  with context.begin_transaction():
76
77
  context.run_migrations()
@@ -0,0 +1,41 @@
1
+ """Initial migration
2
+
3
+ Revision ID: b8f22b3549c7
4
+ Revises:
5
+ Create Date: 2024-09-09 06:00:00.000000+00:00
6
+
7
+ """
8
+ # pylint: disable=no-member, invalid-name, missing-function-docstring, unused-import, no-name-in-module
9
+
10
+ from alembic import op
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = "b8f22b3549c7"
14
+ down_revision = None
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade():
20
+ op.execute(
21
+ """
22
+ CREATE TABLE query (
23
+ id UUID PRIMARY KEY,
24
+ catalog_name VARCHAR NOT NULL,
25
+ engine_name VARCHAR NOT NULL,
26
+ engine_version VARCHAR NOT NULL,
27
+ submitted_query VARCHAR NOT NULL,
28
+ async_ BOOLEAN NOT NULL,
29
+ executed_query VARCHAR,
30
+ scheduled TIMESTAMP,
31
+ started TIMESTAMP,
32
+ finished TIMESTAMP,
33
+ state VARCHAR NOT NULL,
34
+ progress FLOAT NOT NULL
35
+ )
36
+ """,
37
+ )
38
+
39
+
40
+ def downgrade():
41
+ op.execute("DROP TABLE query")
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Version for Hatch
3
3
  """
4
- __version__ = "0.0.1a65"
4
+ __version__ = "0.0.1a66"
@@ -1,48 +1,12 @@
1
1
  """
2
2
  Helper functions for API
3
3
  """
4
- from http import HTTPStatus
5
4
  from typing import Any, Dict, List, Optional
6
5
 
7
- from fastapi import HTTPException
8
- from sqlalchemy import inspect
9
- from sqlalchemy.exc import NoResultFound, NoSuchTableError, OperationalError
10
- from sqlmodel import Session, create_engine, select
6
+ from sqlalchemy import create_engine, inspect
7
+ from sqlalchemy.exc import NoSuchTableError, OperationalError
11
8
 
12
9
  from djqs.exceptions import DJException, DJTableNotFound
13
- from djqs.models.catalog import Catalog
14
- from djqs.models.engine import Engine
15
-
16
-
17
- def get_catalog(session: Session, name: str) -> Catalog:
18
- """
19
- Get a catalog by name
20
- """
21
- statement = select(Catalog).where(Catalog.name == name)
22
- catalog = session.exec(statement).one_or_none()
23
- if not catalog:
24
- raise DJException(
25
- message=f"Catalog with name `{name}` does not exist.",
26
- http_status_code=404,
27
- )
28
- return catalog
29
-
30
-
31
- def get_engine(session: Session, name: str, version: str) -> Engine:
32
- """
33
- Return an Engine instance given an engine name and version
34
- """
35
- statement = (
36
- select(Engine).where(Engine.name == name).where(Engine.version == version)
37
- )
38
- try:
39
- engine = session.exec(statement).one()
40
- except NoResultFound as exc:
41
- raise HTTPException(
42
- status_code=HTTPStatus.NOT_FOUND,
43
- detail=f"Engine not found: `{name}` version `{version}`",
44
- ) from exc
45
- return engine
46
10
 
47
11
 
48
12
  def get_columns(
@@ -12,29 +12,41 @@ from contextlib import asynccontextmanager
12
12
 
13
13
  from fastapi import FastAPI, Request
14
14
  from fastapi.responses import JSONResponse
15
+ from psycopg.rows import dict_row
16
+ from psycopg_pool import AsyncConnectionPool
15
17
 
16
18
  from djqs import __version__
17
- from djqs.api import catalogs, engines, queries, tables
18
- from djqs.config import load_djqs_config
19
+ from djqs.api import queries, tables
19
20
  from djqs.exceptions import DJException
20
- from djqs.utils import get_session, get_settings
21
+ from djqs.utils import get_settings
21
22
 
22
23
  _logger = logging.getLogger(__name__)
23
24
 
24
25
  settings = get_settings()
25
- session = next(get_session())
26
26
 
27
27
 
28
28
  @asynccontextmanager
29
- async def lifespan(app: FastAPI): # pylint: disable=W0621,W0613
29
+ async def lifespan(fastapi_app: FastAPI):
30
30
  """
31
- Load DJQS config on app startup
31
+ Create a postgres connection pool and store it in the app state
32
32
  """
33
+ _logger.info("Starting PostgreSQL connection pool...")
34
+ pool = AsyncConnectionPool(
35
+ settings.index,
36
+ kwargs={"row_factory": dict_row},
37
+ check=AsyncConnectionPool.check_connection,
38
+ min_size=5,
39
+ max_size=20,
40
+ timeout=15,
41
+ )
42
+ fastapi_app.state.pool = pool
33
43
  try:
34
- load_djqs_config(settings=settings, session=session)
35
- except Exception as e: # pylint: disable=W0718,C0103
36
- _logger.warning("Could not load DJQS config: %s", e)
37
- yield
44
+ _logger.info("PostgreSQL connection pool started with DSN: %s", settings.index)
45
+ yield
46
+ finally:
47
+ _logger.info("Closing PostgreSQL connection pool")
48
+ await pool.close()
49
+ _logger.info("PostgreSQL connection pool closed")
38
50
 
39
51
 
40
52
  app = FastAPI(
@@ -47,12 +59,10 @@ app = FastAPI(
47
59
  },
48
60
  lifespan=lifespan,
49
61
  )
50
- app.include_router(catalogs.get_router)
51
- app.include_router(engines.get_router)
52
62
  app.include_router(queries.router)
53
63
  app.include_router(tables.router)
54
- app.include_router(catalogs.post_router) if settings.enable_dynamic_config else None
55
- app.include_router(engines.post_router) if settings.enable_dynamic_config else None
64
+
65
+ app.router.lifespan_context = lifespan
56
66
 
57
67
 
58
68
  @app.exception_handler(DJException)
@@ -0,0 +1,259 @@
1
+ """
2
+ Query related APIs.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ import uuid
8
+ from dataclasses import asdict
9
+ from http import HTTPStatus
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ import msgpack
13
+ from accept_types import get_best_match
14
+ from fastapi import (
15
+ APIRouter,
16
+ BackgroundTasks,
17
+ Body,
18
+ Depends,
19
+ Header,
20
+ HTTPException,
21
+ Request,
22
+ Response,
23
+ )
24
+ from psycopg_pool import AsyncConnectionPool
25
+
26
+ from djqs.config import Settings
27
+ from djqs.db.postgres import DBQuery, get_postgres_pool
28
+ from djqs.engine import process_query
29
+ from djqs.models.query import (
30
+ Query,
31
+ QueryCreate,
32
+ QueryResults,
33
+ QueryState,
34
+ StatementResults,
35
+ decode_results,
36
+ encode_results,
37
+ )
38
+ from djqs.utils import get_settings
39
+
40
+ _logger = logging.getLogger(__name__)
41
+ router = APIRouter(tags=["SQL Queries"])
42
+
43
+
44
+ @router.post(
45
+ "/queries/",
46
+ response_model=QueryResults,
47
+ status_code=HTTPStatus.OK,
48
+ responses={
49
+ 200: {
50
+ "content": {"application/msgpack": {}},
51
+ "description": "Return results as JSON or msgpack",
52
+ },
53
+ },
54
+ )
55
+ async def submit_query( # pylint: disable=too-many-arguments
56
+ accept: Optional[str] = Header(None),
57
+ *,
58
+ settings: Settings = Depends(get_settings),
59
+ request: Request,
60
+ response: Response,
61
+ postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
62
+ background_tasks: BackgroundTasks,
63
+ body: Any = Body(
64
+ ...,
65
+ example={
66
+ "catalog_name": "warehouse",
67
+ "engine_name": "trino",
68
+ "engine_version": "451",
69
+ "submitted_query": "select * from tpch.sf1.customer limit 10",
70
+ },
71
+ ),
72
+ ) -> QueryResults:
73
+ """
74
+ Run or schedule a query.
75
+
76
+ This endpoint is different from others in that it accepts both JSON and msgpack, and
77
+ can also return JSON or msgpack, depending on HTTP headers.
78
+ """
79
+ content_type = request.headers.get("content-type")
80
+ if content_type == "application/json":
81
+ data = body
82
+ elif content_type == "application/msgpack":
83
+ data = json.loads(msgpack.unpackb(body, ext_hook=decode_results))
84
+ elif content_type is None:
85
+ raise HTTPException(
86
+ status_code=HTTPStatus.BAD_REQUEST,
87
+ detail="Content type must be specified",
88
+ )
89
+ else:
90
+ raise HTTPException(
91
+ status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
92
+ detail=f"Content type not accepted: {content_type}",
93
+ )
94
+
95
+ # Set default catalog and engine if not explicitly specified in submitted query
96
+ data["engine_name"] = data.get("engine_name") or settings.default_engine
97
+ data["engine_version"] = (
98
+ data.get("engine_version") or settings.default_engine_version
99
+ )
100
+ data["catalog_name"] = data.get("catalog_name") or settings.default_catalog
101
+
102
+ create_query = QueryCreate(**data)
103
+
104
+ query_with_results = await save_query_and_run(
105
+ create_query=create_query,
106
+ settings=settings,
107
+ response=response,
108
+ background_tasks=background_tasks,
109
+ postgres_pool=postgres_pool,
110
+ headers=request.headers,
111
+ )
112
+
113
+ return_type = get_best_match(accept, ["application/json", "application/msgpack"])
114
+ if not return_type:
115
+ raise HTTPException(
116
+ status_code=HTTPStatus.NOT_ACCEPTABLE,
117
+ detail="Client MUST accept: application/json, application/msgpack",
118
+ )
119
+
120
+ if return_type == "application/msgpack":
121
+ content = msgpack.packb(
122
+ asdict(query_with_results),
123
+ default=encode_results,
124
+ )
125
+ else:
126
+ content = json.dumps(asdict(query_with_results), default=str)
127
+
128
+ return Response(
129
+ content=content,
130
+ media_type=return_type,
131
+ status_code=response.status_code or HTTPStatus.OK,
132
+ )
133
+
134
+
135
+ async def save_query_and_run( # pylint: disable=R0913
136
+ create_query: QueryCreate,
137
+ settings: Settings,
138
+ response: Response,
139
+ background_tasks: BackgroundTasks,
140
+ postgres_pool: AsyncConnectionPool,
141
+ headers: Optional[Dict[str, str]] = None,
142
+ ) -> QueryResults:
143
+ """
144
+ Store a new query to the DB and run it.
145
+ """
146
+ query = Query(
147
+ catalog_name=create_query.catalog_name, # type: ignore
148
+ engine_name=create_query.engine_name, # type: ignore
149
+ engine_version=create_query.engine_version, # type: ignore
150
+ submitted_query=create_query.submitted_query,
151
+ async_=create_query.async_,
152
+ )
153
+ query.state = QueryState.ACCEPTED
154
+
155
+ async with postgres_pool.connection() as conn:
156
+ results = (
157
+ await DBQuery()
158
+ .save_query(
159
+ query_id=query.id,
160
+ catalog_name=query.catalog_name,
161
+ engine_name=query.engine_name,
162
+ engine_version=query.engine_version,
163
+ submitted_query=query.submitted_query,
164
+ async_=query.async_,
165
+ state=query.state.value,
166
+ )
167
+ .execute(conn=conn)
168
+ )
169
+ query_save_result = results[0]
170
+ if not query_save_result: # pragma: no cover
171
+ raise HTTPException(
172
+ status_code=HTTPStatus.NOT_FOUND,
173
+ detail="Query failed to save",
174
+ )
175
+
176
+ if query.async_:
177
+ background_tasks.add_task(
178
+ process_query,
179
+ settings,
180
+ postgres_pool,
181
+ query,
182
+ headers,
183
+ )
184
+
185
+ response.status_code = HTTPStatus.CREATED
186
+ return QueryResults(
187
+ id=query.id,
188
+ catalog_name=query.catalog_name,
189
+ engine_name=query.engine_name,
190
+ engine_version=query.engine_version,
191
+ submitted_query=query.submitted_query,
192
+ executed_query=query.executed_query,
193
+ state=QueryState.SCHEDULED,
194
+ results=[],
195
+ errors=[],
196
+ )
197
+
198
+ query_results = await process_query(
199
+ settings=settings,
200
+ postgres_pool=postgres_pool,
201
+ query=query,
202
+ headers=headers,
203
+ )
204
+ return query_results
205
+
206
+
207
+ def load_query_results(
208
+ settings: Settings,
209
+ key: str,
210
+ ) -> List[StatementResults]:
211
+ """
212
+ Load results from backend, if available.
213
+
214
+ If ``paginate`` is true we also load the results into the cache, anticipating more
215
+ paginated queries.
216
+ """
217
+ if settings.results_backend.has(key):
218
+ _logger.info("Reading results from results backend")
219
+ cached = settings.results_backend.get(key)
220
+ query_results = json.loads(cached)
221
+ else: # pragma: no cover
222
+ _logger.warning("No results found")
223
+ query_results = []
224
+
225
+ return query_results
226
+
227
+
228
+ @router.get("/queries/{query_id}/", response_model=QueryResults)
229
+ async def read_query(
230
+ query_id: uuid.UUID,
231
+ *,
232
+ settings: Settings = Depends(get_settings),
233
+ postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
234
+ ) -> QueryResults:
235
+ """
236
+ Fetch information about a query.
237
+
238
+ For paginated queries we move the data from the results backend to the cache for a
239
+ short period, anticipating additional requests.
240
+ """
241
+ async with postgres_pool.connection() as conn:
242
+ dbquery_results = (
243
+ await DBQuery().get_query(query_id=query_id).execute(conn=conn)
244
+ )
245
+ queries = dbquery_results[0]
246
+ if not queries:
247
+ raise HTTPException(
248
+ status_code=HTTPStatus.NOT_FOUND,
249
+ detail="Query not found",
250
+ )
251
+ query = queries[0]
252
+
253
+ query_results = load_query_results(settings, str(query_id))
254
+
255
+ prev = next_ = None
256
+
257
+ return QueryResults(
258
+ results=query_results, next=next_, previous=prev, errors=[], **query
259
+ )
@@ -3,24 +3,21 @@ Table related APIs.
3
3
  """
4
4
  from typing import Optional
5
5
 
6
- from fastapi import APIRouter, Depends
7
- from sqlmodel import Session
6
+ from fastapi import APIRouter, Path, Query
8
7
 
9
- from djqs.api.helpers import get_columns, get_engine
8
+ from djqs.api.helpers import get_columns
10
9
  from djqs.exceptions import DJInvalidTableRef
11
10
  from djqs.models.table import TableInfo
12
- from djqs.utils import get_session, get_settings
11
+ from djqs.utils import get_settings
13
12
 
14
13
  router = APIRouter(tags=["Table Reflection"])
15
14
 
16
15
 
17
16
  @router.get("/table/{table}/columns/", response_model=TableInfo)
18
17
  def table_columns(
19
- table: str,
20
- engine: Optional[str] = None,
21
- engine_version: Optional[str] = None,
22
- *,
23
- session: Session = Depends(get_session),
18
+ table: str = Path(..., example="tpch.sf1.customer"),
19
+ engine: Optional[str] = Query(None, example="trino"),
20
+ engine_version: Optional[str] = Query(None, example="451"),
24
21
  ) -> TableInfo:
25
22
  """
26
23
  Get column information for a table
@@ -34,19 +31,19 @@ def table_columns(
34
31
  )
35
32
  settings = get_settings()
36
33
 
37
- if engine_version == "":
38
- version = ""
39
- else: # pragma: no cover
40
- version = engine_version or settings.default_reflection_engine_version
41
-
42
- engine = get_engine(
43
- session=session,
44
- name=engine or settings.default_reflection_engine,
45
- version=version,
46
- )
34
+ if engine and engine_version:
35
+ engine_config = settings.find_engine(
36
+ engine_name=engine,
37
+ engine_version=engine_version or settings.default_engine_version,
38
+ )
39
+ else:
40
+ engine_config = settings.find_engine(
41
+ engine_name=settings.default_engine,
42
+ engine_version=engine_version or settings.default_engine_version,
43
+ )
47
44
  external_columns = get_columns(
48
- uri=engine.uri,
49
- extra_params=engine.extra_params,
45
+ uri=engine_config.uri,
46
+ extra_params=engine_config.extra_params,
50
47
  catalog=table_parts[0],
51
48
  schema=table_parts[1],
52
49
  table=table_parts[2],