datajunction-query 0.0.1a64__tar.gz → 0.0.1a66__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datajunction-query might be problematic. Click here for more details.
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.coveragerc +3 -1
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/Dockerfile +4 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/Makefile +2 -2
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/PKG-INFO +7 -6
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/alembic/env.py +13 -12
- datajunction_query-0.0.1a66/alembic/versions/2024_09_09_0000-b8f22b3549c7_initial_migration.py +41 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/__about__.py +1 -1
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/api/helpers.py +2 -38
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/api/main.py +24 -14
- datajunction_query-0.0.1a66/djqs/api/queries.py +259 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/api/tables.py +18 -21
- datajunction_query-0.0.1a66/djqs/config.py +199 -0
- datajunction_query-0.0.1a66/djqs/db/postgres.py +138 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/engine.py +60 -37
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/exceptions.py +73 -32
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/models/query.py +30 -46
- datajunction_query-0.0.1a66/djqs/utils.py +41 -0
- datajunction_query-0.0.1a66/pdm.lock +1807 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/pyproject.toml +38 -21
- datajunction_query-0.0.1a66/tests/api/queries_test.py +651 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/api/table_test.py +29 -10
- datajunction_query-0.0.1a66/tests/config.djqs.yml +32 -0
- datajunction_query-0.0.1a66/tests/conftest.py +85 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/utils_test.py +1 -24
- datajunction_query-0.0.1a64/alembic/versions/2023_02_28_0541-a7e11a2438b4_initial_migration.py +0 -86
- datajunction_query-0.0.1a64/alembic/versions/2023_10_09_1858-f3407a1ec625_add_type_and_extra_para_ms_field_for_.py +0 -36
- datajunction_query-0.0.1a64/djqs/api/catalogs.py +0 -115
- datajunction_query-0.0.1a64/djqs/api/engines.py +0 -61
- datajunction_query-0.0.1a64/djqs/api/queries.py +0 -205
- datajunction_query-0.0.1a64/djqs/config.py +0 -96
- datajunction_query-0.0.1a64/djqs/models/catalog.py +0 -75
- datajunction_query-0.0.1a64/djqs/models/engine.py +0 -50
- datajunction_query-0.0.1a64/djqs/utils.py +0 -90
- datajunction_query-0.0.1a64/pdm.lock +0 -2217
- datajunction_query-0.0.1a64/tests/api/catalogs_test.py +0 -374
- datajunction_query-0.0.1a64/tests/api/engines_test.py +0 -153
- datajunction_query-0.0.1a64/tests/api/queries_test.py +0 -732
- datajunction_query-0.0.1a64/tests/config.djqs.yml +0 -11
- datajunction_query-0.0.1a64/tests/configs/databases/druid.yaml +0 -4
- datajunction_query-0.0.1a64/tests/configs/databases/gsheets.yaml +0 -8
- datajunction_query-0.0.1a64/tests/configs/databases/postgres.yaml +0 -7
- datajunction_query-0.0.1a64/tests/configs/nodes/core/comments.yaml +0 -28
- datajunction_query-0.0.1a64/tests/configs/nodes/core/dim_users.yaml +0 -18
- datajunction_query-0.0.1a64/tests/configs/nodes/core/num_comments.yaml +0 -6
- datajunction_query-0.0.1a64/tests/configs/nodes/core/users.yaml +0 -28
- datajunction_query-0.0.1a64/tests/conftest.py +0 -127
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.flake8 +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.gitignore +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.isort.cfg +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.pre-commit-config.yaml +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.pylintrc +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/.readthedocs.yml +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/AUTHORS.rst +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/CODE_OF_CONDUCT.md +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/LICENSE.txt +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/README.rst +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/alembic/README +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/alembic/script.py.mako +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/alembic.ini +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/config.djqs.yml +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/config.jsonschema +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/__init__.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/api/__init__.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/constants.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/enum.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/fixes.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/models/__init__.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/models/table.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/djqs/typing.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/cockroachdb/cockroachdb_examples_init.sql +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/cockroachdb/cockroachdb_metadata_init.sql +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/cockroachdb/steam-games.csv +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/cockroachdb/steam-hours-played.csv +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/default.duckdb +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/druid_environment +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/druid_init.sh +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/druid_spec.json +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/duckdb.sql +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/duckdb_load.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/postgres_init.roads.sql +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/postgres_init.sql +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/spark.roads.sql +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/spark_load_roads.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/docker/wait-for +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/openapi.json +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/scripts/generate-openapi.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/setup.cfg +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/__init__.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/api/__init__.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/exceptions_test.py +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/contractors.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/dispatchers.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/hard_hat_state.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/hard_hats.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/municipality.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/municipality_municipality_type.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/municipality_type.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/repair_order_details.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/repair_orders.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/repair_type.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/us_region.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tests/resources/us_states.parquet +0 -0
- {datajunction_query-0.0.1a64 → datajunction_query-0.0.1a66}/tox.ini +0 -0
|
@@ -2,6 +2,10 @@ FROM jupyter/pyspark-notebook
|
|
|
2
2
|
USER root
|
|
3
3
|
WORKDIR /code
|
|
4
4
|
COPY . /code
|
|
5
|
+
RUN apt-get update && apt-get install -y \
|
|
6
|
+
libpq-dev \
|
|
7
|
+
&& apt-get clean \
|
|
8
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
5
9
|
RUN pip install -e .[uvicorn]
|
|
6
10
|
CMD ["uvicorn", "djqs.api.main:app", "--host", "0.0.0.0", "--port", "8001", "--reload"]
|
|
7
11
|
EXPOSE 8001
|
|
@@ -27,10 +27,10 @@ docker-run-with-cockroachdb:
|
|
|
27
27
|
docker compose -f docker-compose.yml -f docker-compose.cockroachdb.yml up
|
|
28
28
|
|
|
29
29
|
test: pyenv
|
|
30
|
-
pdm run pytest --cov=djqs -vv tests/ --doctest-modules djqs --without-integration --without-slow-integration ${PYTEST_ARGS}
|
|
30
|
+
pdm run pytest --cov=djqs -vv tests/ --cov-report term-missing --doctest-modules djqs --without-integration --without-slow-integration ${PYTEST_ARGS}
|
|
31
31
|
|
|
32
32
|
integration: pyenv
|
|
33
|
-
pdm run pytest --cov=djqs -vv tests/ --doctest-modules djqs --with-integration --with-slow-integration
|
|
33
|
+
pdm run pytest --cov=djqs -vv tests/ --cov-report term-missing --doctest-modules djqs --with-integration --with-slow-integration
|
|
34
34
|
|
|
35
35
|
clean:
|
|
36
36
|
pyenv virtualenv-delete djqs
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datajunction-query
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.1a66
|
|
4
4
|
Summary: OSS Implementation of a DataJunction Query Service
|
|
5
5
|
Project-URL: repository, https://github.com/DataJunction/dj
|
|
6
6
|
Author-email: DataJunction Authors <roberto@dealmeida.net>
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Python: <4.0,>=3.
|
|
17
|
+
Requires-Python: <4.0,>=3.10
|
|
18
18
|
Requires-Dist: accept-types==0.4.1
|
|
19
19
|
Requires-Dist: cachelib>=0.4.0
|
|
20
20
|
Requires-Dist: duckdb-engine
|
|
@@ -22,15 +22,16 @@ Requires-Dist: duckdb==0.8.1
|
|
|
22
22
|
Requires-Dist: fastapi>=0.79.0
|
|
23
23
|
Requires-Dist: importlib-metadata
|
|
24
24
|
Requires-Dist: msgpack>=1.0.3
|
|
25
|
+
Requires-Dist: psycopg[async,pool]>=3.2.1
|
|
26
|
+
Requires-Dist: pytest-asyncio>=0.24.0
|
|
27
|
+
Requires-Dist: pytest-integration>=0.2.3
|
|
25
28
|
Requires-Dist: python-dotenv==0.19.2
|
|
26
29
|
Requires-Dist: pyyaml>=6.0.1
|
|
27
30
|
Requires-Dist: requests<=2.29.0,>=2.28.2
|
|
28
31
|
Requires-Dist: rich>=10.16.2
|
|
29
32
|
Requires-Dist: snowflake-connector-python>=3.3.1
|
|
30
|
-
Requires-Dist: sqlalchemy
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist: sqlmodel<1.0.0,>=0.0.8
|
|
33
|
-
Requires-Dist: sqlparse<1.0.0,>=0.4.3
|
|
33
|
+
Requires-Dist: sqlalchemy>=2.0.34
|
|
34
|
+
Requires-Dist: tenacity>=9.0.0
|
|
34
35
|
Requires-Dist: toml>=0.10.2
|
|
35
36
|
Requires-Dist: trino>=0.324.0
|
|
36
37
|
Provides-Extra: uvicorn
|
|
@@ -2,18 +2,13 @@
|
|
|
2
2
|
Environment for Alembic migrations.
|
|
3
3
|
"""
|
|
4
4
|
# pylint: disable=no-member, unused-import, no-name-in-module, import-error
|
|
5
|
-
|
|
6
5
|
from logging.config import fileConfig
|
|
7
6
|
|
|
8
|
-
from
|
|
7
|
+
from sqlalchemy import create_engine
|
|
9
8
|
|
|
10
9
|
from alembic import context
|
|
11
|
-
from djqs.models.catalog import Catalog
|
|
12
|
-
from djqs.models.engine import Engine
|
|
13
|
-
from djqs.models.query import Query
|
|
14
|
-
from djqs.utils import get_settings
|
|
15
10
|
|
|
16
|
-
|
|
11
|
+
DEFAULT_URI = "postgresql+psycopg://dj:dj@postgres_metadata:5432/dj"
|
|
17
12
|
|
|
18
13
|
# this is the Alembic Config object, which provides
|
|
19
14
|
# access to the values within the .ini file in use.
|
|
@@ -28,7 +23,7 @@ if config.config_file_name is not None:
|
|
|
28
23
|
# for 'autogenerate' support
|
|
29
24
|
# from myapp import mymodel
|
|
30
25
|
# target_metadata = mymodel.Base.metadata
|
|
31
|
-
target_metadata =
|
|
26
|
+
target_metadata = None # pylint: disable=invalid-name
|
|
32
27
|
|
|
33
28
|
# other values from the config, defined by the needs of env.py,
|
|
34
29
|
# can be acquired:
|
|
@@ -48,12 +43,13 @@ def run_migrations_offline():
|
|
|
48
43
|
script output.
|
|
49
44
|
|
|
50
45
|
"""
|
|
51
|
-
|
|
46
|
+
x_args = context.get_x_argument(as_dictionary=True)
|
|
52
47
|
context.configure(
|
|
53
|
-
url=
|
|
48
|
+
url=x_args.get("uri") or DEFAULT_URI,
|
|
54
49
|
target_metadata=target_metadata,
|
|
55
50
|
literal_binds=True,
|
|
56
51
|
dialect_opts={"paramstyle": "named"},
|
|
52
|
+
version_table="alembic_version_djqs",
|
|
57
53
|
)
|
|
58
54
|
|
|
59
55
|
with context.begin_transaction():
|
|
@@ -67,10 +63,15 @@ def run_migrations_online():
|
|
|
67
63
|
and associate a connection with the context.
|
|
68
64
|
|
|
69
65
|
"""
|
|
70
|
-
|
|
66
|
+
x_args = context.get_x_argument(as_dictionary=True)
|
|
67
|
+
connectable = create_engine(x_args.get("uri") or DEFAULT_URI)
|
|
71
68
|
|
|
72
69
|
with connectable.connect() as connection:
|
|
73
|
-
context.configure(
|
|
70
|
+
context.configure(
|
|
71
|
+
connection=connection,
|
|
72
|
+
target_metadata=target_metadata,
|
|
73
|
+
version_table="alembic_version_djqs",
|
|
74
|
+
)
|
|
74
75
|
|
|
75
76
|
with context.begin_transaction():
|
|
76
77
|
context.run_migrations()
|
datajunction_query-0.0.1a66/alembic/versions/2024_09_09_0000-b8f22b3549c7_initial_migration.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Initial migration
|
|
2
|
+
|
|
3
|
+
Revision ID: b8f22b3549c7
|
|
4
|
+
Revises:
|
|
5
|
+
Create Date: 2024-09-09 06:00:00.000000+00:00
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
# pylint: disable=no-member, invalid-name, missing-function-docstring, unused-import, no-name-in-module
|
|
9
|
+
|
|
10
|
+
from alembic import op
|
|
11
|
+
|
|
12
|
+
# revision identifiers, used by Alembic.
|
|
13
|
+
revision = "b8f22b3549c7"
|
|
14
|
+
down_revision = None
|
|
15
|
+
branch_labels = None
|
|
16
|
+
depends_on = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade():
|
|
20
|
+
op.execute(
|
|
21
|
+
"""
|
|
22
|
+
CREATE TABLE query (
|
|
23
|
+
id UUID PRIMARY KEY,
|
|
24
|
+
catalog_name VARCHAR NOT NULL,
|
|
25
|
+
engine_name VARCHAR NOT NULL,
|
|
26
|
+
engine_version VARCHAR NOT NULL,
|
|
27
|
+
submitted_query VARCHAR NOT NULL,
|
|
28
|
+
async_ BOOLEAN NOT NULL,
|
|
29
|
+
executed_query VARCHAR,
|
|
30
|
+
scheduled TIMESTAMP,
|
|
31
|
+
started TIMESTAMP,
|
|
32
|
+
finished TIMESTAMP,
|
|
33
|
+
state VARCHAR NOT NULL,
|
|
34
|
+
progress FLOAT NOT NULL
|
|
35
|
+
)
|
|
36
|
+
""",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def downgrade():
|
|
41
|
+
op.execute("DROP TABLE query")
|
|
@@ -1,48 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Helper functions for API
|
|
3
3
|
"""
|
|
4
|
-
from http import HTTPStatus
|
|
5
4
|
from typing import Any, Dict, List, Optional
|
|
6
5
|
|
|
7
|
-
from
|
|
8
|
-
from sqlalchemy import
|
|
9
|
-
from sqlalchemy.exc import NoResultFound, NoSuchTableError, OperationalError
|
|
10
|
-
from sqlmodel import Session, create_engine, select
|
|
6
|
+
from sqlalchemy import create_engine, inspect
|
|
7
|
+
from sqlalchemy.exc import NoSuchTableError, OperationalError
|
|
11
8
|
|
|
12
9
|
from djqs.exceptions import DJException, DJTableNotFound
|
|
13
|
-
from djqs.models.catalog import Catalog
|
|
14
|
-
from djqs.models.engine import Engine
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def get_catalog(session: Session, name: str) -> Catalog:
|
|
18
|
-
"""
|
|
19
|
-
Get a catalog by name
|
|
20
|
-
"""
|
|
21
|
-
statement = select(Catalog).where(Catalog.name == name)
|
|
22
|
-
catalog = session.exec(statement).one_or_none()
|
|
23
|
-
if not catalog:
|
|
24
|
-
raise DJException(
|
|
25
|
-
message=f"Catalog with name `{name}` does not exist.",
|
|
26
|
-
http_status_code=404,
|
|
27
|
-
)
|
|
28
|
-
return catalog
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def get_engine(session: Session, name: str, version: str) -> Engine:
|
|
32
|
-
"""
|
|
33
|
-
Return an Engine instance given an engine name and version
|
|
34
|
-
"""
|
|
35
|
-
statement = (
|
|
36
|
-
select(Engine).where(Engine.name == name).where(Engine.version == version)
|
|
37
|
-
)
|
|
38
|
-
try:
|
|
39
|
-
engine = session.exec(statement).one()
|
|
40
|
-
except NoResultFound as exc:
|
|
41
|
-
raise HTTPException(
|
|
42
|
-
status_code=HTTPStatus.NOT_FOUND,
|
|
43
|
-
detail=f"Engine not found: `{name}` version `{version}`",
|
|
44
|
-
) from exc
|
|
45
|
-
return engine
|
|
46
10
|
|
|
47
11
|
|
|
48
12
|
def get_columns(
|
|
@@ -12,29 +12,41 @@ from contextlib import asynccontextmanager
|
|
|
12
12
|
|
|
13
13
|
from fastapi import FastAPI, Request
|
|
14
14
|
from fastapi.responses import JSONResponse
|
|
15
|
+
from psycopg.rows import dict_row
|
|
16
|
+
from psycopg_pool import AsyncConnectionPool
|
|
15
17
|
|
|
16
18
|
from djqs import __version__
|
|
17
|
-
from djqs.api import
|
|
18
|
-
from djqs.config import load_djqs_config
|
|
19
|
+
from djqs.api import queries, tables
|
|
19
20
|
from djqs.exceptions import DJException
|
|
20
|
-
from djqs.utils import
|
|
21
|
+
from djqs.utils import get_settings
|
|
21
22
|
|
|
22
23
|
_logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
settings = get_settings()
|
|
25
|
-
session = next(get_session())
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
@asynccontextmanager
|
|
29
|
-
async def lifespan(
|
|
29
|
+
async def lifespan(fastapi_app: FastAPI):
|
|
30
30
|
"""
|
|
31
|
-
|
|
31
|
+
Create a postgres connection pool and store it in the app state
|
|
32
32
|
"""
|
|
33
|
+
_logger.info("Starting PostgreSQL connection pool...")
|
|
34
|
+
pool = AsyncConnectionPool(
|
|
35
|
+
settings.index,
|
|
36
|
+
kwargs={"row_factory": dict_row},
|
|
37
|
+
check=AsyncConnectionPool.check_connection,
|
|
38
|
+
min_size=5,
|
|
39
|
+
max_size=20,
|
|
40
|
+
timeout=15,
|
|
41
|
+
)
|
|
42
|
+
fastapi_app.state.pool = pool
|
|
33
43
|
try:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
44
|
+
_logger.info("PostgreSQL connection pool started with DSN: %s", settings.index)
|
|
45
|
+
yield
|
|
46
|
+
finally:
|
|
47
|
+
_logger.info("Closing PostgreSQL connection pool")
|
|
48
|
+
await pool.close()
|
|
49
|
+
_logger.info("PostgreSQL connection pool closed")
|
|
38
50
|
|
|
39
51
|
|
|
40
52
|
app = FastAPI(
|
|
@@ -47,12 +59,10 @@ app = FastAPI(
|
|
|
47
59
|
},
|
|
48
60
|
lifespan=lifespan,
|
|
49
61
|
)
|
|
50
|
-
app.include_router(catalogs.get_router)
|
|
51
|
-
app.include_router(engines.get_router)
|
|
52
62
|
app.include_router(queries.router)
|
|
53
63
|
app.include_router(tables.router)
|
|
54
|
-
|
|
55
|
-
app.
|
|
64
|
+
|
|
65
|
+
app.router.lifespan_context = lifespan
|
|
56
66
|
|
|
57
67
|
|
|
58
68
|
@app.exception_handler(DJException)
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Query related APIs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from http import HTTPStatus
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
import msgpack
|
|
13
|
+
from accept_types import get_best_match
|
|
14
|
+
from fastapi import (
|
|
15
|
+
APIRouter,
|
|
16
|
+
BackgroundTasks,
|
|
17
|
+
Body,
|
|
18
|
+
Depends,
|
|
19
|
+
Header,
|
|
20
|
+
HTTPException,
|
|
21
|
+
Request,
|
|
22
|
+
Response,
|
|
23
|
+
)
|
|
24
|
+
from psycopg_pool import AsyncConnectionPool
|
|
25
|
+
|
|
26
|
+
from djqs.config import Settings
|
|
27
|
+
from djqs.db.postgres import DBQuery, get_postgres_pool
|
|
28
|
+
from djqs.engine import process_query
|
|
29
|
+
from djqs.models.query import (
|
|
30
|
+
Query,
|
|
31
|
+
QueryCreate,
|
|
32
|
+
QueryResults,
|
|
33
|
+
QueryState,
|
|
34
|
+
StatementResults,
|
|
35
|
+
decode_results,
|
|
36
|
+
encode_results,
|
|
37
|
+
)
|
|
38
|
+
from djqs.utils import get_settings
|
|
39
|
+
|
|
40
|
+
_logger = logging.getLogger(__name__)
|
|
41
|
+
router = APIRouter(tags=["SQL Queries"])
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@router.post(
|
|
45
|
+
"/queries/",
|
|
46
|
+
response_model=QueryResults,
|
|
47
|
+
status_code=HTTPStatus.OK,
|
|
48
|
+
responses={
|
|
49
|
+
200: {
|
|
50
|
+
"content": {"application/msgpack": {}},
|
|
51
|
+
"description": "Return results as JSON or msgpack",
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
)
|
|
55
|
+
async def submit_query( # pylint: disable=too-many-arguments
|
|
56
|
+
accept: Optional[str] = Header(None),
|
|
57
|
+
*,
|
|
58
|
+
settings: Settings = Depends(get_settings),
|
|
59
|
+
request: Request,
|
|
60
|
+
response: Response,
|
|
61
|
+
postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
|
|
62
|
+
background_tasks: BackgroundTasks,
|
|
63
|
+
body: Any = Body(
|
|
64
|
+
...,
|
|
65
|
+
example={
|
|
66
|
+
"catalog_name": "warehouse",
|
|
67
|
+
"engine_name": "trino",
|
|
68
|
+
"engine_version": "451",
|
|
69
|
+
"submitted_query": "select * from tpch.sf1.customer limit 10",
|
|
70
|
+
},
|
|
71
|
+
),
|
|
72
|
+
) -> QueryResults:
|
|
73
|
+
"""
|
|
74
|
+
Run or schedule a query.
|
|
75
|
+
|
|
76
|
+
This endpoint is different from others in that it accepts both JSON and msgpack, and
|
|
77
|
+
can also return JSON or msgpack, depending on HTTP headers.
|
|
78
|
+
"""
|
|
79
|
+
content_type = request.headers.get("content-type")
|
|
80
|
+
if content_type == "application/json":
|
|
81
|
+
data = body
|
|
82
|
+
elif content_type == "application/msgpack":
|
|
83
|
+
data = json.loads(msgpack.unpackb(body, ext_hook=decode_results))
|
|
84
|
+
elif content_type is None:
|
|
85
|
+
raise HTTPException(
|
|
86
|
+
status_code=HTTPStatus.BAD_REQUEST,
|
|
87
|
+
detail="Content type must be specified",
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
raise HTTPException(
|
|
91
|
+
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
|
92
|
+
detail=f"Content type not accepted: {content_type}",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Set default catalog and engine if not explicitly specified in submitted query
|
|
96
|
+
data["engine_name"] = data.get("engine_name") or settings.default_engine
|
|
97
|
+
data["engine_version"] = (
|
|
98
|
+
data.get("engine_version") or settings.default_engine_version
|
|
99
|
+
)
|
|
100
|
+
data["catalog_name"] = data.get("catalog_name") or settings.default_catalog
|
|
101
|
+
|
|
102
|
+
create_query = QueryCreate(**data)
|
|
103
|
+
|
|
104
|
+
query_with_results = await save_query_and_run(
|
|
105
|
+
create_query=create_query,
|
|
106
|
+
settings=settings,
|
|
107
|
+
response=response,
|
|
108
|
+
background_tasks=background_tasks,
|
|
109
|
+
postgres_pool=postgres_pool,
|
|
110
|
+
headers=request.headers,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return_type = get_best_match(accept, ["application/json", "application/msgpack"])
|
|
114
|
+
if not return_type:
|
|
115
|
+
raise HTTPException(
|
|
116
|
+
status_code=HTTPStatus.NOT_ACCEPTABLE,
|
|
117
|
+
detail="Client MUST accept: application/json, application/msgpack",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if return_type == "application/msgpack":
|
|
121
|
+
content = msgpack.packb(
|
|
122
|
+
asdict(query_with_results),
|
|
123
|
+
default=encode_results,
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
content = json.dumps(asdict(query_with_results), default=str)
|
|
127
|
+
|
|
128
|
+
return Response(
|
|
129
|
+
content=content,
|
|
130
|
+
media_type=return_type,
|
|
131
|
+
status_code=response.status_code or HTTPStatus.OK,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
async def save_query_and_run( # pylint: disable=R0913
|
|
136
|
+
create_query: QueryCreate,
|
|
137
|
+
settings: Settings,
|
|
138
|
+
response: Response,
|
|
139
|
+
background_tasks: BackgroundTasks,
|
|
140
|
+
postgres_pool: AsyncConnectionPool,
|
|
141
|
+
headers: Optional[Dict[str, str]] = None,
|
|
142
|
+
) -> QueryResults:
|
|
143
|
+
"""
|
|
144
|
+
Store a new query to the DB and run it.
|
|
145
|
+
"""
|
|
146
|
+
query = Query(
|
|
147
|
+
catalog_name=create_query.catalog_name, # type: ignore
|
|
148
|
+
engine_name=create_query.engine_name, # type: ignore
|
|
149
|
+
engine_version=create_query.engine_version, # type: ignore
|
|
150
|
+
submitted_query=create_query.submitted_query,
|
|
151
|
+
async_=create_query.async_,
|
|
152
|
+
)
|
|
153
|
+
query.state = QueryState.ACCEPTED
|
|
154
|
+
|
|
155
|
+
async with postgres_pool.connection() as conn:
|
|
156
|
+
results = (
|
|
157
|
+
await DBQuery()
|
|
158
|
+
.save_query(
|
|
159
|
+
query_id=query.id,
|
|
160
|
+
catalog_name=query.catalog_name,
|
|
161
|
+
engine_name=query.engine_name,
|
|
162
|
+
engine_version=query.engine_version,
|
|
163
|
+
submitted_query=query.submitted_query,
|
|
164
|
+
async_=query.async_,
|
|
165
|
+
state=query.state.value,
|
|
166
|
+
)
|
|
167
|
+
.execute(conn=conn)
|
|
168
|
+
)
|
|
169
|
+
query_save_result = results[0]
|
|
170
|
+
if not query_save_result: # pragma: no cover
|
|
171
|
+
raise HTTPException(
|
|
172
|
+
status_code=HTTPStatus.NOT_FOUND,
|
|
173
|
+
detail="Query failed to save",
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if query.async_:
|
|
177
|
+
background_tasks.add_task(
|
|
178
|
+
process_query,
|
|
179
|
+
settings,
|
|
180
|
+
postgres_pool,
|
|
181
|
+
query,
|
|
182
|
+
headers,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
response.status_code = HTTPStatus.CREATED
|
|
186
|
+
return QueryResults(
|
|
187
|
+
id=query.id,
|
|
188
|
+
catalog_name=query.catalog_name,
|
|
189
|
+
engine_name=query.engine_name,
|
|
190
|
+
engine_version=query.engine_version,
|
|
191
|
+
submitted_query=query.submitted_query,
|
|
192
|
+
executed_query=query.executed_query,
|
|
193
|
+
state=QueryState.SCHEDULED,
|
|
194
|
+
results=[],
|
|
195
|
+
errors=[],
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
query_results = await process_query(
|
|
199
|
+
settings=settings,
|
|
200
|
+
postgres_pool=postgres_pool,
|
|
201
|
+
query=query,
|
|
202
|
+
headers=headers,
|
|
203
|
+
)
|
|
204
|
+
return query_results
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def load_query_results(
|
|
208
|
+
settings: Settings,
|
|
209
|
+
key: str,
|
|
210
|
+
) -> List[StatementResults]:
|
|
211
|
+
"""
|
|
212
|
+
Load results from backend, if available.
|
|
213
|
+
|
|
214
|
+
If ``paginate`` is true we also load the results into the cache, anticipating more
|
|
215
|
+
paginated queries.
|
|
216
|
+
"""
|
|
217
|
+
if settings.results_backend.has(key):
|
|
218
|
+
_logger.info("Reading results from results backend")
|
|
219
|
+
cached = settings.results_backend.get(key)
|
|
220
|
+
query_results = json.loads(cached)
|
|
221
|
+
else: # pragma: no cover
|
|
222
|
+
_logger.warning("No results found")
|
|
223
|
+
query_results = []
|
|
224
|
+
|
|
225
|
+
return query_results
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@router.get("/queries/{query_id}/", response_model=QueryResults)
|
|
229
|
+
async def read_query(
|
|
230
|
+
query_id: uuid.UUID,
|
|
231
|
+
*,
|
|
232
|
+
settings: Settings = Depends(get_settings),
|
|
233
|
+
postgres_pool: AsyncConnectionPool = Depends(get_postgres_pool),
|
|
234
|
+
) -> QueryResults:
|
|
235
|
+
"""
|
|
236
|
+
Fetch information about a query.
|
|
237
|
+
|
|
238
|
+
For paginated queries we move the data from the results backend to the cache for a
|
|
239
|
+
short period, anticipating additional requests.
|
|
240
|
+
"""
|
|
241
|
+
async with postgres_pool.connection() as conn:
|
|
242
|
+
dbquery_results = (
|
|
243
|
+
await DBQuery().get_query(query_id=query_id).execute(conn=conn)
|
|
244
|
+
)
|
|
245
|
+
queries = dbquery_results[0]
|
|
246
|
+
if not queries:
|
|
247
|
+
raise HTTPException(
|
|
248
|
+
status_code=HTTPStatus.NOT_FOUND,
|
|
249
|
+
detail="Query not found",
|
|
250
|
+
)
|
|
251
|
+
query = queries[0]
|
|
252
|
+
|
|
253
|
+
query_results = load_query_results(settings, str(query_id))
|
|
254
|
+
|
|
255
|
+
prev = next_ = None
|
|
256
|
+
|
|
257
|
+
return QueryResults(
|
|
258
|
+
results=query_results, next=next_, previous=prev, errors=[], **query
|
|
259
|
+
)
|
|
@@ -3,24 +3,21 @@ Table related APIs.
|
|
|
3
3
|
"""
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
|
-
from fastapi import APIRouter,
|
|
7
|
-
from sqlmodel import Session
|
|
6
|
+
from fastapi import APIRouter, Path, Query
|
|
8
7
|
|
|
9
|
-
from djqs.api.helpers import get_columns
|
|
8
|
+
from djqs.api.helpers import get_columns
|
|
10
9
|
from djqs.exceptions import DJInvalidTableRef
|
|
11
10
|
from djqs.models.table import TableInfo
|
|
12
|
-
from djqs.utils import
|
|
11
|
+
from djqs.utils import get_settings
|
|
13
12
|
|
|
14
13
|
router = APIRouter(tags=["Table Reflection"])
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
@router.get("/table/{table}/columns/", response_model=TableInfo)
|
|
18
17
|
def table_columns(
|
|
19
|
-
table: str,
|
|
20
|
-
engine: Optional[str] = None,
|
|
21
|
-
engine_version: Optional[str] = None,
|
|
22
|
-
*,
|
|
23
|
-
session: Session = Depends(get_session),
|
|
18
|
+
table: str = Path(..., example="tpch.sf1.customer"),
|
|
19
|
+
engine: Optional[str] = Query(None, example="trino"),
|
|
20
|
+
engine_version: Optional[str] = Query(None, example="451"),
|
|
24
21
|
) -> TableInfo:
|
|
25
22
|
"""
|
|
26
23
|
Get column information for a table
|
|
@@ -34,19 +31,19 @@ def table_columns(
|
|
|
34
31
|
)
|
|
35
32
|
settings = get_settings()
|
|
36
33
|
|
|
37
|
-
if
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
34
|
+
if engine and engine_version:
|
|
35
|
+
engine_config = settings.find_engine(
|
|
36
|
+
engine_name=engine,
|
|
37
|
+
engine_version=engine_version or settings.default_engine_version,
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
engine_config = settings.find_engine(
|
|
41
|
+
engine_name=settings.default_engine,
|
|
42
|
+
engine_version=engine_version or settings.default_engine_version,
|
|
43
|
+
)
|
|
47
44
|
external_columns = get_columns(
|
|
48
|
-
uri=
|
|
49
|
-
extra_params=
|
|
45
|
+
uri=engine_config.uri,
|
|
46
|
+
extra_params=engine_config.extra_params,
|
|
50
47
|
catalog=table_parts[0],
|
|
51
48
|
schema=table_parts[1],
|
|
52
49
|
table=table_parts[2],
|