datajunction-query 0.0.1a1.dev0__tar.gz → 0.0.1a31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datajunction-query might be problematic. Click here for more details.

Files changed (96) hide show
  1. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.pre-commit-config.yaml +1 -0
  2. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/Makefile +1 -1
  3. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/PKG-INFO +2 -1
  4. datajunction_query-0.0.1a31/alembic/versions/2023_10_09_1858-f3407a1ec625_add_type_and_extra_para_ms_field_for_.py +36 -0
  5. datajunction_query-0.0.1a31/config.djqs.yml +11 -0
  6. datajunction_query-0.0.1a31/config.jsonschema +83 -0
  7. datajunction_query-0.0.1a31/djqs/__about__.py +4 -0
  8. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/catalogs.py +10 -5
  9. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/engines.py +5 -4
  10. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/helpers.py +0 -9
  11. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/main.py +10 -5
  12. datajunction_query-0.0.1a31/djqs/config.py +96 -0
  13. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/engine.py +11 -51
  14. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/models/engine.py +16 -2
  15. datajunction_query-0.0.1a31/pdm.lock +1673 -0
  16. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/pyproject.toml +1 -0
  17. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/api/catalogs_test.py +31 -23
  18. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/api/engines_test.py +28 -6
  19. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/api/queries_test.py +70 -144
  20. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/api/table_test.py +6 -1
  21. datajunction_query-0.0.1a31/tests/config.djqs.yml +11 -0
  22. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/conftest.py +40 -18
  23. datajunction_query-0.0.1a1.dev0/djqs/__about__.py +0 -4
  24. datajunction_query-0.0.1a1.dev0/djqs/config.py +0 -36
  25. datajunction_query-0.0.1a1.dev0/pdm.lock +0 -1775
  26. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.coveragerc +0 -0
  27. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.flake8 +0 -0
  28. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.gitignore +0 -0
  29. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.isort.cfg +0 -0
  30. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.pylintrc +0 -0
  31. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/.readthedocs.yml +0 -0
  32. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/AUTHORS.rst +0 -0
  33. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/CODE_OF_CONDUCT.md +0 -0
  34. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/Dockerfile +0 -0
  35. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/LICENSE.txt +0 -0
  36. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/README.rst +0 -0
  37. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/alembic/README +0 -0
  38. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/alembic/env.py +0 -0
  39. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/alembic/script.py.mako +0 -0
  40. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/alembic/versions/2023_02_28_0541-a7e11a2438b4_initial_migration.py +0 -0
  41. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/alembic.ini +0 -0
  42. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/__init__.py +0 -0
  43. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/__init__.py +0 -0
  44. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/queries.py +0 -0
  45. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/api/tables.py +0 -0
  46. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/constants.py +0 -0
  47. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/exceptions.py +0 -0
  48. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/fixes.py +0 -0
  49. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/models/__init__.py +0 -0
  50. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/models/catalog.py +0 -0
  51. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/models/query.py +0 -0
  52. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/models/table.py +0 -0
  53. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/typing.py +0 -0
  54. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/djqs/utils.py +0 -0
  55. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/cockroachdb/cockroachdb_examples_init.sql +0 -0
  56. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/cockroachdb/cockroachdb_metadata_init.sql +0 -0
  57. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/cockroachdb/steam-games.csv +0 -0
  58. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/cockroachdb/steam-hours-played.csv +0 -0
  59. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/default.duckdb +0 -0
  60. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/druid_environment +0 -0
  61. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/druid_init.sh +0 -0
  62. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/druid_spec.json +0 -0
  63. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/duckdb.sql +0 -0
  64. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/duckdb_load.py +0 -0
  65. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/postgres_init.roads.sql +0 -0
  66. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/postgres_init.sql +0 -0
  67. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/spark.roads.sql +0 -0
  68. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/spark_load_roads.py +0 -0
  69. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/docker/wait-for +0 -0
  70. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/openapi.json +0 -0
  71. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/scripts/generate-openapi.py +0 -0
  72. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/setup.cfg +0 -0
  73. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/__init__.py +0 -0
  74. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/api/__init__.py +0 -0
  75. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/databases/druid.yaml +0 -0
  76. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/databases/gsheets.yaml +0 -0
  77. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/databases/postgres.yaml +0 -0
  78. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/nodes/core/comments.yaml +0 -0
  79. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/nodes/core/dim_users.yaml +0 -0
  80. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/nodes/core/num_comments.yaml +0 -0
  81. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/configs/nodes/core/users.yaml +0 -0
  82. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/exceptions_test.py +0 -0
  83. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/contractors.parquet +0 -0
  84. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/dispatchers.parquet +0 -0
  85. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/hard_hat_state.parquet +0 -0
  86. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/hard_hats.parquet +0 -0
  87. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/municipality.parquet +0 -0
  88. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/municipality_municipality_type.parquet +0 -0
  89. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/municipality_type.parquet +0 -0
  90. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/repair_order_details.parquet +0 -0
  91. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/repair_orders.parquet +0 -0
  92. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/repair_type.parquet +0 -0
  93. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/us_region.parquet +0 -0
  94. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/resources/us_states.parquet +0 -0
  95. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tests/utils_test.py +0 -0
  96. {datajunction_query-0.0.1a1.dev0 → datajunction_query-0.0.1a31}/tox.ini +0 -0
@@ -72,6 +72,7 @@ repos:
72
72
  - types-pkg_resources
73
73
  - types-PyYAML
74
74
  - types-tabulate
75
+ - types-toml
75
76
  - repo: https://github.com/asottile/add-trailing-comma
76
77
  rev: v2.2.1
77
78
  hooks:
@@ -47,4 +47,4 @@ lint:
47
47
  dev-release:
48
48
  hatch version dev
49
49
  hatch build
50
- hatch publish
50
+ hatch publish
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datajunction-query
3
- Version: 0.0.1a1.dev0
3
+ Version: 0.0.1a31
4
4
  Summary: OSS Implementation of a DataJunction Query Service
5
5
  Project-URL: repository, https://github.com/DataJunction/dj
6
6
  Author-email: DataJunction Authors <roberto@dealmeida.net>
@@ -30,6 +30,7 @@ Requires-Dist: sqlalchemy-utils>=0.37.7
30
30
  Requires-Dist: sqlalchemy<2.0.0,>=1.4.41
31
31
  Requires-Dist: sqlmodel<1.0.0,>=0.0.8
32
32
  Requires-Dist: sqlparse<1.0.0,>=0.4.3
33
+ Requires-Dist: toml>=0.10.2
33
34
  Provides-Extra: uvicorn
34
35
  Requires-Dist: uvicorn[standard]>=0.21.1; extra == 'uvicorn'
35
36
  Description-Content-Type: text/x-rst
@@ -0,0 +1,36 @@
1
+ """Add type and extra_params field for engines
2
+
3
+ Revision ID: f3407a1ec625
4
+ Revises: a7e11a2438b4
5
+ Create Date: 2023-10-09 18:58:57.462134+00:00
6
+
7
+ """
8
+ # pylint: disable=no-member, invalid-name, missing-function-docstring, unused-import, no-name-in-module
9
+
10
+ import sqlalchemy as sa
11
+ import sqlmodel
12
+
13
+ from alembic import op
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision = "f3407a1ec625"
17
+ down_revision = "a7e11a2438b4"
18
+ branch_labels = None
19
+ depends_on = None
20
+
21
+
22
+ def upgrade():
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.add_column("engine", sa.Column("extra_params", sa.JSON(), nullable=True))
25
+ op.add_column(
26
+ "engine",
27
+ sa.Column("type", sa.Enum("DUCKDB", name="enginetype"), nullable=False),
28
+ )
29
+ # ### end Alembic commands ###
30
+
31
+
32
+ def downgrade():
33
+ # ### commands auto generated by Alembic - please adjust! ###
34
+ op.drop_column("engine", "type")
35
+ op.drop_column("engine", "extra_params")
36
+ # ### end Alembic commands ###
@@ -0,0 +1,11 @@
1
+ engines:
2
+ - name: duckdb
3
+ version: 0.7.1
4
+ type: duckdb
5
+ uri: duckdb:////code/docker/default.duckdb
6
+ extra_params:
7
+ location: /code/docker/default.duckdb
8
+ catalogs:
9
+ - name: warehouse
10
+ engines:
11
+ - duckdb
@@ -0,0 +1,83 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-06/schema#",
3
+ "$ref": "#/definitions/DJQSConfig",
4
+ "definitions": {
5
+ "DJQSConfig": {
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "properties": {
9
+ "engines": {
10
+ "type": "array",
11
+ "items": {
12
+ "$ref": "#/definitions/Engine"
13
+ }
14
+ },
15
+ "catalogs": {
16
+ "type": "array",
17
+ "items": {
18
+ "$ref": "#/definitions/Catalog"
19
+ }
20
+ }
21
+ },
22
+ "required": [
23
+ "catalogs",
24
+ "engines"
25
+ ],
26
+ "title": "DJQSConfig"
27
+ },
28
+ "Catalog": {
29
+ "type": "object",
30
+ "additionalProperties": false,
31
+ "properties": {
32
+ "name": {
33
+ "type": "string"
34
+ },
35
+ "engines": {
36
+ "type": "array",
37
+ "items": {
38
+ "type": "string"
39
+ }
40
+ }
41
+ },
42
+ "required": [
43
+ "engines",
44
+ "name"
45
+ ],
46
+ "title": "Catalog"
47
+ },
48
+ "Engine": {
49
+ "type": "object",
50
+ "additionalProperties": false,
51
+ "properties": {
52
+ "name": {
53
+ "type": "string"
54
+ },
55
+ "version": {
56
+ "type": "string"
57
+ },
58
+ "type": {
59
+ "type": "string"
60
+ },
61
+ "uri": {
62
+ "type": "string"
63
+ },
64
+ "extra_params": {
65
+ "$ref": "#/definitions/ExtraParams"
66
+ }
67
+ },
68
+ "required": [
69
+ "extra_params",
70
+ "name",
71
+ "type",
72
+ "uri",
73
+ "version"
74
+ ],
75
+ "title": "Engine"
76
+ },
77
+ "ExtraParams": {
78
+ "type": "object",
79
+ "additionalProperties": true,
80
+ "title": "ExtraParams"
81
+ }
82
+ }
83
+ }
@@ -0,0 +1,4 @@
1
+ """
2
+ Version for Hatch
3
+ """
4
+ __version__ = "0.0.1a31"
@@ -17,10 +17,11 @@ from djqs.models.engine import BaseEngineInfo
17
17
  from djqs.utils import get_session
18
18
 
19
19
  _logger = logging.getLogger(__name__)
20
- router = APIRouter(tags=["Catalogs & Engines"])
20
+ get_router = APIRouter(tags=["Catalogs & Engines"])
21
+ post_router = APIRouter(tags=["Catalogs & Engines - Dynamic Configuration"])
21
22
 
22
23
 
23
- @router.get("/catalogs/", response_model=List[CatalogInfo])
24
+ @get_router.get("/catalogs/", response_model=List[CatalogInfo])
24
25
  def list_catalogs(*, session: Session = Depends(get_session)) -> List[CatalogInfo]:
25
26
  """
26
27
  List all available catalogs
@@ -28,7 +29,7 @@ def list_catalogs(*, session: Session = Depends(get_session)) -> List[CatalogInf
28
29
  return list(session.exec(select(Catalog)))
29
30
 
30
31
 
31
- @router.get("/catalogs/{name}/", response_model=CatalogInfo)
32
+ @get_router.get("/catalogs/{name}/", response_model=CatalogInfo)
32
33
  def read_catalog(name: str, *, session: Session = Depends(get_session)) -> CatalogInfo:
33
34
  """
34
35
  Return a catalog by name
@@ -36,7 +37,7 @@ def read_catalog(name: str, *, session: Session = Depends(get_session)) -> Catal
36
37
  return get_catalog(session, name)
37
38
 
38
39
 
39
- @router.post("/catalogs/", response_model=CatalogInfo, status_code=201)
40
+ @post_router.post("/catalogs/", response_model=CatalogInfo, status_code=201)
40
41
  def add_catalog(
41
42
  data: CatalogInfo,
42
43
  *,
@@ -70,7 +71,11 @@ def add_catalog(
70
71
  return catalog
71
72
 
72
73
 
73
- @router.post("/catalogs/{name}/engines/", response_model=CatalogInfo, status_code=201)
74
+ @post_router.post(
75
+ "/catalogs/{name}/engines/",
76
+ response_model=CatalogInfo,
77
+ status_code=201,
78
+ )
74
79
  def add_engines_to_catalog(
75
80
  name: str,
76
81
  data: List[BaseEngineInfo],
@@ -12,10 +12,11 @@ from djqs.api.helpers import get_engine
12
12
  from djqs.models.engine import BaseEngineInfo, Engine, EngineInfo
13
13
  from djqs.utils import get_session
14
14
 
15
- router = APIRouter(tags=["Catalogs & Engines"])
15
+ get_router = APIRouter(tags=["Catalogs & Engines"])
16
+ post_router = APIRouter(tags=["Catalogs & Engines - Dynamic Configuration"])
16
17
 
17
18
 
18
- @router.get("/engines/", response_model=List[EngineInfo])
19
+ @get_router.get("/engines/", response_model=List[EngineInfo])
19
20
  def list_engines(*, session: Session = Depends(get_session)) -> List[EngineInfo]:
20
21
  """
21
22
  List all available engines
@@ -23,7 +24,7 @@ def list_engines(*, session: Session = Depends(get_session)) -> List[EngineInfo]
23
24
  return list(session.exec(select(Engine)))
24
25
 
25
26
 
26
- @router.get("/engines/{name}/{version}/", response_model=BaseEngineInfo)
27
+ @get_router.get("/engines/{name}/{version}/", response_model=BaseEngineInfo)
27
28
  def list_engine(
28
29
  name: str, version: str, *, session: Session = Depends(get_session)
29
30
  ) -> BaseEngineInfo:
@@ -33,7 +34,7 @@ def list_engine(
33
34
  return get_engine(session, name, version)
34
35
 
35
36
 
36
- @router.post("/engines/", response_model=BaseEngineInfo, status_code=201)
37
+ @post_router.post("/engines/", response_model=BaseEngineInfo, status_code=201)
37
38
  def add_engine(
38
39
  data: EngineInfo,
39
40
  *,
@@ -9,7 +9,6 @@ from sqlalchemy import inspect
9
9
  from sqlalchemy.exc import NoResultFound, NoSuchTableError, OperationalError
10
10
  from sqlmodel import Session, create_engine, select
11
11
 
12
- from djqs.engine import describe_table_via_spark, get_spark_session
13
12
  from djqs.exceptions import DJException
14
13
  from djqs.models.catalog import Catalog
15
14
  from djqs.models.engine import Engine
@@ -59,14 +58,6 @@ def get_columns(
59
58
  if not uri:
60
59
  raise DJException("Cannot retrieve columns without a uri")
61
60
 
62
- if uri.startswith("spark://"):
63
- spark = get_spark_session()
64
- return describe_table_via_spark(
65
- spark,
66
- schema,
67
- table,
68
- )
69
-
70
61
  engine = create_engine(uri, **extra_params)
71
62
  try:
72
63
  inspector = inspect(engine)
@@ -5,7 +5,7 @@ Main DJ query server app.
5
5
  # All the models need to be imported here so that SQLModel can define their
6
6
  # relationships at runtime without causing circular imports.
7
7
  # See https://sqlmodel.tiangolo.com/tutorial/code-structure/#make-circular-imports-work.
8
- # pylint: disable=unused-import
8
+ # pylint: disable=unused-import,expression-not-assigned
9
9
 
10
10
  import logging
11
11
 
@@ -14,13 +14,16 @@ from fastapi.responses import JSONResponse
14
14
 
15
15
  from djqs import __version__
16
16
  from djqs.api import catalogs, engines, queries, tables
17
+ from djqs.config import load_djqs_config
17
18
  from djqs.exceptions import DJException
18
- from djqs.utils import get_settings
19
+ from djqs.utils import get_session, get_settings
19
20
 
20
21
  _logger = logging.getLogger(__name__)
21
22
 
22
-
23
23
  settings = get_settings()
24
+ session = next(get_session())
25
+ load_djqs_config(settings=settings, session=session)
26
+
24
27
  app = FastAPI(
25
28
  title=settings.name,
26
29
  description=settings.description,
@@ -30,10 +33,12 @@ app = FastAPI(
30
33
  "url": "https://mit-license.org/",
31
34
  },
32
35
  )
33
- app.include_router(catalogs.router)
34
- app.include_router(engines.router)
36
+ app.include_router(catalogs.get_router)
37
+ app.include_router(engines.get_router)
35
38
  app.include_router(queries.router)
36
39
  app.include_router(tables.router)
40
+ app.include_router(catalogs.post_router) if settings.enable_dynamic_config else None
41
+ app.include_router(engines.post_router) if settings.enable_dynamic_config else None
37
42
 
38
43
 
39
44
  @app.exception_handler(DJException)
@@ -0,0 +1,96 @@
1
+ """
2
+ Configuration for the query service
3
+ """
4
+
5
+ import json
6
+ from datetime import timedelta
7
+ from typing import Optional
8
+
9
+ import toml
10
+ import yaml
11
+ from cachelib.base import BaseCache
12
+ from cachelib.file import FileSystemCache
13
+ from pydantic import BaseSettings
14
+ from sqlmodel import Session, delete, select
15
+
16
+ from djqs.exceptions import DJException
17
+ from djqs.models.catalog import Catalog, CatalogEngines
18
+ from djqs.models.engine import Engine
19
+
20
+
21
+ class Settings(BaseSettings): # pylint: disable=too-few-public-methods
22
+ """
23
+ Configuration for the query service
24
+ """
25
+
26
+ name: str = "DJQS"
27
+ description: str = "A DataJunction Query Service"
28
+ url: str = "http://localhost:8001/"
29
+
30
+ # SQLAlchemy URI for the metadata database.
31
+ index: str = "sqlite:///djqs.db?check_same_thread=False"
32
+
33
+ # The default engine to use for reflection
34
+ default_reflection_engine: str = "default"
35
+
36
+ # The default engine version to use for reflection
37
+ default_reflection_engine_version: str = ""
38
+
39
+ # Where to store the results from queries.
40
+ results_backend: BaseCache = FileSystemCache("/tmp/djqs", default_timeout=0)
41
+
42
+ paginating_timeout: timedelta = timedelta(minutes=5)
43
+
44
+ # How long to wait when pinging databases to find out the fastest online database.
45
+ do_ping_timeout: timedelta = timedelta(seconds=5)
46
+
47
+ # Configuration file for catalogs and engines
48
+ configuration_file: Optional[str] = None
49
+
50
+ # Enable setting catalog and engine config via REST API calls
51
+ enable_dynamic_config: bool = True
52
+
53
+
54
+ def load_djqs_config(settings: Settings, session: Session) -> None: # pragma: no cover
55
+ """
56
+ Load the DJQS config file into the server metadata database
57
+ """
58
+ config_file = settings.configuration_file if settings.configuration_file else None
59
+ if not config_file:
60
+ return
61
+
62
+ session.exec(delete(Catalog))
63
+ session.exec(delete(Engine))
64
+ session.exec(delete(CatalogEngines))
65
+ session.commit()
66
+
67
+ with open(config_file, mode="r", encoding="utf-8") as filestream:
68
+
69
+ def unknown_filetype():
70
+ raise DJException(message=f"Unknown config file type: {config_file}")
71
+
72
+ data = (
73
+ yaml.safe_load(filestream)
74
+ if any([config_file.endswith(".yml"), config_file.endswith(".yaml")])
75
+ else toml.load(filestream)
76
+ if config_file.endswith(".toml")
77
+ else json.load(filestream)
78
+ if config_file.endswith(".json")
79
+ else unknown_filetype()
80
+ )
81
+
82
+ for engine in data["engines"]:
83
+ session.add(Engine.parse_obj(engine))
84
+ session.commit()
85
+
86
+ for catalog in data["catalogs"]:
87
+ attached_engines = []
88
+ catalog_engines = catalog.pop("engines")
89
+ for name in catalog_engines:
90
+ attached_engines.append(
91
+ session.exec(select(Engine).where(Engine.name == name)).one(),
92
+ )
93
+ catalog_entry = Catalog.parse_obj(catalog)
94
+ catalog_entry.engines = attached_engines
95
+ session.add(catalog_entry)
96
+ session.commit()
@@ -4,17 +4,16 @@ Query related functions.
4
4
 
5
5
  import logging
6
6
  from datetime import datetime, timezone
7
- from typing import List, Optional, Tuple
7
+ from typing import List, Tuple
8
8
 
9
9
  import duckdb
10
10
  import sqlparse
11
- from pyspark.sql import SparkSession # pylint: disable=import-error
12
11
  from sqlalchemy import create_engine, text
13
12
  from sqlmodel import Session, select
14
13
 
15
14
  from djqs.config import Settings
16
15
  from djqs.models.catalog import Catalog
17
- from djqs.models.engine import Engine
16
+ from djqs.models.engine import Engine, EngineType
18
17
  from djqs.models.query import (
19
18
  ColumnMetadata,
20
19
  Query,
@@ -85,11 +84,15 @@ def run_query(
85
84
  .where(Engine.name == query.engine_name)
86
85
  .where(Engine.version == query.engine_version),
87
86
  ).one()
88
- if engine.uri == "spark://local[*]":
89
- spark = get_spark_session()
90
- return run_spark_query(query, spark)
91
- if engine.uri == "duckdb://local[*]":
92
- conn = duckdb.connect(database="/code/docker/default.duckdb", read_only=False)
87
+ if engine.type == EngineType.DUCKDB:
88
+ conn = (
89
+ duckdb.connect()
90
+ if engine.uri == "duckdb:///:memory:"
91
+ else duckdb.connect(
92
+ database=engine.extra_params["location"],
93
+ read_only=True,
94
+ )
95
+ )
93
96
  return run_duckdb_query(query, conn)
94
97
  sqla_engine = create_engine(engine.uri, **catalog.extra_params)
95
98
  connection = sqla_engine.connect()
@@ -111,49 +114,6 @@ def run_query(
111
114
  return output
112
115
 
113
116
 
114
- def get_spark_session():
115
- """
116
- Get a spark session
117
- """
118
- SparkSession._instantiatedContext = None # pylint: disable=protected-access
119
- spark = (
120
- SparkSession.builder.master("local[*]")
121
- .appName("djqs")
122
- .enableHiveSupport()
123
- .getOrCreate()
124
- )
125
- return spark
126
-
127
-
128
- def run_spark_query(
129
- query: Query,
130
- spark: SparkSession,
131
- ) -> List[Tuple[str, List[ColumnMetadata], Stream]]:
132
- """
133
- Run a spark SQL query against the local warehouse
134
- """
135
- output: List[Tuple[str, List[ColumnMetadata], Stream]] = []
136
- results_df = spark.sql(query.submitted_query)
137
- rows = results_df.rdd.map(tuple).collect()
138
- columns: List[ColumnMetadata] = []
139
- output.append((query.submitted_query, columns, rows))
140
- return output
141
-
142
-
143
- def describe_table_via_spark(
144
- spark: SparkSession,
145
- schema: Optional[str],
146
- table: str,
147
- ):
148
- """
149
- Gets the column schemas.
150
- """
151
- schema_ = f"{schema}." if schema else ""
152
- schema_df = spark.sql(f"DESCRIBE TABLE {schema_}{table};")
153
- rows = schema_df.rdd.map(tuple).collect()
154
- return [{"name": row[0], "type": row[1]} for row in rows]
155
-
156
-
157
117
  def run_duckdb_query(
158
118
  query: Query,
159
119
  conn: duckdb.DuckDBPyConnection,
@@ -1,10 +1,20 @@
1
1
  """
2
2
  Models for columns.
3
3
  """
4
+ from enum import Enum
5
+ from typing import Dict, Optional
4
6
 
5
- from typing import Optional
7
+ from sqlalchemy.sql.schema import Column as SqlaColumn
8
+ from sqlmodel import JSON, Field, SQLModel
6
9
 
7
- from sqlmodel import Field, SQLModel
10
+
11
+ class EngineType(Enum):
12
+ """
13
+ Supported engine types
14
+ """
15
+
16
+ DUCKDB = "duckdb"
17
+ SQLALCHEMY = "sqlalchemy"
8
18
 
9
19
 
10
20
  class Engine(SQLModel, table=True): # type: ignore
@@ -14,8 +24,10 @@ class Engine(SQLModel, table=True): # type: ignore
14
24
 
15
25
  id: Optional[int] = Field(default=None, primary_key=True)
16
26
  name: str
27
+ type: EngineType
17
28
  version: str
18
29
  uri: Optional[str]
30
+ extra_params: Dict = Field(default={}, sa_column=SqlaColumn(JSON))
19
31
 
20
32
 
21
33
  class BaseEngineInfo(SQLModel):
@@ -25,6 +37,8 @@ class BaseEngineInfo(SQLModel):
25
37
 
26
38
  name: str
27
39
  version: str
40
+ type: EngineType
41
+ extra_params: Dict = {}
28
42
 
29
43
 
30
44
  class EngineInfo(BaseEngineInfo):