datajunction-query 0.0.1a61__tar.gz → 0.0.1a62__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datajunction-query might be problematic. Click here for more details.

Files changed (94) hide show
  1. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.pre-commit-config.yaml +1 -1
  2. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/PKG-INFO +1 -1
  3. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/__about__.py +1 -1
  4. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/main.py +1 -1
  5. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/queries.py +6 -4
  6. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/constants.py +3 -0
  7. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/engine.py +29 -6
  8. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/api/queries_test.py +65 -0
  9. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.coveragerc +0 -0
  10. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.flake8 +0 -0
  11. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.gitignore +0 -0
  12. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.isort.cfg +0 -0
  13. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.pylintrc +0 -0
  14. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/.readthedocs.yml +0 -0
  15. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/AUTHORS.rst +0 -0
  16. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/CODE_OF_CONDUCT.md +0 -0
  17. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/Dockerfile +0 -0
  18. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/LICENSE.txt +0 -0
  19. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/Makefile +0 -0
  20. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/README.rst +0 -0
  21. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/alembic/README +0 -0
  22. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/alembic/env.py +0 -0
  23. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/alembic/script.py.mako +0 -0
  24. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/alembic/versions/2023_02_28_0541-a7e11a2438b4_initial_migration.py +0 -0
  25. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/alembic/versions/2023_10_09_1858-f3407a1ec625_add_type_and_extra_para_ms_field_for_.py +0 -0
  26. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/alembic.ini +0 -0
  27. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/config.djqs.yml +0 -0
  28. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/config.jsonschema +0 -0
  29. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/__init__.py +0 -0
  30. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/__init__.py +0 -0
  31. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/catalogs.py +0 -0
  32. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/engines.py +0 -0
  33. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/helpers.py +0 -0
  34. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/api/tables.py +0 -0
  35. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/config.py +0 -0
  36. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/enum.py +0 -0
  37. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/exceptions.py +0 -0
  38. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/fixes.py +0 -0
  39. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/models/__init__.py +0 -0
  40. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/models/catalog.py +0 -0
  41. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/models/engine.py +0 -0
  42. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/models/query.py +0 -0
  43. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/models/table.py +0 -0
  44. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/typing.py +0 -0
  45. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/djqs/utils.py +0 -0
  46. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/cockroachdb/cockroachdb_examples_init.sql +0 -0
  47. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/cockroachdb/cockroachdb_metadata_init.sql +0 -0
  48. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/cockroachdb/steam-games.csv +0 -0
  49. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/cockroachdb/steam-hours-played.csv +0 -0
  50. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/default.duckdb +0 -0
  51. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/druid_environment +0 -0
  52. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/druid_init.sh +0 -0
  53. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/druid_spec.json +0 -0
  54. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/duckdb.sql +0 -0
  55. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/duckdb_load.py +0 -0
  56. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/postgres_init.roads.sql +0 -0
  57. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/postgres_init.sql +0 -0
  58. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/spark.roads.sql +0 -0
  59. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/spark_load_roads.py +0 -0
  60. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/docker/wait-for +0 -0
  61. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/openapi.json +0 -0
  62. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/pdm.lock +0 -0
  63. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/pyproject.toml +0 -0
  64. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/scripts/generate-openapi.py +0 -0
  65. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/setup.cfg +0 -0
  66. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/__init__.py +0 -0
  67. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/api/__init__.py +0 -0
  68. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/api/catalogs_test.py +0 -0
  69. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/api/engines_test.py +0 -0
  70. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/api/table_test.py +0 -0
  71. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/config.djqs.yml +0 -0
  72. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/databases/druid.yaml +0 -0
  73. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/databases/gsheets.yaml +0 -0
  74. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/databases/postgres.yaml +0 -0
  75. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/nodes/core/comments.yaml +0 -0
  76. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/nodes/core/dim_users.yaml +0 -0
  77. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/nodes/core/num_comments.yaml +0 -0
  78. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/configs/nodes/core/users.yaml +0 -0
  79. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/conftest.py +0 -0
  80. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/exceptions_test.py +0 -0
  81. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/contractors.parquet +0 -0
  82. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/dispatchers.parquet +0 -0
  83. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/hard_hat_state.parquet +0 -0
  84. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/hard_hats.parquet +0 -0
  85. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/municipality.parquet +0 -0
  86. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/municipality_municipality_type.parquet +0 -0
  87. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/municipality_type.parquet +0 -0
  88. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/repair_order_details.parquet +0 -0
  89. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/repair_orders.parquet +0 -0
  90. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/repair_type.parquet +0 -0
  91. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/us_region.parquet +0 -0
  92. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/resources/us_states.parquet +0 -0
  93. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tests/utils_test.py +0 -0
  94. {datajunction_query-0.0.1a61 → datajunction_query-0.0.1a62}/tox.ini +0 -0
@@ -69,7 +69,7 @@ repos:
69
69
  - types-requests
70
70
  - types-freezegun
71
71
  - types-python-dateutil
72
- - types-pkg_resources
72
+ - types-setuptools
73
73
  - types-PyYAML
74
74
  - types-tabulate
75
75
  - types-toml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datajunction-query
3
- Version: 0.0.1a61
3
+ Version: 0.0.1a62
4
4
  Summary: OSS Implementation of a DataJunction Query Service
5
5
  Project-URL: repository, https://github.com/DataJunction/dj
6
6
  Author-email: DataJunction Authors <roberto@dealmeida.net>
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Version for Hatch
3
3
  """
4
- __version__ = "0.0.1a61"
4
+ __version__ = "0.0.1a62"
@@ -32,7 +32,7 @@ async def lifespan(app: FastAPI): # pylint: disable=W0621,W0613
32
32
  """
33
33
  try:
34
34
  load_djqs_config(settings=settings, session=session)
35
- except Exception as e: # pylint: disable=W0718
35
+ except Exception as e: # pylint: disable=W0718,C0103
36
36
  _logger.warning("Could not load DJQS config: %s", e)
37
37
  yield
38
38
 
@@ -5,7 +5,7 @@ import json
5
5
  import logging
6
6
  import uuid
7
7
  from http import HTTPStatus
8
- from typing import Any, List, Optional
8
+ from typing import Any, Dict, List, Optional
9
9
 
10
10
  import msgpack
11
11
  from accept_types import get_best_match
@@ -105,6 +105,7 @@ async def submit_query( # pylint: disable=too-many-arguments
105
105
  settings,
106
106
  response,
107
107
  background_tasks,
108
+ request.headers,
108
109
  )
109
110
 
110
111
  return_type = get_best_match(accept, ["application/json", "application/msgpack"])
@@ -129,12 +130,13 @@ async def submit_query( # pylint: disable=too-many-arguments
129
130
  )
130
131
 
131
132
 
132
- def save_query_and_run(
133
+ def save_query_and_run( # pylint: disable=R0913
133
134
  create_query: QueryCreate,
134
135
  session: Session,
135
136
  settings: Settings,
136
137
  response: Response,
137
138
  background_tasks: BackgroundTasks,
139
+ headers: Optional[Dict[str, str]] = None,
138
140
  ) -> QueryResults:
139
141
  """
140
142
  Store a new query to the DB and run it.
@@ -147,12 +149,12 @@ def save_query_and_run(
147
149
  session.refresh(query)
148
150
 
149
151
  if query.async_:
150
- background_tasks.add_task(process_query, session, settings, query)
152
+ background_tasks.add_task(process_query, session, settings, query, headers)
151
153
 
152
154
  response.status_code = HTTPStatus.CREATED
153
155
  return QueryResults(results=[], errors=[], **query.dict())
154
156
 
155
- return process_query(session, settings, query)
157
+ return process_query(session, settings, query, headers)
156
158
 
157
159
 
158
160
  def load_query_results(
@@ -15,3 +15,6 @@ DEFAULT_DIMENSION_COLUMN = "id"
15
15
  # used by the SQLAlchemy client
16
16
  QUERY_EXECUTE_TIMEOUT = timedelta(seconds=60)
17
17
  GET_COLUMNS_TIMEOUT = timedelta(seconds=60)
18
+
19
+ # Request header configuration params
20
+ SQLALCHEMY_URI = "SQLALCHEMY_URI"
@@ -1,11 +1,10 @@
1
1
  """
2
2
  Query related functions.
3
3
  """
4
-
5
4
  import logging
6
5
  import os
7
6
  from datetime import datetime, timezone
8
- from typing import List, Tuple
7
+ from typing import Dict, List, Optional, Tuple
9
8
 
10
9
  import duckdb
11
10
  import snowflake.connector
@@ -14,6 +13,7 @@ from sqlalchemy import create_engine, text
14
13
  from sqlmodel import Session, select
15
14
 
16
15
  from djqs.config import Settings
16
+ from djqs.constants import SQLALCHEMY_URI
17
17
  from djqs.models.engine import Engine, EngineType
18
18
  from djqs.models.query import (
19
19
  ColumnMetadata,
@@ -66,9 +66,10 @@ def get_columns_from_description(
66
66
  return columns
67
67
 
68
68
 
69
- def run_query(
69
+ def run_query( # pylint: disable=R0914
70
70
  session: Session,
71
71
  query: Query,
72
+ headers: Optional[Dict[str, str]] = None,
72
73
  ) -> List[Tuple[str, List[ColumnMetadata], Stream]]:
73
74
  """
74
75
  Run a query and return its results.
@@ -76,13 +77,25 @@ def run_query(
76
77
  For each statement we return a tuple with the statement SQL, a description of the
77
78
  columns (name and type) and a stream of rows (tuples).
78
79
  """
80
+
79
81
  _logger.info("Running query on catalog %s", query.catalog_name)
82
+
80
83
  engine = session.exec(
81
84
  select(Engine)
82
85
  .where(Engine.name == query.engine_name)
83
86
  .where(Engine.version == query.engine_version),
84
87
  ).one()
85
- if engine.type == EngineType.DUCKDB:
88
+
89
+ query_server = headers.get("SQLALCHEMY_URI") if headers else None
90
+
91
+ if query_server:
92
+ _logger.info(
93
+ "Creating sqlalchemy engine using request header param %s",
94
+ SQLALCHEMY_URI,
95
+ )
96
+ sqla_engine = create_engine(query_server)
97
+ elif engine.type == EngineType.DUCKDB:
98
+ _logger.info("Creating duckdb connection")
86
99
  conn = (
87
100
  duckdb.connect()
88
101
  if engine.uri == "duckdb:///:memory:"
@@ -92,7 +105,8 @@ def run_query(
92
105
  )
93
106
  )
94
107
  return run_duckdb_query(query, conn)
95
- if engine.type == EngineType.SNOWFLAKE:
108
+ elif engine.type == EngineType.SNOWFLAKE:
109
+ _logger.info("Creating snowflake connection")
96
110
  conn = snowflake.connector.connect(
97
111
  **engine.extra_params,
98
112
  password=os.getenv("SNOWSQL_PWD"),
@@ -100,6 +114,10 @@ def run_query(
100
114
  cur = conn.cursor()
101
115
 
102
116
  return run_snowflake_query(query, cur)
117
+
118
+ _logger.info(
119
+ "Creating sqlalchemy engine using engine name and version defined on query",
120
+ )
103
121
  sqla_engine = create_engine(engine.uri, connect_args=engine.extra_params)
104
122
  connection = sqla_engine.connect()
105
123
 
@@ -152,6 +170,7 @@ def process_query(
152
170
  session: Session,
153
171
  settings: Settings,
154
172
  query: Query,
173
+ headers: Optional[Dict[str, str]] = None,
155
174
  ) -> QueryResults:
156
175
  """
157
176
  Process a query.
@@ -164,7 +183,11 @@ def process_query(
164
183
  query.started = datetime.now(timezone.utc)
165
184
  try:
166
185
  root = []
167
- for sql, columns, stream in run_query(session=session, query=query):
186
+ for sql, columns, stream in run_query(
187
+ session=session,
188
+ query=query,
189
+ headers=headers,
190
+ ):
168
191
  rows = list(stream)
169
192
  root.append(
170
193
  StatementResults(
@@ -86,6 +86,71 @@ def test_submit_query(session: Session, client: TestClient) -> None:
86
86
  assert data["errors"] == []
87
87
 
88
88
 
89
+ def test_submit_query_with_sqlalchemy_uri_header(
90
+ session: Session,
91
+ client: TestClient,
92
+ ) -> None:
93
+ """
94
+ Test ``POST /queries/`` with the SQLALCHEMY_URI defined in the header.
95
+ """
96
+ engine = Engine(
97
+ name="test_engine",
98
+ type=EngineType.DUCKDB,
99
+ version="1.0",
100
+ uri="duckdb:///:memory:",
101
+ )
102
+ catalog = Catalog(name="test_catalog", engines=[engine])
103
+ session.add(catalog)
104
+ session.commit()
105
+ session.refresh(catalog)
106
+
107
+ query_create = QueryCreate(
108
+ catalog_name=catalog.name,
109
+ engine_name=engine.name,
110
+ engine_version=engine.version,
111
+ submitted_query="SELECT 1 AS col",
112
+ )
113
+ payload = query_create.json(by_alias=True)
114
+ assert payload == json.dumps(
115
+ {
116
+ "catalog_name": "test_catalog",
117
+ "engine_name": "test_engine",
118
+ "engine_version": "1.0",
119
+ "submitted_query": "SELECT 1 AS col",
120
+ "async_": False,
121
+ },
122
+ )
123
+
124
+ with freeze_time("2021-01-01T00:00:00Z"):
125
+ response = client.post(
126
+ "/queries/",
127
+ data=payload,
128
+ headers={
129
+ "Content-Type": "application/json",
130
+ "Accept": "application/json",
131
+ "SQLALCHEMY_URI": "trino://example@foo.bar/catalog/schema",
132
+ },
133
+ )
134
+ data = response.json()
135
+
136
+ assert response.status_code == 200
137
+ assert data["catalog_name"] == "test_catalog"
138
+ assert data["engine_name"] == "test_engine"
139
+ assert data["engine_version"] == "1.0"
140
+ assert data["submitted_query"] == "SELECT 1 AS col"
141
+ assert data["executed_query"] == "SELECT 1 AS col"
142
+ assert data["scheduled"] == "2021-01-01T00:00:00"
143
+ assert data["started"] == "2021-01-01T00:00:00"
144
+ assert data["finished"] == "2021-01-01T00:00:00"
145
+ assert data["state"] == "FINISHED"
146
+ assert data["progress"] == 1.0
147
+ assert len(data["results"]) == 1
148
+ assert data["results"][0]["sql"] == "SELECT 1 AS col"
149
+ assert data["results"][0]["columns"] == [{"name": "col", "type": "STR"}]
150
+ assert data["results"][0]["rows"] == [[1]]
151
+ assert data["errors"] == []
152
+
153
+
89
154
  def test_submit_query_msgpack(session: Session, client: TestClient) -> None:
90
155
  """
91
156
  Test ``POST /queries/`` using msgpack.