quillsql 2.1.6__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quillsql/__init__.py +2 -1
- quillsql/assets/__init__.py +1 -1
- quillsql/assets/pgtypes.py +696 -2781
- quillsql/core.py +276 -51
- quillsql/db/__init__.py +1 -1
- quillsql/db/bigquery.py +108 -74
- quillsql/db/cached_connection.py +6 -5
- quillsql/db/db_helper.py +36 -17
- quillsql/db/postgres.py +94 -39
- quillsql/error.py +4 -4
- quillsql/utils/__init__.py +2 -1
- quillsql/utils/filters.py +180 -0
- quillsql/utils/run_query_processes.py +17 -16
- quillsql/utils/schema_conversion.py +6 -3
- quillsql/utils/tenants.py +60 -0
- quillsql-2.2.0.dist-info/METADATA +69 -0
- quillsql-2.2.0.dist-info/RECORD +19 -0
- {quillsql-2.1.6.dist-info → quillsql-2.2.0.dist-info}/WHEEL +1 -1
- quillsql-2.1.6.dist-info/METADATA +0 -72
- quillsql-2.1.6.dist-info/RECORD +0 -17
- {quillsql-2.1.6.dist-info → quillsql-2.2.0.dist-info}/top_level.txt +0 -0
quillsql/core.py
CHANGED
|
@@ -3,15 +3,23 @@ from dotenv import load_dotenv
|
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
import redis
|
|
6
|
+
from .utils import Filter, convert_custom_filter
|
|
7
|
+
import json
|
|
8
|
+
from enum import Enum
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
from quillsql.db.cached_connection import CachedConnection
|
|
9
|
-
from quillsql.db.db_helper import
|
|
12
|
+
from quillsql.db.db_helper import (
|
|
13
|
+
get_db_credentials,
|
|
14
|
+
get_schema_column_info_by_db,
|
|
15
|
+
get_schema_tables_by_db,
|
|
16
|
+
)
|
|
10
17
|
from quillsql.utils.schema_conversion import convert_type_to_postgres
|
|
11
18
|
from quillsql.utils.run_query_processes import (
|
|
12
19
|
array_to_map,
|
|
13
20
|
remove_fields,
|
|
14
21
|
)
|
|
22
|
+
from quillsql.utils.tenants import extract_tenant_ids
|
|
15
23
|
|
|
16
24
|
load_dotenv()
|
|
17
25
|
|
|
@@ -20,8 +28,11 @@ DEV_HOST = "http://localhost:8080"
|
|
|
20
28
|
PROD_HOST = "https://quill-344421.uc.r.appspot.com"
|
|
21
29
|
HOST = DEV_HOST if ENV == "development" else PROD_HOST
|
|
22
30
|
|
|
31
|
+
SINGLE_TENANT = "QUILL_SINGLE_TENANT"
|
|
32
|
+
ALL_TENANTS = "QUILL_ALL_TENANTS"
|
|
33
|
+
FLAG_TASKS = {'dashboard', 'report', 'item', 'report-info', 'filter-options'}
|
|
23
34
|
|
|
24
|
-
|
|
35
|
+
# Quill - Fullstack API Platform for Dashboards and Reporting.
|
|
25
36
|
class Quill:
|
|
26
37
|
def __init__(
|
|
27
38
|
self,
|
|
@@ -32,13 +43,27 @@ class Quill:
|
|
|
32
43
|
metadataServerURL=None,
|
|
33
44
|
cache=None,
|
|
34
45
|
):
|
|
46
|
+
if private_key is None:
|
|
47
|
+
raise ValueError("Private key is required")
|
|
48
|
+
if database_type is None:
|
|
49
|
+
raise ValueError("Database type is required")
|
|
50
|
+
if database_connection_string is None and database_config is None:
|
|
51
|
+
raise ValueError("You must provide either DatabaseConnectionString or DatabaseConfig")
|
|
52
|
+
|
|
35
53
|
# Handles both dsn-style connection strings (eg. "dbname=test password=secret" )
|
|
36
54
|
# as well as url-style connection strings (eg. "postgres://foo@db.com")
|
|
37
|
-
self.baseUrl = metadataServerURL if metadataServerURL
|
|
38
|
-
if database_connection_string
|
|
39
|
-
|
|
55
|
+
self.baseUrl = metadataServerURL if metadataServerURL is not None else HOST
|
|
56
|
+
if database_connection_string is not None:
|
|
57
|
+
self.target_connection = CachedConnection(
|
|
58
|
+
database_type,
|
|
59
|
+
get_db_credentials(database_type, database_connection_string),
|
|
60
|
+
cache,
|
|
61
|
+
True,
|
|
62
|
+
)
|
|
40
63
|
else:
|
|
41
|
-
|
|
64
|
+
self.target_connection = CachedConnection(
|
|
65
|
+
database_type, database_config, cache, False
|
|
66
|
+
)
|
|
42
67
|
self.private_key = private_key
|
|
43
68
|
|
|
44
69
|
def get_cache(self, cache_config):
|
|
@@ -52,47 +77,126 @@ class Quill:
|
|
|
52
77
|
)
|
|
53
78
|
return None
|
|
54
79
|
|
|
55
|
-
def query(self,
|
|
56
|
-
|
|
80
|
+
def query(self, tenants, metadata, flags = None, filters: list[Filter] = None):
|
|
81
|
+
if not tenants:
|
|
82
|
+
raise ValueError("You may not pass an empty tenants array.")
|
|
83
|
+
if flags and not flags:
|
|
84
|
+
raise ValueError("You may not pass an empty flags array.")
|
|
85
|
+
|
|
86
|
+
responseMetadata = {}
|
|
57
87
|
if not metadata:
|
|
58
|
-
return {"error": "
|
|
88
|
+
return {"error": "Missing metadata.", "status": "error", "data": {}}
|
|
59
89
|
|
|
60
90
|
task = metadata.get("task")
|
|
61
91
|
if not task:
|
|
62
|
-
return {"error": "
|
|
92
|
+
return {"error": "Missing task.", "status": "error", "data": {}}
|
|
63
93
|
|
|
64
94
|
try:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
95
|
+
# Set tenant IDs in the connection
|
|
96
|
+
self.target_connection.tenant_ids = extract_tenant_ids(tenants)
|
|
97
|
+
|
|
98
|
+
# Handle tenant flags synthesis
|
|
99
|
+
tenant_flags = None
|
|
100
|
+
if (task in FLAG_TASKS and
|
|
101
|
+
tenants[0] != ALL_TENANTS and
|
|
102
|
+
tenants[0] != SINGLE_TENANT
|
|
103
|
+
):
|
|
104
|
+
|
|
105
|
+
response = self.post_quill('tenant-mapped-flags', {
|
|
106
|
+
'reportId': metadata.get('reportId') or metadata.get('dashboardItemId'),
|
|
107
|
+
'clientId': metadata.get('clientId'),
|
|
108
|
+
'dashboardName': metadata.get('name'),
|
|
109
|
+
'tenants': tenants,
|
|
110
|
+
'flags': flags,
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
if response.get('error'):
|
|
114
|
+
return {
|
|
115
|
+
'status': 'error',
|
|
116
|
+
'error': response.get('error'),
|
|
117
|
+
'data': response.get('metadata') or {},
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
flag_query_results = self.run_queries(
|
|
121
|
+
response.get('queries'),
|
|
122
|
+
self.target_connection.database_type,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
tenant_flags = [
|
|
126
|
+
{
|
|
127
|
+
'tenantField': tenant_field,
|
|
128
|
+
'flags': list(set(row['quill_flag'] for row in query_result['rows']))
|
|
129
|
+
}
|
|
130
|
+
for tenant_field, query_result in zip(
|
|
131
|
+
response['metadata']['queryOrder'],
|
|
132
|
+
flag_query_results['queryResults']
|
|
133
|
+
)
|
|
134
|
+
]
|
|
135
|
+
elif tenants[0] == SINGLE_TENANT and flags:
|
|
136
|
+
if flags and isinstance(flags[0], dict):
|
|
137
|
+
tenant_flags = [{'tenantField': SINGLE_TENANT, 'flags': flags}]
|
|
138
|
+
else:
|
|
139
|
+
tenant_flags = flags
|
|
140
|
+
|
|
141
|
+
pre_query_results = (
|
|
142
|
+
self.run_queries(
|
|
143
|
+
metadata.get("preQueries"),
|
|
144
|
+
self.target_connection.database_type,
|
|
145
|
+
metadata.get("databaseType"),
|
|
146
|
+
metadata,
|
|
147
|
+
metadata.get("runQueryConfig"),
|
|
148
|
+
)
|
|
149
|
+
if metadata.get("preQueries")
|
|
150
|
+
else {}
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if metadata.get("runQueryConfig") and metadata.get("runQueryConfig").get(
|
|
154
|
+
"overridePost"
|
|
155
|
+
):
|
|
156
|
+
return {"data": pre_query_results, "status": "success"}
|
|
77
157
|
view_query = None
|
|
78
|
-
if
|
|
158
|
+
if metadata.get("preQueries"):
|
|
79
159
|
view_query = metadata.get("preQueries")[0]
|
|
80
160
|
payload = {
|
|
81
161
|
**metadata,
|
|
82
|
-
|
|
83
|
-
"
|
|
84
|
-
"
|
|
162
|
+
**pre_query_results,
|
|
163
|
+
"tenants": tenants,
|
|
164
|
+
"flags": tenant_flags,
|
|
165
|
+
"viewQuery": view_query,
|
|
85
166
|
}
|
|
167
|
+
if filters is not None:
|
|
168
|
+
payload["sdkFilters"] = [convert_custom_filter(f) for f in filters]
|
|
86
169
|
quill_results = self.post_quill(metadata.get("task"), payload)
|
|
87
170
|
if quill_results.get("error"):
|
|
88
|
-
|
|
89
|
-
|
|
171
|
+
responseMetadata = quill_results.get("metadata")
|
|
172
|
+
response = {
|
|
173
|
+
"error": quill_results.get("error"),
|
|
174
|
+
"status": "error",
|
|
175
|
+
"data": {},
|
|
176
|
+
}
|
|
177
|
+
if responseMetadata:
|
|
178
|
+
response["data"] = responseMetadata
|
|
179
|
+
return response
|
|
180
|
+
|
|
181
|
+
# If there is no metadata in the quill results, create one
|
|
90
182
|
if not quill_results.get("metadata"):
|
|
91
183
|
quill_results["metadata"] = {}
|
|
92
184
|
metadata = quill_results.get("metadata")
|
|
185
|
+
responseMetadata = metadata
|
|
93
186
|
final_query_results = self.run_queries(
|
|
94
|
-
quill_results.get("queries"),
|
|
187
|
+
quill_results.get("queries"),
|
|
188
|
+
self.target_connection.database_type,
|
|
189
|
+
metadata.get("databaseType"),
|
|
190
|
+
metadata,
|
|
191
|
+
metadata.get("runQueryConfig"),
|
|
95
192
|
)
|
|
193
|
+
|
|
194
|
+
if final_query_results.get("mapped_array") and metadata.get("runQueryConfig", {}).get("arrayToMap"):
|
|
195
|
+
array_to_map = metadata["runQueryConfig"]["arrayToMap"]
|
|
196
|
+
for array, index in zip(final_query_results["mapped_array"], range(len(final_query_results["mapped_array"]))):
|
|
197
|
+
responseMetadata[array_to_map["arrayName"]][index][array_to_map["field"]] = array
|
|
198
|
+
del final_query_results["mapped_array"]
|
|
199
|
+
|
|
96
200
|
# Quick hack to make the sdk work with the Frontend
|
|
97
201
|
if len(final_query_results.get("queryResults")) == 1:
|
|
98
202
|
query_result = final_query_results.get("queryResults")[0]
|
|
@@ -105,46 +209,158 @@ class Quill:
|
|
|
105
209
|
}
|
|
106
210
|
|
|
107
211
|
except Exception as err:
|
|
108
|
-
|
|
212
|
+
if task == "update-view":
|
|
213
|
+
self.post_quill("set-broken-view", {
|
|
214
|
+
"table": metadata.get("name"),
|
|
215
|
+
"clientId": metadata.get("clientId"),
|
|
216
|
+
"error": str(err),
|
|
217
|
+
})
|
|
218
|
+
return {
|
|
219
|
+
"error": str(err).splitlines()[0],
|
|
220
|
+
"status": "error",
|
|
221
|
+
"data": responseMetadata,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
def apply_limit(self, query, limit):
|
|
225
|
+
# Simple logic: if query already has a limit, don't add another
|
|
226
|
+
if getattr(self.target_connection, 'database_type', '').lower() == 'mssql':
|
|
227
|
+
import re
|
|
228
|
+
if re.search(r'SELECT TOP \\d+', query, re.IGNORECASE):
|
|
229
|
+
return query
|
|
230
|
+
return re.sub(r'select', f'SELECT TOP {limit}', query, flags=re.IGNORECASE)
|
|
231
|
+
else:
|
|
232
|
+
if 'limit ' in query.lower():
|
|
233
|
+
return query
|
|
234
|
+
return f"{query.rstrip(';')} limit {limit}"
|
|
109
235
|
|
|
110
|
-
def run_queries(
|
|
236
|
+
def run_queries(
|
|
237
|
+
self, queries, pkDatabaseType, databaseType=None, metadata=None, runQueryConfig=None
|
|
238
|
+
):
|
|
111
239
|
results = {}
|
|
112
240
|
if not queries:
|
|
113
241
|
return {"queryResults": []}
|
|
114
242
|
if databaseType and databaseType.lower() != pkDatabaseType.lower():
|
|
115
243
|
return {"dbMismatched": True, "backendDatabaseType": pkDatabaseType}
|
|
116
|
-
if runQueryConfig and runQueryConfig.get("getColumnsForSchema"):
|
|
117
|
-
return {"queryResults": []}
|
|
118
244
|
if runQueryConfig and runQueryConfig.get("arrayToMap"):
|
|
119
|
-
array_to_map(
|
|
120
|
-
queries,
|
|
245
|
+
mapped_array = array_to_map(
|
|
246
|
+
queries,
|
|
247
|
+
runQueryConfig.get("arrayToMap"),
|
|
248
|
+
metadata,
|
|
249
|
+
self.target_connection,
|
|
121
250
|
)
|
|
122
|
-
|
|
251
|
+
|
|
252
|
+
return {"queryResults": [], "mapped_array": mapped_array}
|
|
123
253
|
elif runQueryConfig and runQueryConfig.get("getColumns"):
|
|
124
|
-
query_results = self.target_connection.query(
|
|
125
|
-
|
|
254
|
+
query_results = self.target_connection.query(
|
|
255
|
+
queries[0].strip().rstrip(";") + " limit 1000"
|
|
256
|
+
)
|
|
257
|
+
results["columns"] = [
|
|
258
|
+
{
|
|
259
|
+
"fieldType": convert_type_to_postgres(result["dataTypeID"]),
|
|
260
|
+
"name": result["name"],
|
|
261
|
+
"displayName": result["name"],
|
|
262
|
+
"isVisible": True,
|
|
263
|
+
"field": result["name"],
|
|
264
|
+
}
|
|
265
|
+
for result in query_results["fields"]
|
|
266
|
+
]
|
|
267
|
+
elif runQueryConfig and runQueryConfig.get("getColumnsForSchema"):
|
|
268
|
+
query_results = []
|
|
269
|
+
for table in queries:
|
|
270
|
+
if not table.get("viewQuery") or (
|
|
271
|
+
not table.get("isSelectStar") and not table.get("customFieldInfo")
|
|
272
|
+
):
|
|
273
|
+
query_results.append(table)
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
limit = ""
|
|
277
|
+
if runQueryConfig.get("limitBy"):
|
|
278
|
+
limit = f" limit {runQueryConfig.get('limitBy')}"
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
query_result = self.target_connection.query(
|
|
282
|
+
f"{table['viewQuery'].strip().rstrip(';')} {limit}"
|
|
283
|
+
)
|
|
284
|
+
columns = [
|
|
285
|
+
{
|
|
286
|
+
"fieldType": convert_type_to_postgres(field["dataTypeID"]),
|
|
287
|
+
"name": field["name"],
|
|
288
|
+
"displayName": field["name"],
|
|
289
|
+
"isVisible": True,
|
|
290
|
+
"field": field["name"],
|
|
291
|
+
}
|
|
292
|
+
for field in query_result["fields"]
|
|
293
|
+
]
|
|
294
|
+
query_results.append(
|
|
295
|
+
{**table, "columns": columns, "rows": query_result["rows"]}
|
|
296
|
+
)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
query_results.append(
|
|
299
|
+
{**table, "error": f"Error fetching columns {e}"}
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
results["queryResults"] = query_results
|
|
303
|
+
if runQueryConfig.get("fieldsToRemove"):
|
|
304
|
+
results["queryResults"] = [
|
|
305
|
+
{
|
|
306
|
+
**table,
|
|
307
|
+
"columns": [
|
|
308
|
+
column
|
|
309
|
+
for column in table.get("columns", [])
|
|
310
|
+
if column["name"] not in runQueryConfig["fieldsToRemove"]
|
|
311
|
+
],
|
|
312
|
+
}
|
|
313
|
+
for table in query_results
|
|
314
|
+
]
|
|
315
|
+
return results
|
|
126
316
|
elif runQueryConfig and runQueryConfig.get("getTables"):
|
|
127
317
|
tables = get_schema_tables_by_db(
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
318
|
+
self.target_connection.database_type,
|
|
319
|
+
self.target_connection.connection,
|
|
320
|
+
runQueryConfig["schemaNames"],
|
|
131
321
|
)
|
|
132
322
|
schema = get_schema_column_info_by_db(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
323
|
+
self.target_connection.database_type,
|
|
324
|
+
self.target_connection.connection,
|
|
325
|
+
runQueryConfig["schemaNames"],
|
|
326
|
+
tables,
|
|
137
327
|
)
|
|
138
328
|
results["queryResults"] = schema
|
|
329
|
+
elif runQueryConfig and runQueryConfig.get("runIndividualQueries"):
|
|
330
|
+
# so that one query doesn't fail the whole thing
|
|
331
|
+
# the only reason this isn't the default behavior is for backwards compatibility
|
|
332
|
+
query_results = []
|
|
333
|
+
for query in queries:
|
|
334
|
+
try:
|
|
335
|
+
run_query = query
|
|
336
|
+
if runQueryConfig.get("limitBy"):
|
|
337
|
+
run_query = self.apply_limit(query, runQueryConfig["limitBy"])
|
|
338
|
+
query_result = self.target_connection.query(run_query)
|
|
339
|
+
query_results.append(query_result)
|
|
340
|
+
except Exception as e:
|
|
341
|
+
query_results.append({
|
|
342
|
+
"query": query,
|
|
343
|
+
"error": str(e),
|
|
344
|
+
})
|
|
345
|
+
results["queryResults"] = query_results
|
|
139
346
|
else:
|
|
140
347
|
if runQueryConfig and runQueryConfig.get("limitThousand"):
|
|
141
|
-
queries = [
|
|
348
|
+
queries = [
|
|
349
|
+
query.strip().rstrip(";") + " limit 1000" for query in queries
|
|
350
|
+
]
|
|
142
351
|
elif runQueryConfig and runQueryConfig.get("limitBy"):
|
|
143
|
-
queries = [
|
|
352
|
+
queries = [
|
|
353
|
+
query.strip().rstrip(";")
|
|
354
|
+
+ f" limit {runQueryConfig.get('limitBy')}"
|
|
355
|
+
for query in queries
|
|
356
|
+
]
|
|
144
357
|
query_results = [self.target_connection.query(query) for query in queries]
|
|
145
358
|
results["queryResults"] = query_results
|
|
146
359
|
if runQueryConfig and runQueryConfig.get("fieldsToRemove"):
|
|
147
|
-
results["queryResults"] = [
|
|
360
|
+
results["queryResults"] = [
|
|
361
|
+
remove_fields(query_result, runQueryConfig.get("fieldsToRemove"))
|
|
362
|
+
for query_result in results["queryResults"]
|
|
363
|
+
]
|
|
148
364
|
if runQueryConfig and runQueryConfig.get("convertDatatypes"):
|
|
149
365
|
for query_result in results["queryResults"]:
|
|
150
366
|
query_result["fields"] = [
|
|
@@ -160,9 +376,18 @@ class Quill:
|
|
|
160
376
|
]
|
|
161
377
|
|
|
162
378
|
return results
|
|
163
|
-
|
|
379
|
+
|
|
164
380
|
def post_quill(self, path, payload):
|
|
381
|
+
# Custom JSON Encoder to handle Enums
|
|
382
|
+
class EnumEncoder(json.JSONEncoder):
|
|
383
|
+
def default(self, obj):
|
|
384
|
+
if isinstance(obj, Enum):
|
|
385
|
+
return obj.value # Convert enum to its value (string in this case)
|
|
386
|
+
return super().default(obj)
|
|
387
|
+
|
|
165
388
|
url = f"{self.baseUrl}/sdk/{path}"
|
|
166
|
-
|
|
167
|
-
|
|
389
|
+
# Set content type to application/json
|
|
390
|
+
headers = {"Authorization": f"Bearer {self.private_key}", "Content-Type": "application/json"}
|
|
391
|
+
encoded = json.dumps(payload, cls=EnumEncoder)
|
|
392
|
+
response = requests.post(url, data=encoded, headers=headers)
|
|
168
393
|
return response.json()
|
quillsql/db/__init__.py
CHANGED
quillsql/db/bigquery.py
CHANGED
|
@@ -3,112 +3,146 @@ from google.oauth2 import service_account
|
|
|
3
3
|
import json
|
|
4
4
|
import re
|
|
5
5
|
|
|
6
|
+
|
|
6
7
|
def format_bigquery_config(connection_string):
|
|
7
8
|
# find the start of the json {
|
|
8
|
-
json_start = connection_string.find(
|
|
9
|
-
if
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
json_start = connection_string.find("{")
|
|
10
|
+
if json_start == -1:
|
|
11
|
+
raise Exception("Invalid input string. No JSON data found.")
|
|
12
|
+
|
|
12
13
|
dataset_name = connection_string[0:json_start].strip()
|
|
13
14
|
json_string = connection_string[json_start:]
|
|
14
15
|
try:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
16
|
+
service_account = json.loads(json_string)
|
|
17
|
+
if not service_account.get("project_id") or not service_account.get(
|
|
18
|
+
"private_key"
|
|
19
|
+
):
|
|
20
|
+
raise Exception(
|
|
21
|
+
"Invalid service account JSON. Required fields are missing."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
"dataset_id": dataset_name,
|
|
26
|
+
"project": service_account.get("project_id"),
|
|
27
|
+
"credentials": service_account,
|
|
28
|
+
}
|
|
29
|
+
except (ValueError, TypeError) as e:
|
|
30
|
+
print("Invalid JSON string: ", e)
|
|
26
31
|
return connection_string
|
|
27
32
|
|
|
33
|
+
|
|
28
34
|
def connect_to_bigquery(config, using_connection_string):
|
|
29
35
|
if using_connection_string:
|
|
30
|
-
|
|
36
|
+
credentials = service_account.Credentials.from_service_account_info(
|
|
37
|
+
config["credentials"]
|
|
38
|
+
)
|
|
31
39
|
else:
|
|
32
|
-
|
|
33
|
-
|
|
40
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
41
|
+
config["service_account_file_path"]
|
|
42
|
+
)
|
|
43
|
+
return bigquery.Client(project=config["project"], credentials=credentials)
|
|
44
|
+
|
|
34
45
|
|
|
35
46
|
def run_query_big_query(query, connection):
|
|
36
47
|
query_job = connection.query(query)
|
|
37
48
|
result = query_job.result()
|
|
38
49
|
rows = [dict(row) for row in result]
|
|
39
|
-
fields = [
|
|
50
|
+
fields = [
|
|
51
|
+
{
|
|
52
|
+
"name": field.name,
|
|
53
|
+
"dataTypeID": convert_bigquery_to_postgres(field.field_type),
|
|
54
|
+
}
|
|
55
|
+
for field in result.schema
|
|
56
|
+
]
|
|
40
57
|
# TODO CONVERT to postgres types
|
|
41
58
|
|
|
42
59
|
return {"rows": rows, "fields": fields}
|
|
43
60
|
|
|
61
|
+
|
|
44
62
|
def get_tables_by_schema_big_query(connection, schema_names):
|
|
45
63
|
all_table = []
|
|
46
64
|
for schema_name in schema_names:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
65
|
+
dataset_ref = connection.dataset(schema_name)
|
|
66
|
+
tables = connection.list_tables(dataset_ref)
|
|
67
|
+
for table in tables:
|
|
68
|
+
cur_table = {}
|
|
69
|
+
cur_table["table_name"] = table.table_id
|
|
70
|
+
cur_table["schema_name"] = schema_name
|
|
71
|
+
all_table.append(cur_table)
|
|
54
72
|
return all_table
|
|
55
73
|
|
|
74
|
+
|
|
56
75
|
def get_schema_column_info_big_query(connection, schema_name, table_names):
|
|
57
76
|
all_columns = []
|
|
58
77
|
for table_name in table_names:
|
|
59
|
-
table_ref = connection.dataset(table_name[
|
|
78
|
+
table_ref = connection.dataset(table_name["schema_name"]).table(
|
|
79
|
+
table_name["table_name"]
|
|
80
|
+
)
|
|
60
81
|
table = connection.get_table(table_ref)
|
|
61
82
|
columns = []
|
|
62
83
|
for field in table.schema:
|
|
63
|
-
columns.append(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
84
|
+
columns.append(
|
|
85
|
+
{
|
|
86
|
+
"columnName": field.name,
|
|
87
|
+
"displayName": field.name,
|
|
88
|
+
"dataTypeId": convert_bigquery_to_postgres(field.field_type),
|
|
89
|
+
"fieldType": field.field_type,
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
all_columns.append(
|
|
93
|
+
{
|
|
94
|
+
"tableName": table_name["schema_name"] + "." + table_name["table_name"],
|
|
95
|
+
"displayName": table_name["schema_name"]
|
|
96
|
+
+ "."
|
|
97
|
+
+ table_name["table_name"],
|
|
98
|
+
"columns": columns,
|
|
99
|
+
}
|
|
100
|
+
)
|
|
74
101
|
return all_columns
|
|
75
102
|
|
|
103
|
+
|
|
76
104
|
def infer_schema_big_query(elem):
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
+
# compare elem with regex
|
|
106
|
+
if isinstance(elem, list):
|
|
107
|
+
return 23
|
|
108
|
+
if isinstance(elem, object):
|
|
109
|
+
if re.match(r"/^\d{4}-\d{2}-\d{2}$/", elem.get("value")):
|
|
110
|
+
return 1082
|
|
111
|
+
elif re.match(r"/^\d{2}\/\d{2}\/\d{2,4}$/", elem.get("value")):
|
|
112
|
+
return 1082
|
|
113
|
+
elif re.match(
|
|
114
|
+
r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/", elem.get("value")
|
|
115
|
+
):
|
|
116
|
+
return 1184
|
|
117
|
+
elif re.match(
|
|
118
|
+
r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/", elem.get("value")
|
|
119
|
+
):
|
|
120
|
+
return 1114
|
|
121
|
+
elif re.match(r"/^\d{2}:\d{2}:\d{2}$/", elem.get("value")):
|
|
122
|
+
return 1083
|
|
123
|
+
if isinstance(elem, str):
|
|
124
|
+
if re.match(r"/^\d{4}-\d{2}-\d{2}$/", elem):
|
|
125
|
+
return 1082
|
|
126
|
+
elif re.match(r"/^\d{2}\/\d{2}\/\d{2,4}$/", elem):
|
|
127
|
+
return 1082
|
|
128
|
+
elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/", elem):
|
|
129
|
+
return 1184
|
|
130
|
+
elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/", elem):
|
|
131
|
+
return 1114
|
|
132
|
+
elif re.match(r"/^\d{2}:\d{2}:\d{2}$/", elem):
|
|
133
|
+
return 1083
|
|
134
|
+
else:
|
|
135
|
+
return 1043
|
|
136
|
+
return 1043
|
|
137
|
+
|
|
105
138
|
|
|
106
139
|
def convert_bigquery_to_postgres(value):
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
140
|
+
type_to_oid = {
|
|
141
|
+
"VARCHAR": 1043,
|
|
142
|
+
"INTEGER": 23,
|
|
143
|
+
"FLOAT": 700,
|
|
144
|
+
"TIMESTAMP": 1114,
|
|
145
|
+
"DATE": 1082,
|
|
146
|
+
"BOOL": 16,
|
|
147
|
+
}
|
|
148
|
+
return type_to_oid.get(value.upper()) or 1043
|