quillsql 2.1.6__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quillsql/db/bigquery.py CHANGED
@@ -3,112 +3,146 @@ from google.oauth2 import service_account
3
3
  import json
4
4
  import re
5
5
 
6
+
6
7
  def format_bigquery_config(connection_string):
7
8
  # find the start of the json {
8
- json_start = connection_string.find('{')
9
- if (json_start == -1) :
10
- raise Exception("Invalid input string. No JSON data found.");
11
-
9
+ json_start = connection_string.find("{")
10
+ if json_start == -1:
11
+ raise Exception("Invalid input string. No JSON data found.")
12
+
12
13
  dataset_name = connection_string[0:json_start].strip()
13
14
  json_string = connection_string[json_start:]
14
15
  try:
15
- service_account = json.loads(json_string)
16
- if (not service_account.get('project_id') or not service_account.get('private_key')) :
17
- raise Exception( "Invalid service account JSON. Required fields are missing.")
18
-
19
- return {
20
- "dataset_id": dataset_name,
21
- "project": service_account.get('project_id'),
22
- "credentials": service_account,
23
- }
24
- except:
25
- print('Invalid JSON string')
16
+ service_account = json.loads(json_string)
17
+ if not service_account.get("project_id") or not service_account.get(
18
+ "private_key"
19
+ ):
20
+ raise Exception(
21
+ "Invalid service account JSON. Required fields are missing."
22
+ )
23
+
24
+ return {
25
+ "dataset_id": dataset_name,
26
+ "project": service_account.get("project_id"),
27
+ "credentials": service_account,
28
+ }
29
+ except (ValueError, TypeError) as e:
30
+ print("Invalid JSON string: ", e)
26
31
  return connection_string
27
32
 
33
+
28
34
  def connect_to_bigquery(config, using_connection_string):
29
35
  if using_connection_string:
30
- credentials = service_account.Credentials.from_service_account_info(config['credentials'])
36
+ credentials = service_account.Credentials.from_service_account_info(
37
+ config["credentials"]
38
+ )
31
39
  else:
32
- credentials = service_account.Credentials.from_service_account_file(config['service_account_file_path'])
33
- return bigquery.Client(project=config['project'], credentials=credentials)
40
+ credentials = service_account.Credentials.from_service_account_file(
41
+ config["service_account_file_path"]
42
+ )
43
+ return bigquery.Client(project=config["project"], credentials=credentials)
44
+
34
45
 
35
46
  def run_query_big_query(query, connection):
36
47
  query_job = connection.query(query)
37
48
  result = query_job.result()
38
49
  rows = [dict(row) for row in result]
39
- fields = [{"name": field.name, "dataTypeID": convert_bigquery_to_postgres(field.field_type)} for field in result.schema]
50
+ fields = [
51
+ {
52
+ "name": field.name,
53
+ "dataTypeID": convert_bigquery_to_postgres(field.field_type),
54
+ }
55
+ for field in result.schema
56
+ ]
40
57
  # TODO CONVERT to postgres types
41
58
 
42
59
  return {"rows": rows, "fields": fields}
43
60
 
61
+
44
62
  def get_tables_by_schema_big_query(connection, schema_names):
45
63
  all_table = []
46
64
  for schema_name in schema_names:
47
- dataset_ref = connection.dataset(schema_name)
48
- tables = connection.list_tables(dataset_ref)
49
- for table in tables:
50
- cur_table = {}
51
- cur_table['table_name'] = table.table_id
52
- cur_table['schema_name'] = schema_name
53
- all_table.append(cur_table)
65
+ dataset_ref = connection.dataset(schema_name)
66
+ tables = connection.list_tables(dataset_ref)
67
+ for table in tables:
68
+ cur_table = {}
69
+ cur_table["table_name"] = table.table_id
70
+ cur_table["schema_name"] = schema_name
71
+ all_table.append(cur_table)
54
72
  return all_table
55
73
 
74
+
56
75
  def get_schema_column_info_big_query(connection, schema_name, table_names):
57
76
  all_columns = []
58
77
  for table_name in table_names:
59
- table_ref = connection.dataset(table_name['schema_name']).table(table_name['table_name'])
78
+ table_ref = connection.dataset(table_name["schema_name"]).table(
79
+ table_name["table_name"]
80
+ )
60
81
  table = connection.get_table(table_ref)
61
82
  columns = []
62
83
  for field in table.schema:
63
- columns.append({
64
- 'columnName': field.name,
65
- 'displayName': field.name,
66
- 'dataTypeId': convert_bigquery_to_postgres(field.field_type),
67
- 'fieldType': field.field_type
68
- })
69
- all_columns.append({
70
- 'tableName': table_name['schema_name']+'.'+table_name['table_name'],
71
- 'displayName': table_name['schema_name']+'.'+table_name['table_name'],
72
- 'columns': columns
73
- })
84
+ columns.append(
85
+ {
86
+ "columnName": field.name,
87
+ "displayName": field.name,
88
+ "dataTypeId": convert_bigquery_to_postgres(field.field_type),
89
+ "fieldType": field.field_type,
90
+ }
91
+ )
92
+ all_columns.append(
93
+ {
94
+ "tableName": table_name["schema_name"] + "." + table_name["table_name"],
95
+ "displayName": table_name["schema_name"]
96
+ + "."
97
+ + table_name["table_name"],
98
+ "columns": columns,
99
+ }
100
+ )
74
101
  return all_columns
75
102
 
103
+
76
104
  def infer_schema_big_query(elem):
77
- # compare elem with regex
78
- if isinstance(elem, list):
79
- return 23
80
- if isinstance(elem, object):
81
- if re.match(r"/^\d{4}-\d{2}-\d{2}$/", elem.get('value')):
82
- return 1082
83
- elif re.match(r"/^\d{2}\/\d{2}\/\d{2,4}$/", elem.get('value')):
84
- return 1082
85
- elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/", elem.get('value')):
86
- return 1184
87
- elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/", elem.get('value')):
88
- return 1114
89
- elif re.match(r"/^\d{2}:\d{2}:\d{2}$/", elem.get('value')):
90
- return 1083
91
- if isinstance(elem, str):
92
- if re.match(r"/^\d{4}-\d{2}-\d{2}$/", elem):
93
- return 1082
94
- elif re.match(r"/^\d{2}\/\d{2}\/\d{2,4}$/", elem):
95
- return 1082
96
- elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/", elem):
97
- return 1184
98
- elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/", elem):
99
- return 1114
100
- elif re.match(r"/^\d{2}:\d{2}:\d{2}$/", elem):
101
- return 1083
102
- else:
103
- return 1043
104
- return 1043
105
+ # compare elem with regex
106
+ if isinstance(elem, list):
107
+ return 23
108
+ if isinstance(elem, object):
109
+ if re.match(r"/^\d{4}-\d{2}-\d{2}$/", elem.get("value")):
110
+ return 1082
111
+ elif re.match(r"/^\d{2}\/\d{2}\/\d{2,4}$/", elem.get("value")):
112
+ return 1082
113
+ elif re.match(
114
+ r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/", elem.get("value")
115
+ ):
116
+ return 1184
117
+ elif re.match(
118
+ r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/", elem.get("value")
119
+ ):
120
+ return 1114
121
+ elif re.match(r"/^\d{2}:\d{2}:\d{2}$/", elem.get("value")):
122
+ return 1083
123
+ if isinstance(elem, str):
124
+ if re.match(r"/^\d{4}-\d{2}-\d{2}$/", elem):
125
+ return 1082
126
+ elif re.match(r"/^\d{2}\/\d{2}\/\d{2,4}$/", elem):
127
+ return 1082
128
+ elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/", elem):
129
+ return 1184
130
+ elif re.match(r"/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/", elem):
131
+ return 1114
132
+ elif re.match(r"/^\d{2}:\d{2}:\d{2}$/", elem):
133
+ return 1083
134
+ else:
135
+ return 1043
136
+ return 1043
137
+
105
138
 
106
139
  def convert_bigquery_to_postgres(value):
107
- type_to_oid = {
108
- "VARCHAR": 1043,
109
- "INTEGER": 23,
110
- "FLOAT": 700,
111
- "TIMESTAMP": 1114,
112
- "DATE": 1082,
113
- }
114
- return type_to_oid.get(value.upper()) or 1043
140
+ type_to_oid = {
141
+ "VARCHAR": 1043,
142
+ "INTEGER": 23,
143
+ "FLOAT": 700,
144
+ "TIMESTAMP": 1114,
145
+ "DATE": 1082,
146
+ "BOOL": 16,
147
+ }
148
+ return type_to_oid.get(value.upper()) or 1043
@@ -1,11 +1,10 @@
1
1
  import psycopg2
2
- from psycopg2.extras import RealDictCursor
3
2
  import json
4
3
  import redis
5
4
  from quillsql.error import PgQueryError
6
5
  from quillsql.db.db_helper import connect_to_db, run_query_by_db
7
6
 
8
- ## The TTL for new cache entries (default: 1h)
7
+ # The TTL for new cache entries (default: 1h)
9
8
  DEFAULT_CACHE_TTL = 24 * 60 * 60
10
9
 
11
10
 
@@ -33,7 +32,9 @@ class CachedConnection:
33
32
  def exec_with_reconnect(self, sql):
34
33
  reconnect_count = 0
35
34
  while True:
36
- self.connection = connect_to_db(self.database_type, self.config, self.using_connection_string)
35
+ self.connection = connect_to_db(
36
+ self.database_type, self.config, self.using_connection_string
37
+ )
37
38
  try:
38
39
  return run_query_by_db(self.database_type, sql, self.connection)
39
40
  except psycopg2.Error as err:
@@ -57,9 +58,9 @@ class CachedConnection:
57
58
  cached_result = self.cache.get(key)
58
59
  if cached_result:
59
60
  cached = json.loads(cached_result)
60
- return {"rows": cached, "fields": cached['fields']}
61
+ return {"rows": cached, "fields": cached["fields"]}
61
62
  else:
62
63
  new_result = self.exec(sql)
63
64
  new_result_string = json.dumps(new_result)
64
65
  self.cache.set(key, new_result_string, "EX", DEFAULT_CACHE_TTL)
65
- return {"rows": new_result, "fields": new_result['fields']}
66
+ return {"rows": new_result, "fields": new_result["fields"]}
quillsql/db/db_helper.py CHANGED
@@ -1,42 +1,61 @@
1
- from quillsql.db.postgres import format_postgres, connect_to_postgres, get_schema_column_info_postgres, get_tables_by_schema_postgres, run_query_postgres, disconnect_from_postgres
2
- from quillsql.db.bigquery import format_bigquery_config, connect_to_bigquery, get_schema_column_info_big_query, get_tables_by_schema_big_query, run_query_big_query
1
+ from quillsql.db.postgres import (
2
+ format_postgres,
3
+ connect_to_postgres,
4
+ get_schema_column_info_postgres,
5
+ get_tables_by_schema_postgres,
6
+ run_query_postgres,
7
+ disconnect_from_postgres,
8
+ )
9
+ from quillsql.db.bigquery import (
10
+ format_bigquery_config,
11
+ connect_to_bigquery,
12
+ get_schema_column_info_big_query,
13
+ get_tables_by_schema_big_query,
14
+ run_query_big_query,
15
+ )
16
+
3
17
 
4
18
  def get_db_credentials(database_type, connection_string):
5
- if(database_type.lower() == 'postgresql'):
6
- return format_postgres(connection_string)
7
- elif(database_type.lower() == 'bigquery'):
8
- return format_bigquery_config(connection_string)
9
- return {}
19
+ if database_type.lower() == "postgresql":
20
+ return format_postgres(connection_string)
21
+ elif database_type.lower() == "bigquery":
22
+ return format_bigquery_config(connection_string)
23
+ return {}
24
+
10
25
 
11
26
  def connect_to_db(database_type, config, using_connection_string):
12
- if(database_type.lower() == 'postgresql'):
27
+ if database_type.lower() == "postgresql":
13
28
  return connect_to_postgres(config, using_connection_string)
14
- elif(database_type.lower() == 'bigquery'):
29
+ elif database_type.lower() == "bigquery":
15
30
  return connect_to_bigquery(config, using_connection_string)
16
31
  return None
17
32
 
33
+
18
34
  def run_query_by_db(database_type, query, connection):
19
- if (database_type.lower() == 'postgresql'):
35
+ if database_type.lower() == "postgresql":
20
36
  return run_query_postgres(query, connection)
21
- elif (database_type.lower() == 'bigquery'):
37
+ elif database_type.lower() == "bigquery":
22
38
  return run_query_big_query(query, connection)
23
39
  return None
24
40
 
41
+
25
42
  def disconnect_from_db(database_type, connection):
26
- if (database_type.lower() == 'postgresql'):
43
+ if database_type.lower() == "postgresql":
27
44
  return disconnect_from_postgres(connection)
28
45
  return None
29
46
 
47
+
30
48
  def get_schema_tables_by_db(database_type, connection, schema_name):
31
- if (database_type.lower() == 'postgresql'):
49
+ if database_type.lower() == "postgresql":
32
50
  return get_tables_by_schema_postgres(connection, schema_name)
33
- elif (database_type.lower() == 'bigquery'):
51
+ elif database_type.lower() == "bigquery":
34
52
  return get_tables_by_schema_big_query(connection, schema_name)
35
53
  return None
36
54
 
55
+
37
56
  def get_schema_column_info_by_db(database_type, connection, schema_name, table_names):
38
- if (database_type.lower() == 'postgresql'):
57
+ if database_type.lower() == "postgresql":
39
58
  return get_schema_column_info_postgres(connection, schema_name, table_names)
40
- elif (database_type.lower() == 'bigquery'):
59
+ elif database_type.lower() == "bigquery":
41
60
  return get_schema_column_info_big_query(connection, schema_name, table_names)
42
- return None
61
+ return None
quillsql/db/postgres.py CHANGED
@@ -1,72 +1,127 @@
1
+ import os
1
2
  import psycopg2
2
3
  from psycopg2.extensions import make_dsn
3
4
  from quillsql.assets.pgtypes import PG_TYPES
4
5
 
5
- def format_postgres(connection_string ):
6
- to_dsn = lambda conn: make_dsn(conn) if "://" in conn else conn
6
+
7
+ def format_postgres(connection_string):
8
+ def to_dsn(conn):
9
+ return make_dsn(conn) if "://" in conn else conn
10
+
7
11
  return to_dsn(connection_string)
8
12
 
13
+
9
14
  def connect_to_postgres(config, usingConnectionString):
15
+ os.environ['PGGSSENCMODE'] = 'disable' # https://github.com/psycopg/psycopg2/issues/1084
10
16
  if usingConnectionString:
11
- return psycopg2.connect(config)
17
+ return psycopg2.connect(config)
12
18
  else:
13
- return psycopg2.connect(
14
- database=config['dbname'],
15
- user=config['user'],
16
- password=config['password'],
17
- host=config['host'],
18
- port=config['port']
19
- )
19
+ return psycopg2.connect(
20
+ database=config["dbname"],
21
+ user=config["user"],
22
+ password=config["password"],
23
+ host=config["host"],
24
+ port=config["port"],
25
+ )
26
+
20
27
 
21
28
  def run_query_postgres(query, connection):
22
29
  cursor = connection.cursor()
23
30
  cursor.execute(query)
24
31
  result = cursor.fetchall()
25
- fields = [
26
- {"name": desc[0], "dataTypeID": desc[1]} for desc in cursor.description
27
- ]
32
+ fields = [{"name": desc[0], "dataTypeID": desc[1]} for desc in cursor.description]
28
33
  cursor.close()
29
- rows_dict = [dict(zip([field['name'] for field in fields], row)) for row in result]
34
+ rows_dict = [dict(zip([field["name"] for field in fields], row)) for row in result]
30
35
  return {"rows": rows_dict, "fields": fields}
31
36
 
37
+
32
38
  def disconnect_from_postgres(connection):
33
39
  connection.close()
34
40
  return
35
41
 
42
+
36
43
  # getTablesBySchemaPostgres
44
+
45
+
37
46
  def get_tables_by_schema_postgres(connection, schema_names):
38
47
  all_tables = []
39
48
  for schema_name in schema_names:
40
- query = f"SELECT table_name, table_schema FROM information_schema.tables WHERE table_schema = '{schema_name}'"
41
- results = run_query_postgres(query, connection)
42
- for row in results['rows']:
43
- cur_table = {}
44
- cur_table['table_name'] = row['table_name']
45
- cur_table['schema_name'] = row['table_schema']
46
- all_tables.append(cur_table)
49
+ query = f"""
50
+ SELECT table_name, table_schema
51
+ FROM information_schema.tables
52
+ WHERE table_schema = '{schema_name}'
53
+
54
+ UNION
55
+
56
+ SELECT c.relname as table_name, n.nspname as table_schema
57
+ FROM pg_class c
58
+ JOIN pg_namespace n ON c.relnamespace = n.oid
59
+ WHERE n.nspname = '{schema_name}'
60
+ AND c.relkind = 'm';
61
+ """
62
+ results = run_query_postgres(query, connection)
63
+ for row in results["rows"]:
64
+ cur_table = {}
65
+ cur_table["table_name"] = row["table_name"]
66
+ cur_table["schema_name"] = row["table_schema"]
67
+ all_tables.append(cur_table)
47
68
  return all_tables
48
69
 
49
- # getSchemaColumnInfoPostgress
70
+
71
+ # getSchemaColumnInfoPostgres
72
+
73
+
50
74
  def get_schema_column_info_postgres(connection, schema_name, table_names):
51
75
  all_columns = []
52
76
  for table_name in table_names:
53
- query = f"SELECT column_name, udt_name FROM information_schema.columns WHERE table_schema = '{table_name['schema_name']}' AND table_name = '{table_name['table_name']}' ORDER BY ordinal_position"
77
+ query = f"""
78
+ SELECT column_name as "column_name", udt_name as "field_type", ordinal_position as "sort_number"
79
+ FROM information_schema.columns
80
+ WHERE table_schema = '{table_name['schema_name']}'
81
+ AND table_name = '{table_name['table_name']}'
82
+
83
+ UNION
84
+
85
+ SELECT a.attname as "column_name", t.typname as "field_type", a.attnum as "sort_number"
86
+ FROM pg_attribute a
87
+ JOIN pg_class c ON a.attrelid = c.oid
88
+ JOIN pg_namespace n ON c.relnamespace = n.oid
89
+ JOIN pg_type t ON a.atttypid = t.oid
90
+ WHERE n.nspname = '{table_name['schema_name']}'
91
+ AND c.relname = '{table_name['table_name']}'
92
+ AND c.relkind = 'm'
93
+ AND a.attnum > 0
94
+ AND NOT a.attisdropped
95
+ ORDER BY "sort_number"
96
+ """
54
97
  results = run_query_postgres(query, connection)
55
98
  columns = []
56
- for row in results['rows']:
57
- # Convert row['udt_name'] to postgresql oid
58
- pg_type = next((pg_type for pg_type in PG_TYPES if pg_type['typname'] == row['udt_name']), None)
59
- if pg_type == None:
99
+ for row in results["rows"]:
100
+ pg_type = next(
101
+ (
102
+ pg_type
103
+ for pg_type in PG_TYPES
104
+ if pg_type["typname"] == row["field_type"]
105
+ ),
106
+ None,
107
+ )
108
+ if pg_type is None:
60
109
  pg_type = 1043
61
- columns.append({
62
- 'columnName': row['column_name'],
63
- 'displayName': row['column_name'],
64
- 'dataTypeID': pg_type['oid'],
65
- 'fieldType': row['udt_name'],
66
- })
67
- all_columns.append({
68
- 'tableName': table_name['schema_name']+'.'+table_name['table_name'],
69
- 'displayName': table_name['schema_name']+'.'+table_name['table_name'],
70
- 'columns': columns
71
- })
72
- return all_columns
110
+ columns.append(
111
+ {
112
+ "columnName": row["column_name"],
113
+ "displayName": row["column_name"],
114
+ "dataTypeID": pg_type["oid"],
115
+ "fieldType": row["field_type"],
116
+ }
117
+ )
118
+ all_columns.append(
119
+ {
120
+ "tableName": table_name["schema_name"] + "." + table_name["table_name"],
121
+ "displayName": table_name["schema_name"]
122
+ + "."
123
+ + table_name["table_name"],
124
+ "columns": columns,
125
+ }
126
+ )
127
+ return all_columns
quillsql/error.py CHANGED
@@ -1,5 +1,5 @@
1
1
  class PgQueryError(Exception):
2
- def __init__(self, message, query, position):
3
- super().__init__(message)
4
- self.query = query
5
- self.position = position
2
+ def __init__(self, message, query, position):
3
+ super().__init__(message)
4
+ self.query = query
5
+ self.position = position
@@ -1,3 +1,4 @@
1
1
  # __init__.py
2
2
 
3
- from .run_query_processes import remove_fields, array_to_map
3
+ from .run_query_processes import remove_fields, array_to_map
4
+ from .filters import Filter, FilterType, FieldType, StringOperator, NumberOperator, NullOperator, DateOperator, convert_custom_filter