quillsql 2.1.6__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quillsql/__init__.py +2 -1
- quillsql/assets/__init__.py +1 -1
- quillsql/assets/pgtypes.py +696 -2781
- quillsql/core.py +427 -54
- quillsql/db/__init__.py +1 -1
- quillsql/db/bigquery.py +108 -74
- quillsql/db/cached_connection.py +6 -5
- quillsql/db/db_helper.py +36 -17
- quillsql/db/postgres.py +94 -39
- quillsql/error.py +4 -4
- quillsql/utils/__init__.py +2 -1
- quillsql/utils/filters.py +180 -0
- quillsql/utils/pivot_template.py +485 -0
- quillsql/utils/run_query_processes.py +17 -16
- quillsql/utils/schema_conversion.py +6 -3
- quillsql/utils/tenants.py +60 -0
- quillsql-2.2.1.dist-info/METADATA +69 -0
- quillsql-2.2.1.dist-info/RECORD +20 -0
- {quillsql-2.1.6.dist-info → quillsql-2.2.1.dist-info}/WHEEL +1 -1
- quillsql-2.1.6.dist-info/METADATA +0 -72
- quillsql-2.1.6.dist-info/RECORD +0 -17
- {quillsql-2.1.6.dist-info → quillsql-2.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Union, Any, Optional
|
|
3
|
+
from dataclasses import dataclass, asdict
|
|
4
|
+
|
|
5
|
+
# Constants
|
|
6
|
+
IS_EXACTLY = 'is exactly'
|
|
7
|
+
IS_NOT_EXACTLY = 'is not exactly'
|
|
8
|
+
CONTAINS = 'contains'
|
|
9
|
+
IS = 'is'
|
|
10
|
+
IS_NOT = 'is not'
|
|
11
|
+
IS_NOT_NULL = 'is not null'
|
|
12
|
+
IS_NULL = 'is null'
|
|
13
|
+
|
|
14
|
+
IN_THE_LAST = 'in the last'
|
|
15
|
+
IN_THE_PREVIOUS = 'in the previous'
|
|
16
|
+
IN_THE_CURRENT = 'in the current'
|
|
17
|
+
|
|
18
|
+
EQUAL_TO = 'equal to'
|
|
19
|
+
NOT_EQUAL_TO = 'not equal to'
|
|
20
|
+
GREATER_THAN = 'greater than'
|
|
21
|
+
LESS_THAN = 'less than'
|
|
22
|
+
GREATER_THAN_OR_EQUAL_TO = 'greater than or equal to'
|
|
23
|
+
LESS_THAN_OR_EQUAL_TO = 'less than or equal to'
|
|
24
|
+
|
|
25
|
+
YEAR = 'year'
|
|
26
|
+
QUARTER = 'quarter'
|
|
27
|
+
MONTH = 'month'
|
|
28
|
+
WEEK = 'week'
|
|
29
|
+
DAY = 'day'
|
|
30
|
+
HOUR = 'hour'
|
|
31
|
+
|
|
32
|
+
NUMBER = 'number'
|
|
33
|
+
STRING = 'string'
|
|
34
|
+
DATE = 'date'
|
|
35
|
+
NULL = 'null'
|
|
36
|
+
CUSTOM = 'custom'
|
|
37
|
+
BOOLEAN = 'boolean'
|
|
38
|
+
|
|
39
|
+
# Enums
|
|
40
|
+
class StringOperator(Enum):
|
|
41
|
+
IS_EXACTLY = IS_EXACTLY
|
|
42
|
+
IS_NOT_EXACTLY = IS_NOT_EXACTLY
|
|
43
|
+
CONTAINS = CONTAINS
|
|
44
|
+
IS = IS
|
|
45
|
+
IS_NOT = IS_NOT
|
|
46
|
+
|
|
47
|
+
class DateOperator(Enum):
|
|
48
|
+
CUSTOM = CUSTOM
|
|
49
|
+
IN_THE_LAST = IN_THE_LAST
|
|
50
|
+
IN_THE_PREVIOUS = IN_THE_PREVIOUS
|
|
51
|
+
IN_THE_CURRENT = IN_THE_CURRENT
|
|
52
|
+
EQUAL_TO = EQUAL_TO
|
|
53
|
+
NOT_EQUAL_TO = NOT_EQUAL_TO
|
|
54
|
+
GREATER_THAN = GREATER_THAN
|
|
55
|
+
LESS_THAN = LESS_THAN
|
|
56
|
+
GREATER_THAN_OR_EQUAL_TO = GREATER_THAN_OR_EQUAL_TO
|
|
57
|
+
LESS_THAN_OR_EQUAL_TO = LESS_THAN_OR_EQUAL_TO
|
|
58
|
+
|
|
59
|
+
class NumberOperator(Enum):
|
|
60
|
+
EQUAL_TO = EQUAL_TO
|
|
61
|
+
NOT_EQUAL_TO = NOT_EQUAL_TO
|
|
62
|
+
GREATER_THAN = GREATER_THAN
|
|
63
|
+
LESS_THAN = LESS_THAN
|
|
64
|
+
GREATER_THAN_OR_EQUAL_TO = GREATER_THAN_OR_EQUAL_TO
|
|
65
|
+
LESS_THAN_OR_EQUAL_TO = LESS_THAN_OR_EQUAL_TO
|
|
66
|
+
|
|
67
|
+
class NullOperator(Enum):
|
|
68
|
+
IS_NOT_NULL = IS_NOT_NULL
|
|
69
|
+
IS_NULL = IS_NULL
|
|
70
|
+
|
|
71
|
+
class BoolOperator(Enum):
|
|
72
|
+
EQUAL_TO = EQUAL_TO
|
|
73
|
+
NOT_EQUAL_TO = NOT_EQUAL_TO
|
|
74
|
+
|
|
75
|
+
class TimeUnit(Enum):
|
|
76
|
+
YEAR = YEAR
|
|
77
|
+
QUARTER = QUARTER
|
|
78
|
+
MONTH = MONTH
|
|
79
|
+
WEEK = WEEK
|
|
80
|
+
DAY = DAY
|
|
81
|
+
HOUR = HOUR
|
|
82
|
+
|
|
83
|
+
class FieldType(Enum):
|
|
84
|
+
STRING = STRING
|
|
85
|
+
NUMBER = NUMBER
|
|
86
|
+
DATE = DATE
|
|
87
|
+
NULL = NULL
|
|
88
|
+
BOOLEAN = BOOLEAN
|
|
89
|
+
|
|
90
|
+
class FilterType(Enum):
|
|
91
|
+
STRING_FILTER = 'string-filter'
|
|
92
|
+
DATE_FILTER = 'date-filter'
|
|
93
|
+
DATE_CUSTOM_FILTER = 'date-custom-filter'
|
|
94
|
+
DATE_COMPARISON_FILTER = 'date-comparison-filter'
|
|
95
|
+
NUMERIC_FILTER = 'numeric-filter'
|
|
96
|
+
NULL_FILTER = 'null-filter'
|
|
97
|
+
STRING_IN_FILTER = 'string-in-filter'
|
|
98
|
+
BOOLEAN_FILTER = 'boolean-filter'
|
|
99
|
+
|
|
100
|
+
# Types
|
|
101
|
+
Operator = Union[StringOperator, DateOperator, NumberOperator, NullOperator, BoolOperator]
|
|
102
|
+
|
|
103
|
+
# Base Filter Interface
|
|
104
|
+
@dataclass
|
|
105
|
+
class DateRange:
|
|
106
|
+
startDate: str
|
|
107
|
+
endDate: str
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class DateValue:
|
|
111
|
+
value: int
|
|
112
|
+
unit: TimeUnit
|
|
113
|
+
@dataclass
|
|
114
|
+
class BaseFilter:
|
|
115
|
+
filterType: FilterType
|
|
116
|
+
fieldType: FieldType
|
|
117
|
+
operator: Operator
|
|
118
|
+
field: str
|
|
119
|
+
value: Union[bool, int, str, list[str], DateRange, DateValue, None]
|
|
120
|
+
table: Optional[str] = None
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class Filter:
|
|
124
|
+
filter_type: FilterType
|
|
125
|
+
operator: Operator
|
|
126
|
+
value: Union[bool, int, str, list[str], DateRange, DateValue, None]
|
|
127
|
+
field: str
|
|
128
|
+
table: str
|
|
129
|
+
|
|
130
|
+
def convert_custom_filter(filter: Filter) -> dict:
|
|
131
|
+
if filter.filter_type == FilterType.STRING_FILTER:
|
|
132
|
+
if not isinstance(filter.value, str):
|
|
133
|
+
raise ValueError('Invalid value for StringFilter, expected string')
|
|
134
|
+
if filter.operator not in StringOperator:
|
|
135
|
+
raise ValueError('Invalid operator for StringFilter, expected StringOperator')
|
|
136
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.STRING, filter.operator, filter.field, filter.value, filter.table))
|
|
137
|
+
elif filter.filter_type == FilterType.STRING_IN_FILTER:
|
|
138
|
+
if not isinstance(filter.value, list):
|
|
139
|
+
raise ValueError('Invalid value for StringInFilter, expected list')
|
|
140
|
+
if filter.operator not in StringOperator:
|
|
141
|
+
raise ValueError('Invalid operator for StringInFilter, expected StringOperator')
|
|
142
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.STRING, filter.operator, filter.field, filter.value, filter.table))
|
|
143
|
+
elif filter.filter_type == FilterType.NUMERIC_FILTER:
|
|
144
|
+
if not isinstance(filter.value, int):
|
|
145
|
+
raise ValueError('Invalid value for NumericFilter, expected int')
|
|
146
|
+
if filter.operator not in NumberOperator:
|
|
147
|
+
raise ValueError('Invalid operator for NumericFilter, expected NumberOperator')
|
|
148
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.NUMBER, filter.operator, filter.field, filter.value, filter.table))
|
|
149
|
+
elif filter.filter_type == FilterType.DATE_FILTER:
|
|
150
|
+
if not isinstance(filter.value, DateValue) or filter.value is None:
|
|
151
|
+
raise ValueError('Invalid value for DateFilter, expected DateValue')
|
|
152
|
+
if filter.operator not in DateOperator:
|
|
153
|
+
raise ValueError('Invalid operator for DateFilter, expected DateOperator')
|
|
154
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.DATE, filter.operator, filter.field, filter.value, filter.table))
|
|
155
|
+
elif filter.filter_type == FilterType.DATE_CUSTOM_FILTER:
|
|
156
|
+
if not isinstance(filter.value, DateRange) or filter.value is None:
|
|
157
|
+
raise ValueError('Invalid value for DateCustomFilter, expected DateRange')
|
|
158
|
+
if filter.operator not in DateOperator:
|
|
159
|
+
raise ValueError('Invalid operator for DateCustomFilter, expected DateOperator')
|
|
160
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.DATE, filter.operator, filter.field, filter.value, filter.table))
|
|
161
|
+
elif filter.filter_type == FilterType.DATE_COMPARISON_FILTER:
|
|
162
|
+
if not isinstance(filter.value, str):
|
|
163
|
+
raise ValueError('Invalid value for DateComparisonFilter, expected str')
|
|
164
|
+
if filter.operator not in DateOperator:
|
|
165
|
+
raise ValueError('Invalid operator for DateComparisonFilter, expected DateOperator')
|
|
166
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.DATE, filter.operator, filter.field, filter.value, filter.table))
|
|
167
|
+
elif filter.filter_type == FilterType.NULL_FILTER:
|
|
168
|
+
if filter.value is not None:
|
|
169
|
+
raise ValueError('Invalid value for NullFilter, expected None')
|
|
170
|
+
if filter.operator not in NullOperator:
|
|
171
|
+
raise ValueError('Invalid operator for NullFilter, expected NullOperator')
|
|
172
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.NULL, filter.operator, filter.field, filter.value, filter.table))
|
|
173
|
+
elif filter.filter_type == FilterType.BOOLEAN_FILTER:
|
|
174
|
+
if not isinstance(filter.value, bool):
|
|
175
|
+
raise ValueError('Invalid value for BooleanFilter, expected bool')
|
|
176
|
+
if filter.operator not in BoolOperator:
|
|
177
|
+
raise ValueError('Invalid operator for BooleanFilter, expected BoolOperator')
|
|
178
|
+
return asdict(BaseFilter(filter.filter_type, FieldType.BOOLEAN, filter.operator, filter.field, filter.value, filter.table))
|
|
179
|
+
else:
|
|
180
|
+
raise ValueError(f'Unknown filter type: {filter.filter_type}')
|
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pivot Template System - SDK SIDE
|
|
3
|
+
|
|
4
|
+
This file contains all the logic needed on the SDK to:
|
|
5
|
+
1. Hydrate pivot query templates with actual distinct values
|
|
6
|
+
2. Parse distinct values from different database result formats
|
|
7
|
+
3. Validate templates before hydration
|
|
8
|
+
|
|
9
|
+
This runs on the customer's Python SDK where customer data is accessible.
|
|
10
|
+
Takes templates from server and populates them with actual data.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
from typing import List, Dict, Any, Optional, TypedDict
|
|
16
|
+
|
|
17
|
+
# Constants
|
|
18
|
+
MAX_PIVOT_UNIQUE_VALUES = 250
|
|
19
|
+
PIVOT_COLUMN_MARKER = "{{QUILL_PIVOT_COLUMNS}}"
|
|
20
|
+
PIVOT_COLUMN_ALIAS_MARKER = "{{QUILL_PIVOT_COLUMN_ALIASES}}"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Types
|
|
24
|
+
class PivotAggregation(TypedDict, total=False):
|
|
25
|
+
aggregationType: str
|
|
26
|
+
valueField: Optional[str]
|
|
27
|
+
valueFieldType: Optional[str]
|
|
28
|
+
valueField2: Optional[str]
|
|
29
|
+
valueField2Type: Optional[str]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PivotConfig(TypedDict, total=False):
|
|
33
|
+
requiresDistinctValues: bool
|
|
34
|
+
columnField: Optional[str]
|
|
35
|
+
rowField: Optional[str]
|
|
36
|
+
rowFieldType: Optional[str]
|
|
37
|
+
aggregations: List[PivotAggregation]
|
|
38
|
+
databaseType: str
|
|
39
|
+
dateBucket: Optional[str]
|
|
40
|
+
pivotType: str
|
|
41
|
+
sort: Optional[bool]
|
|
42
|
+
sortField: Optional[str]
|
|
43
|
+
sortDirection: Optional[str]
|
|
44
|
+
rowLimit: Optional[int]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ============================================================================
|
|
48
|
+
# HELPER FUNCTIONS
|
|
49
|
+
# ============================================================================
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def process_single_quotes(value: str, database_type: str) -> str:
|
|
53
|
+
"""Process single quotes based on database type."""
|
|
54
|
+
if database_type.lower() in ["postgresql", "snowflake", "clickhouse"]:
|
|
55
|
+
return value.replace("'", "''")
|
|
56
|
+
return value.replace("'", "\\'")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def process_agg_type(agg_type: str, has_column_field: bool = False) -> str:
|
|
60
|
+
"""Process aggregation type."""
|
|
61
|
+
if agg_type == "count" and has_column_field:
|
|
62
|
+
return "SUM"
|
|
63
|
+
return "AVG" if agg_type and agg_type.lower() == "average" else (agg_type.lower() if agg_type else "")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def replace_bigquery_special_characters(column: str) -> str:
|
|
67
|
+
"""Replace BigQuery special characters."""
|
|
68
|
+
return column.replace("/", "quill_forward_slash")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def process_column_reference(
|
|
72
|
+
column: str,
|
|
73
|
+
database_type: str,
|
|
74
|
+
fallback_on_null: Optional[str] = None,
|
|
75
|
+
is_column_field_alias: bool = False,
|
|
76
|
+
is_value_field_alias: bool = False
|
|
77
|
+
) -> str:
|
|
78
|
+
"""Process column reference based on database type."""
|
|
79
|
+
db = database_type.lower()
|
|
80
|
+
|
|
81
|
+
if db in ["postgresql", "clickhouse"]:
|
|
82
|
+
if column == "":
|
|
83
|
+
return f'"{fallback_on_null}"' if fallback_on_null else '"_"'
|
|
84
|
+
if is_column_field_alias:
|
|
85
|
+
return f'"{column.replace(chr(34), "")}"'
|
|
86
|
+
column_parts = column.split(".")
|
|
87
|
+
if len(column_parts) > 1:
|
|
88
|
+
return '"' + '","'.join([part.replace('"', '') for part in column_parts]) + '"'
|
|
89
|
+
return f'"{column.replace(chr(34), "")}"'
|
|
90
|
+
|
|
91
|
+
elif db == "mysql":
|
|
92
|
+
if column == "":
|
|
93
|
+
return fallback_on_null if fallback_on_null else "_"
|
|
94
|
+
if is_column_field_alias:
|
|
95
|
+
return f"`{column.replace('`', '').replace(chr(34), '')}`"
|
|
96
|
+
column_parts = column.split(".")
|
|
97
|
+
if len(column_parts) > 1:
|
|
98
|
+
return "`" + "`.`".join([part.replace("`", "") for part in column_parts]) + "`"
|
|
99
|
+
return f"`{column.replace('`', '')}`"
|
|
100
|
+
|
|
101
|
+
elif db == "snowflake":
|
|
102
|
+
if column == "":
|
|
103
|
+
return fallback_on_null if fallback_on_null else "_"
|
|
104
|
+
if is_column_field_alias:
|
|
105
|
+
return f'"{column.replace(chr(34), "")}"'
|
|
106
|
+
if is_value_field_alias:
|
|
107
|
+
cleaned_column = column.replace(")", "").replace("(", "_")
|
|
108
|
+
return cleaned_column
|
|
109
|
+
return column
|
|
110
|
+
|
|
111
|
+
elif db == "bigquery":
|
|
112
|
+
if column == "":
|
|
113
|
+
return f"`{fallback_on_null}`" if fallback_on_null else "`_`"
|
|
114
|
+
if is_column_field_alias:
|
|
115
|
+
return f"`{replace_bigquery_special_characters(column)}`"
|
|
116
|
+
column_parts = column.split(".")
|
|
117
|
+
if len(column_parts) > 1:
|
|
118
|
+
return "`" + "`.`".join([part for part in column_parts]) + "`"
|
|
119
|
+
return f"`{column}`"
|
|
120
|
+
|
|
121
|
+
elif db == "mssql":
|
|
122
|
+
if column == "":
|
|
123
|
+
return f"[{fallback_on_null}]" if fallback_on_null else "[_]"
|
|
124
|
+
if is_column_field_alias:
|
|
125
|
+
return f"[{column}]"
|
|
126
|
+
column_parts = column.split(".")
|
|
127
|
+
if len(column_parts) > 1:
|
|
128
|
+
return "[" + "].[".join([part for part in column_parts]) + "]"
|
|
129
|
+
return f"[{column}]"
|
|
130
|
+
|
|
131
|
+
elif db == "databricks":
|
|
132
|
+
if column == "":
|
|
133
|
+
return f"`{fallback_on_null}`" if fallback_on_null else "`_`"
|
|
134
|
+
if is_column_field_alias:
|
|
135
|
+
return f"`{column}`"
|
|
136
|
+
column_parts = column.split(".")
|
|
137
|
+
if len(column_parts) > 1:
|
|
138
|
+
return "`" + "`.`".join([part for part in column_parts]) + "`"
|
|
139
|
+
return f"`{column}`"
|
|
140
|
+
|
|
141
|
+
else:
|
|
142
|
+
return column
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def process_value_field(agg_type: str, database_type: str, value_field: str) -> str:
|
|
146
|
+
"""Process value field based on aggregation type."""
|
|
147
|
+
if agg_type in ["min", "max"] or (agg_type and agg_type.lower() == "average"):
|
|
148
|
+
return f"{process_column_reference(value_field, database_type)} ELSE null"
|
|
149
|
+
if agg_type == "count":
|
|
150
|
+
return "1 ELSE 0"
|
|
151
|
+
return f"{process_column_reference(value_field, database_type)} ELSE 0" if value_field else "1 ELSE 0"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# ============================================================================
|
|
155
|
+
# DISTINCT VALUES PARSING
|
|
156
|
+
# ============================================================================
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def parse_distinct_values(query_result: Dict[str, Any], database_type: str) -> List[str]:
|
|
160
|
+
"""
|
|
161
|
+
Parses distinct values from database query results.
|
|
162
|
+
Different databases return different formats.
|
|
163
|
+
"""
|
|
164
|
+
if not query_result or not query_result.get("rows") or len(query_result["rows"]) == 0:
|
|
165
|
+
return []
|
|
166
|
+
|
|
167
|
+
row = query_result["rows"][0]
|
|
168
|
+
distinct_values = []
|
|
169
|
+
|
|
170
|
+
db = database_type.lower()
|
|
171
|
+
|
|
172
|
+
if db in ["postgresql", "bigquery", "snowflake", "databricks", "clickhouse"]:
|
|
173
|
+
# These return arrays in string_values field
|
|
174
|
+
if "string_values" in row:
|
|
175
|
+
if isinstance(row["string_values"], list):
|
|
176
|
+
distinct_values = row["string_values"]
|
|
177
|
+
elif isinstance(row["string_values"], str):
|
|
178
|
+
# Handle JSON string arrays
|
|
179
|
+
try:
|
|
180
|
+
distinct_values = json.loads(row["string_values"])
|
|
181
|
+
except:
|
|
182
|
+
distinct_values = []
|
|
183
|
+
|
|
184
|
+
elif db == "mysql":
|
|
185
|
+
# MySQL returns JSON_ARRAYAGG which should be an array
|
|
186
|
+
if "string_values" in row:
|
|
187
|
+
if isinstance(row["string_values"], list):
|
|
188
|
+
distinct_values = row["string_values"]
|
|
189
|
+
elif isinstance(row["string_values"], str):
|
|
190
|
+
try:
|
|
191
|
+
distinct_values = json.loads(row["string_values"])
|
|
192
|
+
except:
|
|
193
|
+
distinct_values = []
|
|
194
|
+
|
|
195
|
+
elif db == "mssql":
|
|
196
|
+
# MS SQL returns comma-separated string
|
|
197
|
+
if "string_values" in row and isinstance(row["string_values"], str):
|
|
198
|
+
distinct_values = [v.strip() for v in row["string_values"].split(",")]
|
|
199
|
+
|
|
200
|
+
else:
|
|
201
|
+
print(f"Warning: Unknown database type: {database_type}")
|
|
202
|
+
distinct_values = []
|
|
203
|
+
|
|
204
|
+
# Filter out null/undefined/empty values
|
|
205
|
+
return [value for value in distinct_values if value is not None and value != ""]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ============================================================================
|
|
209
|
+
# MATCH CASING FUNCTION
|
|
210
|
+
# ============================================================================
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def match_casing(text: Optional[str], template: Optional[str]) -> str:
|
|
214
|
+
"""Matches the casing of text to template."""
|
|
215
|
+
if not text or not template:
|
|
216
|
+
return text or ""
|
|
217
|
+
|
|
218
|
+
# Detect patterns
|
|
219
|
+
def is_title_case(s: str) -> bool:
|
|
220
|
+
return bool(re.match(r'^[A-Z][a-z]*([A-Z][a-z]*)*$', s))
|
|
221
|
+
|
|
222
|
+
def is_camel_case(s: str) -> bool:
|
|
223
|
+
return bool(re.match(r'^[a-z]+([A-Z][a-z]*)*$', s))
|
|
224
|
+
|
|
225
|
+
def is_snake_case(s: str) -> bool:
|
|
226
|
+
return bool(re.match(r'^[a-z0-9]+(_[a-z0-9]+)*$', s))
|
|
227
|
+
|
|
228
|
+
def is_all_lower_case(s: str) -> bool:
|
|
229
|
+
return bool(re.match(r'^[a-z]+$', s))
|
|
230
|
+
|
|
231
|
+
def is_all_upper_case(s: str) -> bool:
|
|
232
|
+
return bool(re.match(r'^[A-Z]+$', s))
|
|
233
|
+
|
|
234
|
+
def is_capitalized(s: str) -> bool:
|
|
235
|
+
return bool(re.match(r'^[A-Z][a-z]*$', s))
|
|
236
|
+
|
|
237
|
+
def is_screaming_snake_case(s: str) -> bool:
|
|
238
|
+
return bool(re.match(r'^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$', s))
|
|
239
|
+
|
|
240
|
+
# Convert functions
|
|
241
|
+
def to_title_case(s: str) -> str:
|
|
242
|
+
return ''.join(word.capitalize() for word in re.split(r'[_\s]+', s.lower()))
|
|
243
|
+
|
|
244
|
+
def to_camel_case(s: str) -> str:
|
|
245
|
+
return re.sub(r'_(.)', lambda m: m.group(1).upper(), s.lower())
|
|
246
|
+
|
|
247
|
+
def to_snake_case(s: str) -> str:
|
|
248
|
+
return re.sub(r'[A-Z]', lambda m: f'_{m.group(0).lower()}', s)
|
|
249
|
+
|
|
250
|
+
def to_screaming_snake_case(s: str) -> str:
|
|
251
|
+
result = re.sub(r'([A-Z])', r'_\1', s)
|
|
252
|
+
result = result.lstrip('_')
|
|
253
|
+
return result.upper()
|
|
254
|
+
|
|
255
|
+
# Match casing
|
|
256
|
+
if is_title_case(template):
|
|
257
|
+
return to_title_case(text)
|
|
258
|
+
elif is_camel_case(template):
|
|
259
|
+
return to_camel_case(text)
|
|
260
|
+
elif is_snake_case(template):
|
|
261
|
+
return to_snake_case(text)
|
|
262
|
+
elif is_all_lower_case(template):
|
|
263
|
+
return text.lower()
|
|
264
|
+
elif is_all_upper_case(template):
|
|
265
|
+
return text.upper()
|
|
266
|
+
elif is_capitalized(template):
|
|
267
|
+
return text.capitalize()
|
|
268
|
+
elif is_screaming_snake_case(template):
|
|
269
|
+
return to_screaming_snake_case(text)
|
|
270
|
+
else:
|
|
271
|
+
return text # Default case if no specific pattern is detected
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# ============================================================================
|
|
275
|
+
# TEMPLATE HYDRATION
|
|
276
|
+
# ============================================================================
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def hydrate_pivot_template(
|
|
280
|
+
template: str,
|
|
281
|
+
distinct_values: List[str],
|
|
282
|
+
config: PivotConfig
|
|
283
|
+
) -> str:
|
|
284
|
+
"""
|
|
285
|
+
Hydrates a pivot query template with actual distinct values.
|
|
286
|
+
This function should be called in the Python SDK after fetching distinct values.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
template: The SQL template string containing markers
|
|
290
|
+
distinct_values: Array of distinct values fetched from the database
|
|
291
|
+
config: config about the pivot configuration
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Hydrated SQL query string ready to execute
|
|
295
|
+
"""
|
|
296
|
+
column_field = config.get("columnField")
|
|
297
|
+
row_field = config.get("rowField")
|
|
298
|
+
aggregations = config.get("aggregations", [])
|
|
299
|
+
database_type = config.get("databaseType", "postgresql")
|
|
300
|
+
|
|
301
|
+
# If this pivot doesn't require distinct values, return as-is
|
|
302
|
+
if not config.get("requiresDistinctValues") or not column_field or not row_field:
|
|
303
|
+
return template
|
|
304
|
+
|
|
305
|
+
# Filter and limit distinct values
|
|
306
|
+
filtered_values = [
|
|
307
|
+
value for value in distinct_values
|
|
308
|
+
if value is not None and value != ""
|
|
309
|
+
][:MAX_PIVOT_UNIQUE_VALUES]
|
|
310
|
+
|
|
311
|
+
# Get properly quoted column references
|
|
312
|
+
column_field_alias = process_column_reference(
|
|
313
|
+
column_field,
|
|
314
|
+
database_type,
|
|
315
|
+
None,
|
|
316
|
+
False,
|
|
317
|
+
True
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
row_field_alias = process_column_reference(
|
|
321
|
+
row_field,
|
|
322
|
+
database_type,
|
|
323
|
+
None,
|
|
324
|
+
False,
|
|
325
|
+
True
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Generate column aliases for SELECT in quill_alias CTE
|
|
329
|
+
column_aliases = []
|
|
330
|
+
column_aliases.append(
|
|
331
|
+
f"{process_column_reference(row_field, database_type, None, True)} AS {row_field_alias}"
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Generate CASE WHEN columns for each aggregation
|
|
335
|
+
case_when_columns = []
|
|
336
|
+
seen_aggs: Dict[str, Dict[str, int]] = {}
|
|
337
|
+
|
|
338
|
+
for current_agg in aggregations:
|
|
339
|
+
agg_type = current_agg.get("aggregationType", "")
|
|
340
|
+
value_field = current_agg.get("valueField", "")
|
|
341
|
+
|
|
342
|
+
# Track duplicate aggregation combos for disambiguation
|
|
343
|
+
if agg_type in seen_aggs and value_field in seen_aggs[agg_type]:
|
|
344
|
+
seen_aggs[agg_type][value_field] += 1
|
|
345
|
+
else:
|
|
346
|
+
if agg_type not in seen_aggs:
|
|
347
|
+
seen_aggs[agg_type] = {}
|
|
348
|
+
seen_aggs[agg_type][value_field] = 1
|
|
349
|
+
|
|
350
|
+
disambiguation_index = str(seen_aggs[agg_type][value_field])
|
|
351
|
+
if disambiguation_index == "1":
|
|
352
|
+
disambiguation_index = ""
|
|
353
|
+
|
|
354
|
+
value_field_alias = process_column_reference(
|
|
355
|
+
current_agg.get("valueField") or row_field or "count",
|
|
356
|
+
database_type,
|
|
357
|
+
None,
|
|
358
|
+
False,
|
|
359
|
+
True
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
value_alias_substring = ""
|
|
363
|
+
if current_agg.get("valueField"):
|
|
364
|
+
value_alias_substring = f"{process_column_reference(current_agg['valueField'], database_type, None, True)} AS {value_field_alias}"
|
|
365
|
+
|
|
366
|
+
# Handle disambiguation for multiple aggregations
|
|
367
|
+
total_seen = sum(seen_aggs[agg_type].values())
|
|
368
|
+
disambiguation_field = ""
|
|
369
|
+
if total_seen > 1:
|
|
370
|
+
disambiguation_field = f"_{current_agg.get('valueField', '')}{disambiguation_index}"
|
|
371
|
+
|
|
372
|
+
disambiguation = ""
|
|
373
|
+
if len(aggregations) > 1:
|
|
374
|
+
if disambiguation_field:
|
|
375
|
+
disambiguation = f"{disambiguation_field}_{match_casing(agg_type, current_agg.get('valueField'))}"
|
|
376
|
+
else:
|
|
377
|
+
disambiguation = f"_{agg_type}"
|
|
378
|
+
|
|
379
|
+
# Wrap boolean fields in CASE WHEN
|
|
380
|
+
value_expr = ""
|
|
381
|
+
if current_agg.get("valueFieldType") == "bool":
|
|
382
|
+
value_expr = f"CASE WHEN {value_field_alias} THEN 1 ELSE 0 END"
|
|
383
|
+
else:
|
|
384
|
+
value_expr = process_value_field(
|
|
385
|
+
agg_type,
|
|
386
|
+
database_type,
|
|
387
|
+
value_field_alias
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Handle percentage aggregations specially
|
|
391
|
+
if agg_type == "percentage":
|
|
392
|
+
value_field2 = current_agg.get("valueField2") or current_agg.get("valueField") or "count"
|
|
393
|
+
value_field2_alias = process_column_reference(
|
|
394
|
+
value_field2,
|
|
395
|
+
database_type,
|
|
396
|
+
None,
|
|
397
|
+
False,
|
|
398
|
+
True
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
value_field2_type = current_agg.get("valueField2Type") or current_agg.get("valueFieldType")
|
|
402
|
+
value2_expr = ""
|
|
403
|
+
if value_field2_type == "bool":
|
|
404
|
+
value2_expr = f"CASE WHEN {value_field2_alias} THEN 1 ELSE 0 END"
|
|
405
|
+
else:
|
|
406
|
+
value2_expr = value_field2_alias
|
|
407
|
+
|
|
408
|
+
value2_alias_substring = ""
|
|
409
|
+
if current_agg.get("valueField2") and current_agg.get("valueField") != current_agg.get("valueField2"):
|
|
410
|
+
value2_alias_substring = f"{process_column_reference(current_agg['valueField2'], database_type, None, True)} AS {value_field2_alias}"
|
|
411
|
+
|
|
412
|
+
# Percentage with same field for numerator and denominator
|
|
413
|
+
if current_agg.get("valueField") == current_agg.get("valueField2") or not current_agg.get("valueField2"):
|
|
414
|
+
for column in filtered_values:
|
|
415
|
+
case_when_columns.append(
|
|
416
|
+
f"CAST(sum(CASE WHEN {column_field_alias} = '{process_single_quotes(column, database_type)}' THEN {value_expr} END) AS FLOAT) / GREATEST(sum({value2_expr}), 1) AS {process_column_reference(column + disambiguation, database_type, '_', True)}"
|
|
417
|
+
)
|
|
418
|
+
else:
|
|
419
|
+
# Percentage with different fields
|
|
420
|
+
for column in filtered_values:
|
|
421
|
+
case_when_columns.append(
|
|
422
|
+
f"CAST(sum(CASE WHEN {column_field_alias} = '{process_single_quotes(column, database_type)}' THEN {value_expr} END) AS FLOAT) / GREATEST(sum(CASE WHEN {column_field_alias} = '{process_single_quotes(column, database_type)}' THEN {value2_expr} END), 1) AS {process_column_reference(column + disambiguation, database_type, '_', True)}"
|
|
423
|
+
)
|
|
424
|
+
if value2_alias_substring:
|
|
425
|
+
column_aliases.append(value2_alias_substring)
|
|
426
|
+
else:
|
|
427
|
+
# Standard aggregations (sum, count, avg, min, max)
|
|
428
|
+
for column in filtered_values:
|
|
429
|
+
case_when_columns.append(
|
|
430
|
+
f"{process_agg_type(agg_type, True)}(CASE WHEN {column_field_alias} = '{process_single_quotes(column, database_type)}' THEN {value_expr} END) AS {process_column_reference(column + disambiguation, database_type, '_', True)}"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
if value_alias_substring:
|
|
434
|
+
column_aliases.append(value_alias_substring)
|
|
435
|
+
|
|
436
|
+
# Add the column field to the aliases
|
|
437
|
+
column_aliases.append(
|
|
438
|
+
f"{process_column_reference(column_field, database_type, None, True)} AS {column_field_alias}"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Remove duplicates
|
|
442
|
+
unique_column_aliases = list(dict.fromkeys(column_aliases))
|
|
443
|
+
|
|
444
|
+
# Replace markers with actual SQL
|
|
445
|
+
hydrated_template = template.replace(
|
|
446
|
+
PIVOT_COLUMN_ALIAS_MARKER,
|
|
447
|
+
", ".join(unique_column_aliases)
|
|
448
|
+
).replace(
|
|
449
|
+
PIVOT_COLUMN_MARKER,
|
|
450
|
+
", ".join(case_when_columns)
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
return hydrated_template
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
# ============================================================================
|
|
457
|
+
# VALIDATION
|
|
458
|
+
# ============================================================================
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def validate_template(template: str, config: PivotConfig) -> Dict[str, Any]:
|
|
462
|
+
"""Validates that a template can be hydrated with the given config."""
|
|
463
|
+
errors = []
|
|
464
|
+
|
|
465
|
+
if not template:
|
|
466
|
+
errors.append("Template is empty")
|
|
467
|
+
|
|
468
|
+
if config.get("requiresDistinctValues"):
|
|
469
|
+
if PIVOT_COLUMN_MARKER not in template:
|
|
470
|
+
errors.append(f"Template is missing {PIVOT_COLUMN_MARKER} marker")
|
|
471
|
+
if PIVOT_COLUMN_ALIAS_MARKER not in template:
|
|
472
|
+
errors.append(f"Template is missing {PIVOT_COLUMN_ALIAS_MARKER} marker")
|
|
473
|
+
if not config.get("columnField"):
|
|
474
|
+
errors.append("config is missing columnField")
|
|
475
|
+
if not config.get("rowField"):
|
|
476
|
+
errors.append("config is missing rowField")
|
|
477
|
+
|
|
478
|
+
if not config.get("aggregations") or len(config.get("aggregations", [])) == 0:
|
|
479
|
+
errors.append("config is missing aggregations")
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
"valid": len(errors) == 0,
|
|
483
|
+
"errors": errors
|
|
484
|
+
}
|
|
485
|
+
|