databricks-sqlalchemy 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. CHANGELOG.md +2 -2
  2. databricks/sqlalchemy/__init__.py +4 -1
  3. databricks/sqlalchemy/_ddl.py +100 -0
  4. databricks/sqlalchemy/_parse.py +385 -0
  5. databricks/sqlalchemy/_types.py +323 -0
  6. databricks/sqlalchemy/base.py +436 -0
  7. databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
  8. databricks/sqlalchemy/py.typed +0 -0
  9. databricks/sqlalchemy/pytest.ini +4 -0
  10. databricks/sqlalchemy/requirements.py +249 -0
  11. databricks/sqlalchemy/setup.cfg +4 -0
  12. databricks/sqlalchemy/test/_extra.py +70 -0
  13. databricks/sqlalchemy/test/_future.py +331 -0
  14. databricks/sqlalchemy/test/_regression.py +311 -0
  15. databricks/sqlalchemy/test/_unsupported.py +450 -0
  16. databricks/sqlalchemy/test/conftest.py +13 -0
  17. databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
  18. databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
  19. databricks/sqlalchemy/test/test_suite.py +13 -0
  20. databricks/sqlalchemy/test_local/__init__.py +5 -0
  21. databricks/sqlalchemy/test_local/conftest.py +44 -0
  22. databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  23. databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
  24. databricks/sqlalchemy/test_local/test_ddl.py +96 -0
  25. databricks/sqlalchemy/test_local/test_parsing.py +160 -0
  26. databricks/sqlalchemy/test_local/test_types.py +161 -0
  27. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/METADATA +60 -39
  28. databricks_sqlalchemy-2.0.0.dist-info/RECORD +31 -0
  29. databricks/sqlalchemy/dialect/__init__.py +0 -340
  30. databricks/sqlalchemy/dialect/base.py +0 -17
  31. databricks/sqlalchemy/dialect/compiler.py +0 -38
  32. databricks_sqlalchemy-1.0.1.dist-info/RECORD +0 -10
  33. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/LICENSE +0 -0
  34. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/WHEEL +0 -0
  35. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,323 @@
1
+ from datetime import datetime, time, timezone
2
+ from itertools import product
3
+ from typing import Any, Union, Optional
4
+
5
+ import sqlalchemy
6
+ from sqlalchemy.engine.interfaces import Dialect
7
+ from sqlalchemy.ext.compiler import compiles
8
+
9
+ from databricks.sql.utils import ParamEscaper
10
+
11
+
12
+ def process_literal_param_hack(value: Any):
13
+ """This method is supposed to accept a Python type and return a string representation of that type.
14
+ But due to some weirdness in the way SQLAlchemy's literal rendering works, we have to return
15
+ the value itself because, by the time it reaches our custom type code, it's already been converted
16
+ into a string.
17
+
18
+ TimeTest
19
+ DateTimeTest
20
+ DateTimeTZTest
21
+
22
+ This dynamic only seems to affect the literal rendering of datetime and time objects.
23
+
24
+ All fail without this hack in-place. I'm not sure why. But it works.
25
+ """
26
+ return value
27
+
28
+
29
+ @compiles(sqlalchemy.types.Enum, "databricks")
30
+ @compiles(sqlalchemy.types.String, "databricks")
31
+ @compiles(sqlalchemy.types.Text, "databricks")
32
+ @compiles(sqlalchemy.types.Time, "databricks")
33
+ @compiles(sqlalchemy.types.Unicode, "databricks")
34
+ @compiles(sqlalchemy.types.UnicodeText, "databricks")
35
+ @compiles(sqlalchemy.types.Uuid, "databricks")
36
+ def compile_string_databricks(type_, compiler, **kw):
37
+ """
38
+ We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy
39
+ defaults to incompatible / abnormal compiled names
40
+
41
+ Enum -> VARCHAR
42
+ String -> VARCHAR[LENGTH]
43
+ Text -> VARCHAR[LENGTH]
44
+ Time -> TIME
45
+ Unicode -> VARCHAR[LENGTH]
46
+ UnicodeText -> TEXT
47
+ Uuid -> CHAR[32]
48
+
49
+ But all of these types will be compiled to STRING in Databricks SQL
50
+ """
51
+ return "STRING"
52
+
53
+
54
+ @compiles(sqlalchemy.types.Integer, "databricks")
55
+ def compile_integer_databricks(type_, compiler, **kw):
56
+ """
57
+ We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER"
58
+ """
59
+ return "INT"
60
+
61
+
62
+ @compiles(sqlalchemy.types.LargeBinary, "databricks")
63
+ def compile_binary_databricks(type_, compiler, **kw):
64
+ """
65
+ We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB"
66
+ """
67
+ return "BINARY"
68
+
69
+
70
+ @compiles(sqlalchemy.types.Numeric, "databricks")
71
+ def compile_numeric_databricks(type_, compiler, **kw):
72
+ """
73
+ We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC"
74
+
75
+ The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric
76
+ to the SQLAlchemy Decimal() implementation
77
+ """
78
+ return compiler.visit_DECIMAL(type_, **kw)
79
+
80
+
81
+ @compiles(sqlalchemy.types.DateTime, "databricks")
82
+ def compile_datetime_databricks(type_, compiler, **kw):
83
+ """
84
+ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ" instead of "DATETIME"
85
+ """
86
+ return "TIMESTAMP_NTZ"
87
+
88
+
89
+ @compiles(sqlalchemy.types.ARRAY, "databricks")
90
+ def compile_array_databricks(type_, compiler, **kw):
91
+ """
92
+ SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql.
93
+ The Postgres implementation works for Databricks SQL, so we duplicate that here.
94
+
95
+ :type_:
96
+ This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute
97
+ which is itself an instance of TypeEngine
98
+
99
+ https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY
100
+ """
101
+
102
+ inner = compiler.process(type_.item_type, **kw)
103
+
104
+ return f"ARRAY<{inner}>"
105
+
106
+
107
+ class TIMESTAMP_NTZ(sqlalchemy.types.TypeDecorator):
108
+ """Represents values comprising values of fields year, month, day, hour, minute, and second.
109
+ All operations are performed without taking any time zone into account.
110
+
111
+ Our dialect maps sqlalchemy.types.DateTime() to this type, which means that all DateTime()
112
+ objects are stored without tzinfo. To read and write timezone-aware datetimes use
113
+ databricks.sql.TIMESTAMP instead.
114
+
115
+ https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html
116
+ """
117
+
118
+ impl = sqlalchemy.types.DateTime
119
+
120
+ cache_ok = True
121
+
122
+ def process_result_value(self, value: Union[None, datetime], dialect):
123
+ if value is None:
124
+ return None
125
+ return value.replace(tzinfo=None)
126
+
127
+
128
+ class TIMESTAMP(sqlalchemy.types.TypeDecorator):
129
+ """Represents values comprising values of fields year, month, day, hour, minute, and second,
130
+ with the session local time-zone.
131
+
132
+ Our dialect maps sqlalchemy.types.DateTime() to TIMESTAMP_NTZ, which means that all DateTime()
133
+ objects are stored without tzinfo. To read and write timezone-aware datetimes use
134
+ this type instead.
135
+
136
+ ```python
137
+ # This won't work
138
+ `Column(sqlalchemy.DateTime(timezone=True))`
139
+
140
+ # But this does
141
+ `Column(TIMESTAMP)`
142
+ ````
143
+
144
+ https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html
145
+ """
146
+
147
+ impl = sqlalchemy.types.DateTime
148
+
149
+ cache_ok = True
150
+
151
+ def process_result_value(self, value: Union[None, datetime], dialect):
152
+ if value is None:
153
+ return None
154
+
155
+ if not value.tzinfo:
156
+ return value.replace(tzinfo=timezone.utc)
157
+ return value
158
+
159
+ def process_bind_param(
160
+ self, value: Union[datetime, None], dialect
161
+ ) -> Optional[datetime]:
162
+ """pysql can pass datetime.datetime() objects directly to DBR"""
163
+ return value
164
+
165
+ def process_literal_param(
166
+ self, value: Union[datetime, None], dialect: Dialect
167
+ ) -> str:
168
+ """ """
169
+ return process_literal_param_hack(value)
170
+
171
+
172
+ @compiles(TIMESTAMP, "databricks")
173
+ def compile_timestamp_databricks(type_, compiler, **kw):
174
+ """
175
+ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ" instead of "DATETIME"
176
+ """
177
+ return "TIMESTAMP"
178
+
179
+
180
+ class DatabricksTimeType(sqlalchemy.types.TypeDecorator):
181
+ """Databricks has no native TIME type. So we store it as a string."""
182
+
183
+ impl = sqlalchemy.types.Time
184
+ cache_ok = True
185
+
186
+ BASE_FMT = "%H:%M:%S"
187
+ MICROSEC_PART = ".%f"
188
+ TIMEZONE_PART = "%z"
189
+
190
+ def _generate_fmt_string(self, ms: bool, tz: bool) -> str:
191
+ """Return a format string for datetime.strptime() that includes or excludes microseconds and timezone."""
192
+ _ = lambda x, y: x if y else ""
193
+ return f"{self.BASE_FMT}{_(self.MICROSEC_PART,ms)}{_(self.TIMEZONE_PART,tz)}"
194
+
195
+ @property
196
+ def allowed_fmt_strings(self):
197
+ """Time strings can be read with or without microseconds and with or without a timezone."""
198
+
199
+ if not hasattr(self, "_allowed_fmt_strings"):
200
+ ms_switch = tz_switch = [True, False]
201
+ self._allowed_fmt_strings = [
202
+ self._generate_fmt_string(x, y)
203
+ for x, y in product(ms_switch, tz_switch)
204
+ ]
205
+
206
+ return self._allowed_fmt_strings
207
+
208
+ def _parse_result_string(self, value: str) -> time:
209
+ """Parse a string into a time object. Try all allowed formats until one works."""
210
+ for fmt in self.allowed_fmt_strings:
211
+ try:
212
+ # We use timetz() here because we want to preserve the timezone information
213
+ # Calling .time() will strip the timezone information
214
+ return datetime.strptime(value, fmt).timetz()
215
+ except ValueError:
216
+ pass
217
+
218
+ raise ValueError(f"Could not parse time string {value}")
219
+
220
+ def _determine_fmt_string(self, value: time) -> str:
221
+ """Determine which format string to use to render a time object as a string."""
222
+ ms_bool = value.microsecond > 0
223
+ tz_bool = value.tzinfo is not None
224
+ return self._generate_fmt_string(ms_bool, tz_bool)
225
+
226
+ def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]:
227
+ """Values sent to the database are converted to %:H:%M:%S strings."""
228
+ if value is None:
229
+ return None
230
+ fmt_string = self._determine_fmt_string(value)
231
+ return value.strftime(fmt_string)
232
+
233
+ # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string
234
+ def process_literal_param(self, value, dialect) -> time: # type: ignore
235
+ """ """
236
+ return process_literal_param_hack(value)
237
+
238
+ def process_result_value(
239
+ self, value: Union[None, str], dialect
240
+ ) -> Union[time, None]:
241
+ """Values received from the database are parsed into datetime.time() objects"""
242
+ if value is None:
243
+ return None
244
+
245
+ return self._parse_result_string(value)
246
+
247
+
248
+ class DatabricksStringType(sqlalchemy.types.TypeDecorator):
249
+ """We have to implement our own String() type because SQLAlchemy's default implementation
250
+ wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for
251
+ escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL.
252
+ """
253
+
254
+ impl = sqlalchemy.types.String
255
+ cache_ok = True
256
+ pe = ParamEscaper()
257
+
258
+ def process_literal_param(self, value, dialect) -> str:
259
+ """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here
260
+ implements the same logic as our legacy inline escaping logic.
261
+ """
262
+
263
+ return self.pe.escape_string(value)
264
+
265
+ def literal_processor(self, dialect):
266
+ """We manually override this method to prevent further processing of the string literal beyond
267
+ what happens in the process_literal_param() method.
268
+
269
+ The SQLAlchemy docs _specifically_ say to not override this method.
270
+
271
+ It appears that any processing that happens from TypeEngine.process_literal_param happens _before_
272
+ and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor()
273
+ method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax
274
+ error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need.
275
+
276
+ We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong.
277
+
278
+ See type_api.py::TypeEngine.literal_processor:
279
+
280
+ ```python
281
+ def process(value: Any) -> str:
282
+ return fixed_impl_processor(
283
+ fixed_process_literal_param(value, dialect)
284
+ )
285
+ ```
286
+
287
+ That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the
288
+ process_literal_param defined in our Databricks dialect)
289
+
290
+ https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor
291
+ """
292
+
293
+ def process(value):
294
+ """This is a copy of the default String.literal_processor() method but stripping away
295
+ its double-escaping behaviour for single-quotes.
296
+ """
297
+
298
+ _step1 = self.process_literal_param(value, dialect="databricks")
299
+ if dialect.identifier_preparer._double_percents:
300
+ _step2 = _step1.replace("%", "%%")
301
+ else:
302
+ _step2 = _step1
303
+
304
+ return "%s" % _step2
305
+
306
+ return process
307
+
308
+
309
+ class TINYINT(sqlalchemy.types.TypeDecorator):
310
+ """Represents 1-byte signed integers
311
+
312
+ Acts like a sqlalchemy SmallInteger() in Python but writes to a TINYINT field in Databricks
313
+
314
+ https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html
315
+ """
316
+
317
+ impl = sqlalchemy.types.SmallInteger
318
+ cache_ok = True
319
+
320
+
321
+ @compiles(TINYINT, "databricks")
322
+ def compile_tinyint(type_, compiler, **kw):
323
+ return "TINYINT"