pydiverse-common 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from .dtypes import (
4
+ Bool,
5
+ Date,
6
+ Datetime,
7
+ Decimal,
8
+ Dtype,
9
+ Duration,
10
+ Float,
11
+ Float32,
12
+ Float64,
13
+ Int,
14
+ Int8,
15
+ Int16,
16
+ Int32,
17
+ Int64,
18
+ List,
19
+ NullType,
20
+ PandasBackend,
21
+ String,
22
+ Time,
23
+ Uint8,
24
+ Uint16,
25
+ Uint32,
26
+ Uint64,
27
+ )
28
+
29
+ __all__ = [
30
+ "Dtype",
31
+ "Bool",
32
+ "Date",
33
+ "Datetime",
34
+ "Decimal",
35
+ "Duration",
36
+ "Float",
37
+ "Float32",
38
+ "Float64",
39
+ "Int",
40
+ "Int8",
41
+ "Int16",
42
+ "Int32",
43
+ "Int64",
44
+ "NullType",
45
+ "String",
46
+ "Time",
47
+ "Uint8",
48
+ "Uint16",
49
+ "Uint32",
50
+ "Uint64",
51
+ "List",
52
+ "PandasBackend",
53
+ ]
@@ -0,0 +1,389 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class PandasBackend(str, Enum):
7
+ NUMPY = "numpy"
8
+ ARROW = "arrow"
9
+
10
+
11
+ class Dtype:
12
+ """Base class for all data types."""
13
+
14
+ def __eq__(self, rhs):
15
+ return isinstance(rhs, Dtype) and type(self) is type(rhs)
16
+
17
+ def __hash__(self):
18
+ return hash(type(self))
19
+
20
+ def __repr__(self):
21
+ return self.__class__.__name__
22
+
23
+ @classmethod
24
+ def is_int(cls):
25
+ return False
26
+
27
+ @classmethod
28
+ def is_float(cls):
29
+ return False
30
+
31
+ @classmethod
32
+ def is_subtype(cls, rhs):
33
+ rhs_cls = type(rhs)
34
+ return (
35
+ (cls is rhs_cls)
36
+ or (rhs_cls is Int and cls.is_int())
37
+ or (rhs_cls is Float and cls.is_float())
38
+ )
39
+
40
+ @staticmethod
41
+ def from_sql(sql_type) -> Dtype:
42
+ import sqlalchemy as sqa
43
+
44
+ if isinstance(sql_type, sqa.SmallInteger):
45
+ return Int16()
46
+ if isinstance(sql_type, sqa.BigInteger):
47
+ return Int64()
48
+ if isinstance(sql_type, sqa.Integer):
49
+ return Int32()
50
+ if isinstance(sql_type, sqa.Float):
51
+ precision = sql_type.precision or 53
52
+ if precision <= 24:
53
+ return Float32()
54
+ return Float64()
55
+ if isinstance(sql_type, sqa.Numeric | sqa.DECIMAL):
56
+ # Just to be safe, we always use FLOAT64 for fixpoint numbers.
57
+ # Databases are obsessed about fixpoint. However, in dataframes, it
58
+ # is more common to just work with double precision floating point.
59
+ return Float64()
60
+ if isinstance(sql_type, sqa.String):
61
+ return String()
62
+ if isinstance(sql_type, sqa.Boolean):
63
+ return Bool()
64
+ if isinstance(sql_type, sqa.Date):
65
+ return Date()
66
+ if isinstance(sql_type, sqa.Time):
67
+ return Time()
68
+ if isinstance(sql_type, sqa.DateTime):
69
+ return Datetime()
70
+ if isinstance(sql_type, sqa.ARRAY):
71
+ return List(Dtype.from_sql(sql_type.item_type.from_sql))
72
+ if isinstance(sql_type, sqa.Null):
73
+ return NullType()
74
+
75
+ raise TypeError
76
+
77
+ @staticmethod
78
+ def from_pandas(pandas_type) -> Dtype:
79
+ import numpy as np
80
+ import pandas as pd
81
+
82
+ if isinstance(pandas_type, pd.ArrowDtype):
83
+ return Dtype.from_arrow(pandas_type.pyarrow_dtype)
84
+
85
+ def is_np_dtype(type_, np_dtype):
86
+ return pd.core.dtypes.common._is_dtype_type(
87
+ type_, pd.core.dtypes.common.classes(np_dtype)
88
+ )
89
+
90
+ if pd.api.types.is_signed_integer_dtype(pandas_type):
91
+ if is_np_dtype(pandas_type, np.int64):
92
+ return Int64()
93
+ elif is_np_dtype(pandas_type, np.int32):
94
+ return Int32()
95
+ elif is_np_dtype(pandas_type, np.int16):
96
+ return Int16()
97
+ elif is_np_dtype(pandas_type, np.int8):
98
+ return Int8()
99
+ raise TypeError
100
+ if pd.api.types.is_unsigned_integer_dtype(pandas_type):
101
+ if is_np_dtype(pandas_type, np.uint64):
102
+ return Uint64()
103
+ elif is_np_dtype(pandas_type, np.uint32):
104
+ return Uint32()
105
+ elif is_np_dtype(pandas_type, np.uint16):
106
+ return Uint16()
107
+ elif is_np_dtype(pandas_type, np.uint8):
108
+ return Uint8()
109
+ raise TypeError
110
+ if pd.api.types.is_float_dtype(pandas_type):
111
+ if is_np_dtype(pandas_type, np.float64):
112
+ return Float64()
113
+ elif is_np_dtype(pandas_type, np.float32):
114
+ return Float32()
115
+ raise TypeError
116
+ if pd.api.types.is_string_dtype(pandas_type):
117
+ # We reserve the use of the object column for string.
118
+ return String()
119
+ if pd.api.types.is_bool_dtype(pandas_type):
120
+ return Bool()
121
+ if pd.api.types.is_datetime64_any_dtype(pandas_type):
122
+ return Datetime()
123
+
124
+ raise TypeError
125
+
126
+ @staticmethod
127
+ def from_arrow(arrow_type) -> Dtype:
128
+ import pyarrow as pa
129
+
130
+ if pa.types.is_signed_integer(arrow_type):
131
+ if pa.types.is_int64(arrow_type):
132
+ return Int64()
133
+ if pa.types.is_int32(arrow_type):
134
+ return Int32()
135
+ if pa.types.is_int16(arrow_type):
136
+ return Int16()
137
+ if pa.types.is_int8(arrow_type):
138
+ return Int8()
139
+ raise TypeError
140
+ if pa.types.is_unsigned_integer(arrow_type):
141
+ if pa.types.is_uint64(arrow_type):
142
+ return Uint64()
143
+ if pa.types.is_uint32(arrow_type):
144
+ return Uint32()
145
+ if pa.types.is_uint16(arrow_type):
146
+ return Uint16()
147
+ if pa.types.is_uint8(arrow_type):
148
+ return Uint8()
149
+ raise TypeError
150
+ if pa.types.is_floating(arrow_type):
151
+ if pa.types.is_float64(arrow_type):
152
+ return Float64()
153
+ if pa.types.is_float32(arrow_type):
154
+ return Float32()
155
+ if pa.types.is_float16(arrow_type):
156
+ return Float32()
157
+ raise TypeError
158
+ if pa.types.is_string(arrow_type):
159
+ return String()
160
+ if pa.types.is_boolean(arrow_type):
161
+ return Bool()
162
+ if pa.types.is_timestamp(arrow_type):
163
+ return Datetime()
164
+ if pa.types.is_date(arrow_type):
165
+ return Date()
166
+ if pa.types.is_time(arrow_type):
167
+ return Time()
168
+ raise TypeError
169
+
170
+ @staticmethod
171
+ def from_polars(polars_type) -> Dtype:
172
+ import polars as pl
173
+
174
+ if isinstance(polars_type, pl.List):
175
+ return List(Dtype.from_polars(polars_type.inner))
176
+
177
+ return {
178
+ pl.Int64: Int64(),
179
+ pl.Int32: Int32(),
180
+ pl.Int16: Int16(),
181
+ pl.Int8: Int8(),
182
+ pl.UInt64: Uint64(),
183
+ pl.UInt32: Uint32(),
184
+ pl.UInt16: Uint16(),
185
+ pl.UInt8: Uint8(),
186
+ pl.Float64: Float64(),
187
+ pl.Float32: Float32(),
188
+ pl.Utf8: String(),
189
+ pl.Boolean: Bool(),
190
+ pl.Datetime: Datetime(),
191
+ pl.Time: Time(),
192
+ pl.Date: Date(),
193
+ pl.Null: NullType(),
194
+ pl.Duration: Duration(),
195
+ pl.Enum: String(),
196
+ }[polars_type.base_type()]
197
+
198
+ def to_sql(self):
199
+ import sqlalchemy as sqa
200
+
201
+ return {
202
+ Int8(): sqa.SmallInteger(),
203
+ Int16(): sqa.SmallInteger(),
204
+ Int32(): sqa.Integer(),
205
+ Int64(): sqa.BigInteger(),
206
+ Uint8(): sqa.SmallInteger(),
207
+ Uint16(): sqa.Integer(),
208
+ Uint32(): sqa.BigInteger(),
209
+ Uint64(): sqa.BigInteger(),
210
+ Float32(): sqa.Float(24),
211
+ Float64(): sqa.Float(53),
212
+ String(): sqa.String(),
213
+ Bool(): sqa.Boolean(),
214
+ Date(): sqa.Date(),
215
+ Time(): sqa.Time(),
216
+ Datetime(): sqa.DateTime(),
217
+ Decimal(): sqa.DECIMAL(),
218
+ NullType(): sqa.types.NullType(),
219
+ }[self]
220
+
221
+ def to_pandas(self, backend: PandasBackend = PandasBackend.ARROW):
222
+ import pandas as pd
223
+
224
+ if backend == PandasBackend.NUMPY:
225
+ return self.to_pandas_nullable(backend)
226
+ if backend == PandasBackend.ARROW:
227
+ if self == String():
228
+ return pd.StringDtype(storage="pyarrow")
229
+ return pd.ArrowDtype(self.to_arrow())
230
+
231
+ def to_pandas_nullable(self, backend: PandasBackend = PandasBackend.ARROW):
232
+ import pandas as pd
233
+
234
+ if self == Time():
235
+ if backend == PandasBackend.ARROW:
236
+ return pd.ArrowDtype(self.to_arrow())
237
+ raise TypeError("pandas doesn't have a native time dtype")
238
+
239
+ return {
240
+ Int8(): pd.Int8Dtype(),
241
+ Int16(): pd.Int16Dtype(),
242
+ Int32(): pd.Int32Dtype(),
243
+ Int64(): pd.Int64Dtype(),
244
+ Uint8(): pd.UInt8Dtype(),
245
+ Uint16(): pd.UInt16Dtype(),
246
+ Uint32(): pd.UInt32Dtype(),
247
+ Uint64(): pd.UInt64Dtype(),
248
+ Float32(): pd.Float32Dtype(),
249
+ Float64(): pd.Float64Dtype(),
250
+ String(): pd.StringDtype(),
251
+ Bool(): pd.BooleanDtype(),
252
+ Date(): "datetime64[s]",
253
+ # Time() not supported
254
+ Datetime(): "datetime64[us]",
255
+ }[self]
256
+
257
+ def to_arrow(self):
258
+ import pyarrow as pa
259
+
260
+ return {
261
+ Int8(): pa.int8(),
262
+ Int16(): pa.int16(),
263
+ Int32(): pa.int32(),
264
+ Int64(): pa.int64(),
265
+ Uint8(): pa.uint8(),
266
+ Uint16(): pa.uint16(),
267
+ Uint32(): pa.uint32(),
268
+ Uint64(): pa.uint64(),
269
+ Float32(): pa.float32(),
270
+ Float64(): pa.float64(),
271
+ String(): pa.string(),
272
+ Bool(): pa.bool_(),
273
+ Date(): pa.date32(),
274
+ Time(): pa.time64("us"),
275
+ Datetime(): pa.timestamp("us"),
276
+ }[self]
277
+
278
+ def to_polars(self: Dtype):
279
+ import polars as pl
280
+
281
+ return {
282
+ Int64(): pl.Int64,
283
+ Int32(): pl.Int32,
284
+ Int16(): pl.Int16,
285
+ Int8(): pl.Int8,
286
+ Uint64(): pl.UInt64,
287
+ Uint32(): pl.UInt32,
288
+ Uint16(): pl.UInt16,
289
+ Uint8(): pl.UInt8,
290
+ Float64(): pl.Float64,
291
+ Float32(): pl.Float32,
292
+ String(): pl.Utf8,
293
+ Bool(): pl.Boolean,
294
+ Datetime(): pl.Datetime("us"),
295
+ Duration(): pl.Duration,
296
+ Time(): pl.Time,
297
+ Date(): pl.Date,
298
+ NullType(): pl.Null,
299
+ }[self]
300
+
301
+
302
+ class Float(Dtype):
303
+ @classmethod
304
+ def is_float(cls):
305
+ return True
306
+
307
+
308
+ class Float64(Float): ...
309
+
310
+
311
+ class Float32(Float): ...
312
+
313
+
314
+ class Decimal(Dtype): ...
315
+
316
+
317
+ class Int(Dtype):
318
+ @classmethod
319
+ def is_int(cls):
320
+ return True
321
+
322
+
323
+ class Int64(Int): ...
324
+
325
+
326
+ class Int32(Int): ...
327
+
328
+
329
+ class Int16(Int): ...
330
+
331
+
332
+ class Int8(Int): ...
333
+
334
+
335
+ class Uint64(Int): ...
336
+
337
+
338
+ class Uint32(Int): ...
339
+
340
+
341
+ class Uint16(Int): ...
342
+
343
+
344
+ class Uint8(Int): ...
345
+
346
+
347
+ class String(Dtype): ...
348
+
349
+
350
+ class Bool(Dtype): ...
351
+
352
+
353
+ class Datetime(Dtype): ...
354
+
355
+
356
+ class Date(Dtype): ...
357
+
358
+
359
+ class Time(Dtype): ...
360
+
361
+
362
+ class Duration(Dtype): ...
363
+
364
+
365
+ class NullType(Dtype): ...
366
+
367
+
368
+ class List(Dtype):
369
+ def __init__(self, inner: Dtype):
370
+ self.inner = inner
371
+
372
+ def __eq__(self, rhs):
373
+ return isinstance(rhs, List) and self.inner == rhs.inner
374
+
375
+ def __hash__(self):
376
+ return hash((0, hash(self.inner)))
377
+
378
+ def __repr__(self):
379
+ return f"List[{repr(self.inner)}]"
380
+
381
+ def to_sql(self):
382
+ import sqlalchemy as sqa
383
+
384
+ return sqa.ARRAY(self.inner.to_sql())
385
+
386
+ def to_polars(self):
387
+ import polars as pl
388
+
389
+ return pl.List(self.inner.to_polars())
@@ -0,0 +1,66 @@
1
+ Metadata-Version: 2.4
2
+ Name: pydiverse-common
3
+ Version: 0.1.0
4
+ Summary: Common functionality shared between pydiverse libraries
5
+ Author: QuantCo, Inc.
6
+ Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
7
+ License: BSD 3-Clause License
8
+
9
+ Copyright (c) 2022, pydiverse
10
+ All rights reserved.
11
+
12
+ Redistribution and use in source and binary forms, with or without
13
+ modification, are permitted provided that the following conditions are met:
14
+
15
+ 1. Redistributions of source code must retain the above copyright notice, this
16
+ list of conditions and the following disclaimer.
17
+
18
+ 2. Redistributions in binary form must reproduce the above copyright notice,
19
+ this list of conditions and the following disclaimer in the documentation
20
+ and/or other materials provided with the distribution.
21
+
22
+ 3. Neither the name of the copyright holder nor the names of its
23
+ contributors may be used to endorse or promote products derived from
24
+ this software without specific prior written permission.
25
+
26
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
+ License-File: LICENSE
37
+ Classifier: Development Status :: 4 - Beta
38
+ Classifier: Intended Audience :: Developers
39
+ Classifier: Intended Audience :: Science/Research
40
+ Classifier: License :: OSI Approved :: BSD License
41
+ Classifier: Programming Language :: Python :: 3
42
+ Classifier: Programming Language :: Python :: 3.9
43
+ Classifier: Programming Language :: Python :: 3.10
44
+ Classifier: Programming Language :: Python :: 3.11
45
+ Classifier: Programming Language :: Python :: 3.12
46
+ Classifier: Programming Language :: Python :: 3.13
47
+ Classifier: Programming Language :: SQL
48
+ Classifier: Topic :: Database
49
+ Classifier: Topic :: Scientific/Engineering
50
+ Classifier: Topic :: Software Development
51
+ Requires-Python: >=3.9
52
+ Description-Content-Type: text/markdown
53
+
54
+ # pydiverse.common
55
+
56
+ [![CI](https://github.com/pydiverse/pydiverse.common/actions/workflows/tests.yml/badge.svg)](https://github.com/pydiverse/pydiverse.common/actions/workflows/tests.yml)
57
+
58
+ A base package for different libraries in the pydiverse library collection.
59
+ This includes functionality like a type-system for tabular data (SQL and DataFrame).
60
+ This type-system is used for ensuring reliable operation of the pydiverse library
61
+ with various execution backends like Pandas, Polars, and various SQL dialects.
62
+
63
+ ## Usage
64
+
65
+ pydiverse.common can either be installed via pypi with `pip install pydiverse-common` or via
66
+ conda-forge with `conda install pydiverse-common -c conda-forge`.
@@ -0,0 +1,6 @@
1
+ pydiverse/common/__init__.py,sha256=PUZC9Un8H77EnlsjE2_bQC4y94zOfsvK-6LPg5uIQvE,667
2
+ pydiverse/common/dtypes.py,sha256=75ToXN2YilFJ9T-BuYZKcdXcpE1XRgig0_3Jo070ToQ,10868
3
+ pydiverse_common-0.1.0.dist-info/METADATA,sha256=UcmvyvuqejgxkluJim30pXOV7GTBl5HnNfD117TvzE8,3406
4
+ pydiverse_common-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
+ pydiverse_common-0.1.0.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
6
+ pydiverse_common-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2022, pydiverse
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.