pydiverse-common 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -201,6 +201,11 @@ class Dtype:
201
201
  return NullType()
202
202
  if pa.types.is_list(arrow_type):
203
203
  return List(Dtype.from_arrow(arrow_type.value_type))
204
+ if pa.types.is_dictionary(arrow_type):
205
+ raise RuntimeError(
206
+ "Most likely this is an Enum type. But metadata about categories is "
207
+ "only in the pyarrow field and not in the pyarrow dtype"
208
+ )
204
209
  raise TypeError
205
210
 
206
211
  @staticmethod
@@ -268,7 +273,7 @@ class Dtype:
268
273
  if backend == PandasBackend.NUMPY:
269
274
  return self.to_pandas_nullable(backend)
270
275
  if backend == PandasBackend.ARROW:
271
- if self == String():
276
+ if self == String() or isinstance(self, Enum):
272
277
  return pd.StringDtype(storage="pyarrow")
273
278
  return pd.ArrowDtype(self.to_arrow())
274
279
 
@@ -355,6 +360,12 @@ class Dtype:
355
360
  NullType(): pa.null(),
356
361
  }[self]
357
362
 
363
+ def to_arrow_field(self, name: str, nullable: bool = True):
364
+ """Convert this Dtype to a PyArrow Field."""
365
+ import pyarrow as pa
366
+
367
+ return pa.field(name, self.to_arrow(), nullable=nullable)
368
+
358
369
  def to_polars(self: "Dtype"):
359
370
  """Convert this Dtype to a Polars type."""
360
371
  import polars as pl
@@ -506,6 +517,19 @@ class Enum(String):
506
517
  def to_arrow(self):
507
518
  import pyarrow as pa
508
519
 
509
- # There is also pa.dictionary(), which seems to be kind of similar to an enum.
510
- # Maybe it is better to convert to this.
520
+ # enum categories can only be maintained in pyarrow field (see to_arrow_field)
511
521
  return pa.string()
522
+
523
+ def to_arrow_field(self, name: str, nullable: bool = True):
524
+ """Convert this Dtype to a PyArrow Field."""
525
+ import pyarrow as pa
526
+
527
+ # try to mimic what polars does
528
+ return pa.field(
529
+ name,
530
+ pa.dictionary(pa.uint32(), pa.large_string()),
531
+ nullable=nullable,
532
+ metadata={
533
+ "_PL_ENUM_VALUES": "".join([f"{len(c)};{c}" for c in self.categories])
534
+ },
535
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydiverse-common
3
- Version: 0.3.8
3
+ Version: 0.3.9
4
4
  Summary: Common functionality shared between pydiverse libraries
5
5
  Author: QuantCo, Inc.
6
6
  Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
@@ -1,5 +1,5 @@
1
1
  pydiverse/common/__init__.py,sha256=J7b4iStFyaEMYre_jdlZ4l_8dLyrMWCIpQdsMQcB8aI,806
2
- pydiverse/common/dtypes.py,sha256=LYZKaKYq_4uI4kUhoaCTTo5j1SRurswIOfN11Bkz25A,15986
2
+ pydiverse/common/dtypes.py,sha256=TN3UjI-4uBT_CB_3mLowOYq5GPcyvSbI3N6zHpxxV3c,16885
3
3
  pydiverse/common/testing.py,sha256=FcivI5wn0X3gzJhwnysKvCOgjSTTXaN6FtSFJ72jfSg,341
4
4
  pydiverse/common/version.py,sha256=1IU_m4r76_Qq0u-Tyo2_bERZFOkh0ZFueVzDqcCfLO0,336
5
5
  pydiverse/common/errors/__init__.py,sha256=FNeEfVbUa23b9sHkFsmxHYhY6sRgjaZysPQmlovpJrI,262
@@ -11,7 +11,7 @@ pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU
11
11
  pydiverse/common/util/hashing.py,sha256=8Z1NybJ_zd3ONpn5annHGjowwArWkd2ZkCtlb3dtz_Q,4576
12
12
  pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
13
13
  pydiverse/common/util/structlog.py,sha256=xxhauxMuyxcKXTVg1MiPTkuvPBj8Zcr4o_v8Bq59Nig,3778
14
- pydiverse_common-0.3.8.dist-info/METADATA,sha256=ptAGp299BY9NSaM-XEaojLzhL_KVc0SEY-MFqqqAwL0,3399
15
- pydiverse_common-0.3.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- pydiverse_common-0.3.8.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
- pydiverse_common-0.3.8.dist-info/RECORD,,
14
+ pydiverse_common-0.3.9.dist-info/METADATA,sha256=_P0mMant1LZMCa4p5tFBnpr0cvLIYXZXfu5xmYuih9Q,3399
15
+ pydiverse_common-0.3.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ pydiverse_common-0.3.9.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
+ pydiverse_common-0.3.9.dist-info/RECORD,,