deltacat 1.1.27__py3-none-any.whl → 1.1.28__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.27"
47
+ __version__ = "1.1.28"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -7,7 +7,9 @@ from deltacat.utils.pyarrow import (
7
7
  s3_file_to_table,
8
8
  ReadKwargsProviderPyArrowSchemaOverride,
9
9
  RAISE_ON_EMPTY_CSV_KWARG,
10
+ RAISE_ON_DECIMAL_OVERFLOW,
10
11
  )
12
+ import decimal
11
13
  from deltacat.types.media import ContentEncoding, ContentType
12
14
  from deltacat.types.partial_download import PartialParquetParameters
13
15
  from pyarrow.parquet import ParquetFile
@@ -16,6 +18,12 @@ import pyarrow as pa
16
18
  PARQUET_FILE_PATH = "deltacat/tests/utils/data/test_file.parquet"
17
19
  EMPTY_UTSV_PATH = "deltacat/tests/utils/data/empty.csv"
18
20
  NON_EMPTY_VALID_UTSV_PATH = "deltacat/tests/utils/data/non_empty_valid.csv"
21
+ OVERFLOWING_DECIMAL_PRECISION_UTSV_PATH = (
22
+ "deltacat/tests/utils/data/overflowing_decimal_precision.csv"
23
+ )
24
+ OVERFLOWING_DECIMAL_SCALE_UTSV_PATH = (
25
+ "deltacat/tests/utils/data/overflowing_decimal_scale.csv"
26
+ )
19
27
  GZIP_COMPRESSED_FILE_UTSV_PATH = "deltacat/tests/utils/data/non_empty_compressed.gz"
20
28
  BZ2_COMPRESSED_FILE_UTSV_PATH = "deltacat/tests/utils/data/non_empty_compressed.bz2"
21
29
 
@@ -407,6 +415,253 @@ class TestReadCSV(TestCase):
407
415
  ),
408
416
  )
409
417
 
418
+ def test_read_csv_when_decimal_precision_overflows_and_raise_kwarg_specified(self):
419
+ schema = pa.schema(
420
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(4, 2))]
421
+ )
422
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
423
+ _add_column_kwargs(
424
+ ContentType.UNESCAPED_TSV.value,
425
+ ["is_active", "decimal_value"],
426
+ ["is_active", "decimal_value"],
427
+ kwargs,
428
+ )
429
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
430
+
431
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
432
+ self.assertRaises(
433
+ pa.lib.ArrowInvalid,
434
+ lambda: pyarrow_read_csv(
435
+ OVERFLOWING_DECIMAL_PRECISION_UTSV_PATH,
436
+ **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True}
437
+ ),
438
+ )
439
+
440
+ def test_read_csv_when_decimal_precision_overflows_sanity(self):
441
+ schema = pa.schema(
442
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(4, 2))]
443
+ )
444
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
445
+ _add_column_kwargs(
446
+ ContentType.UNESCAPED_TSV.value,
447
+ ["is_active", "decimal_value"],
448
+ ["is_active", "decimal_value"],
449
+ kwargs,
450
+ )
451
+
452
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
453
+
454
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
455
+
456
+ self.assertRaises(
457
+ pa.lib.ArrowInvalid,
458
+ lambda: pyarrow_read_csv(OVERFLOWING_DECIMAL_PRECISION_UTSV_PATH, **kwargs),
459
+ )
460
+
461
+ def test_read_csv_when_decimal_scale_overflows_and_raise_kwarg_specified(self):
462
+ schema = pa.schema(
463
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(20, 2))]
464
+ )
465
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
466
+ _add_column_kwargs(
467
+ ContentType.UNESCAPED_TSV.value,
468
+ ["is_active", "decimal_value"],
469
+ ["is_active", "decimal_value"],
470
+ kwargs,
471
+ )
472
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
473
+
474
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
475
+
476
+ self.assertRaises(
477
+ pa.lib.ArrowInvalid,
478
+ lambda: pyarrow_read_csv(
479
+ OVERFLOWING_DECIMAL_SCALE_UTSV_PATH,
480
+ **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True}
481
+ ),
482
+ )
483
+
484
+ def test_read_csv_when_decimal_scale_overflows_sanity(self):
485
+ schema = pa.schema(
486
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(20, 2))]
487
+ )
488
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
489
+ _add_column_kwargs(
490
+ ContentType.UNESCAPED_TSV.value,
491
+ ["is_active", "decimal_value"],
492
+ ["is_active", "decimal_value"],
493
+ kwargs,
494
+ )
495
+
496
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
497
+
498
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
499
+
500
+ result = pyarrow_read_csv(OVERFLOWING_DECIMAL_SCALE_UTSV_PATH, **kwargs)
501
+
502
+ self.assertEqual(len(result), 3)
503
+ self.assertEqual(
504
+ result[1][0].as_py(), decimal.Decimal("322236.66")
505
+ ) # rounding decimal
506
+ self.assertEqual(result[1][1].as_py(), decimal.Decimal("32.33")) # not rounded
507
+ self.assertEqual(len(result.column_names), 2)
508
+ result_schema = result.schema
509
+ self.assertEqual(result_schema.field(0).type, "string")
510
+ self.assertEqual(result_schema.field(1).type, pa.decimal128(20, 2))
511
+
512
+ def test_read_csv_when_decimal_scale_overflows_and_negative_scale(self):
513
+ schema = pa.schema(
514
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(20, -2))]
515
+ )
516
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
517
+ _add_column_kwargs(
518
+ ContentType.UNESCAPED_TSV.value,
519
+ ["is_active", "decimal_value"],
520
+ ["is_active", "decimal_value"],
521
+ kwargs,
522
+ )
523
+
524
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
525
+
526
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
527
+
528
+ result = pyarrow_read_csv(OVERFLOWING_DECIMAL_SCALE_UTSV_PATH, **kwargs)
529
+
530
+ self.assertEqual(len(result), 3)
531
+ self.assertEqual(
532
+ result[1][0].as_py(),
533
+ decimal.Decimal("322200"), # consequence of negative scale
534
+ ) # rounding decimal
535
+ self.assertEqual(result[1][1].as_py(), decimal.Decimal("00"))
536
+ self.assertEqual(len(result.column_names), 2)
537
+ result_schema = result.schema
538
+ self.assertEqual(result_schema.field(0).type, "string")
539
+ self.assertEqual(result_schema.field(1).type, pa.decimal128(20, -2))
540
+
541
+ def test_read_csv_when_decimal_scale_overflows_with_decimal256(self):
542
+ schema = pa.schema(
543
+ [("is_active", pa.string()), ("decimal_value", pa.decimal256(20, 2))]
544
+ )
545
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
546
+ _add_column_kwargs(
547
+ ContentType.UNESCAPED_TSV.value,
548
+ ["is_active", "decimal_value"],
549
+ ["is_active", "decimal_value"],
550
+ kwargs,
551
+ )
552
+
553
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
554
+
555
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
556
+
557
+ result = pyarrow_read_csv(OVERFLOWING_DECIMAL_SCALE_UTSV_PATH, **kwargs)
558
+
559
+ self.assertEqual(len(result), 3)
560
+ self.assertEqual(
561
+ result[1][0].as_py(), decimal.Decimal("322236.66")
562
+ ) # rounding decimal
563
+ self.assertEqual(result[1][1].as_py(), decimal.Decimal("32.33")) # not rounded
564
+ self.assertEqual(len(result.column_names), 2)
565
+ result_schema = result.schema
566
+ self.assertEqual(result_schema.field(0).type, "string")
567
+ self.assertEqual(result_schema.field(1).type, pa.decimal256(20, 2))
568
+
569
+ def test_read_csv_when_decimal_scale_overflows_with_decimal256_and_raise_on_overflow(
570
+ self,
571
+ ):
572
+ schema = pa.schema(
573
+ [("is_active", pa.string()), ("decimal_value", pa.decimal256(20, 2))]
574
+ )
575
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
576
+ _add_column_kwargs(
577
+ ContentType.UNESCAPED_TSV.value,
578
+ ["is_active", "decimal_value"],
579
+ ["is_active", "decimal_value"],
580
+ kwargs,
581
+ )
582
+
583
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
584
+
585
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
586
+
587
+ self.assertRaises(
588
+ pa.lib.ArrowNotImplementedError,
589
+ lambda: pyarrow_read_csv(
590
+ OVERFLOWING_DECIMAL_SCALE_UTSV_PATH,
591
+ **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True}
592
+ ),
593
+ )
594
+
595
+ def test_read_csv_when_decimal_scale_overflows_without_any_schema_then_infers(self):
596
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
597
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=None)
598
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
599
+
600
+ result = pyarrow_read_csv(OVERFLOWING_DECIMAL_SCALE_UTSV_PATH, **kwargs)
601
+
602
+ # The default behavior of pyarrow is to invalid skip rows
603
+ self.assertEqual(len(result), 2)
604
+ self.assertEqual(result[1][0].as_py(), 32.33) # rounding decimal
605
+ self.assertEqual(result[1][1].as_py(), 0.4) # not rounded
606
+ self.assertEqual(len(result.column_names), 2)
607
+ result_schema = result.schema
608
+ self.assertEqual(result_schema.field(0).type, "string")
609
+ self.assertEqual(result_schema.field(1).type, pa.float64())
610
+
611
+ def test_read_csv_when_decimal_scale_and_precision_overflow_and_raise_on_overflow(
612
+ self,
613
+ ):
614
+ schema = pa.schema(
615
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(5, 2))]
616
+ )
617
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
618
+ _add_column_kwargs(
619
+ ContentType.UNESCAPED_TSV.value,
620
+ ["is_active", "decimal_value"],
621
+ ["is_active", "decimal_value"],
622
+ kwargs,
623
+ )
624
+
625
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
626
+
627
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
628
+
629
+ self.assertRaises(
630
+ pa.lib.ArrowInvalid,
631
+ lambda: pyarrow_read_csv(OVERFLOWING_DECIMAL_SCALE_UTSV_PATH, **kwargs),
632
+ )
633
+
634
+ def test_read_csv_when_decimal_scale_overflow_and_file_like_obj_passed(self):
635
+ schema = pa.schema(
636
+ [("is_active", pa.string()), ("decimal_value", pa.decimal128(15, 2))]
637
+ )
638
+ kwargs = content_type_to_reader_kwargs(ContentType.UNESCAPED_TSV.value)
639
+ _add_column_kwargs(
640
+ ContentType.UNESCAPED_TSV.value,
641
+ ["is_active", "decimal_value"],
642
+ ["is_active", "decimal_value"],
643
+ kwargs,
644
+ )
645
+
646
+ read_kwargs_provider = ReadKwargsProviderPyArrowSchemaOverride(schema=schema)
647
+
648
+ kwargs = read_kwargs_provider(ContentType.UNESCAPED_TSV.value, kwargs)
649
+
650
+ with open(OVERFLOWING_DECIMAL_SCALE_UTSV_PATH, "rb") as file:
651
+ result = pyarrow_read_csv(file, **kwargs)
652
+
653
+ self.assertEqual(len(result), 3)
654
+ self.assertEqual(
655
+ result[1][0].as_py(), decimal.Decimal("322236.66")
656
+ ) # rounding decimal
657
+ self.assertEqual(
658
+ result[1][1].as_py(), decimal.Decimal("32.33")
659
+ ) # not rounded
660
+ self.assertEqual(len(result.column_names), 2)
661
+ result_schema = result.schema
662
+ self.assertEqual(result_schema.field(0).type, "string")
663
+ self.assertEqual(result_schema.field(1).type, pa.decimal128(15, 2))
664
+
410
665
 
411
666
  class TestS3FileToTable(TestCase):
412
667
  def test_s3_file_to_table_identity_sanity(self):
deltacat/utils/pyarrow.py CHANGED
@@ -1,6 +1,7 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
+ import copy
4
5
  import bz2
5
6
  import gzip
6
7
  import io
@@ -47,6 +48,17 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
47
48
  RAISE_ON_EMPTY_CSV_KWARG = "raise_on_empty_csv"
48
49
  READER_TYPE_KWARG = "reader_type"
49
50
 
51
+ """
52
+ By default, round decimal values using half_to_even round mode when
53
+ rescaling a decimal to the given scale and precision in the schema would cause
54
+ data loss. Setting any non null value of this argument will result
55
+ in an error instead.
56
+ """
57
+ RAISE_ON_DECIMAL_OVERFLOW = "raise_on_decimal_overflow"
58
+ # Note the maximum from https://arrow.apache.org/docs/python/generated/pyarrow.Decimal256Type.html#pyarrow.Decimal256Type
59
+ DECIMAL256_DEFAULT_SCALE = 38
60
+ DECIMAL256_MAX_PRECISION = 76
61
+
50
62
 
51
63
  def _filter_schema_for_columns(schema: pa.Schema, columns: List[str]) -> pa.Schema:
52
64
 
@@ -64,45 +76,162 @@ def _filter_schema_for_columns(schema: pa.Schema, columns: List[str]) -> pa.Sche
64
76
  return target_schema
65
77
 
66
78
 
67
- def pyarrow_read_csv(*args, **kwargs) -> pa.Table:
68
- try:
69
- new_kwargs = sanitize_kwargs_by_supported_kwargs(
70
- ["read_options", "parse_options", "convert_options", "memory_pool"], kwargs
79
+ def _extract_arrow_schema_from_read_csv_kwargs(kwargs: Dict[str, Any]) -> pa.Schema:
80
+ schema = None
81
+ if (
82
+ "convert_options" in kwargs
83
+ and kwargs["convert_options"].column_types is not None
84
+ ):
85
+ schema = kwargs["convert_options"].column_types
86
+ if not isinstance(schema, pa.Schema):
87
+ schema = pa.schema(schema)
88
+ if kwargs["convert_options"].include_columns:
89
+ schema = _filter_schema_for_columns(
90
+ schema, kwargs["convert_options"].include_columns
91
+ )
92
+ elif (
93
+ kwargs.get("read_options") is not None
94
+ and kwargs["read_options"].column_names
95
+ ):
96
+ schema = _filter_schema_for_columns(
97
+ schema, kwargs["read_options"].column_names
98
+ )
99
+ else:
100
+ logger.debug(
101
+ "Schema not specified in the kwargs."
102
+ " Hence, schema could not be inferred from the empty CSV."
71
103
  )
104
+
105
+ return schema
106
+
107
+
108
+ def _new_schema_with_replaced_fields(
109
+ schema: pa.Schema, field_to_replace: Callable[[pa.Field], Optional[pa.Field]]
110
+ ) -> pa.Schema:
111
+ if schema is None:
112
+ return None
113
+
114
+ new_schema_fields = []
115
+ for field in schema:
116
+ new_field = field_to_replace(field)
117
+ if new_field is not None:
118
+ new_schema_fields.append(new_field)
119
+ else:
120
+ new_schema_fields.append(field)
121
+
122
+ return pa.schema(new_schema_fields, metadata=schema.metadata)
123
+
124
+
125
+ def _read_csv_rounding_decimal_columns_to_fit_scale(
126
+ schema: pa.Schema, reader_args: List[Any], reader_kwargs: Dict[str, Any]
127
+ ) -> pa.Table:
128
+ # Note: We read decimals as strings first because CSV
129
+ # conversion to decimal256 isn't implemented as of pyarrow==12.0.1
130
+ new_schema = _new_schema_with_replaced_fields(
131
+ schema,
132
+ lambda fld: pa.field(fld.name, pa.string(), metadata=fld.metadata)
133
+ if pa.types.is_decimal128(fld.type) or pa.types.is_decimal256(fld.type)
134
+ else None,
135
+ )
136
+ new_kwargs = sanitize_kwargs_by_supported_kwargs(
137
+ ["read_options", "parse_options", "convert_options", "memory_pool"],
138
+ reader_kwargs,
139
+ )
140
+ # Creating a shallow copy for efficiency
141
+ new_convert_options = copy.copy(new_kwargs["convert_options"])
142
+ new_convert_options.column_types = new_schema
143
+ new_reader_kwargs = {**new_kwargs, "convert_options": new_convert_options}
144
+ arrow_table = pacsv.read_csv(*reader_args, **new_reader_kwargs)
145
+
146
+ for column_index, field in enumerate(schema):
147
+ if pa.types.is_decimal128(field.type) or pa.types.is_decimal256(field.type):
148
+ column_array = arrow_table[field.name]
149
+ # We always cast to decimal256 to accomodate fixed scale of 38
150
+ cast_to_type = pa.decimal256(
151
+ DECIMAL256_MAX_PRECISION, DECIMAL256_DEFAULT_SCALE
152
+ )
153
+ casted_decimal_array = pc.cast(column_array, cast_to_type)
154
+ # Note that scale can be negative
155
+ rounded_column_array = pc.round(
156
+ casted_decimal_array, ndigits=field.type.scale
157
+ )
158
+ final_decimal_array = pc.cast(rounded_column_array, field.type)
159
+ arrow_table = arrow_table.set_column(
160
+ column_index,
161
+ field,
162
+ final_decimal_array,
163
+ )
164
+ logger.debug(
165
+ f"Rounded decimal column: {field.name} to {field.type.scale} scale and"
166
+ f" {field.type.precision} precision"
167
+ )
168
+
169
+ return arrow_table
170
+
171
+
172
+ def pyarrow_read_csv_default(*args, **kwargs):
173
+ new_kwargs = sanitize_kwargs_by_supported_kwargs(
174
+ ["read_options", "parse_options", "convert_options", "memory_pool"], kwargs
175
+ )
176
+
177
+ try:
72
178
  return pacsv.read_csv(*args, **new_kwargs)
73
179
  except pa.lib.ArrowInvalid as e:
74
- if e.__str__() == "Empty CSV file" and not kwargs.get(RAISE_ON_EMPTY_CSV_KWARG):
75
- schema = None
76
- if (
77
- "convert_options" in kwargs
78
- and kwargs["convert_options"].column_types is not None
79
- ):
80
- schema = kwargs["convert_options"].column_types
81
- if not isinstance(schema, pa.Schema):
82
- schema = pa.schema(schema)
83
- if kwargs["convert_options"].include_columns:
84
- schema = _filter_schema_for_columns(
85
- schema, kwargs["convert_options"].include_columns
86
- )
87
- elif (
88
- kwargs.get("read_options") is not None
89
- and kwargs["read_options"].column_names
180
+ error_str = e.__str__()
181
+ schema = _extract_arrow_schema_from_read_csv_kwargs(kwargs)
182
+
183
+ if error_str == "Empty CSV file" and not kwargs.get(RAISE_ON_EMPTY_CSV_KWARG):
184
+ logger.debug(f"Read CSV empty schema being used: {schema}")
185
+ return pa.Table.from_pylist([], schema=schema)
186
+ if not kwargs.get(RAISE_ON_DECIMAL_OVERFLOW):
187
+ # Note, this logic requires expensive casting. To prevent downgrading performance
188
+ # for happy path reads, we are handling this case in response to an error.
189
+ logger.warning(
190
+ "Rescaling Decimal to the given scale in the schema. "
191
+ f"Original error: {error_str}"
192
+ )
193
+
194
+ if schema is not None and "convert_options" in kwargs:
195
+ if (
196
+ "Rescaling Decimal" in error_str
197
+ and "value would cause data loss" in error_str
90
198
  ):
91
- schema = _filter_schema_for_columns(
92
- schema, kwargs["read_options"].column_names
199
+ logger.debug(f"Checking if the file: {args[0]}...")
200
+ # Since we are re-reading the file, we have to seek to beginning
201
+ if isinstance(args[0], io.IOBase) and args[0].seekable():
202
+ logger.debug(f"Seeking to the beginning of the file {args[0]}")
203
+ args[0].seek(0)
204
+ return _read_csv_rounding_decimal_columns_to_fit_scale(
205
+ schema=schema, reader_args=args, reader_kwargs=kwargs
93
206
  )
94
-
95
207
  else:
96
208
  logger.debug(
97
- "Schema not specified in the kwargs."
98
- " Hence, schema could not be inferred from the empty CSV."
209
+ "Schema is None when trying to adjust decimal values. "
210
+ "Hence, bubbling up exception..."
99
211
  )
100
212
 
101
- logger.debug(f"Read CSV empty schema being used: {schema}")
102
- return pa.Table.from_pylist([], schema=schema)
103
213
  raise e
104
214
 
105
215
 
216
+ def pyarrow_read_csv(*args, **kwargs) -> pa.Table:
217
+ schema = _extract_arrow_schema_from_read_csv_kwargs(kwargs)
218
+
219
+ # CSV conversion to decimal256 isn't supported as of pyarrow=12.0.1
220
+ # Below ensures decimal256 is casted properly.
221
+ schema_includes_decimal256 = (
222
+ (True if any([pa.types.is_decimal256(x.type) for x in schema]) else False)
223
+ if schema is not None
224
+ else None
225
+ )
226
+ if schema_includes_decimal256 and not kwargs.get(RAISE_ON_DECIMAL_OVERFLOW):
227
+ # falling back to expensive method of reading CSV
228
+ return _read_csv_rounding_decimal_columns_to_fit_scale(
229
+ schema, reader_args=args, reader_kwargs=kwargs
230
+ )
231
+ else:
232
+ return pyarrow_read_csv_default(*args, **kwargs)
233
+
234
+
106
235
  CONTENT_TYPE_TO_PA_READ_FUNC: Dict[str, Callable] = {
107
236
  ContentType.UNESCAPED_TSV.value: pyarrow_read_csv,
108
237
  ContentType.TSV.value: pyarrow_read_csv,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.27
3
+ Version: 1.1.28
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=NNgt1N6a4dwztCKl6C7klF3mQEn-S-sBHNZPKPqRHko,1778
1
+ deltacat/__init__.py,sha256=GPlTQc6AW4ig_nZJ7kMVe-kbZxYfrSVGFN1YEqY8dXU,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -179,7 +179,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
179
179
  deltacat/tests/utils/test_daft.py,sha256=kY8lkXoQvyWunok8UvOsh1An297rb3jcnstTuIAyAlc,8232
180
180
  deltacat/tests/utils/test_metrics.py,sha256=Ym9nOz1EtB180pLmvugihj1sDTNDMb5opIjjr5Nmcls,16339
181
181
  deltacat/tests/utils/test_placement.py,sha256=g61wVOMkHe4YJeR9Oxg_BOVQ6bhHHbC3IBYv8YhUu94,597
182
- deltacat/tests/utils/test_pyarrow.py,sha256=AWx0DTsBA1PkQag48w_HeQdz7tlBzJsm9v7Nd6-dhEY,19607
182
+ deltacat/tests/utils/test_pyarrow.py,sha256=YDuyFYNjy6thzfA6Z2a0dOytUugsExu1uMUOhmP_aXc,29977
183
183
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
184
184
  deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
185
185
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -200,7 +200,7 @@ deltacat/utils/numpy.py,sha256=SpHKKvC-K8NINTWGVfTZ5-gBFTGYqaXjjgKFhsdUjwg,2049
200
200
  deltacat/utils/pandas.py,sha256=q99mlRB7tymICMcNbfGLfLqFu_C-feyPZKZm2CWJJVc,9574
201
201
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
202
202
  deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
203
- deltacat/utils/pyarrow.py,sha256=nW_eD6fWAlbyHUzPj1rOOfnUbpP3RnAgNSuuVNyvhZ4,29174
203
+ deltacat/utils/pyarrow.py,sha256=xEZRzbTBU6uj9K4DtvngIPtQkTA8haVgQ4Y4vjwHvtM,34311
204
204
  deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
205
205
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
206
206
  deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
@@ -210,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
210
210
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
211
211
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
212
212
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
213
- deltacat-1.1.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
- deltacat-1.1.27.dist-info/METADATA,sha256=VL7sWG3lO3cV3tzwTiCTgpm7h0K5Dh3GtKiqojgSgHI,1733
215
- deltacat-1.1.27.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
- deltacat-1.1.27.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
- deltacat-1.1.27.dist-info/RECORD,,
213
+ deltacat-1.1.28.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
+ deltacat-1.1.28.dist-info/METADATA,sha256=-ZnMp9C26vVxi015Il3UtBMm9pHqA8aZmTnNTgr8Tb8,1733
215
+ deltacat-1.1.28.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
216
+ deltacat-1.1.28.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
+ deltacat-1.1.28.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5