opteryx-catalog 0.4.11__py3-none-any.whl → 0.4.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. opteryx_catalog/catalog/compaction.py +15 -8
  2. opteryx_catalog/catalog/dataset.py +449 -111
  3. opteryx_catalog/catalog/manifest.py +390 -330
  4. opteryx_catalog/catalog/metadata.py +3 -0
  5. opteryx_catalog/iops/fileio.py +13 -0
  6. opteryx_catalog/maki_nage/__init__.py +8 -0
  7. opteryx_catalog/maki_nage/distogram.py +558 -0
  8. opteryx_catalog/maki_nage/tests/_test_histogram.py +52 -0
  9. opteryx_catalog/maki_nage/tests/test_bounds.py +24 -0
  10. opteryx_catalog/maki_nage/tests/test_count.py +19 -0
  11. opteryx_catalog/maki_nage/tests/test_count_at.py +89 -0
  12. opteryx_catalog/maki_nage/tests/test_quantile.py +81 -0
  13. opteryx_catalog/maki_nage/tests/test_stats.py +25 -0
  14. opteryx_catalog/maki_nage/tests/test_update.py +44 -0
  15. opteryx_catalog/opteryx_catalog.py +82 -54
  16. opteryx_catalog/webhooks/__init__.py +230 -0
  17. opteryx_catalog/webhooks/events.py +177 -0
  18. {opteryx_catalog-0.4.11.dist-info → opteryx_catalog-0.4.26.dist-info}/METADATA +15 -18
  19. opteryx_catalog-0.4.26.dist-info/RECORD +45 -0
  20. {opteryx_catalog-0.4.11.dist-info → opteryx_catalog-0.4.26.dist-info}/WHEEL +1 -1
  21. scripts/collect_byte_counts.py +42 -0
  22. scripts/emit_full_single_file.py +81 -0
  23. scripts/inspect_manifest_dryrun.py +322 -0
  24. scripts/inspect_single_file.py +147 -0
  25. scripts/inspect_single_file_gcs.py +124 -0
  26. tests/test_collections.py +37 -0
  27. tests/test_describe_uncompressed.py +127 -0
  28. tests/test_refresh_manifest.py +275 -0
  29. tests/test_webhooks.py +177 -0
  30. opteryx_catalog-0.4.11.dist-info/RECORD +0 -25
  31. {opteryx_catalog-0.4.11.dist-info → opteryx_catalog-0.4.26.dist-info}/licenses/LICENSE +0 -0
  32. {opteryx_catalog-0.4.11.dist-info → opteryx_catalog-0.4.26.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@ from typing import Optional
14
14
  import pyarrow as pa
15
15
  import pyarrow.parquet as pq
16
16
 
17
- from .manifest import build_parquet_manifest_entry
17
+ from .manifest import build_parquet_manifest_entry_from_bytes
18
18
  from .metadata import Snapshot
19
19
 
20
20
  # Constants
@@ -398,18 +398,25 @@ class DatasetCompactor:
398
398
  file_name = f"data-{snapshot_id}-{idx:04d}.parquet"
399
399
  file_path = os.path.join(self.dataset.metadata.location, file_name)
400
400
 
401
- # Write parquet file
401
+ # Write parquet file to buffer and upload (so we can reuse bytes)
402
402
  try:
403
+ buf = pa.BufferOutputStream()
404
+ from ..iops.fileio import WRITE_PARQUET_OPTIONS
405
+
406
+ pq.write_table(table, buf, **WRITE_PARQUET_OPTIONS)
407
+ pdata = buf.getvalue().to_pybytes()
403
408
  io = self.dataset.io
404
- out = io.new_output(file_path)
405
- with out.create() as f:
406
- pq.write_table(table, f)
409
+ out = io.new_output(file_path).create()
410
+ out.write(pdata)
411
+ out.close()
407
412
  except Exception:
408
- # Failed to write, abort
413
+ # Failed to write or upload, abort
409
414
  return None
410
415
 
411
- # Build manifest entry with full statistics
412
- entry_dict = build_parquet_manifest_entry(table, file_path)
416
+ # Build manifest entry with full statistics using the bytes-based builder
417
+ entry_dict = build_parquet_manifest_entry_from_bytes(
418
+ pdata, file_path, len(pdata), orig_table=table
419
+ )
413
420
  new_entries.append(entry_dict)
414
421
 
415
422
  # Create new manifest with updated entries