daplapath 2.0.3__tar.gz → 2.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {daplapath-2.0.3 → daplapath-2.0.6}/PKG-INFO +1 -1
- {daplapath-2.0.3 → daplapath-2.0.6}/daplapath/__init__.py +1 -0
- {daplapath-2.0.3 → daplapath-2.0.6}/daplapath/path.py +183 -74
- {daplapath-2.0.3 → daplapath-2.0.6}/pyproject.toml +1 -1
- {daplapath-2.0.3 → daplapath-2.0.6}/LICENSE.md +0 -0
- {daplapath-2.0.3 → daplapath-2.0.6}/README.md +0 -0
|
@@ -14,8 +14,8 @@ import shutil
|
|
|
14
14
|
from typing import Callable, Any
|
|
15
15
|
import inspect
|
|
16
16
|
import itertools
|
|
17
|
-
import warnings
|
|
18
17
|
|
|
18
|
+
from fsspec.spec import AbstractFileSystem
|
|
19
19
|
import datetime
|
|
20
20
|
import numpy as np
|
|
21
21
|
import pandas as pd
|
|
@@ -23,6 +23,8 @@ import pandas.io.formats.format as fmt
|
|
|
23
23
|
from pandas.api.types import is_dict_like
|
|
24
24
|
import pyarrow
|
|
25
25
|
import pyarrow.parquet as pq
|
|
26
|
+
import pyarrow.dataset as ds
|
|
27
|
+
|
|
26
28
|
|
|
27
29
|
try:
|
|
28
30
|
import gcsfs
|
|
@@ -47,15 +49,15 @@ class Config:
|
|
|
47
49
|
file_system: Callable
|
|
48
50
|
|
|
49
51
|
|
|
50
|
-
class LocalFileSystem:
|
|
52
|
+
class LocalFileSystem(AbstractFileSystem):
|
|
51
53
|
"""Mimicks GCS's FileSystem but using standard library (os, glob, shutil)."""
|
|
52
54
|
|
|
53
55
|
@staticmethod
|
|
54
56
|
def glob(
|
|
55
57
|
path: str,
|
|
56
|
-
recursive: bool = True,
|
|
57
58
|
detail: bool = False,
|
|
58
|
-
|
|
59
|
+
recursive: bool = True,
|
|
60
|
+
include_hidden: bool = True,
|
|
59
61
|
**kwargs,
|
|
60
62
|
) -> list[dict] | list[str]:
|
|
61
63
|
relevant_paths = glob.iglob(
|
|
@@ -67,14 +69,16 @@ class LocalFileSystem:
|
|
|
67
69
|
with ThreadPoolExecutor() as executor:
|
|
68
70
|
return list(executor.map(get_file_info, relevant_paths))
|
|
69
71
|
|
|
72
|
+
@classmethod
|
|
73
|
+
def ls(cls, path: str, detail: bool = False, **kwargs):
|
|
74
|
+
return cls().glob(
|
|
75
|
+
str(pathlib.Path(path) / "**"), detail=detail, recursive=False, **kwargs
|
|
76
|
+
)
|
|
77
|
+
|
|
70
78
|
@staticmethod
|
|
71
79
|
def info(path) -> dict[str, Any]:
|
|
72
80
|
return get_file_info(path)
|
|
73
81
|
|
|
74
|
-
@staticmethod
|
|
75
|
-
def isdir(path: str) -> bool:
|
|
76
|
-
return os.path.isdir(path)
|
|
77
|
-
|
|
78
82
|
@staticmethod
|
|
79
83
|
def open(path: str, *args, **kwargs) -> io.TextIOWrapper:
|
|
80
84
|
return open(path, *args, **kwargs)
|
|
@@ -87,8 +91,12 @@ class LocalFileSystem:
|
|
|
87
91
|
def mv(source: str, destination, **kwargs) -> str:
|
|
88
92
|
return shutil.move(source, destination, **kwargs)
|
|
89
93
|
|
|
94
|
+
@classmethod
|
|
95
|
+
def cp(cls, source: str, destination, **kwargs) -> str:
|
|
96
|
+
return cls.cp_file(source, destination, **kwargs)
|
|
97
|
+
|
|
90
98
|
@staticmethod
|
|
91
|
-
def
|
|
99
|
+
def cp_file(self, path1, path2, **kwargs):
|
|
92
100
|
os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
|
|
93
101
|
return shutil.copy2(source, destination, **kwargs)
|
|
94
102
|
|
|
@@ -96,6 +104,14 @@ class LocalFileSystem:
|
|
|
96
104
|
def rm_file(path: str, *args, **kwargs) -> None:
|
|
97
105
|
return os.remove(path, *args, **kwargs)
|
|
98
106
|
|
|
107
|
+
@staticmethod
|
|
108
|
+
def rmdir(path: str, *args, **kwargs) -> None:
|
|
109
|
+
return shutil.rmtree(path, *args, **kwargs)
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def makedirs(path: str, exist_ok: bool = False) -> None:
|
|
113
|
+
return os.makedirs(path, exist_ok=exist_ok)
|
|
114
|
+
|
|
99
115
|
|
|
100
116
|
class GCSFileSystem(gcsfs.GCSFileSystem):
|
|
101
117
|
def isdir(self, path: str) -> bool:
|
|
@@ -110,9 +126,6 @@ else:
|
|
|
110
126
|
_config = Config(LocalFileSystem)
|
|
111
127
|
|
|
112
128
|
|
|
113
|
-
gcsfs.GCSFileSystem.isdir
|
|
114
|
-
|
|
115
|
-
|
|
116
129
|
class Tree:
|
|
117
130
|
"""Stores text to be printed/displayed in directory tree format.
|
|
118
131
|
|
|
@@ -166,6 +179,17 @@ class _PathBase:
|
|
|
166
179
|
class Path(str, _PathBase):
|
|
167
180
|
"""Path object that works like a string, with methods for working with the GCS file system."""
|
|
168
181
|
|
|
182
|
+
_file_system_attrs: set[str] = {
|
|
183
|
+
"info",
|
|
184
|
+
"isdir",
|
|
185
|
+
"open",
|
|
186
|
+
"exists",
|
|
187
|
+
"mv",
|
|
188
|
+
"cp",
|
|
189
|
+
"rm_file",
|
|
190
|
+
"rmdir",
|
|
191
|
+
}
|
|
192
|
+
|
|
169
193
|
@property
|
|
170
194
|
def _iterable_type(self) -> type | Callable:
|
|
171
195
|
"""Can be overridden in subclass."""
|
|
@@ -182,14 +206,26 @@ class Path(str, _PathBase):
|
|
|
182
206
|
.rstrip("/")
|
|
183
207
|
)
|
|
184
208
|
|
|
185
|
-
def __new__(cls, gcs_path: str | PurePath | None = None):
|
|
209
|
+
def __new__(cls, gcs_path: str | PurePath | None = None, file_system=None):
|
|
186
210
|
"""Construct Path with '/' as delimiter."""
|
|
187
211
|
gcs_path = cls._standardize_path(gcs_path or "")
|
|
188
212
|
obj = super().__new__(cls, gcs_path)
|
|
189
213
|
obj._path = PurePosixPath(obj)
|
|
190
|
-
obj._file_system =
|
|
214
|
+
obj._file_system = file_system
|
|
191
215
|
return obj
|
|
192
216
|
|
|
217
|
+
def buckets_path(self) -> "Path":
|
|
218
|
+
if self.startswith("/buckets"):
|
|
219
|
+
return self
|
|
220
|
+
|
|
221
|
+
root = self.parts[0]
|
|
222
|
+
bucket = root.split("-data-")[-1].split("-prod")[0]
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
return self._new(f"/buckets/{bucket}/{'/'.join(self.parts[1:])}")
|
|
226
|
+
except IndexError:
|
|
227
|
+
return self._new(f"/buckets/{bucket}")
|
|
228
|
+
|
|
193
229
|
def tree(
|
|
194
230
|
self,
|
|
195
231
|
max_rows: int | None = 3,
|
|
@@ -365,7 +401,7 @@ class Path(str, _PathBase):
|
|
|
365
401
|
'file_v201.parquet'
|
|
366
402
|
"""
|
|
367
403
|
version_text = f"{self._version_prefix}{version}" if version is not None else ""
|
|
368
|
-
return self.
|
|
404
|
+
return self._new(
|
|
369
405
|
f"{self.parent}/{self.versionless_stem}{version_text}{self.suffix}"
|
|
370
406
|
)
|
|
371
407
|
|
|
@@ -468,7 +504,7 @@ class Path(str, _PathBase):
|
|
|
468
504
|
|
|
469
505
|
parent = f"{self.parent}/" if self.parent != "." else ""
|
|
470
506
|
|
|
471
|
-
return self.
|
|
507
|
+
return self._new(
|
|
472
508
|
f"{parent}{stem}{period_string}{version_string}{self.suffix}".replace(
|
|
473
509
|
"".join(self.periods), period_string.strip(self._period_prefix)
|
|
474
510
|
)
|
|
@@ -509,12 +545,17 @@ class Path(str, _PathBase):
|
|
|
509
545
|
@property
|
|
510
546
|
def versionless_stem(self) -> str:
|
|
511
547
|
"""Return the file stem before the version pattern."""
|
|
512
|
-
return self.
|
|
548
|
+
return self._new(re.split(self._version_pattern, self._path.name)[0]).stem
|
|
513
549
|
|
|
514
550
|
@property
|
|
515
551
|
def parent(self) -> "Path":
|
|
516
552
|
"""Parent path."""
|
|
517
|
-
return self.
|
|
553
|
+
return self._new(self._path.parent)
|
|
554
|
+
|
|
555
|
+
@property
|
|
556
|
+
def parents(self) -> "list[Path]":
|
|
557
|
+
"""Parent path."""
|
|
558
|
+
return [self._new(parent) for parent in self._path.parents]
|
|
518
559
|
|
|
519
560
|
@property
|
|
520
561
|
def name(self) -> str:
|
|
@@ -542,52 +583,48 @@ class Path(str, _PathBase):
|
|
|
542
583
|
|
|
543
584
|
@property
|
|
544
585
|
def index_column_names(self) -> list[str]:
|
|
545
|
-
|
|
546
|
-
try:
|
|
547
|
-
schema = pq.read_schema(file)
|
|
548
|
-
return _get_index_cols(schema)
|
|
549
|
-
except KeyError:
|
|
550
|
-
return read_nrows(file, 1).index.names
|
|
586
|
+
return _get_index_cols(self.schema)
|
|
551
587
|
|
|
552
588
|
@property
|
|
553
589
|
def columns(self) -> pd.Index:
|
|
554
590
|
"""Columns of the file."""
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
591
|
+
schema = self.schema
|
|
592
|
+
try:
|
|
593
|
+
names = [
|
|
594
|
+
x["field_name"]
|
|
595
|
+
for x in json.loads(schema.metadata[b"pandas"].decode())["columns"]
|
|
596
|
+
]
|
|
597
|
+
except (KeyError, TypeError):
|
|
598
|
+
names = schema.names
|
|
599
|
+
index_cols = _get_index_cols(schema)
|
|
600
|
+
return pd.Index(names).difference(index_cols)
|
|
562
601
|
|
|
563
602
|
@property
|
|
564
603
|
def schema(self) -> pyarrow.Schema:
|
|
565
604
|
"""Date types of the file's columns."""
|
|
566
|
-
|
|
567
|
-
|
|
605
|
+
try:
|
|
606
|
+
with self.open("rb") as file:
|
|
607
|
+
return get_schema(file)
|
|
608
|
+
except (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
|
|
609
|
+
return get_schema(self)
|
|
568
610
|
|
|
569
611
|
@property
|
|
570
612
|
def dtypes(self) -> pd.Series:
|
|
571
613
|
"""Date types of the file's columns."""
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
lambda x: ~x.index.isin(index_cols)
|
|
578
|
-
]
|
|
579
|
-
except KeyError:
|
|
580
|
-
return read_nrows(file, 1).dtypes
|
|
614
|
+
schema = self.schema
|
|
615
|
+
index_cols = _get_index_cols(schema)
|
|
616
|
+
return pd.Series(schema.types, index=schema.names).loc[
|
|
617
|
+
lambda x: ~x.index.isin(index_cols)
|
|
618
|
+
]
|
|
581
619
|
|
|
582
620
|
@property
|
|
583
621
|
def shape(self) -> tuple[int, int]:
|
|
584
622
|
"""Number of rows and columns."""
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
return read_nrows(file, 1).shape
|
|
623
|
+
try:
|
|
624
|
+
with self.open("rb") as file:
|
|
625
|
+
return get_shape(file)
|
|
626
|
+
except (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
|
|
627
|
+
return get_shape(self)
|
|
591
628
|
|
|
592
629
|
@property
|
|
593
630
|
def nrow(self) -> int:
|
|
@@ -643,25 +680,27 @@ class Path(str, _PathBase):
|
|
|
643
680
|
|
|
644
681
|
@property
|
|
645
682
|
def partition_root(self) -> "Path":
|
|
683
|
+
if ".parquet" not in self:
|
|
684
|
+
return self
|
|
646
685
|
return self.split(".parquet")[0] + ".parquet"
|
|
647
686
|
|
|
648
|
-
def
|
|
649
|
-
|
|
650
|
-
return self.file_system.isdir(self)
|
|
651
|
-
except AttributeError:
|
|
652
|
-
return self.file_system.is_dir(self)
|
|
687
|
+
def isfile(self) -> bool:
|
|
688
|
+
return not self.isdir()
|
|
653
689
|
|
|
654
690
|
def is_file(self) -> bool:
|
|
655
|
-
return
|
|
691
|
+
return self.isfile()
|
|
692
|
+
|
|
693
|
+
def is_dir(self) -> bool:
|
|
694
|
+
return self.isdir()
|
|
656
695
|
|
|
657
696
|
def with_suffix(self, suffix: str):
|
|
658
|
-
return self.
|
|
697
|
+
return self._new(self._path.with_suffix(suffix))
|
|
659
698
|
|
|
660
699
|
def with_name(self, new_name: str):
|
|
661
|
-
return self.
|
|
700
|
+
return self._new(self._path.with_name(new_name))
|
|
662
701
|
|
|
663
702
|
def with_stem(self, new_with_stem: str):
|
|
664
|
-
return self.
|
|
703
|
+
return self._new(self._path.with_stem(new_with_stem))
|
|
665
704
|
|
|
666
705
|
@property
|
|
667
706
|
def file_system(self):
|
|
@@ -689,7 +728,7 @@ class Path(str, _PathBase):
|
|
|
689
728
|
"unsupported operand type(s) for /: "
|
|
690
729
|
f"{self.__class__.__name__} and {other.__class__.__name__}"
|
|
691
730
|
)
|
|
692
|
-
return self.
|
|
731
|
+
return self._new(f"{self}/{as_str(other)}")
|
|
693
732
|
|
|
694
733
|
def __getattribute__(self, name):
|
|
695
734
|
"""stackoverflow hack to ensure we return Path when using string methods.
|
|
@@ -721,21 +760,15 @@ class Path(str, _PathBase):
|
|
|
721
760
|
error_message = f"{self.__class__.__name__} has no attribute '{attr}'"
|
|
722
761
|
if attr.startswith("_"):
|
|
723
762
|
raise AttributeError(error_message)
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
raise AttributeError(error_message) from e
|
|
763
|
+
if attr not in self._file_system_attrs:
|
|
764
|
+
raise AttributeError(error_message)
|
|
765
|
+
return functools.partial(getattr(self.file_system, attr), self)
|
|
728
766
|
|
|
729
767
|
def __fspath__(self) -> str:
|
|
730
768
|
return str(self)
|
|
731
769
|
|
|
732
770
|
def __dir__(self) -> list[str]:
|
|
733
|
-
return list(
|
|
734
|
-
sorted(
|
|
735
|
-
{x for x in dir(Path)}
|
|
736
|
-
| {x for x in dir(self._file_system) if not x.startswith("_")}
|
|
737
|
-
)
|
|
738
|
-
)
|
|
771
|
+
return list(sorted({x for x in dir(Path)} | self._file_system_attrs))
|
|
739
772
|
|
|
740
773
|
def _iterable_constructor(self, info: list[dict], **kwargs) -> "PathSeries":
|
|
741
774
|
series: pd.Series = _get_paths_and_index(info).apply(self.__class__)
|
|
@@ -743,6 +776,9 @@ class Path(str, _PathBase):
|
|
|
743
776
|
path._file_system = self._file_system
|
|
744
777
|
return self._iterable_type(series, **kwargs)
|
|
745
778
|
|
|
779
|
+
def _new(self, new_path: str | Path) -> "Path":
|
|
780
|
+
return self.__class__(new_path, self.file_system)
|
|
781
|
+
|
|
746
782
|
|
|
747
783
|
class PathSeries(pd.Series, _PathBase):
|
|
748
784
|
"""A pandas Series for working with GCS (Google Cloud Storage) paths.
|
|
@@ -863,6 +899,12 @@ class PathSeries(pd.Series, _PathBase):
|
|
|
863
899
|
def partition_root(self) -> "PathSeries":
|
|
864
900
|
return self.files.apply(lambda x: x.partition_root).drop_duplicates()
|
|
865
901
|
|
|
902
|
+
@property
|
|
903
|
+
def partitioned_files(self) -> "PathSeries":
|
|
904
|
+
return self.files.loc[
|
|
905
|
+
lambda x: x.str.count(r"\.parquet") == 2
|
|
906
|
+
].partition_root.drop_duplicates()
|
|
907
|
+
|
|
866
908
|
@property
|
|
867
909
|
def dirs(self) -> "PathSeries":
|
|
868
910
|
"""Select only the directories in the Series."""
|
|
@@ -1218,18 +1260,18 @@ def split_path_and_make_copyable_html(
|
|
|
1218
1260
|
split: str | None = "/",
|
|
1219
1261
|
display_prefix: str | None = ".../",
|
|
1220
1262
|
) -> str:
|
|
1221
|
-
"""Get
|
|
1263
|
+
"""Get HTML text that displays the last part, but makes the full path copyable to clipboard.
|
|
1222
1264
|
|
|
1223
|
-
Splits the path on a delimiter and creates an
|
|
1265
|
+
Splits the path on a delimiter and creates an HTML string that displays only the
|
|
1224
1266
|
last part, but adds a hyperlink which copies the full path to clipboard when clicked.
|
|
1225
1267
|
|
|
1226
1268
|
Parameters
|
|
1227
1269
|
----------
|
|
1228
1270
|
path: File or directory path
|
|
1229
|
-
max_parts: Maximum number of path
|
|
1271
|
+
max_parts: Maximum number of path parts to display. Defaults to 2,
|
|
1230
1272
|
meaning the two last parts. Set to None to show full paths.
|
|
1231
1273
|
split: Text pattern to split the path on. Defaults to "/".
|
|
1232
|
-
display_prefix: The text to display instead of the parent directory. Defaults to ".../"
|
|
1274
|
+
display_prefix: The text to display instead of the parent directory. Defaults to ".../".
|
|
1233
1275
|
|
|
1234
1276
|
Returns
|
|
1235
1277
|
-------
|
|
@@ -1237,7 +1279,8 @@ def split_path_and_make_copyable_html(
|
|
|
1237
1279
|
"""
|
|
1238
1280
|
|
|
1239
1281
|
copy_to_clipboard_js = f"""<script>
|
|
1240
|
-
function copyToClipboard(text) {{
|
|
1282
|
+
function copyToClipboard(text, event) {{
|
|
1283
|
+
event.preventDefault();
|
|
1241
1284
|
navigator.clipboard.writeText(text)
|
|
1242
1285
|
.then(() => {{
|
|
1243
1286
|
const alertBox = document.createElement('div');
|
|
@@ -1271,7 +1314,7 @@ function copyToClipboard(text) {{
|
|
|
1271
1314
|
else:
|
|
1272
1315
|
displayed_text = path
|
|
1273
1316
|
|
|
1274
|
-
return f'{copy_to_clipboard_js}<a href="
|
|
1317
|
+
return f'{copy_to_clipboard_js}<a href="#" title="{path}" onclick="copyToClipboard(\'{path}\', event)">{displayed_text}</a>'
|
|
1275
1318
|
|
|
1276
1319
|
|
|
1277
1320
|
def _get_default_multi_index() -> pd.MultiIndex:
|
|
@@ -1458,6 +1501,72 @@ def get_arguments(func: Callable | object) -> list[str]:
|
|
|
1458
1501
|
)
|
|
1459
1502
|
|
|
1460
1503
|
|
|
1504
|
+
def get_schema(file) -> pyarrow.Schema:
|
|
1505
|
+
try:
|
|
1506
|
+
return pq.read_schema(file)
|
|
1507
|
+
except (
|
|
1508
|
+
PermissionError,
|
|
1509
|
+
pyarrow.ArrowInvalid,
|
|
1510
|
+
FileNotFoundError,
|
|
1511
|
+
IsADirectoryError,
|
|
1512
|
+
OSError,
|
|
1513
|
+
):
|
|
1514
|
+
# try:
|
|
1515
|
+
# return ds.dataset(file).schema
|
|
1516
|
+
# except (TypeError, FileNotFoundError) as e:
|
|
1517
|
+
if not hasattr(file, "file_system"):
|
|
1518
|
+
raise e
|
|
1519
|
+
|
|
1520
|
+
file_system = file.file_system
|
|
1521
|
+
|
|
1522
|
+
def _get_schema(path):
|
|
1523
|
+
try:
|
|
1524
|
+
return pq.read_schema(path)
|
|
1525
|
+
except FileNotFoundError:
|
|
1526
|
+
with file_system.open(path, "rb") as f:
|
|
1527
|
+
return pq.read_schema(f)
|
|
1528
|
+
|
|
1529
|
+
with ThreadPoolExecutor() as executor:
|
|
1530
|
+
return pyarrow.unify_schemas(
|
|
1531
|
+
list(
|
|
1532
|
+
executor.map(_get_schema, file_system.glob(file + "/**/*.parquet"))
|
|
1533
|
+
),
|
|
1534
|
+
promote_options="permissive",
|
|
1535
|
+
)
|
|
1536
|
+
|
|
1537
|
+
|
|
1538
|
+
def get_num_rows(file):
|
|
1539
|
+
try:
|
|
1540
|
+
return pq.read_metadata(file).num_rows
|
|
1541
|
+
except (
|
|
1542
|
+
PermissionError,
|
|
1543
|
+
pyarrow.ArrowInvalid,
|
|
1544
|
+
FileNotFoundError,
|
|
1545
|
+
TypeError,
|
|
1546
|
+
OSError,
|
|
1547
|
+
) as e:
|
|
1548
|
+
try:
|
|
1549
|
+
return ds.dataset(file).count_rows()
|
|
1550
|
+
except Exception as e2:
|
|
1551
|
+
if not hasattr(file, "glob"):
|
|
1552
|
+
raise e2 from 2
|
|
1553
|
+
|
|
1554
|
+
def _get_num_rows(path):
|
|
1555
|
+
with path.open("rb") as file:
|
|
1556
|
+
return pq.read_metadata(file).num_rows
|
|
1557
|
+
|
|
1558
|
+
with ThreadPoolExecutor() as executor:
|
|
1559
|
+
return sum(executor.map(_get_num_rows, file.glob("**").files))
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
def get_shape(file) -> tuple[int, int]:
|
|
1563
|
+
schema = get_schema(file)
|
|
1564
|
+
index_cols = _get_index_cols(schema)
|
|
1565
|
+
ncol: int = sum(name not in index_cols for name in schema.names)
|
|
1566
|
+
nrow: int = get_num_rows(file)
|
|
1567
|
+
return nrow, ncol
|
|
1568
|
+
|
|
1569
|
+
|
|
1461
1570
|
def read_nrows(file, nrow: int) -> pd.DataFrame:
|
|
1462
1571
|
"""Read first n rows of a parquet file."""
|
|
1463
1572
|
rows = next(pq.ParquetFile(file).iter_batches(nrow))
|
|
File without changes
|
|
File without changes
|