daplapath 2.0.7__py3-none-any.whl → 2.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- daplapath/path.py +49 -15
- {daplapath-2.0.7.dist-info → daplapath-2.0.8.dist-info}/METADATA +14 -17
- daplapath-2.0.8.dist-info/RECORD +6 -0
- daplapath-2.0.7.dist-info/RECORD +0 -6
- {daplapath-2.0.7.dist-info → daplapath-2.0.8.dist-info}/LICENSE.md +0 -0
- {daplapath-2.0.7.dist-info → daplapath-2.0.8.dist-info}/WHEEL +0 -0
daplapath/path.py
CHANGED
|
@@ -96,7 +96,7 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
96
96
|
return cls.cp_file(source, destination, **kwargs)
|
|
97
97
|
|
|
98
98
|
@staticmethod
|
|
99
|
-
def cp_file(
|
|
99
|
+
def cp_file(source, destination, **kwargs):
|
|
100
100
|
os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
|
|
101
101
|
return shutil.copy2(source, destination, **kwargs)
|
|
102
102
|
|
|
@@ -252,11 +252,16 @@ class Path(str, _PathBase):
|
|
|
252
252
|
def rglob(self, pattern: str, **kwargs) -> "PathSeries":
|
|
253
253
|
return self.glob(pattern, recursive=True, **kwargs)
|
|
254
254
|
|
|
255
|
-
def glob(
|
|
255
|
+
def glob(
|
|
256
|
+
self, pattern: str | None = None, recursive: bool = True, **kwargs
|
|
257
|
+
) -> "PathSeries":
|
|
256
258
|
"""Create PathSeries of files/directories that match the pattern."""
|
|
257
259
|
recursive = kwargs.get("recurse_symlinks", recursive)
|
|
258
260
|
|
|
259
|
-
|
|
261
|
+
if pattern:
|
|
262
|
+
pattern = str(self / pattern)
|
|
263
|
+
else:
|
|
264
|
+
pattern = str(self)
|
|
260
265
|
|
|
261
266
|
# pop kwargs going into PathSeries initialiser.
|
|
262
267
|
iterable_init_args = get_arguments(self._iterable_type)
|
|
@@ -300,6 +305,17 @@ class Path(str, _PathBase):
|
|
|
300
305
|
"""
|
|
301
306
|
return self.glob("**", recursive=recursive, **kwargs)
|
|
302
307
|
|
|
308
|
+
def rmdir(self) -> None:
|
|
309
|
+
files = self.glob("**").files
|
|
310
|
+
with ThreadPoolExecutor() as executor:
|
|
311
|
+
list(executor.map(self.file_system.rm_file, files))
|
|
312
|
+
|
|
313
|
+
def cp(self, destination: "Path | str") -> "Path":
|
|
314
|
+
return self._cp_or_mv(destination, "cp")
|
|
315
|
+
|
|
316
|
+
def mv(self, destination: "Path | str") -> "Path":
|
|
317
|
+
return self._cp_or_mv(destination, "mv")
|
|
318
|
+
|
|
303
319
|
def versions(self, include_versionless: bool = False) -> "PathSeries":
|
|
304
320
|
"""Returns a PathSeries of all versions of the file."""
|
|
305
321
|
files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
|
|
@@ -722,10 +738,10 @@ class Path(str, _PathBase):
|
|
|
722
738
|
|
|
723
739
|
Example
|
|
724
740
|
-------
|
|
725
|
-
>>> folder = 'ssb-
|
|
741
|
+
>>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
|
|
726
742
|
>>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
|
|
727
743
|
>>> file_path
|
|
728
|
-
'ssb-
|
|
744
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
|
|
729
745
|
"""
|
|
730
746
|
if not isinstance(other, (str, PurePath, os.PathLike)):
|
|
731
747
|
raise TypeError(
|
|
@@ -783,6 +799,27 @@ class Path(str, _PathBase):
|
|
|
783
799
|
def _new(self, new_path: str | Path) -> "Path":
|
|
784
800
|
return self.__class__(new_path, self.file_system)
|
|
785
801
|
|
|
802
|
+
def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
|
|
803
|
+
func: Callable = getattr(self.file_system, attr)
|
|
804
|
+
try:
|
|
805
|
+
func(self, destination)
|
|
806
|
+
except FileNotFoundError:
|
|
807
|
+
destination = self.__class__(destination)
|
|
808
|
+
sources = list(self.glob("**").files)
|
|
809
|
+
destinations = [path.replace(self, destination) for path in sources]
|
|
810
|
+
with ThreadPoolExecutor() as executor:
|
|
811
|
+
list(executor.map(func, sources, destinations))
|
|
812
|
+
self._new(destination)
|
|
813
|
+
|
|
814
|
+
def keep_newest_partitions(self) -> "Path":
|
|
815
|
+
def _keep_newest(path):
|
|
816
|
+
while True:
|
|
817
|
+
if path.isfile():
|
|
818
|
+
pass
|
|
819
|
+
|
|
820
|
+
with ThreadPoolExecutor() as executor:
|
|
821
|
+
list(executor.map(_keep_newest, self.ls()))
|
|
822
|
+
|
|
786
823
|
|
|
787
824
|
class PathSeries(pd.Series, _PathBase):
|
|
788
825
|
"""A pandas Series for working with GCS (Google Cloud Storage) paths.
|
|
@@ -1526,9 +1563,12 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1526
1563
|
def _get_schema(path):
|
|
1527
1564
|
try:
|
|
1528
1565
|
return pq.read_schema(path)
|
|
1529
|
-
except FileNotFoundError:
|
|
1530
|
-
|
|
1531
|
-
|
|
1566
|
+
except FileNotFoundError as e:
|
|
1567
|
+
try:
|
|
1568
|
+
with file_system.open(path, "rb") as f:
|
|
1569
|
+
return pq.read_schema(f)
|
|
1570
|
+
except Exception as e2:
|
|
1571
|
+
raise e2.__class__(f"{e2}. {path}") from e
|
|
1532
1572
|
|
|
1533
1573
|
with ThreadPoolExecutor() as executor:
|
|
1534
1574
|
return pyarrow.unify_schemas(
|
|
@@ -1542,13 +1582,7 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1542
1582
|
def get_num_rows(file):
|
|
1543
1583
|
try:
|
|
1544
1584
|
return pq.read_metadata(file).num_rows
|
|
1545
|
-
except
|
|
1546
|
-
PermissionError,
|
|
1547
|
-
pyarrow.ArrowInvalid,
|
|
1548
|
-
FileNotFoundError,
|
|
1549
|
-
TypeError,
|
|
1550
|
-
OSError,
|
|
1551
|
-
) as e:
|
|
1585
|
+
except Exception as e:
|
|
1552
1586
|
try:
|
|
1553
1587
|
return ds.dataset(file).count_rows()
|
|
1554
1588
|
except Exception as e2:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: daplapath
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.8
|
|
4
4
|
Summary: A pathlib.Path class for dapla
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: ort
|
|
@@ -35,14 +35,14 @@ from daplapath.path import Path
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
```python
|
|
38
|
-
folder = Path('ssb-
|
|
38
|
+
folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
|
|
39
39
|
folder
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
'ssb-
|
|
45
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
|
|
@@ -107,7 +107,7 @@ file
|
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
'ssb-
|
|
110
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
|
|
@@ -119,7 +119,7 @@ file.parent
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
'ssb-
|
|
122
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
|
|
@@ -135,8 +135,7 @@ file.columns
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
|
|
138
|
-
Index(['
|
|
139
|
-
'SHAPE_Area', 'geometry'],
|
|
138
|
+
Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
|
|
140
139
|
dtype='object')
|
|
141
140
|
|
|
142
141
|
|
|
@@ -149,13 +148,11 @@ file.dtypes
|
|
|
149
148
|
|
|
150
149
|
|
|
151
150
|
|
|
152
|
-
|
|
153
|
-
|
|
151
|
+
objtype string
|
|
152
|
+
navn string
|
|
154
153
|
komm_nr string
|
|
155
154
|
fylke_nr string
|
|
156
|
-
|
|
157
|
-
SHAPE_Length double
|
|
158
|
-
SHAPE_Area double
|
|
155
|
+
areal_gdb double
|
|
159
156
|
geometry binary
|
|
160
157
|
dtype: object
|
|
161
158
|
|
|
@@ -206,7 +203,7 @@ file.latest_version()
|
|
|
206
203
|
|
|
207
204
|
|
|
208
205
|
|
|
209
|
-
'ssb-
|
|
206
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
210
207
|
|
|
211
208
|
|
|
212
209
|
|
|
@@ -218,7 +215,7 @@ file.highest_numbered_version()
|
|
|
218
215
|
|
|
219
216
|
|
|
220
217
|
|
|
221
|
-
'ssb-
|
|
218
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
222
219
|
|
|
223
220
|
|
|
224
221
|
|
|
@@ -231,7 +228,7 @@ file.new_version()
|
|
|
231
228
|
|
|
232
229
|
|
|
233
230
|
|
|
234
|
-
'ssb-
|
|
231
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
|
|
235
232
|
|
|
236
233
|
|
|
237
234
|
|
|
@@ -268,11 +265,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
|
|
|
268
265
|
|
|
269
266
|
```python
|
|
270
267
|
print(
|
|
271
|
-
Path("ssb-
|
|
268
|
+
Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
|
|
272
269
|
)
|
|
273
270
|
```
|
|
274
271
|
|
|
275
|
-
ssb-
|
|
272
|
+
ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
|
|
276
273
|
└──2000 /
|
|
277
274
|
└──SSB_tettsted_flate_p2000.parquet
|
|
278
275
|
└──SSB_tettsted_flate_p2000_v1.parquet
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
|
|
2
|
+
daplapath/path.py,sha256=f4N8JeJ-RqkS0jIoAk69FlVTGR8kyj64FhJR59CZX1w,53928
|
|
3
|
+
daplapath-2.0.8.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
|
|
4
|
+
daplapath-2.0.8.dist-info/METADATA,sha256=qc6AKIcFCJTun5HaeakY80u03SBsNv0Okzi2kJORvAg,14697
|
|
5
|
+
daplapath-2.0.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
6
|
+
daplapath-2.0.8.dist-info/RECORD,,
|
daplapath-2.0.7.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
|
|
2
|
-
daplapath/path.py,sha256=_6qbMdkEis1dNsoXwCdl1IT2hedTORvR5vlDNbxPNak,52602
|
|
3
|
-
daplapath-2.0.7.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
|
|
4
|
-
daplapath-2.0.7.dist-info/METADATA,sha256=mYwZhDxhkjptdi8K2s8oHLSMIzGfSVbPX7JB1mcs-cA,14774
|
|
5
|
-
daplapath-2.0.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
6
|
-
daplapath-2.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|