daplapath 2.0.6__tar.gz → 2.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {daplapath-2.0.6 → daplapath-2.0.8}/PKG-INFO +14 -17
- {daplapath-2.0.6 → daplapath-2.0.8}/README.md +13 -16
- {daplapath-2.0.6 → daplapath-2.0.8}/daplapath/path.py +56 -18
- {daplapath-2.0.6 → daplapath-2.0.8}/pyproject.toml +1 -1
- {daplapath-2.0.6 → daplapath-2.0.8}/LICENSE.md +0 -0
- {daplapath-2.0.6 → daplapath-2.0.8}/daplapath/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: daplapath
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.8
|
|
4
4
|
Summary: A pathlib.Path class for dapla
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: ort
|
|
@@ -35,14 +35,14 @@ from daplapath.path import Path
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
```python
|
|
38
|
-
folder = Path('ssb-
|
|
38
|
+
folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
|
|
39
39
|
folder
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
'ssb-
|
|
45
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
|
|
@@ -107,7 +107,7 @@ file
|
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
'ssb-
|
|
110
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
|
|
@@ -119,7 +119,7 @@ file.parent
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
'ssb-
|
|
122
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
|
|
@@ -135,8 +135,7 @@ file.columns
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
|
|
138
|
-
Index(['
|
|
139
|
-
'SHAPE_Area', 'geometry'],
|
|
138
|
+
Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
|
|
140
139
|
dtype='object')
|
|
141
140
|
|
|
142
141
|
|
|
@@ -149,13 +148,11 @@ file.dtypes
|
|
|
149
148
|
|
|
150
149
|
|
|
151
150
|
|
|
152
|
-
|
|
153
|
-
|
|
151
|
+
objtype string
|
|
152
|
+
navn string
|
|
154
153
|
komm_nr string
|
|
155
154
|
fylke_nr string
|
|
156
|
-
|
|
157
|
-
SHAPE_Length double
|
|
158
|
-
SHAPE_Area double
|
|
155
|
+
areal_gdb double
|
|
159
156
|
geometry binary
|
|
160
157
|
dtype: object
|
|
161
158
|
|
|
@@ -206,7 +203,7 @@ file.latest_version()
|
|
|
206
203
|
|
|
207
204
|
|
|
208
205
|
|
|
209
|
-
'ssb-
|
|
206
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
210
207
|
|
|
211
208
|
|
|
212
209
|
|
|
@@ -218,7 +215,7 @@ file.highest_numbered_version()
|
|
|
218
215
|
|
|
219
216
|
|
|
220
217
|
|
|
221
|
-
'ssb-
|
|
218
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
222
219
|
|
|
223
220
|
|
|
224
221
|
|
|
@@ -231,7 +228,7 @@ file.new_version()
|
|
|
231
228
|
|
|
232
229
|
|
|
233
230
|
|
|
234
|
-
'ssb-
|
|
231
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
|
|
235
232
|
|
|
236
233
|
|
|
237
234
|
|
|
@@ -268,11 +265,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
|
|
|
268
265
|
|
|
269
266
|
```python
|
|
270
267
|
print(
|
|
271
|
-
Path("ssb-
|
|
268
|
+
Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
|
|
272
269
|
)
|
|
273
270
|
```
|
|
274
271
|
|
|
275
|
-
ssb-
|
|
272
|
+
ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
|
|
276
273
|
└──2000 /
|
|
277
274
|
└──SSB_tettsted_flate_p2000.parquet
|
|
278
275
|
└──SSB_tettsted_flate_p2000_v1.parquet
|
|
@@ -19,14 +19,14 @@ from daplapath.path import Path
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
```python
|
|
22
|
-
folder = Path('ssb-
|
|
22
|
+
folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
|
|
23
23
|
folder
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
'ssb-
|
|
29
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
|
|
@@ -91,7 +91,7 @@ file
|
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
'ssb-
|
|
94
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
|
|
@@ -103,7 +103,7 @@ file.parent
|
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
'ssb-
|
|
106
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
|
|
@@ -119,8 +119,7 @@ file.columns
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
Index(['
|
|
123
|
-
'SHAPE_Area', 'geometry'],
|
|
122
|
+
Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
|
|
124
123
|
dtype='object')
|
|
125
124
|
|
|
126
125
|
|
|
@@ -133,13 +132,11 @@ file.dtypes
|
|
|
133
132
|
|
|
134
133
|
|
|
135
134
|
|
|
136
|
-
|
|
137
|
-
|
|
135
|
+
objtype string
|
|
136
|
+
navn string
|
|
138
137
|
komm_nr string
|
|
139
138
|
fylke_nr string
|
|
140
|
-
|
|
141
|
-
SHAPE_Length double
|
|
142
|
-
SHAPE_Area double
|
|
139
|
+
areal_gdb double
|
|
143
140
|
geometry binary
|
|
144
141
|
dtype: object
|
|
145
142
|
|
|
@@ -190,7 +187,7 @@ file.latest_version()
|
|
|
190
187
|
|
|
191
188
|
|
|
192
189
|
|
|
193
|
-
'ssb-
|
|
190
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
194
191
|
|
|
195
192
|
|
|
196
193
|
|
|
@@ -202,7 +199,7 @@ file.highest_numbered_version()
|
|
|
202
199
|
|
|
203
200
|
|
|
204
201
|
|
|
205
|
-
'ssb-
|
|
202
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
206
203
|
|
|
207
204
|
|
|
208
205
|
|
|
@@ -215,7 +212,7 @@ file.new_version()
|
|
|
215
212
|
|
|
216
213
|
|
|
217
214
|
|
|
218
|
-
'ssb-
|
|
215
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
|
|
219
216
|
|
|
220
217
|
|
|
221
218
|
|
|
@@ -252,11 +249,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
|
|
|
252
249
|
|
|
253
250
|
```python
|
|
254
251
|
print(
|
|
255
|
-
Path("ssb-
|
|
252
|
+
Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
|
|
256
253
|
)
|
|
257
254
|
```
|
|
258
255
|
|
|
259
|
-
ssb-
|
|
256
|
+
ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
|
|
260
257
|
└──2000 /
|
|
261
258
|
└──SSB_tettsted_flate_p2000.parquet
|
|
262
259
|
└──SSB_tettsted_flate_p2000_v1.parquet
|
|
@@ -96,7 +96,7 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
96
96
|
return cls.cp_file(source, destination, **kwargs)
|
|
97
97
|
|
|
98
98
|
@staticmethod
|
|
99
|
-
def cp_file(
|
|
99
|
+
def cp_file(source, destination, **kwargs):
|
|
100
100
|
os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
|
|
101
101
|
return shutil.copy2(source, destination, **kwargs)
|
|
102
102
|
|
|
@@ -252,11 +252,16 @@ class Path(str, _PathBase):
|
|
|
252
252
|
def rglob(self, pattern: str, **kwargs) -> "PathSeries":
|
|
253
253
|
return self.glob(pattern, recursive=True, **kwargs)
|
|
254
254
|
|
|
255
|
-
def glob(
|
|
255
|
+
def glob(
|
|
256
|
+
self, pattern: str | None = None, recursive: bool = True, **kwargs
|
|
257
|
+
) -> "PathSeries":
|
|
256
258
|
"""Create PathSeries of files/directories that match the pattern."""
|
|
257
259
|
recursive = kwargs.get("recurse_symlinks", recursive)
|
|
258
260
|
|
|
259
|
-
|
|
261
|
+
if pattern:
|
|
262
|
+
pattern = str(self / pattern)
|
|
263
|
+
else:
|
|
264
|
+
pattern = str(self)
|
|
260
265
|
|
|
261
266
|
# pop kwargs going into PathSeries initialiser.
|
|
262
267
|
iterable_init_args = get_arguments(self._iterable_type)
|
|
@@ -300,6 +305,17 @@ class Path(str, _PathBase):
|
|
|
300
305
|
"""
|
|
301
306
|
return self.glob("**", recursive=recursive, **kwargs)
|
|
302
307
|
|
|
308
|
+
def rmdir(self) -> None:
|
|
309
|
+
files = self.glob("**").files
|
|
310
|
+
with ThreadPoolExecutor() as executor:
|
|
311
|
+
list(executor.map(self.file_system.rm_file, files))
|
|
312
|
+
|
|
313
|
+
def cp(self, destination: "Path | str") -> "Path":
|
|
314
|
+
return self._cp_or_mv(destination, "cp")
|
|
315
|
+
|
|
316
|
+
def mv(self, destination: "Path | str") -> "Path":
|
|
317
|
+
return self._cp_or_mv(destination, "mv")
|
|
318
|
+
|
|
303
319
|
def versions(self, include_versionless: bool = False) -> "PathSeries":
|
|
304
320
|
"""Returns a PathSeries of all versions of the file."""
|
|
305
321
|
files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
|
|
@@ -605,7 +621,9 @@ class Path(str, _PathBase):
|
|
|
605
621
|
try:
|
|
606
622
|
with self.open("rb") as file:
|
|
607
623
|
return get_schema(file)
|
|
608
|
-
except (
|
|
624
|
+
except (
|
|
625
|
+
Exception
|
|
626
|
+
): # (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
|
|
609
627
|
return get_schema(self)
|
|
610
628
|
|
|
611
629
|
@property
|
|
@@ -623,7 +641,9 @@ class Path(str, _PathBase):
|
|
|
623
641
|
try:
|
|
624
642
|
with self.open("rb") as file:
|
|
625
643
|
return get_shape(file)
|
|
626
|
-
except (
|
|
644
|
+
except (
|
|
645
|
+
Exception
|
|
646
|
+
): # (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
|
|
627
647
|
return get_shape(self)
|
|
628
648
|
|
|
629
649
|
@property
|
|
@@ -718,10 +738,10 @@ class Path(str, _PathBase):
|
|
|
718
738
|
|
|
719
739
|
Example
|
|
720
740
|
-------
|
|
721
|
-
>>> folder = 'ssb-
|
|
741
|
+
>>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
|
|
722
742
|
>>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
|
|
723
743
|
>>> file_path
|
|
724
|
-
'ssb-
|
|
744
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
|
|
725
745
|
"""
|
|
726
746
|
if not isinstance(other, (str, PurePath, os.PathLike)):
|
|
727
747
|
raise TypeError(
|
|
@@ -779,6 +799,27 @@ class Path(str, _PathBase):
|
|
|
779
799
|
def _new(self, new_path: str | Path) -> "Path":
|
|
780
800
|
return self.__class__(new_path, self.file_system)
|
|
781
801
|
|
|
802
|
+
def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
|
|
803
|
+
func: Callable = getattr(self.file_system, attr)
|
|
804
|
+
try:
|
|
805
|
+
func(self, destination)
|
|
806
|
+
except FileNotFoundError:
|
|
807
|
+
destination = self.__class__(destination)
|
|
808
|
+
sources = list(self.glob("**").files)
|
|
809
|
+
destinations = [path.replace(self, destination) for path in sources]
|
|
810
|
+
with ThreadPoolExecutor() as executor:
|
|
811
|
+
list(executor.map(func, sources, destinations))
|
|
812
|
+
self._new(destination)
|
|
813
|
+
|
|
814
|
+
def keep_newest_partitions(self) -> "Path":
|
|
815
|
+
def _keep_newest(path):
|
|
816
|
+
while True:
|
|
817
|
+
if path.isfile():
|
|
818
|
+
pass
|
|
819
|
+
|
|
820
|
+
with ThreadPoolExecutor() as executor:
|
|
821
|
+
list(executor.map(_keep_newest, self.ls()))
|
|
822
|
+
|
|
782
823
|
|
|
783
824
|
class PathSeries(pd.Series, _PathBase):
|
|
784
825
|
"""A pandas Series for working with GCS (Google Cloud Storage) paths.
|
|
@@ -1510,7 +1551,7 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1510
1551
|
FileNotFoundError,
|
|
1511
1552
|
IsADirectoryError,
|
|
1512
1553
|
OSError,
|
|
1513
|
-
):
|
|
1554
|
+
) as e:
|
|
1514
1555
|
# try:
|
|
1515
1556
|
# return ds.dataset(file).schema
|
|
1516
1557
|
# except (TypeError, FileNotFoundError) as e:
|
|
@@ -1522,9 +1563,12 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1522
1563
|
def _get_schema(path):
|
|
1523
1564
|
try:
|
|
1524
1565
|
return pq.read_schema(path)
|
|
1525
|
-
except FileNotFoundError:
|
|
1526
|
-
|
|
1527
|
-
|
|
1566
|
+
except FileNotFoundError as e:
|
|
1567
|
+
try:
|
|
1568
|
+
with file_system.open(path, "rb") as f:
|
|
1569
|
+
return pq.read_schema(f)
|
|
1570
|
+
except Exception as e2:
|
|
1571
|
+
raise e2.__class__(f"{e2}. {path}") from e
|
|
1528
1572
|
|
|
1529
1573
|
with ThreadPoolExecutor() as executor:
|
|
1530
1574
|
return pyarrow.unify_schemas(
|
|
@@ -1538,13 +1582,7 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1538
1582
|
def get_num_rows(file):
|
|
1539
1583
|
try:
|
|
1540
1584
|
return pq.read_metadata(file).num_rows
|
|
1541
|
-
except
|
|
1542
|
-
PermissionError,
|
|
1543
|
-
pyarrow.ArrowInvalid,
|
|
1544
|
-
FileNotFoundError,
|
|
1545
|
-
TypeError,
|
|
1546
|
-
OSError,
|
|
1547
|
-
) as e:
|
|
1585
|
+
except Exception as e:
|
|
1548
1586
|
try:
|
|
1549
1587
|
return ds.dataset(file).count_rows()
|
|
1550
1588
|
except Exception as e2:
|
|
File without changes
|
|
File without changes
|