daplapath 2.0.7__tar.gz → 2.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {daplapath-2.0.7 → daplapath-2.0.8}/PKG-INFO +14 -17
- {daplapath-2.0.7 → daplapath-2.0.8}/README.md +13 -16
- {daplapath-2.0.7 → daplapath-2.0.8}/daplapath/path.py +49 -15
- {daplapath-2.0.7 → daplapath-2.0.8}/pyproject.toml +1 -1
- {daplapath-2.0.7 → daplapath-2.0.8}/LICENSE.md +0 -0
- {daplapath-2.0.7 → daplapath-2.0.8}/daplapath/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: daplapath
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.8
|
|
4
4
|
Summary: A pathlib.Path class for dapla
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: ort
|
|
@@ -35,14 +35,14 @@ from daplapath.path import Path
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
```python
|
|
38
|
-
folder = Path('ssb-
|
|
38
|
+
folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
|
|
39
39
|
folder
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
'ssb-
|
|
45
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
|
|
@@ -107,7 +107,7 @@ file
|
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
'ssb-
|
|
110
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
|
|
@@ -119,7 +119,7 @@ file.parent
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
'ssb-
|
|
122
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
|
|
@@ -135,8 +135,7 @@ file.columns
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
|
|
138
|
-
Index(['
|
|
139
|
-
'SHAPE_Area', 'geometry'],
|
|
138
|
+
Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
|
|
140
139
|
dtype='object')
|
|
141
140
|
|
|
142
141
|
|
|
@@ -149,13 +148,11 @@ file.dtypes
|
|
|
149
148
|
|
|
150
149
|
|
|
151
150
|
|
|
152
|
-
|
|
153
|
-
|
|
151
|
+
objtype string
|
|
152
|
+
navn string
|
|
154
153
|
komm_nr string
|
|
155
154
|
fylke_nr string
|
|
156
|
-
|
|
157
|
-
SHAPE_Length double
|
|
158
|
-
SHAPE_Area double
|
|
155
|
+
areal_gdb double
|
|
159
156
|
geometry binary
|
|
160
157
|
dtype: object
|
|
161
158
|
|
|
@@ -206,7 +203,7 @@ file.latest_version()
|
|
|
206
203
|
|
|
207
204
|
|
|
208
205
|
|
|
209
|
-
'ssb-
|
|
206
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
210
207
|
|
|
211
208
|
|
|
212
209
|
|
|
@@ -218,7 +215,7 @@ file.highest_numbered_version()
|
|
|
218
215
|
|
|
219
216
|
|
|
220
217
|
|
|
221
|
-
'ssb-
|
|
218
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
222
219
|
|
|
223
220
|
|
|
224
221
|
|
|
@@ -231,7 +228,7 @@ file.new_version()
|
|
|
231
228
|
|
|
232
229
|
|
|
233
230
|
|
|
234
|
-
'ssb-
|
|
231
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
|
|
235
232
|
|
|
236
233
|
|
|
237
234
|
|
|
@@ -268,11 +265,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
|
|
|
268
265
|
|
|
269
266
|
```python
|
|
270
267
|
print(
|
|
271
|
-
Path("ssb-
|
|
268
|
+
Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
|
|
272
269
|
)
|
|
273
270
|
```
|
|
274
271
|
|
|
275
|
-
ssb-
|
|
272
|
+
ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
|
|
276
273
|
└──2000 /
|
|
277
274
|
└──SSB_tettsted_flate_p2000.parquet
|
|
278
275
|
└──SSB_tettsted_flate_p2000_v1.parquet
|
|
@@ -19,14 +19,14 @@ from daplapath.path import Path
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
```python
|
|
22
|
-
folder = Path('ssb-
|
|
22
|
+
folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
|
|
23
23
|
folder
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
'ssb-
|
|
29
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
|
|
@@ -91,7 +91,7 @@ file
|
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
'ssb-
|
|
94
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
|
|
@@ -103,7 +103,7 @@ file.parent
|
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
'ssb-
|
|
106
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
|
|
@@ -119,8 +119,7 @@ file.columns
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
Index(['
|
|
123
|
-
'SHAPE_Area', 'geometry'],
|
|
122
|
+
Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
|
|
124
123
|
dtype='object')
|
|
125
124
|
|
|
126
125
|
|
|
@@ -133,13 +132,11 @@ file.dtypes
|
|
|
133
132
|
|
|
134
133
|
|
|
135
134
|
|
|
136
|
-
|
|
137
|
-
|
|
135
|
+
objtype string
|
|
136
|
+
navn string
|
|
138
137
|
komm_nr string
|
|
139
138
|
fylke_nr string
|
|
140
|
-
|
|
141
|
-
SHAPE_Length double
|
|
142
|
-
SHAPE_Area double
|
|
139
|
+
areal_gdb double
|
|
143
140
|
geometry binary
|
|
144
141
|
dtype: object
|
|
145
142
|
|
|
@@ -190,7 +187,7 @@ file.latest_version()
|
|
|
190
187
|
|
|
191
188
|
|
|
192
189
|
|
|
193
|
-
'ssb-
|
|
190
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
194
191
|
|
|
195
192
|
|
|
196
193
|
|
|
@@ -202,7 +199,7 @@ file.highest_numbered_version()
|
|
|
202
199
|
|
|
203
200
|
|
|
204
201
|
|
|
205
|
-
'ssb-
|
|
202
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
|
|
206
203
|
|
|
207
204
|
|
|
208
205
|
|
|
@@ -215,7 +212,7 @@ file.new_version()
|
|
|
215
212
|
|
|
216
213
|
|
|
217
214
|
|
|
218
|
-
'ssb-
|
|
215
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
|
|
219
216
|
|
|
220
217
|
|
|
221
218
|
|
|
@@ -252,11 +249,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
|
|
|
252
249
|
|
|
253
250
|
```python
|
|
254
251
|
print(
|
|
255
|
-
Path("ssb-
|
|
252
|
+
Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
|
|
256
253
|
)
|
|
257
254
|
```
|
|
258
255
|
|
|
259
|
-
ssb-
|
|
256
|
+
ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
|
|
260
257
|
└──2000 /
|
|
261
258
|
└──SSB_tettsted_flate_p2000.parquet
|
|
262
259
|
└──SSB_tettsted_flate_p2000_v1.parquet
|
|
@@ -96,7 +96,7 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
96
96
|
return cls.cp_file(source, destination, **kwargs)
|
|
97
97
|
|
|
98
98
|
@staticmethod
|
|
99
|
-
def cp_file(
|
|
99
|
+
def cp_file(source, destination, **kwargs):
|
|
100
100
|
os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
|
|
101
101
|
return shutil.copy2(source, destination, **kwargs)
|
|
102
102
|
|
|
@@ -252,11 +252,16 @@ class Path(str, _PathBase):
|
|
|
252
252
|
def rglob(self, pattern: str, **kwargs) -> "PathSeries":
|
|
253
253
|
return self.glob(pattern, recursive=True, **kwargs)
|
|
254
254
|
|
|
255
|
-
def glob(
|
|
255
|
+
def glob(
|
|
256
|
+
self, pattern: str | None = None, recursive: bool = True, **kwargs
|
|
257
|
+
) -> "PathSeries":
|
|
256
258
|
"""Create PathSeries of files/directories that match the pattern."""
|
|
257
259
|
recursive = kwargs.get("recurse_symlinks", recursive)
|
|
258
260
|
|
|
259
|
-
|
|
261
|
+
if pattern:
|
|
262
|
+
pattern = str(self / pattern)
|
|
263
|
+
else:
|
|
264
|
+
pattern = str(self)
|
|
260
265
|
|
|
261
266
|
# pop kwargs going into PathSeries initialiser.
|
|
262
267
|
iterable_init_args = get_arguments(self._iterable_type)
|
|
@@ -300,6 +305,17 @@ class Path(str, _PathBase):
|
|
|
300
305
|
"""
|
|
301
306
|
return self.glob("**", recursive=recursive, **kwargs)
|
|
302
307
|
|
|
308
|
+
def rmdir(self) -> None:
|
|
309
|
+
files = self.glob("**").files
|
|
310
|
+
with ThreadPoolExecutor() as executor:
|
|
311
|
+
list(executor.map(self.file_system.rm_file, files))
|
|
312
|
+
|
|
313
|
+
def cp(self, destination: "Path | str") -> "Path":
|
|
314
|
+
return self._cp_or_mv(destination, "cp")
|
|
315
|
+
|
|
316
|
+
def mv(self, destination: "Path | str") -> "Path":
|
|
317
|
+
return self._cp_or_mv(destination, "mv")
|
|
318
|
+
|
|
303
319
|
def versions(self, include_versionless: bool = False) -> "PathSeries":
|
|
304
320
|
"""Returns a PathSeries of all versions of the file."""
|
|
305
321
|
files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
|
|
@@ -722,10 +738,10 @@ class Path(str, _PathBase):
|
|
|
722
738
|
|
|
723
739
|
Example
|
|
724
740
|
-------
|
|
725
|
-
>>> folder = 'ssb-
|
|
741
|
+
>>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
|
|
726
742
|
>>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
|
|
727
743
|
>>> file_path
|
|
728
|
-
'ssb-
|
|
744
|
+
'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
|
|
729
745
|
"""
|
|
730
746
|
if not isinstance(other, (str, PurePath, os.PathLike)):
|
|
731
747
|
raise TypeError(
|
|
@@ -783,6 +799,27 @@ class Path(str, _PathBase):
|
|
|
783
799
|
def _new(self, new_path: str | Path) -> "Path":
|
|
784
800
|
return self.__class__(new_path, self.file_system)
|
|
785
801
|
|
|
802
|
+
def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
|
|
803
|
+
func: Callable = getattr(self.file_system, attr)
|
|
804
|
+
try:
|
|
805
|
+
func(self, destination)
|
|
806
|
+
except FileNotFoundError:
|
|
807
|
+
destination = self.__class__(destination)
|
|
808
|
+
sources = list(self.glob("**").files)
|
|
809
|
+
destinations = [path.replace(self, destination) for path in sources]
|
|
810
|
+
with ThreadPoolExecutor() as executor:
|
|
811
|
+
list(executor.map(func, sources, destinations))
|
|
812
|
+
self._new(destination)
|
|
813
|
+
|
|
814
|
+
def keep_newest_partitions(self) -> "Path":
|
|
815
|
+
def _keep_newest(path):
|
|
816
|
+
while True:
|
|
817
|
+
if path.isfile():
|
|
818
|
+
pass
|
|
819
|
+
|
|
820
|
+
with ThreadPoolExecutor() as executor:
|
|
821
|
+
list(executor.map(_keep_newest, self.ls()))
|
|
822
|
+
|
|
786
823
|
|
|
787
824
|
class PathSeries(pd.Series, _PathBase):
|
|
788
825
|
"""A pandas Series for working with GCS (Google Cloud Storage) paths.
|
|
@@ -1526,9 +1563,12 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1526
1563
|
def _get_schema(path):
|
|
1527
1564
|
try:
|
|
1528
1565
|
return pq.read_schema(path)
|
|
1529
|
-
except FileNotFoundError:
|
|
1530
|
-
|
|
1531
|
-
|
|
1566
|
+
except FileNotFoundError as e:
|
|
1567
|
+
try:
|
|
1568
|
+
with file_system.open(path, "rb") as f:
|
|
1569
|
+
return pq.read_schema(f)
|
|
1570
|
+
except Exception as e2:
|
|
1571
|
+
raise e2.__class__(f"{e2}. {path}") from e
|
|
1532
1572
|
|
|
1533
1573
|
with ThreadPoolExecutor() as executor:
|
|
1534
1574
|
return pyarrow.unify_schemas(
|
|
@@ -1542,13 +1582,7 @@ def get_schema(file) -> pyarrow.Schema:
|
|
|
1542
1582
|
def get_num_rows(file):
|
|
1543
1583
|
try:
|
|
1544
1584
|
return pq.read_metadata(file).num_rows
|
|
1545
|
-
except
|
|
1546
|
-
PermissionError,
|
|
1547
|
-
pyarrow.ArrowInvalid,
|
|
1548
|
-
FileNotFoundError,
|
|
1549
|
-
TypeError,
|
|
1550
|
-
OSError,
|
|
1551
|
-
) as e:
|
|
1585
|
+
except Exception as e:
|
|
1552
1586
|
try:
|
|
1553
1587
|
return ds.dataset(file).count_rows()
|
|
1554
1588
|
except Exception as e2:
|
|
File without changes
|
|
File without changes
|