daplapath 2.0.6__tar.gz → 2.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.6
3
+ Version: 2.0.8
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -35,14 +35,14 @@ from daplapath.path import Path
35
35
 
36
36
 
37
37
  ```python
38
- folder = Path('ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024')
38
+ folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
39
39
  folder
40
40
  ```
41
41
 
42
42
 
43
43
 
44
44
 
45
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
45
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
46
46
 
47
47
 
48
48
 
@@ -107,7 +107,7 @@ file
107
107
 
108
108
 
109
109
 
110
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
110
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
111
111
 
112
112
 
113
113
 
@@ -119,7 +119,7 @@ file.parent
119
119
 
120
120
 
121
121
 
122
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
122
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
123
123
 
124
124
 
125
125
 
@@ -135,8 +135,7 @@ file.columns
135
135
 
136
136
 
137
137
 
138
- Index(['OBJTYPE', 'NAVN', "komm_nr", "fylke_nr", 'AREAL_GDB', 'SHAPE_Length',
139
- 'SHAPE_Area', 'geometry'],
138
+ Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
140
139
  dtype='object')
141
140
 
142
141
 
@@ -149,13 +148,11 @@ file.dtypes
149
148
 
150
149
 
151
150
 
152
- OBJTYPE string
153
- NAVN string
151
+ objtype string
152
+ navn string
154
153
  komm_nr string
155
154
  fylke_nr string
156
- AREAL_GDB double
157
- SHAPE_Length double
158
- SHAPE_Area double
155
+ areal_gdb double
159
156
  geometry binary
160
157
  dtype: object
161
158
 
@@ -206,7 +203,7 @@ file.latest_version()
206
203
 
207
204
 
208
205
 
209
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
206
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
210
207
 
211
208
 
212
209
 
@@ -218,7 +215,7 @@ file.highest_numbered_version()
218
215
 
219
216
 
220
217
 
221
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
218
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
222
219
 
223
220
 
224
221
 
@@ -231,7 +228,7 @@ file.new_version()
231
228
 
232
229
 
233
230
 
234
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
231
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
235
232
 
236
233
 
237
234
 
@@ -268,11 +265,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
268
265
 
269
266
  ```python
270
267
  print(
271
- Path("ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data").tree()
268
+ Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
272
269
  )
273
270
  ```
274
271
 
275
- ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data /
272
+ ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
276
273
  └──2000 /
277
274
  └──SSB_tettsted_flate_p2000.parquet
278
275
  └──SSB_tettsted_flate_p2000_v1.parquet
@@ -19,14 +19,14 @@ from daplapath.path import Path
19
19
 
20
20
 
21
21
  ```python
22
- folder = Path('ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024')
22
+ folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
23
23
  folder
24
24
  ```
25
25
 
26
26
 
27
27
 
28
28
 
29
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
29
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
30
30
 
31
31
 
32
32
 
@@ -91,7 +91,7 @@ file
91
91
 
92
92
 
93
93
 
94
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
94
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
95
95
 
96
96
 
97
97
 
@@ -103,7 +103,7 @@ file.parent
103
103
 
104
104
 
105
105
 
106
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
106
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
107
107
 
108
108
 
109
109
 
@@ -119,8 +119,7 @@ file.columns
119
119
 
120
120
 
121
121
 
122
- Index(['OBJTYPE', 'NAVN', "komm_nr", "fylke_nr", 'AREAL_GDB', 'SHAPE_Length',
123
- 'SHAPE_Area', 'geometry'],
122
+ Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
124
123
  dtype='object')
125
124
 
126
125
 
@@ -133,13 +132,11 @@ file.dtypes
133
132
 
134
133
 
135
134
 
136
- OBJTYPE string
137
- NAVN string
135
+ objtype string
136
+ navn string
138
137
  komm_nr string
139
138
  fylke_nr string
140
- AREAL_GDB double
141
- SHAPE_Length double
142
- SHAPE_Area double
139
+ areal_gdb double
143
140
  geometry binary
144
141
  dtype: object
145
142
 
@@ -190,7 +187,7 @@ file.latest_version()
190
187
 
191
188
 
192
189
 
193
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
190
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
194
191
 
195
192
 
196
193
 
@@ -202,7 +199,7 @@ file.highest_numbered_version()
202
199
 
203
200
 
204
201
 
205
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
202
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
206
203
 
207
204
 
208
205
 
@@ -215,7 +212,7 @@ file.new_version()
215
212
 
216
213
 
217
214
 
218
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
215
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
219
216
 
220
217
 
221
218
 
@@ -252,11 +249,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
252
249
 
253
250
  ```python
254
251
  print(
255
- Path("ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data").tree()
252
+ Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
256
253
  )
257
254
  ```
258
255
 
259
- ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data /
256
+ ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
260
257
  └──2000 /
261
258
  └──SSB_tettsted_flate_p2000.parquet
262
259
  └──SSB_tettsted_flate_p2000_v1.parquet
@@ -96,7 +96,7 @@ class LocalFileSystem(AbstractFileSystem):
96
96
  return cls.cp_file(source, destination, **kwargs)
97
97
 
98
98
  @staticmethod
99
- def cp_file(self, path1, path2, **kwargs):
99
+ def cp_file(source, destination, **kwargs):
100
100
  os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
101
101
  return shutil.copy2(source, destination, **kwargs)
102
102
 
@@ -252,11 +252,16 @@ class Path(str, _PathBase):
252
252
  def rglob(self, pattern: str, **kwargs) -> "PathSeries":
253
253
  return self.glob(pattern, recursive=True, **kwargs)
254
254
 
255
- def glob(self, pattern: str, recursive: bool = True, **kwargs) -> "PathSeries":
255
+ def glob(
256
+ self, pattern: str | None = None, recursive: bool = True, **kwargs
257
+ ) -> "PathSeries":
256
258
  """Create PathSeries of files/directories that match the pattern."""
257
259
  recursive = kwargs.get("recurse_symlinks", recursive)
258
260
 
259
- pattern = str(self / pattern)
261
+ if pattern:
262
+ pattern = str(self / pattern)
263
+ else:
264
+ pattern = str(self)
260
265
 
261
266
  # pop kwargs going into PathSeries initialiser.
262
267
  iterable_init_args = get_arguments(self._iterable_type)
@@ -300,6 +305,17 @@ class Path(str, _PathBase):
300
305
  """
301
306
  return self.glob("**", recursive=recursive, **kwargs)
302
307
 
308
+ def rmdir(self) -> None:
309
+ files = self.glob("**").files
310
+ with ThreadPoolExecutor() as executor:
311
+ list(executor.map(self.file_system.rm_file, files))
312
+
313
+ def cp(self, destination: "Path | str") -> "Path":
314
+ return self._cp_or_mv(destination, "cp")
315
+
316
+ def mv(self, destination: "Path | str") -> "Path":
317
+ return self._cp_or_mv(destination, "mv")
318
+
303
319
  def versions(self, include_versionless: bool = False) -> "PathSeries":
304
320
  """Returns a PathSeries of all versions of the file."""
305
321
  files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
@@ -605,7 +621,9 @@ class Path(str, _PathBase):
605
621
  try:
606
622
  with self.open("rb") as file:
607
623
  return get_schema(file)
608
- except (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
624
+ except (
625
+ Exception
626
+ ): # (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
609
627
  return get_schema(self)
610
628
 
611
629
  @property
@@ -623,7 +641,9 @@ class Path(str, _PathBase):
623
641
  try:
624
642
  with self.open("rb") as file:
625
643
  return get_shape(file)
626
- except (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
644
+ except (
645
+ Exception
646
+ ): # (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
627
647
  return get_shape(self)
628
648
 
629
649
  @property
@@ -718,10 +738,10 @@ class Path(str, _PathBase):
718
738
 
719
739
  Example
720
740
  -------
721
- >>> folder = 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2023'
741
+ >>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
722
742
  >>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
723
743
  >>> file_path
724
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
744
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
725
745
  """
726
746
  if not isinstance(other, (str, PurePath, os.PathLike)):
727
747
  raise TypeError(
@@ -779,6 +799,27 @@ class Path(str, _PathBase):
779
799
  def _new(self, new_path: str | Path) -> "Path":
780
800
  return self.__class__(new_path, self.file_system)
781
801
 
802
+ def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
803
+ func: Callable = getattr(self.file_system, attr)
804
+ try:
805
+ func(self, destination)
806
+ except FileNotFoundError:
807
+ destination = self.__class__(destination)
808
+ sources = list(self.glob("**").files)
809
+ destinations = [path.replace(self, destination) for path in sources]
810
+ with ThreadPoolExecutor() as executor:
811
+ list(executor.map(func, sources, destinations))
812
+ self._new(destination)
813
+
814
+ def keep_newest_partitions(self) -> "Path":
815
+ def _keep_newest(path):
816
+ while True:
817
+ if path.isfile():
818
+ pass
819
+
820
+ with ThreadPoolExecutor() as executor:
821
+ list(executor.map(_keep_newest, self.ls()))
822
+
782
823
 
783
824
  class PathSeries(pd.Series, _PathBase):
784
825
  """A pandas Series for working with GCS (Google Cloud Storage) paths.
@@ -1510,7 +1551,7 @@ def get_schema(file) -> pyarrow.Schema:
1510
1551
  FileNotFoundError,
1511
1552
  IsADirectoryError,
1512
1553
  OSError,
1513
- ):
1554
+ ) as e:
1514
1555
  # try:
1515
1556
  # return ds.dataset(file).schema
1516
1557
  # except (TypeError, FileNotFoundError) as e:
@@ -1522,9 +1563,12 @@ def get_schema(file) -> pyarrow.Schema:
1522
1563
  def _get_schema(path):
1523
1564
  try:
1524
1565
  return pq.read_schema(path)
1525
- except FileNotFoundError:
1526
- with file_system.open(path, "rb") as f:
1527
- return pq.read_schema(f)
1566
+ except FileNotFoundError as e:
1567
+ try:
1568
+ with file_system.open(path, "rb") as f:
1569
+ return pq.read_schema(f)
1570
+ except Exception as e2:
1571
+ raise e2.__class__(f"{e2}. {path}") from e
1528
1572
 
1529
1573
  with ThreadPoolExecutor() as executor:
1530
1574
  return pyarrow.unify_schemas(
@@ -1538,13 +1582,7 @@ def get_schema(file) -> pyarrow.Schema:
1538
1582
  def get_num_rows(file):
1539
1583
  try:
1540
1584
  return pq.read_metadata(file).num_rows
1541
- except (
1542
- PermissionError,
1543
- pyarrow.ArrowInvalid,
1544
- FileNotFoundError,
1545
- TypeError,
1546
- OSError,
1547
- ) as e:
1585
+ except Exception as e:
1548
1586
  try:
1549
1587
  return ds.dataset(file).count_rows()
1550
1588
  except Exception as e2:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "daplapath"
3
- version = "2.0.6"
3
+ version = "2.0.8"
4
4
  description = "A pathlib.Path class for dapla"
5
5
  authors = ["ort <ort@ssb.no>"]
6
6
  license = "MIT"
File without changes