daplapath 2.0.7__py3-none-any.whl → 2.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
daplapath/path.py CHANGED
@@ -96,7 +96,7 @@ class LocalFileSystem(AbstractFileSystem):
96
96
  return cls.cp_file(source, destination, **kwargs)
97
97
 
98
98
  @staticmethod
99
- def cp_file(self, path1, path2, **kwargs):
99
+ def cp_file(source, destination, **kwargs):
100
100
  os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
101
101
  return shutil.copy2(source, destination, **kwargs)
102
102
 
@@ -252,11 +252,16 @@ class Path(str, _PathBase):
252
252
  def rglob(self, pattern: str, **kwargs) -> "PathSeries":
253
253
  return self.glob(pattern, recursive=True, **kwargs)
254
254
 
255
- def glob(self, pattern: str, recursive: bool = True, **kwargs) -> "PathSeries":
255
+ def glob(
256
+ self, pattern: str | None = None, recursive: bool = True, **kwargs
257
+ ) -> "PathSeries":
256
258
  """Create PathSeries of files/directories that match the pattern."""
257
259
  recursive = kwargs.get("recurse_symlinks", recursive)
258
260
 
259
- pattern = str(self / pattern)
261
+ if pattern:
262
+ pattern = str(self / pattern)
263
+ else:
264
+ pattern = str(self)
260
265
 
261
266
  # pop kwargs going into PathSeries initialiser.
262
267
  iterable_init_args = get_arguments(self._iterable_type)
@@ -300,6 +305,17 @@ class Path(str, _PathBase):
300
305
  """
301
306
  return self.glob("**", recursive=recursive, **kwargs)
302
307
 
308
+ def rmdir(self) -> None:
309
+ files = self.glob("**").files
310
+ with ThreadPoolExecutor() as executor:
311
+ list(executor.map(self.file_system.rm_file, files))
312
+
313
+ def cp(self, destination: "Path | str") -> "Path":
314
+ return self._cp_or_mv(destination, "cp")
315
+
316
+ def mv(self, destination: "Path | str") -> "Path":
317
+ return self._cp_or_mv(destination, "mv")
318
+
303
319
  def versions(self, include_versionless: bool = False) -> "PathSeries":
304
320
  """Returns a PathSeries of all versions of the file."""
305
321
  files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
@@ -722,10 +738,10 @@ class Path(str, _PathBase):
722
738
 
723
739
  Example
724
740
  -------
725
- >>> folder = 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2023'
741
+ >>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
726
742
  >>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
727
743
  >>> file_path
728
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
744
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
729
745
  """
730
746
  if not isinstance(other, (str, PurePath, os.PathLike)):
731
747
  raise TypeError(
@@ -783,6 +799,27 @@ class Path(str, _PathBase):
783
799
  def _new(self, new_path: str | Path) -> "Path":
784
800
  return self.__class__(new_path, self.file_system)
785
801
 
802
+ def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
803
+ func: Callable = getattr(self.file_system, attr)
804
+ try:
805
+ func(self, destination)
806
+ except FileNotFoundError:
807
+ destination = self.__class__(destination)
808
+ sources = list(self.glob("**").files)
809
+ destinations = [path.replace(self, destination) for path in sources]
810
+ with ThreadPoolExecutor() as executor:
811
+ list(executor.map(func, sources, destinations))
812
+ self._new(destination)
813
+
814
+ def keep_newest_partitions(self) -> "Path":
815
+ def _keep_newest(path):
816
+ while True:
817
+ if path.isfile():
818
+ pass
819
+
820
+ with ThreadPoolExecutor() as executor:
821
+ list(executor.map(_keep_newest, self.ls()))
822
+
786
823
 
787
824
  class PathSeries(pd.Series, _PathBase):
788
825
  """A pandas Series for working with GCS (Google Cloud Storage) paths.
@@ -1526,9 +1563,12 @@ def get_schema(file) -> pyarrow.Schema:
1526
1563
  def _get_schema(path):
1527
1564
  try:
1528
1565
  return pq.read_schema(path)
1529
- except FileNotFoundError:
1530
- with file_system.open(path, "rb") as f:
1531
- return pq.read_schema(f)
1566
+ except FileNotFoundError as e:
1567
+ try:
1568
+ with file_system.open(path, "rb") as f:
1569
+ return pq.read_schema(f)
1570
+ except Exception as e2:
1571
+ raise e2.__class__(f"{e2}. {path}") from e
1532
1572
 
1533
1573
  with ThreadPoolExecutor() as executor:
1534
1574
  return pyarrow.unify_schemas(
@@ -1542,13 +1582,7 @@ def get_schema(file) -> pyarrow.Schema:
1542
1582
  def get_num_rows(file):
1543
1583
  try:
1544
1584
  return pq.read_metadata(file).num_rows
1545
- except (
1546
- PermissionError,
1547
- pyarrow.ArrowInvalid,
1548
- FileNotFoundError,
1549
- TypeError,
1550
- OSError,
1551
- ) as e:
1585
+ except Exception as e:
1552
1586
  try:
1553
1587
  return ds.dataset(file).count_rows()
1554
1588
  except Exception as e2:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.7
3
+ Version: 2.0.8
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -35,14 +35,14 @@ from daplapath.path import Path
35
35
 
36
36
 
37
37
  ```python
38
- folder = Path('ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024')
38
+ folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
39
39
  folder
40
40
  ```
41
41
 
42
42
 
43
43
 
44
44
 
45
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
45
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
46
46
 
47
47
 
48
48
 
@@ -107,7 +107,7 @@ file
107
107
 
108
108
 
109
109
 
110
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
110
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
111
111
 
112
112
 
113
113
 
@@ -119,7 +119,7 @@ file.parent
119
119
 
120
120
 
121
121
 
122
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
122
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
123
123
 
124
124
 
125
125
 
@@ -135,8 +135,7 @@ file.columns
135
135
 
136
136
 
137
137
 
138
- Index(['OBJTYPE', 'NAVN', "komm_nr", "fylke_nr", 'AREAL_GDB', 'SHAPE_Length',
139
- 'SHAPE_Area', 'geometry'],
138
+ Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
140
139
  dtype='object')
141
140
 
142
141
 
@@ -149,13 +148,11 @@ file.dtypes
149
148
 
150
149
 
151
150
 
152
- OBJTYPE string
153
- NAVN string
151
+ objtype string
152
+ navn string
154
153
  komm_nr string
155
154
  fylke_nr string
156
- AREAL_GDB double
157
- SHAPE_Length double
158
- SHAPE_Area double
155
+ areal_gdb double
159
156
  geometry binary
160
157
  dtype: object
161
158
 
@@ -206,7 +203,7 @@ file.latest_version()
206
203
 
207
204
 
208
205
 
209
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
206
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
210
207
 
211
208
 
212
209
 
@@ -218,7 +215,7 @@ file.highest_numbered_version()
218
215
 
219
216
 
220
217
 
221
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
218
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
222
219
 
223
220
 
224
221
 
@@ -231,7 +228,7 @@ file.new_version()
231
228
 
232
229
 
233
230
 
234
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
231
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
235
232
 
236
233
 
237
234
 
@@ -268,11 +265,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
268
265
 
269
266
  ```python
270
267
  print(
271
- Path("ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data").tree()
268
+ Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
272
269
  )
273
270
  ```
274
271
 
275
- ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data /
272
+ ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
276
273
  └──2000 /
277
274
  └──SSB_tettsted_flate_p2000.parquet
278
275
  └──SSB_tettsted_flate_p2000_v1.parquet
@@ -0,0 +1,6 @@
1
+ daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
2
+ daplapath/path.py,sha256=f4N8JeJ-RqkS0jIoAk69FlVTGR8kyj64FhJR59CZX1w,53928
3
+ daplapath-2.0.8.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
4
+ daplapath-2.0.8.dist-info/METADATA,sha256=qc6AKIcFCJTun5HaeakY80u03SBsNv0Okzi2kJORvAg,14697
5
+ daplapath-2.0.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
+ daplapath-2.0.8.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
2
- daplapath/path.py,sha256=_6qbMdkEis1dNsoXwCdl1IT2hedTORvR5vlDNbxPNak,52602
3
- daplapath-2.0.7.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
4
- daplapath-2.0.7.dist-info/METADATA,sha256=mYwZhDxhkjptdi8K2s8oHLSMIzGfSVbPX7JB1mcs-cA,14774
5
- daplapath-2.0.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
- daplapath-2.0.7.dist-info/RECORD,,