daplapath 2.0.7__tar.gz → 2.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.7
3
+ Version: 2.0.8
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -35,14 +35,14 @@ from daplapath.path import Path
35
35
 
36
36
 
37
37
  ```python
38
- folder = Path('ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024')
38
+ folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
39
39
  folder
40
40
  ```
41
41
 
42
42
 
43
43
 
44
44
 
45
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
45
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
46
46
 
47
47
 
48
48
 
@@ -107,7 +107,7 @@ file
107
107
 
108
108
 
109
109
 
110
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
110
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
111
111
 
112
112
 
113
113
 
@@ -119,7 +119,7 @@ file.parent
119
119
 
120
120
 
121
121
 
122
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
122
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
123
123
 
124
124
 
125
125
 
@@ -135,8 +135,7 @@ file.columns
135
135
 
136
136
 
137
137
 
138
- Index(['OBJTYPE', 'NAVN', "komm_nr", "fylke_nr", 'AREAL_GDB', 'SHAPE_Length',
139
- 'SHAPE_Area', 'geometry'],
138
+ Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
140
139
  dtype='object')
141
140
 
142
141
 
@@ -149,13 +148,11 @@ file.dtypes
149
148
 
150
149
 
151
150
 
152
- OBJTYPE string
153
- NAVN string
151
+ objtype string
152
+ navn string
154
153
  komm_nr string
155
154
  fylke_nr string
156
- AREAL_GDB double
157
- SHAPE_Length double
158
- SHAPE_Area double
155
+ areal_gdb double
159
156
  geometry binary
160
157
  dtype: object
161
158
 
@@ -206,7 +203,7 @@ file.latest_version()
206
203
 
207
204
 
208
205
 
209
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
206
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
210
207
 
211
208
 
212
209
 
@@ -218,7 +215,7 @@ file.highest_numbered_version()
218
215
 
219
216
 
220
217
 
221
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
218
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
222
219
 
223
220
 
224
221
 
@@ -231,7 +228,7 @@ file.new_version()
231
228
 
232
229
 
233
230
 
234
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
231
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
235
232
 
236
233
 
237
234
 
@@ -268,11 +265,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
268
265
 
269
266
  ```python
270
267
  print(
271
- Path("ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data").tree()
268
+ Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
272
269
  )
273
270
  ```
274
271
 
275
- ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data /
272
+ ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
276
273
  └──2000 /
277
274
  └──SSB_tettsted_flate_p2000.parquet
278
275
  └──SSB_tettsted_flate_p2000_v1.parquet
@@ -19,14 +19,14 @@ from daplapath.path import Path
19
19
 
20
20
 
21
21
  ```python
22
- folder = Path('ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024')
22
+ folder = Path('ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024')
23
23
  folder
24
24
  ```
25
25
 
26
26
 
27
27
 
28
28
 
29
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
29
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
30
30
 
31
31
 
32
32
 
@@ -91,7 +91,7 @@ file
91
91
 
92
92
 
93
93
 
94
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
94
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
95
95
 
96
96
 
97
97
 
@@ -103,7 +103,7 @@ file.parent
103
103
 
104
104
 
105
105
 
106
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024'
106
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024'
107
107
 
108
108
 
109
109
 
@@ -119,8 +119,7 @@ file.columns
119
119
 
120
120
 
121
121
 
122
- Index(['OBJTYPE', 'NAVN', "komm_nr", "fylke_nr", 'AREAL_GDB', 'SHAPE_Length',
123
- 'SHAPE_Area', 'geometry'],
122
+ Index(['objtype', 'navn', "komm_nr", "fylke_nr", 'areal_gdb', 'geometry'],
124
123
  dtype='object')
125
124
 
126
125
 
@@ -133,13 +132,11 @@ file.dtypes
133
132
 
134
133
 
135
134
 
136
- OBJTYPE string
137
- NAVN string
135
+ objtype string
136
+ navn string
138
137
  komm_nr string
139
138
  fylke_nr string
140
- AREAL_GDB double
141
- SHAPE_Length double
142
- SHAPE_Area double
139
+ areal_gdb double
143
140
  geometry binary
144
141
  dtype: object
145
142
 
@@ -190,7 +187,7 @@ file.latest_version()
190
187
 
191
188
 
192
189
 
193
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
190
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
194
191
 
195
192
 
196
193
 
@@ -202,7 +199,7 @@ file.highest_numbered_version()
202
199
 
203
200
 
204
201
 
205
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
202
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v1.parquet'
206
203
 
207
204
 
208
205
 
@@ -215,7 +212,7 @@ file.new_version()
215
212
 
216
213
 
217
214
 
218
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
215
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2024/ABAS_kommune_utenhav_p2024_v2.parquet'
219
216
 
220
217
 
221
218
 
@@ -252,11 +249,11 @@ Filtre med hyperlenke. Gjør at man kopierer stien når man klikker på den.
252
249
 
253
250
  ```python
254
251
  print(
255
- Path("ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data").tree()
252
+ Path("ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data").tree()
256
253
  )
257
254
  ```
258
255
 
259
- ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data /
256
+ ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data /
260
257
  └──2000 /
261
258
  └──SSB_tettsted_flate_p2000.parquet
262
259
  └──SSB_tettsted_flate_p2000_v1.parquet
@@ -96,7 +96,7 @@ class LocalFileSystem(AbstractFileSystem):
96
96
  return cls.cp_file(source, destination, **kwargs)
97
97
 
98
98
  @staticmethod
99
- def cp_file(self, path1, path2, **kwargs):
99
+ def cp_file(source, destination, **kwargs):
100
100
  os.makedirs(pathlib.Path(destination).parent, exist_ok=True)
101
101
  return shutil.copy2(source, destination, **kwargs)
102
102
 
@@ -252,11 +252,16 @@ class Path(str, _PathBase):
252
252
  def rglob(self, pattern: str, **kwargs) -> "PathSeries":
253
253
  return self.glob(pattern, recursive=True, **kwargs)
254
254
 
255
- def glob(self, pattern: str, recursive: bool = True, **kwargs) -> "PathSeries":
255
+ def glob(
256
+ self, pattern: str | None = None, recursive: bool = True, **kwargs
257
+ ) -> "PathSeries":
256
258
  """Create PathSeries of files/directories that match the pattern."""
257
259
  recursive = kwargs.get("recurse_symlinks", recursive)
258
260
 
259
- pattern = str(self / pattern)
261
+ if pattern:
262
+ pattern = str(self / pattern)
263
+ else:
264
+ pattern = str(self)
260
265
 
261
266
  # pop kwargs going into PathSeries initialiser.
262
267
  iterable_init_args = get_arguments(self._iterable_type)
@@ -300,6 +305,17 @@ class Path(str, _PathBase):
300
305
  """
301
306
  return self.glob("**", recursive=recursive, **kwargs)
302
307
 
308
+ def rmdir(self) -> None:
309
+ files = self.glob("**").files
310
+ with ThreadPoolExecutor() as executor:
311
+ list(executor.map(self.file_system.rm_file, files))
312
+
313
+ def cp(self, destination: "Path | str") -> "Path":
314
+ return self._cp_or_mv(destination, "cp")
315
+
316
+ def mv(self, destination: "Path | str") -> "Path":
317
+ return self._cp_or_mv(destination, "mv")
318
+
303
319
  def versions(self, include_versionless: bool = False) -> "PathSeries":
304
320
  """Returns a PathSeries of all versions of the file."""
305
321
  files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
@@ -722,10 +738,10 @@ class Path(str, _PathBase):
722
738
 
723
739
  Example
724
740
  -------
725
- >>> folder = 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2023'
741
+ >>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
726
742
  >>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
727
743
  >>> file_path
728
- 'ssb-kart-data-delt-geo-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
744
+ 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023/ABAS_kommune_flate_p2023_v1.parquet'
729
745
  """
730
746
  if not isinstance(other, (str, PurePath, os.PathLike)):
731
747
  raise TypeError(
@@ -783,6 +799,27 @@ class Path(str, _PathBase):
783
799
  def _new(self, new_path: str | Path) -> "Path":
784
800
  return self.__class__(new_path, self.file_system)
785
801
 
802
+ def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
803
+ func: Callable = getattr(self.file_system, attr)
804
+ try:
805
+ func(self, destination)
806
+ except FileNotFoundError:
807
+ destination = self.__class__(destination)
808
+ sources = list(self.glob("**").files)
809
+ destinations = [path.replace(self, destination) for path in sources]
810
+ with ThreadPoolExecutor() as executor:
811
+ list(executor.map(func, sources, destinations))
812
+ self._new(destination)
813
+
814
+ def keep_newest_partitions(self) -> "Path":
815
+ def _keep_newest(path):
816
+ while True:
817
+ if path.isfile():
818
+ pass
819
+
820
+ with ThreadPoolExecutor() as executor:
821
+ list(executor.map(_keep_newest, self.ls()))
822
+
786
823
 
787
824
  class PathSeries(pd.Series, _PathBase):
788
825
  """A pandas Series for working with GCS (Google Cloud Storage) paths.
@@ -1526,9 +1563,12 @@ def get_schema(file) -> pyarrow.Schema:
1526
1563
  def _get_schema(path):
1527
1564
  try:
1528
1565
  return pq.read_schema(path)
1529
- except FileNotFoundError:
1530
- with file_system.open(path, "rb") as f:
1531
- return pq.read_schema(f)
1566
+ except FileNotFoundError as e:
1567
+ try:
1568
+ with file_system.open(path, "rb") as f:
1569
+ return pq.read_schema(f)
1570
+ except Exception as e2:
1571
+ raise e2.__class__(f"{e2}. {path}") from e
1532
1572
 
1533
1573
  with ThreadPoolExecutor() as executor:
1534
1574
  return pyarrow.unify_schemas(
@@ -1542,13 +1582,7 @@ def get_schema(file) -> pyarrow.Schema:
1542
1582
  def get_num_rows(file):
1543
1583
  try:
1544
1584
  return pq.read_metadata(file).num_rows
1545
- except (
1546
- PermissionError,
1547
- pyarrow.ArrowInvalid,
1548
- FileNotFoundError,
1549
- TypeError,
1550
- OSError,
1551
- ) as e:
1585
+ except Exception as e:
1552
1586
  try:
1553
1587
  return ds.dataset(file).count_rows()
1554
1588
  except Exception as e2:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "daplapath"
3
- version = "2.0.7"
3
+ version = "2.0.8"
4
4
  description = "A pathlib.Path class for dapla"
5
5
  authors = ["ort <ort@ssb.no>"]
6
6
  license = "MIT"
File without changes