sertit 1.44.1.dev0__tar.gz → 1.44.1.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {sertit-1.44.1.dev0/sertit.egg-info → sertit-1.44.1.dev1}/PKG-INFO +1 -1
  2. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/__meta__.py +1 -1
  3. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/files.py +24 -12
  4. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/path.py +14 -3
  5. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/vectors.py +5 -2
  6. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1/sertit.egg-info}/PKG-INFO +1 -1
  7. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/LICENSE +0 -0
  8. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/MANIFEST.in +0 -0
  9. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/NOTICE +0 -0
  10. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/README.md +0 -0
  11. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/Makefile +0 -0
  12. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/_static/favicon.png +0 -0
  13. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/_static/sertit_utils.png +0 -0
  14. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/_templates/custom-base-template.rst +0 -0
  15. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/_templates/custom-class-template.rst +0 -0
  16. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/_templates/custom-module-template.rst +0 -0
  17. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/conf.py +0 -0
  18. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/history.md +0 -0
  19. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/docs/index.md +0 -0
  20. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/requirements.txt +0 -0
  21. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/__init__.py +0 -0
  22. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/arcpy.py +0 -0
  23. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/ci.py +0 -0
  24. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/dask.py +0 -0
  25. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/display.py +0 -0
  26. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/geometry.py +0 -0
  27. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/logs.py +0 -0
  28. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/misc.py +0 -0
  29. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/network.py +0 -0
  30. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/rasters.py +0 -0
  31. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/rasters_rio.py +0 -0
  32. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/s3.py +0 -0
  33. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/snap.py +0 -0
  34. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/strings.py +0 -0
  35. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/types.py +0 -0
  36. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/unistra.py +0 -0
  37. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit/xml.py +0 -0
  38. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit.egg-info/SOURCES.txt +0 -0
  39. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit.egg-info/dependency_links.txt +0 -0
  40. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit.egg-info/requires.txt +0 -0
  41. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/sertit.egg-info/top_level.txt +0 -0
  42. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/setup.cfg +0 -0
  43. {sertit-1.44.1.dev0 → sertit-1.44.1.dev1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sertit
3
- Version: 1.44.1.dev0
3
+ Version: 1.44.1.dev1
4
4
  Summary: ('SERTIT python library for generic tools',)
5
5
  Home-page: UNKNOWN
6
6
  Author: ICube-SERTIT
@@ -17,7 +17,7 @@
17
17
  """
18
18
  Sertit Utils library
19
19
  """
20
- __version__ = "1.44.1.dev0"
20
+ __version__ = "1.44.1.dev1"
21
21
  __title__ = "sertit"
22
22
  __description__ = ("SERTIT python library for generic tools",)
23
23
  __author__ = "ICube-SERTIT"
@@ -377,7 +377,9 @@ def get_archived_rio_path(
377
377
  return path.get_archived_rio_path(archive_path, file_regex, as_list)
378
378
 
379
379
 
380
- def read_archived_file(archive_path: AnyPathStrType, regex: str) -> bytes:
380
+ def read_archived_file(
381
+ archive_path: AnyPathStrType, regex: str, file_list: list = None
382
+ ) -> bytes:
381
383
  """
382
384
  Read archived file (in bytes) from :code:`zip` or :code:`tar` archives.
383
385
 
@@ -386,6 +388,7 @@ def read_archived_file(archive_path: AnyPathStrType, regex: str) -> bytes:
386
388
  Args:
387
389
  archive_path (AnyPathStrType): Archive path
388
390
  regex (str): Regex (used by re) as it can be found in the getmembers() list
391
+ file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
389
392
 
390
393
  Returns:
391
394
  bytes: Archived file in bytes
@@ -399,16 +402,19 @@ def read_archived_file(archive_path: AnyPathStrType, regex: str) -> bytes:
399
402
  try:
400
403
  if archive_path.suffix == ".tar":
401
404
  with tarfile.open(archive_path) as tar_ds:
402
- tar_mb = tar_ds.getmembers()
403
- name_list = [mb.name for mb in tar_mb]
404
- band_name = list(filter(regex.match, name_list))[0]
405
- tarinfo = [mb for mb in tar_mb if mb.name == band_name][0]
405
+ # file_list is not very useful for TAR files...
406
+ if file_list is None:
407
+ tar_mb = tar_ds.getmembers()
408
+ file_list = [mb.name for mb in tar_mb]
409
+ name = list(filter(regex.match, file_list))[0]
410
+ tarinfo = tar_ds.getmember(name)
406
411
  file_str = tar_ds.extractfile(tarinfo).read()
407
412
  elif archive_path.suffix == ".zip":
408
413
  with zipfile.ZipFile(archive_path) as zip_ds:
409
- name_list = [f.filename for f in zip_ds.filelist]
410
- band_name = list(filter(regex.match, name_list))[0]
411
- file_str = zip_ds.read(band_name)
414
+ if file_list is None:
415
+ file_list = [f.filename for f in zip_ds.filelist]
416
+ name = list(filter(regex.match, file_list))[0]
417
+ file_str = zip_ds.read(name)
412
418
 
413
419
  elif archive_path.suffix == ".tar.gz":
414
420
  raise TypeError(
@@ -426,7 +432,9 @@ def read_archived_file(archive_path: AnyPathStrType, regex: str) -> bytes:
426
432
  return file_str
427
433
 
428
434
 
429
- def read_archived_xml(archive_path: AnyPathStrType, xml_regex: str) -> etree._Element:
435
+ def read_archived_xml(
436
+ archive_path: AnyPathStrType, xml_regex: str, file_list: list = None
437
+ ) -> etree._Element:
430
438
  """
431
439
  Read archived XML from :code:`zip` or :code:`tar` archives.
432
440
 
@@ -435,6 +443,7 @@ def read_archived_xml(archive_path: AnyPathStrType, xml_regex: str) -> etree._El
435
443
  Args:
436
444
  archive_path (AnyPathStrType): Archive path
437
445
  xml_regex (str): XML regex (used by re) as it can be found in the getmembers() list
446
+ file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
438
447
 
439
448
  Returns:
440
449
  etree._Element: XML file
@@ -445,12 +454,14 @@ def read_archived_xml(archive_path: AnyPathStrType, xml_regex: str) -> etree._El
445
454
  >>> read_archived_xml(arch_path, file_regex)
446
455
  <Element LANDSAT_METADATA_FILE at 0x1c90007f8c8>
447
456
  """
448
- xml_bytes = read_archived_file(archive_path, xml_regex)
457
+ xml_bytes = read_archived_file(archive_path, xml_regex, file_list=file_list)
449
458
 
450
459
  return etree.fromstring(xml_bytes)
451
460
 
452
461
 
453
- def read_archived_html(archive_path: AnyPathStrType, regex: str) -> html.HtmlElement:
462
+ def read_archived_html(
463
+ archive_path: AnyPathStrType, regex: str, file_list: list = None
464
+ ) -> html.HtmlElement:
454
465
  """
455
466
  Read archived HTML from :code:`zip` or :code:`tar` archives.
456
467
 
@@ -459,6 +470,7 @@ def read_archived_html(archive_path: AnyPathStrType, regex: str) -> html.HtmlEle
459
470
  Args:
460
471
  archive_path (AnyPathStrType): Archive path
461
472
  regex (str): HTML regex (used by re) as it can be found in the getmembers() list
473
+ file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
462
474
 
463
475
  Returns:
464
476
  html._Element: HTML file
@@ -469,7 +481,7 @@ def read_archived_html(archive_path: AnyPathStrType, regex: str) -> html.HtmlEle
469
481
  >>> read_archived_html(arch_path, file_regex)
470
482
  <Element html at 0x1c90007f8c8>
471
483
  """
472
- html_bytes = read_archived_file(archive_path, regex)
484
+ html_bytes = read_archived_file(archive_path, regex, file_list=file_list)
473
485
 
474
486
  return html.fromstring(html_bytes)
475
487
 
@@ -188,6 +188,7 @@ def get_archived_path(
188
188
  file_regex: str,
189
189
  as_list: bool = False,
190
190
  case_sensitive: bool = False,
191
+ file_list: list = None,
191
192
  ) -> Union[list, AnyPathType]:
192
193
  """
193
194
  Get archived file path from inside the archive.
@@ -202,6 +203,7 @@ def get_archived_path(
202
203
  file_regex (str): File regex (used by re) as it can be found in the getmembers() list
203
204
  as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
204
205
  case_sensitive (bool): If true, the regex is case-sensitive.
206
+ file_list (list): List of files to get archived from. Optional, if not given it will be re-computed.
205
207
 
206
208
  Returns:
207
209
  Union[list, str]: Path from inside the zipfile
@@ -214,7 +216,10 @@ def get_archived_path(
214
216
  """
215
217
  # Get file list
216
218
  archive_path = AnyPath(archive_path)
217
- file_list = get_archived_file_list(archive_path)
219
+
220
+ # Offer the ability to give the file list directly, as this operation is expensive when done with large archives stored on the cloud
221
+ if file_list is None:
222
+ file_list = get_archived_file_list(archive_path)
218
223
 
219
224
  # Search for file
220
225
  regex = (
@@ -236,7 +241,10 @@ def get_archived_path(
236
241
 
237
242
 
238
243
  def get_archived_rio_path(
239
- archive_path: AnyPathStrType, file_regex: str, as_list: bool = False
244
+ archive_path: AnyPathStrType,
245
+ file_regex: str,
246
+ as_list: bool = False,
247
+ file_list: list = None,
240
248
  ) -> Union[list, AnyPathType]:
241
249
  """
242
250
  Get archived file path from inside the archive, to be read with rasterio:
@@ -260,6 +268,7 @@ def get_archived_rio_path(
260
268
  archive_path (AnyPathStrType): Archive path
261
269
  file_regex (str): File regex (used by re) as it can be found in the getmembers() list
262
270
  as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
271
+ file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
263
272
 
264
273
  Returns:
265
274
  Union[list, str]: Band path that can be read by rasterio
@@ -283,7 +292,9 @@ def get_archived_rio_path(
283
292
  raise TypeError("Only .zip and .tar files can be read from inside its archive.")
284
293
 
285
294
  # Search for file
286
- archived_band_paths = get_archived_path(archive_path, file_regex, as_list=True)
295
+ archived_band_paths = get_archived_path(
296
+ archive_path, file_regex, as_list=True, file_list=file_list
297
+ )
287
298
 
288
299
  # Convert to rio path
289
300
  if is_cloud_path(archive_path):
@@ -422,7 +422,8 @@ def read(
422
422
  archive_regex (str): [Archive only] Regex for the wanted vector inside the archive
423
423
  window (Any): Anything that can be returned as a bbox (i.e. path, gpd.GeoPandas, Iterable, ...).
424
424
  In case of an iterable, assumption is made it corresponds to geographic bounds. Mimics :code:`rasters.read(..., window=)`. If given, :code:`bbox` is ignored.
425
- **kwargs: Additional arguments used in gpd.read_file
425
+ **kwargs: Additional arguments used in gpd.read_file.
426
+ You can also give :code:`file_list`, the list of files of the archive to get the vector from, as this operation is expensive when done with large archives stored on the cloud.
426
427
 
427
428
  Returns:
428
429
  gpd.GeoDataFrame: Read vector as a GeoDataFrame
@@ -477,7 +478,9 @@ def read(
477
478
  # Manage archive case
478
479
  if vector_path.suffix in [".tar", ".zip"]:
479
480
  prefix = vector_path.suffix[-3:]
480
- file_list = path.get_archived_file_list(vector_path)
481
+ file_list = kwargs.pop(
482
+ "file_list", path.get_archived_file_list(vector_path)
483
+ )
481
484
 
482
485
  try:
483
486
  regex = re.compile(archive_regex)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sertit
3
- Version: 1.44.1.dev0
3
+ Version: 1.44.1.dev1
4
4
  Summary: ('SERTIT python library for generic tools',)
5
5
  Home-page: UNKNOWN
6
6
  Author: ICube-SERTIT
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes