datamarket 0.7.92__tar.gz → 0.7.94__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (34) hide show
  1. {datamarket-0.7.92 → datamarket-0.7.94}/PKG-INFO +2 -3
  2. {datamarket-0.7.92 → datamarket-0.7.94}/pyproject.toml +2 -3
  3. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/aws.py +13 -0
  4. {datamarket-0.7.92 → datamarket-0.7.94}/LICENSE +0 -0
  5. {datamarket-0.7.92 → datamarket-0.7.94}/README.md +0 -0
  6. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/__init__.py +0 -0
  7. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/exceptions/__init__.py +0 -0
  8. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/exceptions/main.py +0 -0
  9. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/__init__.py +0 -0
  10. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/alchemy.py +0 -0
  11. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/azure.py +0 -0
  12. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/drive.py +0 -0
  13. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/ftp.py +0 -0
  14. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/nominatim.py +0 -0
  15. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/peerdb.py +0 -0
  16. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/proxy.py +0 -0
  17. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/interfaces/tinybird.py +0 -0
  18. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/params/__init__.py +0 -0
  19. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/params/nominatim.py +0 -0
  20. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/__init__.py +0 -0
  21. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/airflow.py +0 -0
  22. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/alchemy.py +0 -0
  23. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/main.py +0 -0
  24. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/nominatim.py +0 -0
  25. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/playwright/__init__.py +0 -0
  26. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/playwright/async_api.py +0 -0
  27. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/playwright/sync_api.py +0 -0
  28. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/selenium.py +0 -0
  29. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/soda.py +0 -0
  30. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/strings/__init__.py +0 -0
  31. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/strings/normalization.py +0 -0
  32. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/strings/obfuscation.py +0 -0
  33. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/typer.py +0 -0
  34. {datamarket-0.7.92 → datamarket-0.7.94}/src/datamarket/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.92
3
+ Version: 0.7.94
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -52,7 +52,6 @@ Provides-Extra: pyrate-limiter
52
52
  Provides-Extra: pysocks
53
53
  Provides-Extra: pyspark
54
54
  Provides-Extra: pytest
55
- Provides-Extra: rapidfuzz
56
55
  Provides-Extra: retry
57
56
  Provides-Extra: rnet
58
57
  Provides-Extra: shapely
@@ -116,7 +115,7 @@ Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
116
115
  Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
117
116
  Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
118
117
  Requires-Dist: python-string-utils (>=1.0.0,<2.0.0)
119
- Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
118
+ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0)
120
119
  Requires-Dist: requests (>=2.0.0,<3.0.0)
121
120
  Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
122
121
  Requires-Dist: rnet (>=2.0.0,<3.0.0) ; extra == "rnet"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.7.92"
3
+ version = "0.7.94"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -36,6 +36,7 @@ geopy = "^2.0.0"
36
36
  jellyfish = "^1.0.0"
37
37
  stem = "^1.0.0"
38
38
  babel = "^2.0.0"
39
+ rapidfuzz = "^3.0.0"
39
40
 
40
41
  boto3 = { version = "~1.35.0", optional = true }
41
42
  lxml = { extras = ["html-clean"], version = "^5.0.0", optional = true }
@@ -51,7 +52,6 @@ fake-useragent = { version = "^2.0.0", optional = true }
51
52
  pydrive2 = { version = "^1.0.0", optional = true }
52
53
  clickhouse-driver = { version = "~0.2.0", optional = true }
53
54
  click = { version = "^8.0.0", optional = true }
54
- rapidfuzz = { version = "^3.0.0", optional = true }
55
55
  demjson3 = { version = "^3.0.0", optional = true }
56
56
  nodriver = { version = "~0.44", optional = true }
57
57
  retry = { version = "~0.9.0", optional = true }
@@ -106,7 +106,6 @@ fake-useragent = ["fake-useragent"]
106
106
  pydrive2 = ["pydrive2"]
107
107
  clickhouse-driver = ["clickhouse-driver"]
108
108
  click = ["click"]
109
- rapidfuzz = ["rapidfuzz"]
110
109
  demjson3 = ["demjson3"]
111
110
  nodriver = ["nodriver"]
112
111
  undetected-chromedriver = ["undetected-chromedriver"]
@@ -100,6 +100,19 @@ class AWSInterface:
100
100
  logger.info(f"{s3_path} does not exist")
101
101
  return None
102
102
 
103
+ def file_exists(self, s3_path: str) -> bool:
104
+ if not self.bucket:
105
+ logger.warning("No active bucket selected")
106
+ return False
107
+ try:
108
+ self.s3_client.head_object(Bucket=self.bucket, Key=s3_path)
109
+ return True
110
+ except self.s3_client.exceptions.NoSuchKey:
111
+ return False
112
+ except Exception as e:
113
+ logger.error(f"Error checking existence of {s3_path}: {e}")
114
+ raise
115
+
103
116
  def read_file_as_bytes(self, s3_path: str) -> Optional[io.BytesIO]:
104
117
  obj = self.get_file(s3_path)
105
118
  if not obj:
File without changes
File without changes