datamarket 0.9.17__tar.gz → 0.9.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.9.17 → datamarket-0.9.19}/PKG-INFO +3 -1
- {datamarket-0.9.17 → datamarket-0.9.19}/pyproject.toml +3 -1
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/peerdb.py +28 -3
- {datamarket-0.9.17 → datamarket-0.9.19}/LICENSE +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/README.md +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/__init__.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/ftp.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.9.17 → datamarket-0.9.19}/src/datamarket/utils/typer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.19
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
6
|
Author: DataMarket
|
|
@@ -16,6 +16,7 @@ Provides-Extra: alchemy
|
|
|
16
16
|
Provides-Extra: aws
|
|
17
17
|
Provides-Extra: azure-storage-blob
|
|
18
18
|
Provides-Extra: boto3
|
|
19
|
+
Provides-Extra: camoufox
|
|
19
20
|
Provides-Extra: chompjs
|
|
20
21
|
Provides-Extra: click
|
|
21
22
|
Provides-Extra: clickhouse-driver
|
|
@@ -64,6 +65,7 @@ Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0) ; extra == "alchemy"
|
|
|
64
65
|
Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
|
|
65
66
|
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
66
67
|
Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
68
|
+
Requires-Dist: camoufox[geoip] (>=0.4.11,<0.5.0) ; extra == "camoufox"
|
|
67
69
|
Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
|
|
68
70
|
Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
|
|
69
71
|
Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "datamarket"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.19"
|
|
4
4
|
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
5
|
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
@@ -70,6 +70,7 @@ dnspython = { version = "^2.0.0", optional = true }
|
|
|
70
70
|
openpyxl = { version = "^3.0.0", optional = true }
|
|
71
71
|
httpx = { extras = ["http2"], version = "~0.28.0", optional = true }
|
|
72
72
|
SQLAlchemy = { version = "^2.0.0", optional = true }
|
|
73
|
+
camoufox = { extras = ["geoip"], version = "~0.4.11", optional = true }
|
|
73
74
|
|
|
74
75
|
[tool.poetry.extras]
|
|
75
76
|
boto3 = ["boto3"]
|
|
@@ -115,6 +116,7 @@ google-auth-oauthlib = ["google-auth-oauthlib"]
|
|
|
115
116
|
dnspython = ["dnspython"]
|
|
116
117
|
openpyxl = ["openpyxl"]
|
|
117
118
|
httpx = ["httpx"]
|
|
119
|
+
camoufox = ["camoufox"]
|
|
118
120
|
|
|
119
121
|
# Interface groups
|
|
120
122
|
aws = ["boto3"]
|
|
@@ -84,8 +84,28 @@ class PostgresPeer:
|
|
|
84
84
|
table_list = []
|
|
85
85
|
for table in table_names:
|
|
86
86
|
full_table_name = f'"{schema_name}"."{table}"'
|
|
87
|
-
|
|
88
|
-
|
|
87
|
+
|
|
88
|
+
# Check current replica identity
|
|
89
|
+
query = text("""
|
|
90
|
+
SELECT CASE c.relreplident
|
|
91
|
+
WHEN 'd' THEN 'DEFAULT'
|
|
92
|
+
WHEN 'n' THEN 'NOTHING'
|
|
93
|
+
WHEN 'f' THEN 'FULL'
|
|
94
|
+
WHEN 'i' THEN 'INDEX'
|
|
95
|
+
END AS replica_identity
|
|
96
|
+
FROM pg_class c
|
|
97
|
+
JOIN pg_namespace n ON c.relnamespace = n.oid
|
|
98
|
+
WHERE c.relname = :table_name
|
|
99
|
+
AND n.nspname = :schema_name;
|
|
100
|
+
""")
|
|
101
|
+
result = conn.execute(query, {"table_name": table, "schema_name": schema_name}).scalar_one_or_none()
|
|
102
|
+
|
|
103
|
+
if result != "FULL":
|
|
104
|
+
logger.info(f"Setting REPLICA IDENTITY FULL for table: {full_table_name}")
|
|
105
|
+
conn.execute(text(f"ALTER TABLE {full_table_name} REPLICA IDENTITY FULL;"))
|
|
106
|
+
else:
|
|
107
|
+
logger.info(f"REPLICA IDENTITY for table {full_table_name} is already FULL. Skipping ALTER TABLE.")
|
|
108
|
+
|
|
89
109
|
table_list.append(full_table_name)
|
|
90
110
|
|
|
91
111
|
table_list_str = ", ".join(table_list)
|
|
@@ -312,12 +332,17 @@ class PeerDBInterface:
|
|
|
312
332
|
logger.debug(f"Making API request to PeerDB endpoint: {endpoint}")
|
|
313
333
|
try:
|
|
314
334
|
r = requests.post(url, headers=headers, json=payload, timeout=30)
|
|
335
|
+
response = r.json()
|
|
315
336
|
r.raise_for_status()
|
|
316
337
|
logger.debug(f"API request to {endpoint} completed successfully")
|
|
317
|
-
return
|
|
338
|
+
return response
|
|
318
339
|
except HTTPError as e:
|
|
319
340
|
logger.error(f"HTTP error occurred: {e}")
|
|
320
341
|
logger.error(f"Response JSON: {r.json() if 'r' in locals() else 'N/A'}")
|
|
342
|
+
|
|
343
|
+
if "no rows in result set" in response.get("message", ""):
|
|
344
|
+
return {"currentFlowState": "STATUS_UNKNOWN"}
|
|
345
|
+
|
|
321
346
|
raise
|
|
322
347
|
|
|
323
348
|
def _resolve_host_mapping(self, host):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|