datamarket 0.9.16__tar.gz → 0.9.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {datamarket-0.9.16 → datamarket-0.9.18}/PKG-INFO +1 -1
  2. {datamarket-0.9.16 → datamarket-0.9.18}/pyproject.toml +1 -1
  3. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/peerdb.py +40 -5
  4. {datamarket-0.9.16 → datamarket-0.9.18}/LICENSE +0 -0
  5. {datamarket-0.9.16 → datamarket-0.9.18}/README.md +0 -0
  6. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/__init__.py +0 -0
  7. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/__init__.py +0 -0
  8. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/alchemy.py +0 -0
  9. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/aws.py +0 -0
  10. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/drive.py +0 -0
  11. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/ftp.py +0 -0
  12. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/nominatim.py +0 -0
  13. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/proxy.py +0 -0
  14. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/interfaces/tinybird.py +0 -0
  15. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/params/__init__.py +0 -0
  16. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/params/nominatim.py +0 -0
  17. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/__init__.py +0 -0
  18. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/airflow.py +0 -0
  19. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/alchemy.py +0 -0
  20. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/main.py +0 -0
  21. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/selenium.py +0 -0
  22. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/soda.py +0 -0
  23. {datamarket-0.9.16 → datamarket-0.9.18}/src/datamarket/utils/typer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.16
3
+ Version: 0.9.18
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.16"
3
+ version = "0.9.18"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -18,10 +18,13 @@ from .alchemy import AlchemyInterface
18
18
  ########################################################################################################################
19
19
  # EXCEPTIONS
20
20
 
21
+
21
22
  class DatabaseNotConnectedError(Exception):
22
23
  """Custom error for when database is not connected."""
24
+
23
25
  pass
24
26
 
27
+
25
28
  ########################################################################################################################
26
29
  # CLASSES
27
30
 
@@ -78,10 +81,37 @@ class PostgresPeer:
78
81
  with self.engine.connect() as conn:
79
82
  conn.execute(text(f"DROP PUBLICATION IF EXISTS {schema_name}_peerdb"))
80
83
 
81
- table_list = ", ".join([f"{schema_name}.{table}" for table in table_names])
84
+ table_list = []
85
+ for table in table_names:
86
+ full_table_name = f'"{schema_name}"."{table}"'
87
+
88
+ # Check current replica identity
89
+ query = text("""
90
+ SELECT CASE c.relreplident
91
+ WHEN 'd' THEN 'DEFAULT'
92
+ WHEN 'n' THEN 'NOTHING'
93
+ WHEN 'f' THEN 'FULL'
94
+ WHEN 'i' THEN 'INDEX'
95
+ END AS replica_identity
96
+ FROM pg_class c
97
+ JOIN pg_namespace n ON c.relnamespace = n.oid
98
+ WHERE c.relname = :table_name
99
+ AND n.nspname = :schema_name;
100
+ """)
101
+ result = conn.execute(query, {"table_name": table, "schema_name": schema_name}).scalar_one_or_none()
102
+
103
+ if result != "FULL":
104
+ logger.info(f"Setting REPLICA IDENTITY FULL for table: {full_table_name}")
105
+ conn.execute(text(f"ALTER TABLE {full_table_name} REPLICA IDENTITY FULL;"))
106
+ else:
107
+ logger.info(f"REPLICA IDENTITY for table {full_table_name} is already FULL. Skipping ALTER TABLE.")
108
+
109
+ table_list.append(full_table_name)
110
+
111
+ table_list_str = ", ".join(table_list)
82
112
  conn.execute(
83
113
  text(f"""
84
- CREATE PUBLICATION {schema_name}_peerdb FOR TABLE {table_list};
114
+ CREATE PUBLICATION {schema_name}_peerdb FOR TABLE {table_list_str};
85
115
  """)
86
116
  )
87
117
  conn.commit()
@@ -302,12 +332,17 @@ class PeerDBInterface:
302
332
  logger.debug(f"Making API request to PeerDB endpoint: {endpoint}")
303
333
  try:
304
334
  r = requests.post(url, headers=headers, json=payload, timeout=30)
335
+ response = r.json()
305
336
  r.raise_for_status()
306
337
  logger.debug(f"API request to {endpoint} completed successfully")
307
- return r.json()
338
+ return response
308
339
  except HTTPError as e:
309
340
  logger.error(f"HTTP error occurred: {e}")
310
341
  logger.error(f"Response JSON: {r.json() if 'r' in locals() else 'N/A'}")
342
+
343
+ if "no rows in result set" in response.get("message", ""):
344
+ return {"currentFlowState": "STATUS_UNKNOWN"}
345
+
311
346
  raise
312
347
 
313
348
  def _resolve_host_mapping(self, host):
@@ -319,11 +354,11 @@ class PeerDBInterface:
319
354
  if not self.docker_host_mapping or not host:
320
355
  return host
321
356
 
322
- if host in ['localhost', '127.0.0.1']:
357
+ if host in ["localhost", "127.0.0.1"]:
323
358
  logger.debug(f"Mapping host {host} to {self.docker_host_mapping} for Docker environment")
324
359
  return self.docker_host_mapping
325
360
 
326
- url_pattern = r'(localhost|127\.0\.0\.1)'
361
+ url_pattern = r"(localhost|127\.0\.0\.1)"
327
362
  match = re.search(url_pattern, host)
328
363
  if match:
329
364
  original_host = match.group(1)
File without changes
File without changes