datamarket 0.9.27__tar.gz → 0.9.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.9.27 → datamarket-0.9.29}/PKG-INFO +4 -3
- {datamarket-0.9.27 → datamarket-0.9.29}/pyproject.toml +4 -3
- datamarket-0.9.29/src/datamarket/interfaces/nominatim.py +294 -0
- datamarket-0.9.27/src/datamarket/interfaces/nominatim.py +0 -110
- {datamarket-0.9.27 → datamarket-0.9.29}/LICENSE +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/README.md +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/__init__.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/ftp.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/peerdb.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/strings.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/typer.py +0 -0
- {datamarket-0.9.27 → datamarket-0.9.29}/src/datamarket/utils/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.29
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
6
|
Author: DataMarket
|
|
@@ -28,7 +28,6 @@ Provides-Extra: duckduckgo-search
|
|
|
28
28
|
Provides-Extra: fake-useragent
|
|
29
29
|
Provides-Extra: geoalchemy2
|
|
30
30
|
Provides-Extra: geopandas
|
|
31
|
-
Provides-Extra: geopy
|
|
32
31
|
Provides-Extra: google-api-python-client
|
|
33
32
|
Provides-Extra: google-auth-httplib2
|
|
34
33
|
Provides-Extra: google-auth-oauthlib
|
|
@@ -77,13 +76,14 @@ Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
|
|
|
77
76
|
Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
|
|
78
77
|
Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
|
|
79
78
|
Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
|
|
80
|
-
Requires-Dist: geopy (>=2.0.0,<3.0.0)
|
|
79
|
+
Requires-Dist: geopy (>=2.0.0,<3.0.0)
|
|
81
80
|
Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
|
|
82
81
|
Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
|
|
83
82
|
Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
|
|
84
83
|
Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
|
|
85
84
|
Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
|
|
86
85
|
Requires-Dist: inflection (>=0.5.0,<0.6.0)
|
|
86
|
+
Requires-Dist: jellyfish (>=1.0.0,<2.0.0)
|
|
87
87
|
Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
|
|
88
88
|
Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
|
|
89
89
|
Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
|
|
@@ -98,6 +98,7 @@ Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
|
98
98
|
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
|
|
99
99
|
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
|
|
100
100
|
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
|
|
101
|
+
Requires-Dist: pycountry (>=24.0.0,<25.0.0)
|
|
101
102
|
Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
|
|
102
103
|
Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
|
|
103
104
|
Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "datamarket"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.29"
|
|
4
4
|
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
5
|
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
@@ -30,6 +30,9 @@ inflection = "~0.5.0"
|
|
|
30
30
|
python-string-utils = "^1.0.0"
|
|
31
31
|
unidecode = "^1.0.0"
|
|
32
32
|
numpy = "^2.0.0"
|
|
33
|
+
pycountry = "^24.0.0"
|
|
34
|
+
geopy = "^2.0.0"
|
|
35
|
+
jellyfish = "^1.0.0"
|
|
33
36
|
|
|
34
37
|
boto3 = { version = "~1.35.0", optional = true }
|
|
35
38
|
lxml = { extras = ["html-clean"], version = "^5.0.0", optional = true }
|
|
@@ -48,7 +51,6 @@ stem = { version = "^1.0.0", optional = true }
|
|
|
48
51
|
click = { version = "^8.0.0", optional = true }
|
|
49
52
|
rapidfuzz = { version = "^3.0.0", optional = true }
|
|
50
53
|
demjson3 = { version = "^3.0.0", optional = true }
|
|
51
|
-
geopy = { version = "^2.0.0", optional = true }
|
|
52
54
|
nodriver = { version = "~0.44", optional = true }
|
|
53
55
|
retry = { version = "~0.9.0", optional = true }
|
|
54
56
|
shapely = { version = "^2.0.0", optional = true }
|
|
@@ -93,7 +95,6 @@ stem = ["stem"]
|
|
|
93
95
|
click = ["click"]
|
|
94
96
|
rapidfuzz = ["rapidfuzz"]
|
|
95
97
|
demjson3 = ["demjson3"]
|
|
96
|
-
geopy = ["geopy"]
|
|
97
98
|
nodriver = ["nodriver"]
|
|
98
99
|
undetected-chromedriver = ["undetected-chromedriver"]
|
|
99
100
|
retry = ["retry"]
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# IMPORTS
|
|
3
|
+
|
|
4
|
+
import gettext
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
|
+
|
|
8
|
+
import pycountry
|
|
9
|
+
import requests
|
|
10
|
+
from geopy.distance import geodesic
|
|
11
|
+
from jellyfish import jaro_winkler_similarity
|
|
12
|
+
|
|
13
|
+
from ..params.nominatim import POSTCODES
|
|
14
|
+
from ..utils.strings import normalize
|
|
15
|
+
|
|
16
|
+
########################################################################################################################
|
|
17
|
+
# PARAMETERS
|
|
18
|
+
|
|
19
|
+
JARO_WINKLER_THRESHOLD = 0.85
|
|
20
|
+
|
|
21
|
+
########################################################################################################################
|
|
22
|
+
# CLASSES
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
spanish = gettext.translation("iso3166-1", pycountry.LOCALES_DIR, languages=["es"])
|
|
26
|
+
spanish.install()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class GeoNames:
|
|
30
|
+
def __init__(self, endpoint: str) -> None:
|
|
31
|
+
self.endpoint = endpoint
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def validate_postcode(postcode: Union[int, str]) -> Optional[str]:
|
|
35
|
+
if isinstance(postcode, int):
|
|
36
|
+
postcode = str(postcode)
|
|
37
|
+
|
|
38
|
+
if postcode and len(postcode) == 5 and postcode[:2] in POSTCODES:
|
|
39
|
+
return postcode
|
|
40
|
+
|
|
41
|
+
if postcode and len(postcode) == 4:
|
|
42
|
+
postcode = f"0{postcode}"
|
|
43
|
+
if postcode[:2] in POSTCODES:
|
|
44
|
+
return postcode
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def get_province_from_postcode(postcode: Optional[str]) -> Optional[str]:
|
|
48
|
+
if postcode:
|
|
49
|
+
return POSTCODES[postcode[:2]]
|
|
50
|
+
|
|
51
|
+
def reverse(self, lat: Union[float, str], lon: Union[float, str]) -> Dict[str, Any]:
|
|
52
|
+
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}", timeout=30).json()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Nominatim:
|
|
56
|
+
def __init__(self, nominatim_endpoint: str, geonames_endpoint: str) -> None:
|
|
57
|
+
self.endpoint = nominatim_endpoint
|
|
58
|
+
self.geonames = GeoNames(geonames_endpoint)
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _get_attribute(raw_json: Dict[str, Any], keys: List[str]) -> Any:
|
|
62
|
+
for key in keys:
|
|
63
|
+
if key in raw_json:
|
|
64
|
+
return raw_json[key]
|
|
65
|
+
|
|
66
|
+
def _calculate_distance(self, lat_str: Optional[str], lon_str: Optional[str], input_coords: Tuple[float, float]) -> float:
|
|
67
|
+
dist = float("inf")
|
|
68
|
+
if lat_str and lon_str:
|
|
69
|
+
try:
|
|
70
|
+
coords = (float(lat_str), float(lon_str))
|
|
71
|
+
dist = geodesic(input_coords, coords).km
|
|
72
|
+
except (ValueError, TypeError):
|
|
73
|
+
logger.warning("Invalid coordinates for distance calculation.")
|
|
74
|
+
return dist
|
|
75
|
+
|
|
76
|
+
def _parse_nominatim_result(self, nominatim_raw_json: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
|
77
|
+
raw_address = nominatim_raw_json.get("address", {})
|
|
78
|
+
|
|
79
|
+
postcode_str = str(raw_address.get("postcode", ""))
|
|
80
|
+
postcode = self.geonames.validate_postcode(postcode_str)
|
|
81
|
+
|
|
82
|
+
city = self._get_attribute(raw_address, ["city", "town", "village"])
|
|
83
|
+
district, quarter = self._get_district_quarter(raw_address)
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
"country": raw_address.get("country"),
|
|
87
|
+
"country_code": (raw_address.get("country_code") or "").lower(),
|
|
88
|
+
"state": raw_address.get("state"),
|
|
89
|
+
"province": raw_address.get("province"),
|
|
90
|
+
"city": city,
|
|
91
|
+
"postcode": postcode,
|
|
92
|
+
"district": district,
|
|
93
|
+
"quarter": quarter,
|
|
94
|
+
"street": raw_address.get("road"),
|
|
95
|
+
"number": raw_address.get("house_number"),
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def _parse_geonames_result(self, geonames_raw_json: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
|
99
|
+
geonames_country_code_str = geonames_raw_json.get("country_code")
|
|
100
|
+
country_name = None
|
|
101
|
+
if geonames_country_code_str:
|
|
102
|
+
try:
|
|
103
|
+
country_obj = pycountry.countries.get(alpha_2=geonames_country_code_str.upper())
|
|
104
|
+
if country_obj:
|
|
105
|
+
country_name = spanish.gettext(country_obj.name)
|
|
106
|
+
except LookupError:
|
|
107
|
+
logger.warning(f"Country name not found for code: {geonames_country_code_str} using pycountry.")
|
|
108
|
+
|
|
109
|
+
postcode_str = str(geonames_raw_json.get("postal_code", ""))
|
|
110
|
+
postcode = self.geonames.validate_postcode(postcode_str)
|
|
111
|
+
province = self.geonames.get_province_from_postcode(postcode) if postcode else None
|
|
112
|
+
city = geonames_raw_json.get("place_name")
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
"country": country_name,
|
|
116
|
+
"country_code": (geonames_country_code_str or "").lower(),
|
|
117
|
+
"state": geonames_raw_json.get("community"),
|
|
118
|
+
"province": province,
|
|
119
|
+
"city": city,
|
|
120
|
+
"postcode": postcode,
|
|
121
|
+
"district": None,
|
|
122
|
+
"quarter": None,
|
|
123
|
+
"street": None,
|
|
124
|
+
"number": None,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
def _get_empty_address_result(self) -> Dict[str, None]:
|
|
128
|
+
return {
|
|
129
|
+
"country": None,
|
|
130
|
+
"country_code": None,
|
|
131
|
+
"state": None,
|
|
132
|
+
"province": None,
|
|
133
|
+
"city": None,
|
|
134
|
+
"postcode": None,
|
|
135
|
+
"district": None,
|
|
136
|
+
"quarter": None,
|
|
137
|
+
"street": None,
|
|
138
|
+
"number": None,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
def _select_postcode_and_derived_province(
|
|
142
|
+
self, parsed_nominatim_result: Dict[str, Optional[str]], parsed_geonames_result: Dict[str, Optional[str]], nominatim_address_province_raw: Optional[str]
|
|
143
|
+
) -> Tuple[Optional[str], Optional[str]]:
|
|
144
|
+
"""
|
|
145
|
+
Determines the postcode and its derived province based on comparisons
|
|
146
|
+
between Nominatim and GeoNames data, and Nominatim's raw address province.
|
|
147
|
+
"""
|
|
148
|
+
nominatim_postcode = parsed_nominatim_result.get("postcode")
|
|
149
|
+
geonames_postcode = parsed_geonames_result.get("postcode")
|
|
150
|
+
|
|
151
|
+
province_from_nominatim_postcode = self.geonames.get_province_from_postcode(nominatim_postcode)
|
|
152
|
+
province_from_geonames_postcode = self.geonames.get_province_from_postcode(geonames_postcode)
|
|
153
|
+
|
|
154
|
+
norm_raw_nominatim_province = (
|
|
155
|
+
normalize(nominatim_address_province_raw) if nominatim_address_province_raw else ""
|
|
156
|
+
)
|
|
157
|
+
norm_province_from_nominatim_postcode = (
|
|
158
|
+
normalize(province_from_nominatim_postcode) if province_from_nominatim_postcode else ""
|
|
159
|
+
)
|
|
160
|
+
norm_province_from_geonames_postcode = (
|
|
161
|
+
normalize(province_from_geonames_postcode) if province_from_geonames_postcode else ""
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
selected_postcode = None
|
|
165
|
+
selected_province_from_postcode = None
|
|
166
|
+
|
|
167
|
+
# If provinces derived from Nominatim and GeoNames postcodes differ
|
|
168
|
+
nominatim_postcode_province_matches = False
|
|
169
|
+
if norm_province_from_nominatim_postcode and norm_raw_nominatim_province:
|
|
170
|
+
nominatim_postcode_province_matches = (
|
|
171
|
+
jaro_winkler_similarity(norm_province_from_nominatim_postcode, norm_raw_nominatim_province)
|
|
172
|
+
> JARO_WINKLER_THRESHOLD
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
geonames_postcode_province_matches = False
|
|
176
|
+
if norm_province_from_geonames_postcode and norm_raw_nominatim_province:
|
|
177
|
+
geonames_postcode_province_matches = (
|
|
178
|
+
jaro_winkler_similarity(norm_province_from_geonames_postcode, norm_raw_nominatim_province)
|
|
179
|
+
> JARO_WINKLER_THRESHOLD
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Prefer GeoNames postcode if its province matches Nominatim's raw address province,
|
|
183
|
+
# and Nominatim's own postcode-derived province does not.
|
|
184
|
+
if nominatim_postcode_province_matches:
|
|
185
|
+
selected_postcode = nominatim_postcode
|
|
186
|
+
selected_province_from_postcode = province_from_nominatim_postcode
|
|
187
|
+
if geonames_postcode_province_matches and not nominatim_postcode_province_matches:
|
|
188
|
+
selected_postcode = geonames_postcode
|
|
189
|
+
selected_province_from_postcode = province_from_geonames_postcode
|
|
190
|
+
|
|
191
|
+
return selected_postcode, selected_province_from_postcode
|
|
192
|
+
|
|
193
|
+
def _select_final_result(
|
|
194
|
+
self,
|
|
195
|
+
parsed_nominatim_result: Dict[str, Optional[str]],
|
|
196
|
+
parsed_geonames_result: Dict[str, Optional[str]],
|
|
197
|
+
dist_nominatim: float,
|
|
198
|
+
dist_geonames: float,
|
|
199
|
+
authoritative_postcode: Optional[str],
|
|
200
|
+
authoritative_province_from_postcode: Optional[str],
|
|
201
|
+
nominatim_address_province_raw: Optional[str],
|
|
202
|
+
) -> Dict[str, Optional[str]]:
|
|
203
|
+
"""
|
|
204
|
+
Selects the final address result based on distances and applies the authoritative postcode/province.
|
|
205
|
+
"""
|
|
206
|
+
if dist_nominatim <= dist_geonames and dist_nominatim != float("inf"):
|
|
207
|
+
final_result = parsed_nominatim_result
|
|
208
|
+
final_result["postcode"] = authoritative_postcode
|
|
209
|
+
final_result["province"] = nominatim_address_province_raw
|
|
210
|
+
elif dist_geonames < dist_nominatim and dist_geonames != float("inf"):
|
|
211
|
+
final_result = parsed_geonames_result
|
|
212
|
+
final_result["postcode"] = authoritative_postcode
|
|
213
|
+
final_result["province"] = authoritative_province_from_postcode
|
|
214
|
+
else:
|
|
215
|
+
final_result = self._get_empty_address_result()
|
|
216
|
+
return final_result
|
|
217
|
+
|
|
218
|
+
def _get_district_quarter(self, raw_json: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
|
219
|
+
district = self._get_attribute(raw_json, ["city_district", "suburb", "borough"])
|
|
220
|
+
quarter = self._get_attribute(raw_json, ["quarter", "neighbourhood"])
|
|
221
|
+
|
|
222
|
+
if not district and quarter:
|
|
223
|
+
district = quarter
|
|
224
|
+
quarter = None
|
|
225
|
+
|
|
226
|
+
return district, quarter
|
|
227
|
+
|
|
228
|
+
def geocode(self, address: str) -> List[Dict[str, Any]]:
|
|
229
|
+
return requests.get(f"{self.endpoint}/search?q={address}&format=json", timeout=30).json()
|
|
230
|
+
|
|
231
|
+
def geocode_parsed(self, address: str) -> Optional[Dict[str, Optional[str]]]:
|
|
232
|
+
results = self.geocode(address)
|
|
233
|
+
|
|
234
|
+
if results:
|
|
235
|
+
return self.reverse_parsed(results[0]["lat"], results[0]["lon"])
|
|
236
|
+
|
|
237
|
+
def reverse(self, lat: Union[float, str], lon: Union[float, str]) -> Dict[str, Any]:
|
|
238
|
+
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}&format=json", timeout=30).json()
|
|
239
|
+
|
|
240
|
+
def reverse_parsed(self, lat: Union[float, str], lon: Union[float, str]) -> Dict[str, Optional[str]]:
|
|
241
|
+
nominatim_response = self.reverse(lat, lon)
|
|
242
|
+
geonames_response = self.geonames.reverse(lat, lon)
|
|
243
|
+
|
|
244
|
+
# Initial parsing
|
|
245
|
+
parsed_nominatim_result = self._parse_nominatim_result(nominatim_response)
|
|
246
|
+
parsed_geonames_result = self._parse_geonames_result(geonames_response)
|
|
247
|
+
|
|
248
|
+
# Determine authoritative postcode
|
|
249
|
+
raw_nominatim_province = nominatim_response.get("address", {}).get("province")
|
|
250
|
+
selected_postcode, selected_province_from_postcode = self._select_postcode_and_derived_province(
|
|
251
|
+
parsed_nominatim_result, parsed_geonames_result, raw_nominatim_province
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Calculate distances
|
|
255
|
+
nominatim_response_lat = nominatim_response.get("lat")
|
|
256
|
+
nominatim_response_lon = nominatim_response.get("lon")
|
|
257
|
+
geonames_response_lat = geonames_response.get("lat")
|
|
258
|
+
geonames_response_lon = geonames_response.get("lon")
|
|
259
|
+
|
|
260
|
+
input_coords = None
|
|
261
|
+
try:
|
|
262
|
+
input_coords = (float(lat), float(lon))
|
|
263
|
+
except (ValueError, TypeError):
|
|
264
|
+
logger.error(f"Invalid input coordinates for distance calculation: lat={lat}, lon={lon}")
|
|
265
|
+
return self._get_empty_address_result()
|
|
266
|
+
|
|
267
|
+
dist_nominatim = self._calculate_distance(nominatim_response_lat, nominatim_response_lon, input_coords)
|
|
268
|
+
dist_geonames = self._calculate_distance(geonames_response_lat, geonames_response_lon, input_coords)
|
|
269
|
+
|
|
270
|
+
# Select final result
|
|
271
|
+
final_result = self._select_final_result(
|
|
272
|
+
parsed_nominatim_result,
|
|
273
|
+
parsed_geonames_result,
|
|
274
|
+
dist_nominatim,
|
|
275
|
+
dist_geonames,
|
|
276
|
+
selected_postcode,
|
|
277
|
+
selected_province_from_postcode,
|
|
278
|
+
raw_nominatim_province,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return final_result
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class NominatimInterface(Nominatim):
|
|
285
|
+
def __init__(self, config: Dict[str, Any]) -> None:
|
|
286
|
+
if "osm" in config:
|
|
287
|
+
self.config = config["osm"]
|
|
288
|
+
|
|
289
|
+
self.nominatim_endpoint = self.config["nominatim_endpoint"]
|
|
290
|
+
self.geonames_endpoint = self.config["geonames_endpoint"]
|
|
291
|
+
|
|
292
|
+
super().__init__(self.nominatim_endpoint, self.geonames_endpoint)
|
|
293
|
+
else:
|
|
294
|
+
logger.warning("no osm section in config")
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
########################################################################################################################
|
|
2
|
-
# IMPORTS
|
|
3
|
-
|
|
4
|
-
import logging
|
|
5
|
-
|
|
6
|
-
import requests
|
|
7
|
-
|
|
8
|
-
from ..params.nominatim import POSTCODES
|
|
9
|
-
|
|
10
|
-
########################################################################################################################
|
|
11
|
-
# CLASSES
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class GeoNames:
|
|
17
|
-
def __init__(self, endpoint):
|
|
18
|
-
self.endpoint = endpoint
|
|
19
|
-
|
|
20
|
-
@staticmethod
|
|
21
|
-
def validate_postcode(postcode):
|
|
22
|
-
if isinstance(postcode, int):
|
|
23
|
-
postcode = str(postcode)
|
|
24
|
-
|
|
25
|
-
if postcode and len(postcode) == 5 and postcode[:2] in POSTCODES:
|
|
26
|
-
return postcode
|
|
27
|
-
|
|
28
|
-
if postcode and len(postcode) == 4:
|
|
29
|
-
postcode = f"0{postcode}"
|
|
30
|
-
if postcode[:2] in POSTCODES:
|
|
31
|
-
return postcode
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def get_province_from_postcode(postcode):
|
|
35
|
-
if postcode:
|
|
36
|
-
return POSTCODES[postcode[:2]]
|
|
37
|
-
|
|
38
|
-
def reverse(self, lat, lon):
|
|
39
|
-
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}").json()
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class Nominatim:
|
|
43
|
-
def __init__(self, nominatim_endpoint, geonames_endpoint):
|
|
44
|
-
self.endpoint = nominatim_endpoint
|
|
45
|
-
self.geonames = GeoNames(geonames_endpoint)
|
|
46
|
-
|
|
47
|
-
@staticmethod
|
|
48
|
-
def get_attribute(raw_json, keys):
|
|
49
|
-
for key in keys:
|
|
50
|
-
if key in raw_json:
|
|
51
|
-
return raw_json[key]
|
|
52
|
-
|
|
53
|
-
def geocode(self, address):
|
|
54
|
-
return requests.get(f"{self.endpoint}/search?q={address}&format=json").json()
|
|
55
|
-
|
|
56
|
-
def geocode_parsed(self, address):
|
|
57
|
-
results = self.geocode(address)
|
|
58
|
-
|
|
59
|
-
if results:
|
|
60
|
-
return self.reverse_parsed(results[0]["lat"], results[0]["lon"])
|
|
61
|
-
|
|
62
|
-
def reverse(self, lat, lon):
|
|
63
|
-
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}&format=json").json()
|
|
64
|
-
|
|
65
|
-
def reverse_parsed(self, lat, lon):
|
|
66
|
-
raw_json = self.reverse(lat, lon).get("address", {})
|
|
67
|
-
geoname = self.geonames.reverse(lat, lon)
|
|
68
|
-
|
|
69
|
-
postcode = self.geonames.validate_postcode(
|
|
70
|
-
str(geoname.get("postal_code", ""))
|
|
71
|
-
) or self.geonames.validate_postcode(str(raw_json.get("postcode")))
|
|
72
|
-
|
|
73
|
-
city = self.get_attribute(raw_json, ["city", "town", "village"]) or geoname.get("place_name")
|
|
74
|
-
|
|
75
|
-
district, quarter = self.get_district_quarter(raw_json)
|
|
76
|
-
return {
|
|
77
|
-
"country": raw_json.get("country"),
|
|
78
|
-
"country_code": (raw_json.get("country_code") or geoname.get("country_code") or "").lower(),
|
|
79
|
-
"state": raw_json.get("state") or geoname.get("community"),
|
|
80
|
-
"province": self.geonames.get_province_from_postcode(postcode),
|
|
81
|
-
"city": city,
|
|
82
|
-
"postcode": postcode,
|
|
83
|
-
"district": district,
|
|
84
|
-
"quarter": quarter,
|
|
85
|
-
"street": raw_json.get("road"),
|
|
86
|
-
"number": raw_json.get("house_number"),
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
def get_district_quarter(self, raw_json):
|
|
90
|
-
district = self.get_attribute(raw_json, ["city_district", "suburb", "borough"])
|
|
91
|
-
quarter = self.get_attribute(raw_json, ["quarter", "neighbourhood"])
|
|
92
|
-
|
|
93
|
-
if not district and quarter:
|
|
94
|
-
district = quarter
|
|
95
|
-
quarter = None
|
|
96
|
-
|
|
97
|
-
return district, quarter
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class NominatimInterface(Nominatim):
|
|
101
|
-
def __init__(self, config):
|
|
102
|
-
if "osm" in config:
|
|
103
|
-
self.config = config["osm"]
|
|
104
|
-
|
|
105
|
-
self.nominatim_endpoint = self.config["nominatim_endpoint"]
|
|
106
|
-
self.geonames_endpoint = self.config["geonames_endpoint"]
|
|
107
|
-
|
|
108
|
-
super().__init__(self.nominatim_endpoint, self.geonames_endpoint)
|
|
109
|
-
else:
|
|
110
|
-
logger.warning("no osm section in config")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|