datamarket 0.6.0__py3-none-any.whl → 0.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/__init__.py +0 -1
- datamarket/exceptions/__init__.py +1 -0
- datamarket/exceptions/main.py +118 -0
- datamarket/interfaces/alchemy.py +1934 -25
- datamarket/interfaces/aws.py +81 -14
- datamarket/interfaces/azure.py +127 -0
- datamarket/interfaces/drive.py +60 -10
- datamarket/interfaces/ftp.py +37 -14
- datamarket/interfaces/llm.py +1220 -0
- datamarket/interfaces/nominatim.py +314 -42
- datamarket/interfaces/peerdb.py +272 -104
- datamarket/interfaces/proxy.py +354 -50
- datamarket/interfaces/tinybird.py +7 -15
- datamarket/params/nominatim.py +439 -0
- datamarket/utils/__init__.py +1 -1
- datamarket/utils/airflow.py +10 -7
- datamarket/utils/alchemy.py +2 -1
- datamarket/utils/logs.py +88 -0
- datamarket/utils/main.py +138 -10
- datamarket/utils/nominatim.py +201 -0
- datamarket/utils/playwright/__init__.py +0 -0
- datamarket/utils/playwright/async_api.py +274 -0
- datamarket/utils/playwright/sync_api.py +281 -0
- datamarket/utils/requests.py +655 -0
- datamarket/utils/selenium.py +6 -12
- datamarket/utils/strings/__init__.py +1 -0
- datamarket/utils/strings/normalization.py +217 -0
- datamarket/utils/strings/obfuscation.py +153 -0
- datamarket/utils/strings/standardization.py +40 -0
- datamarket/utils/typer.py +2 -1
- datamarket/utils/types.py +1 -0
- datamarket-0.10.3.dist-info/METADATA +172 -0
- datamarket-0.10.3.dist-info/RECORD +38 -0
- {datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info}/WHEEL +1 -2
- datamarket-0.6.0.dist-info/METADATA +0 -49
- datamarket-0.6.0.dist-info/RECORD +0 -24
- datamarket-0.6.0.dist-info/top_level.txt +0 -1
- {datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,111 +1,383 @@
|
|
|
1
1
|
########################################################################################################################
|
|
2
2
|
# IMPORTS
|
|
3
3
|
|
|
4
|
+
import gettext
|
|
4
5
|
import logging
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
5
7
|
|
|
8
|
+
import pycountry
|
|
6
9
|
import requests
|
|
10
|
+
from geopy.distance import geodesic
|
|
11
|
+
from jellyfish import jaro_winkler_similarity
|
|
7
12
|
|
|
8
|
-
from ..params.nominatim import
|
|
13
|
+
from ..params.nominatim import (
|
|
14
|
+
CITY_TO_PROVINCE,
|
|
15
|
+
MADRID_DISTRICT_DIRECT_PATCH,
|
|
16
|
+
MADRID_DISTRICT_QUARTER_PATCH,
|
|
17
|
+
MADRID_QUARTER_DIRECT_PATCH,
|
|
18
|
+
POSTCODES,
|
|
19
|
+
)
|
|
20
|
+
from ..utils.nominatim import standardize_admin_division
|
|
21
|
+
from ..utils.strings import normalize
|
|
22
|
+
|
|
23
|
+
########################################################################################################################
|
|
24
|
+
# PARAMETERS
|
|
25
|
+
|
|
26
|
+
JARO_WINKLER_THRESHOLD = 0.85
|
|
27
|
+
CLOSE_KM = 2.0
|
|
9
28
|
|
|
10
29
|
########################################################################################################################
|
|
11
30
|
# CLASSES
|
|
12
31
|
|
|
13
32
|
logger = logging.getLogger(__name__)
|
|
33
|
+
spanish = gettext.translation("iso3166-1", pycountry.LOCALES_DIR, languages=["es"])
|
|
34
|
+
spanish.install()
|
|
14
35
|
|
|
15
36
|
|
|
16
37
|
class GeoNames:
|
|
17
|
-
def __init__(self, endpoint):
|
|
38
|
+
def __init__(self, endpoint: str) -> None:
|
|
18
39
|
self.endpoint = endpoint
|
|
19
40
|
|
|
20
41
|
@staticmethod
|
|
21
|
-
def validate_postcode(postcode):
|
|
42
|
+
def validate_postcode(postcode: Union[int, str]) -> Optional[str]:
|
|
22
43
|
if isinstance(postcode, int):
|
|
23
44
|
postcode = str(postcode)
|
|
24
45
|
|
|
25
46
|
if postcode and len(postcode) == 5 and postcode[:2] in POSTCODES:
|
|
26
47
|
return postcode
|
|
27
|
-
|
|
48
|
+
|
|
28
49
|
if postcode and len(postcode) == 4:
|
|
29
50
|
postcode = f"0{postcode}"
|
|
30
51
|
if postcode[:2] in POSTCODES:
|
|
31
52
|
return postcode
|
|
32
53
|
|
|
33
54
|
@staticmethod
|
|
34
|
-
def get_province_from_postcode(postcode):
|
|
55
|
+
def get_province_from_postcode(postcode: Optional[str]) -> Optional[str]:
|
|
35
56
|
if postcode:
|
|
36
57
|
return POSTCODES[postcode[:2]]
|
|
37
58
|
|
|
38
|
-
def reverse(self, lat, lon):
|
|
39
|
-
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}").json()
|
|
59
|
+
def reverse(self, lat: Union[float, str], lon: Union[float, str]) -> Dict[str, Any]:
|
|
60
|
+
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}", timeout=30).json()
|
|
40
61
|
|
|
41
62
|
|
|
42
63
|
class Nominatim:
|
|
43
|
-
def __init__(self, nominatim_endpoint, geonames_endpoint):
|
|
64
|
+
def __init__(self, nominatim_endpoint: str, geonames_endpoint: str) -> None:
|
|
44
65
|
self.endpoint = nominatim_endpoint
|
|
45
66
|
self.geonames = GeoNames(geonames_endpoint)
|
|
46
67
|
|
|
47
68
|
@staticmethod
|
|
48
|
-
def
|
|
69
|
+
def _get_attribute(raw_json: Dict[str, Any], keys: List[str]) -> Any:
|
|
49
70
|
for key in keys:
|
|
50
71
|
if key in raw_json:
|
|
51
72
|
return raw_json[key]
|
|
52
73
|
|
|
53
|
-
def
|
|
54
|
-
|
|
74
|
+
def _calculate_distance(
|
|
75
|
+
self, lat_str: Optional[str], lon_str: Optional[str], input_coords: Tuple[float, float]
|
|
76
|
+
) -> float:
|
|
77
|
+
dist = float("inf")
|
|
78
|
+
if lat_str and lon_str:
|
|
79
|
+
try:
|
|
80
|
+
coords = (float(lat_str), float(lon_str))
|
|
81
|
+
dist = geodesic(input_coords, coords).km
|
|
82
|
+
except (ValueError, TypeError):
|
|
83
|
+
logger.warning("Invalid coordinates for distance calculation.")
|
|
84
|
+
return dist
|
|
55
85
|
|
|
56
|
-
def
|
|
57
|
-
|
|
86
|
+
def _parse_nominatim_result(self, nominatim_raw_json: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
|
87
|
+
raw_address = nominatim_raw_json.get("address", {})
|
|
58
88
|
|
|
59
|
-
|
|
60
|
-
|
|
89
|
+
postcode_str = str(raw_address.get("postcode", ""))
|
|
90
|
+
postcode = self.geonames.validate_postcode(postcode_str)
|
|
61
91
|
|
|
62
|
-
|
|
63
|
-
|
|
92
|
+
city = self._get_attribute(raw_address, ["city", "town", "village"])
|
|
93
|
+
district, quarter = self._get_district_quarter(raw_address)
|
|
64
94
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
95
|
+
return {
|
|
96
|
+
"country": raw_address.get("country"),
|
|
97
|
+
"country_code": (raw_address.get("country_code") or "").lower(),
|
|
98
|
+
"state": raw_address.get("state"),
|
|
99
|
+
"province": raw_address.get("province") or CITY_TO_PROVINCE.get(city),
|
|
100
|
+
"city": city,
|
|
101
|
+
"postcode": postcode,
|
|
102
|
+
"district": district,
|
|
103
|
+
"quarter": quarter,
|
|
104
|
+
"street": raw_address.get("road"),
|
|
105
|
+
"number": raw_address.get("house_number"),
|
|
106
|
+
}
|
|
68
107
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
108
|
+
def _parse_geonames_result(self, geonames_raw_json: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
|
109
|
+
geonames_country_code_str = geonames_raw_json.get("country_code")
|
|
110
|
+
country_name = None
|
|
111
|
+
if geonames_country_code_str:
|
|
112
|
+
try:
|
|
113
|
+
country_obj = pycountry.countries.get(alpha_2=geonames_country_code_str.upper())
|
|
114
|
+
if country_obj:
|
|
115
|
+
country_name = spanish.gettext(country_obj.name)
|
|
116
|
+
except LookupError:
|
|
117
|
+
logger.warning(f"Country name not found for code: {geonames_country_code_str} using pycountry.")
|
|
72
118
|
|
|
73
|
-
|
|
119
|
+
postcode_str = str(geonames_raw_json.get("postal_code", ""))
|
|
120
|
+
postcode = self.geonames.validate_postcode(postcode_str)
|
|
121
|
+
province = self.geonames.get_province_from_postcode(postcode) if postcode else None
|
|
122
|
+
city = geonames_raw_json.get("place_name")
|
|
74
123
|
|
|
75
|
-
district, quarter = self.get_district_quarter(raw_json)
|
|
76
124
|
return {
|
|
77
|
-
"country":
|
|
78
|
-
"country_code":
|
|
79
|
-
"state":
|
|
80
|
-
"province":
|
|
125
|
+
"country": country_name,
|
|
126
|
+
"country_code": (geonames_country_code_str or "").lower(),
|
|
127
|
+
"state": geonames_raw_json.get("community"),
|
|
128
|
+
"province": province,
|
|
81
129
|
"city": city,
|
|
82
130
|
"postcode": postcode,
|
|
83
|
-
"district":
|
|
84
|
-
"quarter":
|
|
85
|
-
"street":
|
|
86
|
-
"number":
|
|
131
|
+
"district": None,
|
|
132
|
+
"quarter": None,
|
|
133
|
+
"street": None,
|
|
134
|
+
"number": None,
|
|
87
135
|
}
|
|
88
136
|
|
|
89
|
-
def
|
|
90
|
-
|
|
91
|
-
|
|
137
|
+
def _get_empty_address_result(self) -> Dict[str, None]:
|
|
138
|
+
return {
|
|
139
|
+
"country": None,
|
|
140
|
+
"country_code": None,
|
|
141
|
+
"state": None,
|
|
142
|
+
"province": None,
|
|
143
|
+
"city": None,
|
|
144
|
+
"postcode": None,
|
|
145
|
+
"district": None,
|
|
146
|
+
"quarter": None,
|
|
147
|
+
"street": None,
|
|
148
|
+
"number": None,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
def _select_postcode_and_derived_province(
|
|
152
|
+
self,
|
|
153
|
+
parsed_nominatim_result: Dict[str, Optional[str]],
|
|
154
|
+
parsed_geonames_result: Dict[str, Optional[str]],
|
|
155
|
+
nominatim_address_province_raw: Optional[str],
|
|
156
|
+
dist_nominatim: float, # distance Nominatim ↔ input (km)
|
|
157
|
+
dist_geonames: float, # distance GeoNames ↔ input (km)
|
|
158
|
+
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
|
159
|
+
"""
|
|
160
|
+
Decide the authoritative postcode, the province derived from it and the associated state.
|
|
161
|
+
|
|
162
|
+
Strategy:
|
|
163
|
+
1. Derive province from each postcode.
|
|
164
|
+
2. Validate each postcode–province pair:
|
|
165
|
+
• Nominatim: compare with raw province string (if present).
|
|
166
|
+
• GeoNames: multi-step validation (raw province, then Nominatim-derived
|
|
167
|
+
province when Nominatim coords are close, then distance fallback).
|
|
168
|
+
3. Return the postcode/province that passes validation with precedence:
|
|
169
|
+
Nominatim > GeoNames. Returns (None, None, None) if neither passes.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
# --- Extract postcodes ---
|
|
173
|
+
nominatim_postcode = parsed_nominatim_result.get("postcode")
|
|
174
|
+
geonames_postcode = parsed_geonames_result.get("postcode")
|
|
175
|
+
|
|
176
|
+
# --- Province derived from each postcode ---
|
|
177
|
+
province_from_nominatim_pc = self.geonames.get_province_from_postcode(nominatim_postcode)
|
|
178
|
+
province_from_geonames_pc = self.geonames.get_province_from_postcode(geonames_postcode)
|
|
179
|
+
|
|
180
|
+
# --- Normalised strings for similarity comparisons ---
|
|
181
|
+
norm_raw_province = normalize(nominatim_address_province_raw) if nominatim_address_province_raw else ""
|
|
182
|
+
norm_province_from_nominatim_pc = normalize(province_from_nominatim_pc) if province_from_nominatim_pc else ""
|
|
183
|
+
norm_province_from_geonames_pc = normalize(province_from_geonames_pc) if province_from_geonames_pc else ""
|
|
184
|
+
|
|
185
|
+
# --- Distance heuristics ---
|
|
186
|
+
nominatim_is_close = dist_nominatim < CLOSE_KM
|
|
187
|
+
geonames_is_close = dist_geonames < CLOSE_KM
|
|
188
|
+
|
|
189
|
+
# --- Validate Nominatim postcode ---
|
|
190
|
+
nominatim_pc_valid = False
|
|
191
|
+
if norm_province_from_nominatim_pc and norm_raw_province:
|
|
192
|
+
nominatim_pc_valid = (
|
|
193
|
+
jaro_winkler_similarity(norm_province_from_nominatim_pc, norm_raw_province) > JARO_WINKLER_THRESHOLD
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# --- Validate GeoNames postcode ---
|
|
197
|
+
geonames_pc_valid = False
|
|
198
|
+
|
|
199
|
+
# 1) Compare with raw province string (if exists)
|
|
200
|
+
if norm_province_from_geonames_pc and norm_raw_province:
|
|
201
|
+
geonames_pc_valid = (
|
|
202
|
+
jaro_winkler_similarity(norm_province_from_geonames_pc, norm_raw_province) > JARO_WINKLER_THRESHOLD
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# 2) If no raw province, compare with province from Nominatim PC **only when** Nominatim is close
|
|
206
|
+
if not geonames_pc_valid and not norm_raw_province and nominatim_is_close: # noqa: SIM102
|
|
207
|
+
if norm_province_from_geonames_pc and norm_province_from_nominatim_pc:
|
|
208
|
+
geonames_pc_valid = (
|
|
209
|
+
jaro_winkler_similarity(norm_province_from_geonames_pc, norm_province_from_nominatim_pc)
|
|
210
|
+
> JARO_WINKLER_THRESHOLD
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# 3) Fallback: accept GeoNames PC if its coordinates are very close
|
|
214
|
+
if not geonames_pc_valid and geonames_is_close and geonames_postcode:
|
|
215
|
+
geonames_pc_valid = True
|
|
216
|
+
|
|
217
|
+
# --- Select authoritative tuple ---
|
|
218
|
+
postcode = None
|
|
219
|
+
province = None
|
|
220
|
+
state = None
|
|
221
|
+
|
|
222
|
+
if nominatim_pc_valid:
|
|
223
|
+
postcode = nominatim_postcode
|
|
224
|
+
province = province_from_nominatim_pc
|
|
225
|
+
state = parsed_nominatim_result.get("state")
|
|
226
|
+
if not state and geonames_pc_valid:
|
|
227
|
+
state = parsed_geonames_result.get("state")
|
|
228
|
+
elif geonames_pc_valid:
|
|
229
|
+
postcode = geonames_postcode
|
|
230
|
+
province = province_from_geonames_pc
|
|
231
|
+
state = parsed_geonames_result.get("state")
|
|
232
|
+
if not state and nominatim_pc_valid:
|
|
233
|
+
state = parsed_nominatim_result.get("state")
|
|
92
234
|
|
|
93
|
-
|
|
94
|
-
district = quarter
|
|
95
|
-
quarter = None
|
|
235
|
+
return postcode, province, state
|
|
96
236
|
|
|
237
|
+
def _select_final_result(
|
|
238
|
+
self,
|
|
239
|
+
parsed_nominatim_result: Dict[str, Optional[str]],
|
|
240
|
+
parsed_geonames_result: Dict[str, Optional[str]],
|
|
241
|
+
dist_nominatim: float,
|
|
242
|
+
dist_geonames: float,
|
|
243
|
+
authoritative_postcode: Optional[str],
|
|
244
|
+
authoritative_province_from_postcode: Optional[str],
|
|
245
|
+
authoritative_state: Optional[str],
|
|
246
|
+
) -> Dict[str, Optional[str]]:
|
|
247
|
+
"""
|
|
248
|
+
Choose the address block (Nominatim vs GeoNames) based on distance,
|
|
249
|
+
then apply the authoritative postcode/province.
|
|
250
|
+
|
|
251
|
+
Rules:
|
|
252
|
+
• Pick the source with the smaller finite distance.
|
|
253
|
+
• Always overwrite 'postcode' if authoritative_postcode is present.
|
|
254
|
+
• Overwrite 'province' only when authoritative_province_from_postcode is not None.
|
|
255
|
+
• If both distances are ∞, return an empty address.
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
# ------------------------------------------------------------------ #
|
|
259
|
+
# 1. Decide the base address block #
|
|
260
|
+
# ------------------------------------------------------------------ #
|
|
261
|
+
if dist_nominatim <= dist_geonames and dist_nominatim != float("inf"):
|
|
262
|
+
final_result = parsed_nominatim_result
|
|
263
|
+
elif dist_geonames < dist_nominatim and dist_geonames != float("inf"):
|
|
264
|
+
final_result = parsed_geonames_result
|
|
265
|
+
else:
|
|
266
|
+
return self._get_empty_address_result()
|
|
267
|
+
|
|
268
|
+
# ------------------------------------------------------------------ #
|
|
269
|
+
# 2. Apply authoritative postcode / province #
|
|
270
|
+
# ------------------------------------------------------------------ #
|
|
271
|
+
if authoritative_postcode:
|
|
272
|
+
final_result["postcode"] = authoritative_postcode
|
|
273
|
+
|
|
274
|
+
if authoritative_province_from_postcode:
|
|
275
|
+
final_result["province"] = authoritative_province_from_postcode
|
|
276
|
+
|
|
277
|
+
if authoritative_province_from_postcode:
|
|
278
|
+
final_result["state"] = authoritative_state
|
|
279
|
+
|
|
280
|
+
return final_result
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _patch_district(raw_district: str, raw_quarter: str = None):
|
|
284
|
+
"""
|
|
285
|
+
Patches the district name, optionally using the quarter for specific patches.
|
|
286
|
+
"""
|
|
287
|
+
if raw_quarter:
|
|
288
|
+
# If raw_quarter is provided, use the tuple (district, quarter) as the key.
|
|
289
|
+
key = (raw_district, raw_quarter)
|
|
290
|
+
return MADRID_DISTRICT_QUARTER_PATCH.get(key, raw_district)
|
|
291
|
+
else:
|
|
292
|
+
return MADRID_DISTRICT_DIRECT_PATCH.get(raw_district, raw_district)
|
|
293
|
+
|
|
294
|
+
@staticmethod
|
|
295
|
+
def _patch_quarter(raw_quarter: str):
|
|
296
|
+
"""
|
|
297
|
+
Patches the quarter name directly.
|
|
298
|
+
"""
|
|
299
|
+
return MADRID_QUARTER_DIRECT_PATCH.get(raw_quarter, raw_quarter)
|
|
300
|
+
|
|
301
|
+
def _get_district_quarter(self, raw_json: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
|
302
|
+
district = self._get_attribute(raw_json, ["city_district", "suburb", "borough"])
|
|
303
|
+
quarter = self._get_attribute(raw_json, ["quarter", "neighbourhood"])
|
|
304
|
+
if (city := raw_json.get("city")) and city == "Madrid":
|
|
305
|
+
mid_district = self._patch_district(district)
|
|
306
|
+
quarter = self._patch_quarter(quarter)
|
|
307
|
+
district = self._patch_district(mid_district, quarter)
|
|
97
308
|
return district, quarter
|
|
98
309
|
|
|
310
|
+
def geocode(self, address: str) -> List[Dict[str, Any]]:
|
|
311
|
+
return requests.get(f"{self.endpoint}/search?q={address}&format=json", timeout=30).json()
|
|
312
|
+
|
|
313
|
+
def geocode_parsed(self, address: str) -> Optional[Dict[str, Optional[str]]]:
|
|
314
|
+
results = self.geocode(address)
|
|
315
|
+
|
|
316
|
+
if results:
|
|
317
|
+
return self.reverse_parsed(results[0]["lat"], results[0]["lon"])
|
|
318
|
+
|
|
319
|
+
def reverse(self, lat: Union[float, str], lon: Union[float, str]) -> Dict[str, Any]:
|
|
320
|
+
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}&format=json", timeout=30).json()
|
|
321
|
+
|
|
322
|
+
def reverse_parsed(self, lat: Union[float, str], lon: Union[float, str]) -> Dict[str, Optional[str]]:
|
|
323
|
+
nominatim_response = self.reverse(lat, lon)
|
|
324
|
+
geonames_response = self.geonames.reverse(lat, lon)
|
|
325
|
+
|
|
326
|
+
# Initial parsing
|
|
327
|
+
parsed_nominatim_result = self._parse_nominatim_result(nominatim_response)
|
|
328
|
+
parsed_geonames_result = self._parse_geonames_result(geonames_response)
|
|
329
|
+
|
|
330
|
+
# Calculate distances
|
|
331
|
+
nominatim_response_lat = nominatim_response.get("lat")
|
|
332
|
+
nominatim_response_lon = nominatim_response.get("lon")
|
|
333
|
+
geonames_response_lat = geonames_response.get("lat")
|
|
334
|
+
geonames_response_lon = geonames_response.get("lon")
|
|
335
|
+
|
|
336
|
+
input_coords = None
|
|
337
|
+
try:
|
|
338
|
+
input_coords = (float(lat), float(lon))
|
|
339
|
+
except (ValueError, TypeError):
|
|
340
|
+
logger.error(f"Invalid input coordinates for distance calculation: lat={lat}, lon={lon}")
|
|
341
|
+
return self._get_empty_address_result()
|
|
342
|
+
|
|
343
|
+
dist_nominatim = self._calculate_distance(nominatim_response_lat, nominatim_response_lon, input_coords)
|
|
344
|
+
dist_geonames = self._calculate_distance(geonames_response_lat, geonames_response_lon, input_coords)
|
|
345
|
+
|
|
346
|
+
# Determine authoritative postcode
|
|
347
|
+
nominatim_province = parsed_nominatim_result.get("province")
|
|
348
|
+
selected_postcode, selected_province_from_postcode, selected_state = self._select_postcode_and_derived_province(
|
|
349
|
+
parsed_nominatim_result, parsed_geonames_result, nominatim_province, dist_nominatim, dist_geonames
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Select final result
|
|
353
|
+
final_result = self._select_final_result(
|
|
354
|
+
parsed_nominatim_result,
|
|
355
|
+
parsed_geonames_result,
|
|
356
|
+
dist_nominatim,
|
|
357
|
+
dist_geonames,
|
|
358
|
+
selected_postcode,
|
|
359
|
+
selected_province_from_postcode,
|
|
360
|
+
selected_state,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Standardize
|
|
364
|
+
final_result["province"] = standardize_admin_division(
|
|
365
|
+
name=final_result["province"], level="province", country_code=final_result["country_code"]
|
|
366
|
+
)
|
|
367
|
+
final_result["state"] = standardize_admin_division(
|
|
368
|
+
name=final_result["state"], level="state", country_code=final_result["country_code"]
|
|
369
|
+
)
|
|
370
|
+
return final_result
|
|
371
|
+
|
|
99
372
|
|
|
100
373
|
class NominatimInterface(Nominatim):
|
|
101
|
-
def __init__(self, config):
|
|
374
|
+
def __init__(self, config: Dict[str, Any]) -> None:
|
|
102
375
|
if "osm" in config:
|
|
103
376
|
self.config = config["osm"]
|
|
104
377
|
|
|
105
378
|
self.nominatim_endpoint = self.config["nominatim_endpoint"]
|
|
106
379
|
self.geonames_endpoint = self.config["geonames_endpoint"]
|
|
107
380
|
|
|
381
|
+
super().__init__(self.nominatim_endpoint, self.geonames_endpoint)
|
|
108
382
|
else:
|
|
109
383
|
logger.warning("no osm section in config")
|
|
110
|
-
|
|
111
|
-
super().__init__(self.nominatim_endpoint, self.geonames_endpoint)
|