datamarket 0.9.27__py3-none-any.whl → 0.9.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/nominatim.py +101 -22
- {datamarket-0.9.27.dist-info → datamarket-0.9.28.dist-info}/METADATA +3 -3
- {datamarket-0.9.27.dist-info → datamarket-0.9.28.dist-info}/RECORD +5 -5
- {datamarket-0.9.27.dist-info → datamarket-0.9.28.dist-info}/LICENSE +0 -0
- {datamarket-0.9.27.dist-info → datamarket-0.9.28.dist-info}/WHEEL +0 -0
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
########################################################################################################################
|
|
2
2
|
# IMPORTS
|
|
3
3
|
|
|
4
|
+
import gettext
|
|
4
5
|
import logging
|
|
6
|
+
import pycountry
|
|
7
|
+
from geopy.distance import geodesic
|
|
5
8
|
|
|
6
9
|
import requests
|
|
7
10
|
|
|
@@ -11,6 +14,8 @@ from ..params.nominatim import POSTCODES
|
|
|
11
14
|
# CLASSES
|
|
12
15
|
|
|
13
16
|
logger = logging.getLogger(__name__)
|
|
17
|
+
spanish = gettext.translation("iso3166-1", pycountry.LOCALES_DIR, languages=["es"])
|
|
18
|
+
spanish.install()
|
|
14
19
|
|
|
15
20
|
|
|
16
21
|
class GeoNames:
|
|
@@ -63,28 +68,102 @@ class Nominatim:
|
|
|
63
68
|
return requests.get(f"{self.endpoint}/reverse?lat={lat}&lon={lon}&format=json").json()
|
|
64
69
|
|
|
65
70
|
def reverse_parsed(self, lat, lon):
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
71
|
+
nominatim_raw_json = self.reverse(lat, lon)
|
|
72
|
+
geonames_raw_json = self.geonames.reverse(lat, lon)
|
|
73
|
+
|
|
74
|
+
nominatim_res_lat_str = nominatim_raw_json.get("lat")
|
|
75
|
+
nominatim_res_lon_str = nominatim_raw_json.get("lon")
|
|
76
|
+
geonames_res_lat_str = geonames_raw_json.get("lat")
|
|
77
|
+
geonames_res_lon_str = geonames_raw_json.get("lon")
|
|
78
|
+
|
|
79
|
+
dist_nominatim = float("inf")
|
|
80
|
+
dist_geonames = float("inf")
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
input_coords = (float(lat), float(lon))
|
|
84
|
+
except (ValueError, TypeError):
|
|
85
|
+
logger.error(f"Invalid input coordinates for distance calculation: lat={lat}, lon={lon}")
|
|
86
|
+
else:
|
|
87
|
+
if nominatim_res_lat_str and nominatim_res_lon_str:
|
|
88
|
+
try:
|
|
89
|
+
nominatim_coords = (float(nominatim_res_lat_str), float(nominatim_res_lon_str))
|
|
90
|
+
dist_nominatim = geodesic(input_coords, nominatim_coords).km
|
|
91
|
+
except (ValueError, TypeError):
|
|
92
|
+
logger.warning("Invalid Nominatim coordinates for distance calculation.")
|
|
93
|
+
|
|
94
|
+
if geonames_res_lat_str and geonames_res_lon_str:
|
|
95
|
+
try:
|
|
96
|
+
geonames_coords = (float(geonames_res_lat_str), float(geonames_res_lon_str))
|
|
97
|
+
dist_geonames = geodesic(input_coords, geonames_coords).km
|
|
98
|
+
except (ValueError, TypeError):
|
|
99
|
+
logger.warning("Invalid GeoNames coordinates for distance calculation.")
|
|
100
|
+
|
|
101
|
+
if dist_nominatim <= dist_geonames and nominatim_res_lat_str is not None and nominatim_res_lon_str is not None:
|
|
102
|
+
# Use Nominatim data
|
|
103
|
+
raw_address = nominatim_raw_json.get("address", {})
|
|
104
|
+
postcode_str = str(raw_address.get("postcode", ""))
|
|
105
|
+
postcode = self.geonames.validate_postcode(postcode_str)
|
|
106
|
+
province = self.geonames.get_province_from_postcode(postcode) if postcode else None
|
|
107
|
+
city = self.get_attribute(raw_address, ["city", "town", "village"])
|
|
108
|
+
district, quarter = self.get_district_quarter(raw_address)
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
"country": raw_address.get("country"),
|
|
112
|
+
"country_code": (raw_address.get("country_code") or "").lower(),
|
|
113
|
+
"state": raw_address.get("state"),
|
|
114
|
+
"province": province,
|
|
115
|
+
"city": city,
|
|
116
|
+
"postcode": postcode,
|
|
117
|
+
"district": district,
|
|
118
|
+
"quarter": quarter,
|
|
119
|
+
"street": raw_address.get("road"),
|
|
120
|
+
"number": raw_address.get("house_number"),
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
elif dist_geonames < dist_nominatim and geonames_res_lat_str is not None and geonames_res_lon_str is not None:
|
|
124
|
+
# Use GeoNames data
|
|
125
|
+
geonames_country_code_str = geonames_raw_json.get("country_code")
|
|
126
|
+
country_name = None
|
|
127
|
+
if geonames_country_code_str:
|
|
128
|
+
try:
|
|
129
|
+
country_obj = pycountry.countries.get(alpha_2=geonames_country_code_str.upper())
|
|
130
|
+
if country_obj:
|
|
131
|
+
country_name = spanish.gettext(country_obj.name)
|
|
132
|
+
except LookupError:
|
|
133
|
+
logger.warning(f"Country name not found for code: {geonames_country_code_str} using pycountry.")
|
|
134
|
+
|
|
135
|
+
postcode_str = str(geonames_raw_json.get("postal_code", ""))
|
|
136
|
+
postcode = self.geonames.validate_postcode(postcode_str)
|
|
137
|
+
province = self.geonames.get_province_from_postcode(postcode) if postcode else None
|
|
138
|
+
city = geonames_raw_json.get("place_name")
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
"country": country_name,
|
|
142
|
+
"country_code": (geonames_country_code_str or "").lower(),
|
|
143
|
+
"state": geonames_raw_json.get("community"),
|
|
144
|
+
"province": province,
|
|
145
|
+
"city": city,
|
|
146
|
+
"postcode": postcode,
|
|
147
|
+
"district": None,
|
|
148
|
+
"quarter": None,
|
|
149
|
+
"street": None,
|
|
150
|
+
"number": None,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
# Neither source provided valid coordinates
|
|
155
|
+
return {
|
|
156
|
+
"country": None,
|
|
157
|
+
"country_code": None,
|
|
158
|
+
"state": None,
|
|
159
|
+
"province": None,
|
|
160
|
+
"city": None,
|
|
161
|
+
"postcode": None,
|
|
162
|
+
"district": None,
|
|
163
|
+
"quarter": None,
|
|
164
|
+
"street": None,
|
|
165
|
+
"number": None,
|
|
166
|
+
}
|
|
88
167
|
|
|
89
168
|
def get_district_quarter(self, raw_json):
|
|
90
169
|
district = self.get_attribute(raw_json, ["city_district", "suburb", "borough"])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.28
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
6
|
Author: DataMarket
|
|
@@ -28,7 +28,6 @@ Provides-Extra: duckduckgo-search
|
|
|
28
28
|
Provides-Extra: fake-useragent
|
|
29
29
|
Provides-Extra: geoalchemy2
|
|
30
30
|
Provides-Extra: geopandas
|
|
31
|
-
Provides-Extra: geopy
|
|
32
31
|
Provides-Extra: google-api-python-client
|
|
33
32
|
Provides-Extra: google-auth-httplib2
|
|
34
33
|
Provides-Extra: google-auth-oauthlib
|
|
@@ -77,7 +76,7 @@ Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
|
|
|
77
76
|
Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
|
|
78
77
|
Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
|
|
79
78
|
Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
|
|
80
|
-
Requires-Dist: geopy (>=2.0.0,<3.0.0)
|
|
79
|
+
Requires-Dist: geopy (>=2.0.0,<3.0.0)
|
|
81
80
|
Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
|
|
82
81
|
Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
|
|
83
82
|
Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
|
|
@@ -98,6 +97,7 @@ Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
|
98
97
|
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
|
|
99
98
|
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
|
|
100
99
|
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
|
|
100
|
+
Requires-Dist: pycountry (>=24.0.0,<25.0.0)
|
|
101
101
|
Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
|
|
102
102
|
Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
|
|
103
103
|
Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
|
|
@@ -4,7 +4,7 @@ datamarket/interfaces/alchemy.py,sha256=4q_gLKCKPK437VKOpdBKSrCyy42P_yWxIhE7KuvH
|
|
|
4
4
|
datamarket/interfaces/aws.py,sha256=7KLUeBxmPN7avEMPsu5HC_KHB1N7W6Anp2X8fo43mlw,2383
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
6
|
datamarket/interfaces/ftp.py,sha256=o0KlJxtksbop9OjCiQRzyAa2IeG_ExVXagS6apwrAQo,1881
|
|
7
|
-
datamarket/interfaces/nominatim.py,sha256=
|
|
7
|
+
datamarket/interfaces/nominatim.py,sha256=TD4OhWdIwn53Va41BS1ugogHEZw0ANKxWIfc9G2JWLU,7280
|
|
8
8
|
datamarket/interfaces/peerdb.py,sha256=cwYwvO740GyaPo9zLAwJsf3UeJDGDiYzjQVM9Q6s-_g,23652
|
|
9
9
|
datamarket/interfaces/proxy.py,sha256=updoOStKd8-nQBbxWbnD9eOt6HksnYi-5dQ0rEySf5M,3152
|
|
10
10
|
datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
|
|
@@ -19,7 +19,7 @@ datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
|
19
19
|
datamarket/utils/strings.py,sha256=rEX9NeBG4C7RECgT0EQebgoFoxgZMy9-7EcBSxgBANU,5654
|
|
20
20
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
21
21
|
datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
|
|
22
|
-
datamarket-0.9.
|
|
23
|
-
datamarket-0.9.
|
|
24
|
-
datamarket-0.9.
|
|
25
|
-
datamarket-0.9.
|
|
22
|
+
datamarket-0.9.28.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
23
|
+
datamarket-0.9.28.dist-info/METADATA,sha256=5V61eLQ9HR_8Tl7J0sRKwR_mzKbJctIjvEebNtPtqug,6546
|
|
24
|
+
datamarket-0.9.28.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
25
|
+
datamarket-0.9.28.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|