quantaroute-geocoding 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quantaroute-geocoding might be problematic. Click here for more details.
- quantaroute_geocoding/__init__.py +32 -0
- quantaroute_geocoding/cli.py +281 -0
- quantaroute_geocoding/client.py +312 -0
- quantaroute_geocoding/csv_processor.py +415 -0
- quantaroute_geocoding/exceptions.py +42 -0
- quantaroute_geocoding/offline.py +280 -0
- quantaroute_geocoding-1.0.0.dist-info/METADATA +349 -0
- quantaroute_geocoding-1.0.0.dist-info/RECORD +12 -0
- quantaroute_geocoding-1.0.0.dist-info/WHEEL +5 -0
- quantaroute_geocoding-1.0.0.dist-info/entry_points.txt +2 -0
- quantaroute_geocoding-1.0.0.dist-info/licenses/LICENSE +21 -0
- quantaroute_geocoding-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CSV processing utilities for bulk geocoding operations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import os
|
|
7
|
+
from typing import Dict, List, Optional, Callable, Union
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
from .client import QuantaRouteClient
|
|
12
|
+
from .offline import OfflineProcessor
|
|
13
|
+
from .exceptions import ValidationError, QuantaRouteError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CSVProcessor:
|
|
17
|
+
"""
|
|
18
|
+
CSV processor for bulk geocoding operations
|
|
19
|
+
|
|
20
|
+
Supports both online API processing and offline DigiPin operations.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
api_key: Optional[str] = None,
|
|
26
|
+
use_offline: bool = False,
|
|
27
|
+
batch_size: int = 50,
|
|
28
|
+
delay_between_batches: float = 1.0
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initialize CSV processor
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
api_key: QuantaRoute API key (required for online processing)
|
|
35
|
+
use_offline: Use offline processing when possible
|
|
36
|
+
batch_size: Number of records to process in each batch
|
|
37
|
+
delay_between_batches: Delay in seconds between API batches
|
|
38
|
+
"""
|
|
39
|
+
self.use_offline = use_offline
|
|
40
|
+
self.batch_size = min(batch_size, 100) # API limit
|
|
41
|
+
self.delay_between_batches = delay_between_batches
|
|
42
|
+
|
|
43
|
+
if not use_offline and not api_key:
|
|
44
|
+
raise ValidationError("API key is required for online processing")
|
|
45
|
+
|
|
46
|
+
self.client = QuantaRouteClient(api_key) if api_key else None
|
|
47
|
+
self.offline_processor = OfflineProcessor() if use_offline else None
|
|
48
|
+
|
|
49
|
+
def process_geocoding_csv(
|
|
50
|
+
self,
|
|
51
|
+
input_file: str,
|
|
52
|
+
output_file: str,
|
|
53
|
+
address_column: str = 'address',
|
|
54
|
+
city_column: Optional[str] = None,
|
|
55
|
+
state_column: Optional[str] = None,
|
|
56
|
+
pincode_column: Optional[str] = None,
|
|
57
|
+
country_column: Optional[str] = None,
|
|
58
|
+
progress_callback: Optional[Callable] = None
|
|
59
|
+
) -> Dict:
|
|
60
|
+
"""
|
|
61
|
+
Process CSV file for address geocoding
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
input_file: Path to input CSV file
|
|
65
|
+
output_file: Path to output CSV file
|
|
66
|
+
address_column: Name of address column
|
|
67
|
+
city_column: Name of city column (optional)
|
|
68
|
+
state_column: Name of state column (optional)
|
|
69
|
+
pincode_column: Name of pincode column (optional)
|
|
70
|
+
country_column: Name of country column (optional)
|
|
71
|
+
progress_callback: Optional callback function for progress updates
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Dict containing processing statistics
|
|
75
|
+
"""
|
|
76
|
+
if not os.path.exists(input_file):
|
|
77
|
+
raise ValidationError(f"Input file not found: {input_file}")
|
|
78
|
+
|
|
79
|
+
# Read CSV
|
|
80
|
+
try:
|
|
81
|
+
df = pd.read_csv(input_file)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
raise ValidationError(f"Failed to read CSV file: {str(e)}")
|
|
84
|
+
|
|
85
|
+
if address_column not in df.columns:
|
|
86
|
+
raise ValidationError(f"Address column '{address_column}' not found in CSV")
|
|
87
|
+
|
|
88
|
+
# Initialize result columns
|
|
89
|
+
df['digipin'] = None
|
|
90
|
+
df['latitude'] = None
|
|
91
|
+
df['longitude'] = None
|
|
92
|
+
df['confidence'] = None
|
|
93
|
+
df['geocoding_status'] = None
|
|
94
|
+
df['geocoding_error'] = None
|
|
95
|
+
|
|
96
|
+
total_rows = len(df)
|
|
97
|
+
processed_count = 0
|
|
98
|
+
success_count = 0
|
|
99
|
+
error_count = 0
|
|
100
|
+
|
|
101
|
+
# Process in batches
|
|
102
|
+
with tqdm(total=total_rows, desc="Geocoding addresses") as pbar:
|
|
103
|
+
for start_idx in range(0, total_rows, self.batch_size):
|
|
104
|
+
end_idx = min(start_idx + self.batch_size, total_rows)
|
|
105
|
+
batch_df = df.iloc[start_idx:end_idx].copy()
|
|
106
|
+
|
|
107
|
+
if self.client and not self.use_offline:
|
|
108
|
+
# Online API processing
|
|
109
|
+
batch_results = self._process_batch_online(
|
|
110
|
+
batch_df,
|
|
111
|
+
address_column,
|
|
112
|
+
city_column,
|
|
113
|
+
state_column,
|
|
114
|
+
pincode_column,
|
|
115
|
+
country_column
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
# Offline processing (coordinates to DigiPin only)
|
|
119
|
+
batch_results = self._process_batch_offline_geocoding(batch_df, address_column)
|
|
120
|
+
|
|
121
|
+
# Update dataframe
|
|
122
|
+
for i, result in enumerate(batch_results):
|
|
123
|
+
row_idx = start_idx + i
|
|
124
|
+
if result['success']:
|
|
125
|
+
df.at[row_idx, 'digipin'] = result['data'].get('digipin')
|
|
126
|
+
coords = result['data'].get('coordinates', {})
|
|
127
|
+
df.at[row_idx, 'latitude'] = coords.get('latitude')
|
|
128
|
+
df.at[row_idx, 'longitude'] = coords.get('longitude')
|
|
129
|
+
df.at[row_idx, 'confidence'] = result['data'].get('confidence')
|
|
130
|
+
df.at[row_idx, 'geocoding_status'] = 'success'
|
|
131
|
+
success_count += 1
|
|
132
|
+
else:
|
|
133
|
+
df.at[row_idx, 'geocoding_status'] = 'error'
|
|
134
|
+
df.at[row_idx, 'geocoding_error'] = result['error']
|
|
135
|
+
error_count += 1
|
|
136
|
+
|
|
137
|
+
processed_count += 1
|
|
138
|
+
|
|
139
|
+
pbar.update(len(batch_results))
|
|
140
|
+
|
|
141
|
+
if progress_callback:
|
|
142
|
+
progress_callback(processed_count, total_rows, success_count, error_count)
|
|
143
|
+
|
|
144
|
+
# Delay between batches to respect rate limits
|
|
145
|
+
if start_idx + self.batch_size < total_rows:
|
|
146
|
+
time.sleep(self.delay_between_batches)
|
|
147
|
+
|
|
148
|
+
# Save results
|
|
149
|
+
try:
|
|
150
|
+
df.to_csv(output_file, index=False)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
raise QuantaRouteError(f"Failed to save output file: {str(e)}")
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
'total_rows': total_rows,
|
|
156
|
+
'processed_rows': processed_count,
|
|
157
|
+
'success_count': success_count,
|
|
158
|
+
'error_count': error_count,
|
|
159
|
+
'success_rate': success_count / total_rows if total_rows > 0 else 0,
|
|
160
|
+
'output_file': output_file
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def process_coordinates_to_digipin_csv(
|
|
164
|
+
self,
|
|
165
|
+
input_file: str,
|
|
166
|
+
output_file: str,
|
|
167
|
+
latitude_column: str = 'latitude',
|
|
168
|
+
longitude_column: str = 'longitude',
|
|
169
|
+
progress_callback: Optional[Callable] = None
|
|
170
|
+
) -> Dict:
|
|
171
|
+
"""
|
|
172
|
+
Process CSV file to convert coordinates to DigiPin codes
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
input_file: Path to input CSV file
|
|
176
|
+
output_file: Path to output CSV file
|
|
177
|
+
latitude_column: Name of latitude column
|
|
178
|
+
longitude_column: Name of longitude column
|
|
179
|
+
progress_callback: Optional callback function for progress updates
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dict containing processing statistics
|
|
183
|
+
"""
|
|
184
|
+
if not os.path.exists(input_file):
|
|
185
|
+
raise ValidationError(f"Input file not found: {input_file}")
|
|
186
|
+
|
|
187
|
+
# Read CSV
|
|
188
|
+
try:
|
|
189
|
+
df = pd.read_csv(input_file)
|
|
190
|
+
except Exception as e:
|
|
191
|
+
raise ValidationError(f"Failed to read CSV file: {str(e)}")
|
|
192
|
+
|
|
193
|
+
if latitude_column not in df.columns:
|
|
194
|
+
raise ValidationError(f"Latitude column '{latitude_column}' not found in CSV")
|
|
195
|
+
|
|
196
|
+
if longitude_column not in df.columns:
|
|
197
|
+
raise ValidationError(f"Longitude column '{longitude_column}' not found in CSV")
|
|
198
|
+
|
|
199
|
+
# Initialize result columns
|
|
200
|
+
df['digipin'] = None
|
|
201
|
+
df['processing_status'] = None
|
|
202
|
+
df['processing_error'] = None
|
|
203
|
+
|
|
204
|
+
total_rows = len(df)
|
|
205
|
+
processed_count = 0
|
|
206
|
+
success_count = 0
|
|
207
|
+
error_count = 0
|
|
208
|
+
|
|
209
|
+
# Process rows
|
|
210
|
+
with tqdm(total=total_rows, desc="Converting coordinates to DigiPin") as pbar:
|
|
211
|
+
for idx, row in df.iterrows():
|
|
212
|
+
try:
|
|
213
|
+
lat = float(row[latitude_column])
|
|
214
|
+
lon = float(row[longitude_column])
|
|
215
|
+
|
|
216
|
+
if self.use_offline and self.offline_processor:
|
|
217
|
+
# Use offline processing
|
|
218
|
+
result = self.offline_processor.coordinates_to_digipin(lat, lon)
|
|
219
|
+
df.at[idx, 'digipin'] = result['digipin']
|
|
220
|
+
elif self.client:
|
|
221
|
+
# Use API
|
|
222
|
+
result = self.client.coordinates_to_digipin(lat, lon)
|
|
223
|
+
df.at[idx, 'digipin'] = result['digipin']
|
|
224
|
+
else:
|
|
225
|
+
raise QuantaRouteError("No processing method available")
|
|
226
|
+
|
|
227
|
+
df.at[idx, 'processing_status'] = 'success'
|
|
228
|
+
success_count += 1
|
|
229
|
+
|
|
230
|
+
except Exception as e:
|
|
231
|
+
df.at[idx, 'processing_status'] = 'error'
|
|
232
|
+
df.at[idx, 'processing_error'] = str(e)
|
|
233
|
+
error_count += 1
|
|
234
|
+
|
|
235
|
+
processed_count += 1
|
|
236
|
+
pbar.update(1)
|
|
237
|
+
|
|
238
|
+
if progress_callback:
|
|
239
|
+
progress_callback(processed_count, total_rows, success_count, error_count)
|
|
240
|
+
|
|
241
|
+
# Save results
|
|
242
|
+
try:
|
|
243
|
+
df.to_csv(output_file, index=False)
|
|
244
|
+
except Exception as e:
|
|
245
|
+
raise QuantaRouteError(f"Failed to save output file: {str(e)}")
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
'total_rows': total_rows,
|
|
249
|
+
'processed_rows': processed_count,
|
|
250
|
+
'success_count': success_count,
|
|
251
|
+
'error_count': error_count,
|
|
252
|
+
'success_rate': success_count / total_rows if total_rows > 0 else 0,
|
|
253
|
+
'output_file': output_file
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
def process_digipin_to_coordinates_csv(
|
|
257
|
+
self,
|
|
258
|
+
input_file: str,
|
|
259
|
+
output_file: str,
|
|
260
|
+
digipin_column: str = 'digipin',
|
|
261
|
+
progress_callback: Optional[Callable] = None
|
|
262
|
+
) -> Dict:
|
|
263
|
+
"""
|
|
264
|
+
Process CSV file to convert DigiPin codes to coordinates
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
input_file: Path to input CSV file
|
|
268
|
+
output_file: Path to output CSV file
|
|
269
|
+
digipin_column: Name of DigiPin column
|
|
270
|
+
progress_callback: Optional callback function for progress updates
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Dict containing processing statistics
|
|
274
|
+
"""
|
|
275
|
+
if not os.path.exists(input_file):
|
|
276
|
+
raise ValidationError(f"Input file not found: {input_file}")
|
|
277
|
+
|
|
278
|
+
# Read CSV
|
|
279
|
+
try:
|
|
280
|
+
df = pd.read_csv(input_file)
|
|
281
|
+
except Exception as e:
|
|
282
|
+
raise ValidationError(f"Failed to read CSV file: {str(e)}")
|
|
283
|
+
|
|
284
|
+
if digipin_column not in df.columns:
|
|
285
|
+
raise ValidationError(f"DigiPin column '{digipin_column}' not found in CSV")
|
|
286
|
+
|
|
287
|
+
# Initialize result columns
|
|
288
|
+
df['latitude'] = None
|
|
289
|
+
df['longitude'] = None
|
|
290
|
+
df['processing_status'] = None
|
|
291
|
+
df['processing_error'] = None
|
|
292
|
+
|
|
293
|
+
total_rows = len(df)
|
|
294
|
+
processed_count = 0
|
|
295
|
+
success_count = 0
|
|
296
|
+
error_count = 0
|
|
297
|
+
|
|
298
|
+
# Process rows
|
|
299
|
+
with tqdm(total=total_rows, desc="Converting DigiPin to coordinates") as pbar:
|
|
300
|
+
for idx, row in df.iterrows():
|
|
301
|
+
try:
|
|
302
|
+
digipin_code = str(row[digipin_column]).strip()
|
|
303
|
+
|
|
304
|
+
if self.use_offline and self.offline_processor:
|
|
305
|
+
# Use offline processing
|
|
306
|
+
result = self.offline_processor.digipin_to_coordinates(digipin_code)
|
|
307
|
+
coords = result['coordinates']
|
|
308
|
+
df.at[idx, 'latitude'] = coords['latitude']
|
|
309
|
+
df.at[idx, 'longitude'] = coords['longitude']
|
|
310
|
+
elif self.client:
|
|
311
|
+
# Use API
|
|
312
|
+
result = self.client.reverse_geocode(digipin_code)
|
|
313
|
+
coords = result.get('coordinates', {})
|
|
314
|
+
df.at[idx, 'latitude'] = coords.get('latitude')
|
|
315
|
+
df.at[idx, 'longitude'] = coords.get('longitude')
|
|
316
|
+
else:
|
|
317
|
+
raise QuantaRouteError("No processing method available")
|
|
318
|
+
|
|
319
|
+
df.at[idx, 'processing_status'] = 'success'
|
|
320
|
+
success_count += 1
|
|
321
|
+
|
|
322
|
+
except Exception as e:
|
|
323
|
+
df.at[idx, 'processing_status'] = 'error'
|
|
324
|
+
df.at[idx, 'processing_error'] = str(e)
|
|
325
|
+
error_count += 1
|
|
326
|
+
|
|
327
|
+
processed_count += 1
|
|
328
|
+
pbar.update(1)
|
|
329
|
+
|
|
330
|
+
if progress_callback:
|
|
331
|
+
progress_callback(processed_count, total_rows, success_count, error_count)
|
|
332
|
+
|
|
333
|
+
# Save results
|
|
334
|
+
try:
|
|
335
|
+
df.to_csv(output_file, index=False)
|
|
336
|
+
except Exception as e:
|
|
337
|
+
raise QuantaRouteError(f"Failed to save output file: {str(e)}")
|
|
338
|
+
|
|
339
|
+
return {
|
|
340
|
+
'total_rows': total_rows,
|
|
341
|
+
'processed_rows': processed_count,
|
|
342
|
+
'success_count': success_count,
|
|
343
|
+
'error_count': error_count,
|
|
344
|
+
'success_rate': success_count / total_rows if total_rows > 0 else 0,
|
|
345
|
+
'output_file': output_file
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
def _process_batch_online(
|
|
349
|
+
self,
|
|
350
|
+
batch_df: pd.DataFrame,
|
|
351
|
+
address_column: str,
|
|
352
|
+
city_column: Optional[str],
|
|
353
|
+
state_column: Optional[str],
|
|
354
|
+
pincode_column: Optional[str],
|
|
355
|
+
country_column: Optional[str]
|
|
356
|
+
) -> List[Dict]:
|
|
357
|
+
"""Process batch using online API"""
|
|
358
|
+
addresses = []
|
|
359
|
+
|
|
360
|
+
for _, row in batch_df.iterrows():
|
|
361
|
+
addr_data = {'address': str(row[address_column])}
|
|
362
|
+
|
|
363
|
+
if city_column and city_column in batch_df.columns:
|
|
364
|
+
addr_data['city'] = str(row[city_column]) if pd.notna(row[city_column]) else None
|
|
365
|
+
|
|
366
|
+
if state_column and state_column in batch_df.columns:
|
|
367
|
+
addr_data['state'] = str(row[state_column]) if pd.notna(row[state_column]) else None
|
|
368
|
+
|
|
369
|
+
if pincode_column and pincode_column in batch_df.columns:
|
|
370
|
+
addr_data['pincode'] = str(row[pincode_column]) if pd.notna(row[pincode_column]) else None
|
|
371
|
+
|
|
372
|
+
if country_column and country_column in batch_df.columns:
|
|
373
|
+
addr_data['country'] = str(row[country_column]) if pd.notna(row[country_column]) else None
|
|
374
|
+
|
|
375
|
+
addresses.append(addr_data)
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
batch_result = self.client.batch_geocode(addresses)
|
|
379
|
+
results = []
|
|
380
|
+
|
|
381
|
+
for result_item in batch_result.get('results', []):
|
|
382
|
+
if 'error' in result_item.get('result', {}):
|
|
383
|
+
results.append({
|
|
384
|
+
'success': False,
|
|
385
|
+
'error': result_item['result']['error']
|
|
386
|
+
})
|
|
387
|
+
else:
|
|
388
|
+
results.append({
|
|
389
|
+
'success': True,
|
|
390
|
+
'data': result_item['result']
|
|
391
|
+
})
|
|
392
|
+
|
|
393
|
+
return results
|
|
394
|
+
|
|
395
|
+
except Exception as e:
|
|
396
|
+
# Return error for all items in batch
|
|
397
|
+
return [{'success': False, 'error': str(e)} for _ in addresses]
|
|
398
|
+
|
|
399
|
+
def _process_batch_offline_geocoding(
|
|
400
|
+
self,
|
|
401
|
+
batch_df: pd.DataFrame,
|
|
402
|
+
address_column: str
|
|
403
|
+
) -> List[Dict]:
|
|
404
|
+
"""Process batch using offline processing (limited functionality)"""
|
|
405
|
+
results = []
|
|
406
|
+
|
|
407
|
+
for _, row in batch_df.iterrows():
|
|
408
|
+
# Offline geocoding is not possible without coordinates
|
|
409
|
+
# This would require a local geocoding database
|
|
410
|
+
results.append({
|
|
411
|
+
'success': False,
|
|
412
|
+
'error': 'Offline address geocoding not supported. Use coordinates_to_digipin instead.'
|
|
413
|
+
})
|
|
414
|
+
|
|
415
|
+
return results
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for QuantaRoute Geocoding SDK
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class QuantaRouteError(Exception):
|
|
7
|
+
"""Base exception for all QuantaRoute errors"""
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class APIError(QuantaRouteError):
|
|
12
|
+
"""Raised when API returns an error response"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, message: str, status_code: int = None, error_code: str = None):
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
self.status_code = status_code
|
|
17
|
+
self.error_code = error_code
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RateLimitError(APIError):
|
|
21
|
+
"""Raised when API rate limit is exceeded"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, message: str, retry_after: int = None):
|
|
24
|
+
super().__init__(message, status_code=429, error_code="RATE_LIMIT_EXCEEDED")
|
|
25
|
+
self.retry_after = retry_after
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AuthenticationError(APIError):
|
|
29
|
+
"""Raised when API key is invalid or missing"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, message: str = "Invalid or missing API key"):
|
|
32
|
+
super().__init__(message, status_code=401, error_code="AUTHENTICATION_ERROR")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ValidationError(QuantaRouteError):
|
|
36
|
+
"""Raised when input validation fails"""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class OfflineProcessingError(QuantaRouteError):
|
|
41
|
+
"""Raised when offline processing fails"""
|
|
42
|
+
pass
|