quantaroute-geocoding 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quantaroute-geocoding might be problematic. Click here for more details.

@@ -0,0 +1,415 @@
1
+ """
2
+ CSV processing utilities for bulk geocoding operations
3
+ """
4
+
5
+ import pandas as pd
6
+ import os
7
+ from typing import Dict, List, Optional, Callable, Union
8
+ from tqdm import tqdm
9
+ import time
10
+
11
+ from .client import QuantaRouteClient
12
+ from .offline import OfflineProcessor
13
+ from .exceptions import ValidationError, QuantaRouteError
14
+
15
+
16
+ class CSVProcessor:
17
+ """
18
+ CSV processor for bulk geocoding operations
19
+
20
+ Supports both online API processing and offline DigiPin operations.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ api_key: Optional[str] = None,
26
+ use_offline: bool = False,
27
+ batch_size: int = 50,
28
+ delay_between_batches: float = 1.0
29
+ ):
30
+ """
31
+ Initialize CSV processor
32
+
33
+ Args:
34
+ api_key: QuantaRoute API key (required for online processing)
35
+ use_offline: Use offline processing when possible
36
+ batch_size: Number of records to process in each batch
37
+ delay_between_batches: Delay in seconds between API batches
38
+ """
39
+ self.use_offline = use_offline
40
+ self.batch_size = min(batch_size, 100) # API limit
41
+ self.delay_between_batches = delay_between_batches
42
+
43
+ if not use_offline and not api_key:
44
+ raise ValidationError("API key is required for online processing")
45
+
46
+ self.client = QuantaRouteClient(api_key) if api_key else None
47
+ self.offline_processor = OfflineProcessor() if use_offline else None
48
+
49
+ def process_geocoding_csv(
50
+ self,
51
+ input_file: str,
52
+ output_file: str,
53
+ address_column: str = 'address',
54
+ city_column: Optional[str] = None,
55
+ state_column: Optional[str] = None,
56
+ pincode_column: Optional[str] = None,
57
+ country_column: Optional[str] = None,
58
+ progress_callback: Optional[Callable] = None
59
+ ) -> Dict:
60
+ """
61
+ Process CSV file for address geocoding
62
+
63
+ Args:
64
+ input_file: Path to input CSV file
65
+ output_file: Path to output CSV file
66
+ address_column: Name of address column
67
+ city_column: Name of city column (optional)
68
+ state_column: Name of state column (optional)
69
+ pincode_column: Name of pincode column (optional)
70
+ country_column: Name of country column (optional)
71
+ progress_callback: Optional callback function for progress updates
72
+
73
+ Returns:
74
+ Dict containing processing statistics
75
+ """
76
+ if not os.path.exists(input_file):
77
+ raise ValidationError(f"Input file not found: {input_file}")
78
+
79
+ # Read CSV
80
+ try:
81
+ df = pd.read_csv(input_file)
82
+ except Exception as e:
83
+ raise ValidationError(f"Failed to read CSV file: {str(e)}")
84
+
85
+ if address_column not in df.columns:
86
+ raise ValidationError(f"Address column '{address_column}' not found in CSV")
87
+
88
+ # Initialize result columns
89
+ df['digipin'] = None
90
+ df['latitude'] = None
91
+ df['longitude'] = None
92
+ df['confidence'] = None
93
+ df['geocoding_status'] = None
94
+ df['geocoding_error'] = None
95
+
96
+ total_rows = len(df)
97
+ processed_count = 0
98
+ success_count = 0
99
+ error_count = 0
100
+
101
+ # Process in batches
102
+ with tqdm(total=total_rows, desc="Geocoding addresses") as pbar:
103
+ for start_idx in range(0, total_rows, self.batch_size):
104
+ end_idx = min(start_idx + self.batch_size, total_rows)
105
+ batch_df = df.iloc[start_idx:end_idx].copy()
106
+
107
+ if self.client and not self.use_offline:
108
+ # Online API processing
109
+ batch_results = self._process_batch_online(
110
+ batch_df,
111
+ address_column,
112
+ city_column,
113
+ state_column,
114
+ pincode_column,
115
+ country_column
116
+ )
117
+ else:
118
+ # Offline processing (coordinates to DigiPin only)
119
+ batch_results = self._process_batch_offline_geocoding(batch_df, address_column)
120
+
121
+ # Update dataframe
122
+ for i, result in enumerate(batch_results):
123
+ row_idx = start_idx + i
124
+ if result['success']:
125
+ df.at[row_idx, 'digipin'] = result['data'].get('digipin')
126
+ coords = result['data'].get('coordinates', {})
127
+ df.at[row_idx, 'latitude'] = coords.get('latitude')
128
+ df.at[row_idx, 'longitude'] = coords.get('longitude')
129
+ df.at[row_idx, 'confidence'] = result['data'].get('confidence')
130
+ df.at[row_idx, 'geocoding_status'] = 'success'
131
+ success_count += 1
132
+ else:
133
+ df.at[row_idx, 'geocoding_status'] = 'error'
134
+ df.at[row_idx, 'geocoding_error'] = result['error']
135
+ error_count += 1
136
+
137
+ processed_count += 1
138
+
139
+ pbar.update(len(batch_results))
140
+
141
+ if progress_callback:
142
+ progress_callback(processed_count, total_rows, success_count, error_count)
143
+
144
+ # Delay between batches to respect rate limits
145
+ if start_idx + self.batch_size < total_rows:
146
+ time.sleep(self.delay_between_batches)
147
+
148
+ # Save results
149
+ try:
150
+ df.to_csv(output_file, index=False)
151
+ except Exception as e:
152
+ raise QuantaRouteError(f"Failed to save output file: {str(e)}")
153
+
154
+ return {
155
+ 'total_rows': total_rows,
156
+ 'processed_rows': processed_count,
157
+ 'success_count': success_count,
158
+ 'error_count': error_count,
159
+ 'success_rate': success_count / total_rows if total_rows > 0 else 0,
160
+ 'output_file': output_file
161
+ }
162
+
163
+ def process_coordinates_to_digipin_csv(
164
+ self,
165
+ input_file: str,
166
+ output_file: str,
167
+ latitude_column: str = 'latitude',
168
+ longitude_column: str = 'longitude',
169
+ progress_callback: Optional[Callable] = None
170
+ ) -> Dict:
171
+ """
172
+ Process CSV file to convert coordinates to DigiPin codes
173
+
174
+ Args:
175
+ input_file: Path to input CSV file
176
+ output_file: Path to output CSV file
177
+ latitude_column: Name of latitude column
178
+ longitude_column: Name of longitude column
179
+ progress_callback: Optional callback function for progress updates
180
+
181
+ Returns:
182
+ Dict containing processing statistics
183
+ """
184
+ if not os.path.exists(input_file):
185
+ raise ValidationError(f"Input file not found: {input_file}")
186
+
187
+ # Read CSV
188
+ try:
189
+ df = pd.read_csv(input_file)
190
+ except Exception as e:
191
+ raise ValidationError(f"Failed to read CSV file: {str(e)}")
192
+
193
+ if latitude_column not in df.columns:
194
+ raise ValidationError(f"Latitude column '{latitude_column}' not found in CSV")
195
+
196
+ if longitude_column not in df.columns:
197
+ raise ValidationError(f"Longitude column '{longitude_column}' not found in CSV")
198
+
199
+ # Initialize result columns
200
+ df['digipin'] = None
201
+ df['processing_status'] = None
202
+ df['processing_error'] = None
203
+
204
+ total_rows = len(df)
205
+ processed_count = 0
206
+ success_count = 0
207
+ error_count = 0
208
+
209
+ # Process rows
210
+ with tqdm(total=total_rows, desc="Converting coordinates to DigiPin") as pbar:
211
+ for idx, row in df.iterrows():
212
+ try:
213
+ lat = float(row[latitude_column])
214
+ lon = float(row[longitude_column])
215
+
216
+ if self.use_offline and self.offline_processor:
217
+ # Use offline processing
218
+ result = self.offline_processor.coordinates_to_digipin(lat, lon)
219
+ df.at[idx, 'digipin'] = result['digipin']
220
+ elif self.client:
221
+ # Use API
222
+ result = self.client.coordinates_to_digipin(lat, lon)
223
+ df.at[idx, 'digipin'] = result['digipin']
224
+ else:
225
+ raise QuantaRouteError("No processing method available")
226
+
227
+ df.at[idx, 'processing_status'] = 'success'
228
+ success_count += 1
229
+
230
+ except Exception as e:
231
+ df.at[idx, 'processing_status'] = 'error'
232
+ df.at[idx, 'processing_error'] = str(e)
233
+ error_count += 1
234
+
235
+ processed_count += 1
236
+ pbar.update(1)
237
+
238
+ if progress_callback:
239
+ progress_callback(processed_count, total_rows, success_count, error_count)
240
+
241
+ # Save results
242
+ try:
243
+ df.to_csv(output_file, index=False)
244
+ except Exception as e:
245
+ raise QuantaRouteError(f"Failed to save output file: {str(e)}")
246
+
247
+ return {
248
+ 'total_rows': total_rows,
249
+ 'processed_rows': processed_count,
250
+ 'success_count': success_count,
251
+ 'error_count': error_count,
252
+ 'success_rate': success_count / total_rows if total_rows > 0 else 0,
253
+ 'output_file': output_file
254
+ }
255
+
256
+ def process_digipin_to_coordinates_csv(
257
+ self,
258
+ input_file: str,
259
+ output_file: str,
260
+ digipin_column: str = 'digipin',
261
+ progress_callback: Optional[Callable] = None
262
+ ) -> Dict:
263
+ """
264
+ Process CSV file to convert DigiPin codes to coordinates
265
+
266
+ Args:
267
+ input_file: Path to input CSV file
268
+ output_file: Path to output CSV file
269
+ digipin_column: Name of DigiPin column
270
+ progress_callback: Optional callback function for progress updates
271
+
272
+ Returns:
273
+ Dict containing processing statistics
274
+ """
275
+ if not os.path.exists(input_file):
276
+ raise ValidationError(f"Input file not found: {input_file}")
277
+
278
+ # Read CSV
279
+ try:
280
+ df = pd.read_csv(input_file)
281
+ except Exception as e:
282
+ raise ValidationError(f"Failed to read CSV file: {str(e)}")
283
+
284
+ if digipin_column not in df.columns:
285
+ raise ValidationError(f"DigiPin column '{digipin_column}' not found in CSV")
286
+
287
+ # Initialize result columns
288
+ df['latitude'] = None
289
+ df['longitude'] = None
290
+ df['processing_status'] = None
291
+ df['processing_error'] = None
292
+
293
+ total_rows = len(df)
294
+ processed_count = 0
295
+ success_count = 0
296
+ error_count = 0
297
+
298
+ # Process rows
299
+ with tqdm(total=total_rows, desc="Converting DigiPin to coordinates") as pbar:
300
+ for idx, row in df.iterrows():
301
+ try:
302
+ digipin_code = str(row[digipin_column]).strip()
303
+
304
+ if self.use_offline and self.offline_processor:
305
+ # Use offline processing
306
+ result = self.offline_processor.digipin_to_coordinates(digipin_code)
307
+ coords = result['coordinates']
308
+ df.at[idx, 'latitude'] = coords['latitude']
309
+ df.at[idx, 'longitude'] = coords['longitude']
310
+ elif self.client:
311
+ # Use API
312
+ result = self.client.reverse_geocode(digipin_code)
313
+ coords = result.get('coordinates', {})
314
+ df.at[idx, 'latitude'] = coords.get('latitude')
315
+ df.at[idx, 'longitude'] = coords.get('longitude')
316
+ else:
317
+ raise QuantaRouteError("No processing method available")
318
+
319
+ df.at[idx, 'processing_status'] = 'success'
320
+ success_count += 1
321
+
322
+ except Exception as e:
323
+ df.at[idx, 'processing_status'] = 'error'
324
+ df.at[idx, 'processing_error'] = str(e)
325
+ error_count += 1
326
+
327
+ processed_count += 1
328
+ pbar.update(1)
329
+
330
+ if progress_callback:
331
+ progress_callback(processed_count, total_rows, success_count, error_count)
332
+
333
+ # Save results
334
+ try:
335
+ df.to_csv(output_file, index=False)
336
+ except Exception as e:
337
+ raise QuantaRouteError(f"Failed to save output file: {str(e)}")
338
+
339
+ return {
340
+ 'total_rows': total_rows,
341
+ 'processed_rows': processed_count,
342
+ 'success_count': success_count,
343
+ 'error_count': error_count,
344
+ 'success_rate': success_count / total_rows if total_rows > 0 else 0,
345
+ 'output_file': output_file
346
+ }
347
+
348
+ def _process_batch_online(
349
+ self,
350
+ batch_df: pd.DataFrame,
351
+ address_column: str,
352
+ city_column: Optional[str],
353
+ state_column: Optional[str],
354
+ pincode_column: Optional[str],
355
+ country_column: Optional[str]
356
+ ) -> List[Dict]:
357
+ """Process batch using online API"""
358
+ addresses = []
359
+
360
+ for _, row in batch_df.iterrows():
361
+ addr_data = {'address': str(row[address_column])}
362
+
363
+ if city_column and city_column in batch_df.columns:
364
+ addr_data['city'] = str(row[city_column]) if pd.notna(row[city_column]) else None
365
+
366
+ if state_column and state_column in batch_df.columns:
367
+ addr_data['state'] = str(row[state_column]) if pd.notna(row[state_column]) else None
368
+
369
+ if pincode_column and pincode_column in batch_df.columns:
370
+ addr_data['pincode'] = str(row[pincode_column]) if pd.notna(row[pincode_column]) else None
371
+
372
+ if country_column and country_column in batch_df.columns:
373
+ addr_data['country'] = str(row[country_column]) if pd.notna(row[country_column]) else None
374
+
375
+ addresses.append(addr_data)
376
+
377
+ try:
378
+ batch_result = self.client.batch_geocode(addresses)
379
+ results = []
380
+
381
+ for result_item in batch_result.get('results', []):
382
+ if 'error' in result_item.get('result', {}):
383
+ results.append({
384
+ 'success': False,
385
+ 'error': result_item['result']['error']
386
+ })
387
+ else:
388
+ results.append({
389
+ 'success': True,
390
+ 'data': result_item['result']
391
+ })
392
+
393
+ return results
394
+
395
+ except Exception as e:
396
+ # Return error for all items in batch
397
+ return [{'success': False, 'error': str(e)} for _ in addresses]
398
+
399
+ def _process_batch_offline_geocoding(
400
+ self,
401
+ batch_df: pd.DataFrame,
402
+ address_column: str
403
+ ) -> List[Dict]:
404
+ """Process batch using offline processing (limited functionality)"""
405
+ results = []
406
+
407
+ for _, row in batch_df.iterrows():
408
+ # Offline geocoding is not possible without coordinates
409
+ # This would require a local geocoding database
410
+ results.append({
411
+ 'success': False,
412
+ 'error': 'Offline address geocoding not supported. Use coordinates_to_digipin instead.'
413
+ })
414
+
415
+ return results
@@ -0,0 +1,42 @@
1
+ """
2
+ Custom exceptions for QuantaRoute Geocoding SDK
3
+ """
4
+
5
+
6
+ class QuantaRouteError(Exception):
7
+ """Base exception for all QuantaRoute errors"""
8
+ pass
9
+
10
+
11
+ class APIError(QuantaRouteError):
12
+ """Raised when API returns an error response"""
13
+
14
+ def __init__(self, message: str, status_code: int = None, error_code: str = None):
15
+ super().__init__(message)
16
+ self.status_code = status_code
17
+ self.error_code = error_code
18
+
19
+
20
+ class RateLimitError(APIError):
21
+ """Raised when API rate limit is exceeded"""
22
+
23
+ def __init__(self, message: str, retry_after: int = None):
24
+ super().__init__(message, status_code=429, error_code="RATE_LIMIT_EXCEEDED")
25
+ self.retry_after = retry_after
26
+
27
+
28
+ class AuthenticationError(APIError):
29
+ """Raised when API key is invalid or missing"""
30
+
31
+ def __init__(self, message: str = "Invalid or missing API key"):
32
+ super().__init__(message, status_code=401, error_code="AUTHENTICATION_ERROR")
33
+
34
+
35
+ class ValidationError(QuantaRouteError):
36
+ """Raised when input validation fails"""
37
+ pass
38
+
39
+
40
+ class OfflineProcessingError(QuantaRouteError):
41
+ """Raised when offline processing fails"""
42
+ pass