ai-lls-lib 1.4.0rc2__py3-none-any.whl → 1.4.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_lls_lib/__init__.py +1 -1
- ai_lls_lib/auth/__init__.py +4 -4
- ai_lls_lib/auth/context_parser.py +68 -68
- ai_lls_lib/cli/__init__.py +3 -3
- ai_lls_lib/cli/__main__.py +30 -30
- ai_lls_lib/cli/aws_client.py +115 -115
- ai_lls_lib/cli/commands/__init__.py +3 -3
- ai_lls_lib/cli/commands/admin.py +174 -174
- ai_lls_lib/cli/commands/cache.py +142 -142
- ai_lls_lib/cli/commands/stripe.py +377 -377
- ai_lls_lib/cli/commands/test_stack.py +216 -216
- ai_lls_lib/cli/commands/verify.py +111 -111
- ai_lls_lib/cli/env_loader.py +122 -122
- ai_lls_lib/core/__init__.py +3 -3
- ai_lls_lib/core/cache.py +106 -106
- ai_lls_lib/core/models.py +77 -77
- ai_lls_lib/core/processor.py +295 -295
- ai_lls_lib/core/verifier.py +84 -84
- ai_lls_lib/payment/__init__.py +13 -13
- ai_lls_lib/payment/credit_manager.py +186 -193
- ai_lls_lib/payment/models.py +102 -102
- ai_lls_lib/payment/stripe_manager.py +487 -487
- ai_lls_lib/payment/webhook_processor.py +215 -215
- ai_lls_lib/providers/__init__.py +7 -7
- ai_lls_lib/providers/base.py +28 -28
- ai_lls_lib/providers/external.py +87 -87
- ai_lls_lib/providers/stub.py +48 -48
- ai_lls_lib/testing/__init__.py +3 -3
- ai_lls_lib/testing/fixtures.py +104 -104
- {ai_lls_lib-1.4.0rc2.dist-info → ai_lls_lib-1.4.0rc4.dist-info}/METADATA +1 -1
- ai_lls_lib-1.4.0rc4.dist-info/RECORD +33 -0
- ai_lls_lib-1.4.0rc2.dist-info/RECORD +0 -33
- {ai_lls_lib-1.4.0rc2.dist-info → ai_lls_lib-1.4.0rc4.dist-info}/WHEEL +0 -0
- {ai_lls_lib-1.4.0rc2.dist-info → ai_lls_lib-1.4.0rc4.dist-info}/entry_points.txt +0 -0
ai_lls_lib/core/processor.py
CHANGED
@@ -1,295 +1,295 @@
|
|
1
|
-
"""
|
2
|
-
Bulk CSV processing for phone verification
|
3
|
-
"""
|
4
|
-
import csv
|
5
|
-
from io import StringIO
|
6
|
-
from typing import List, Optional, Iterator, Iterable
|
7
|
-
from aws_lambda_powertools import Logger
|
8
|
-
from .models import PhoneVerification
|
9
|
-
from .verifier import PhoneVerifier
|
10
|
-
|
11
|
-
logger = Logger()
|
12
|
-
|
13
|
-
|
14
|
-
class BulkProcessor:
|
15
|
-
"""Process CSV files for bulk phone verification"""
|
16
|
-
|
17
|
-
def __init__(self, verifier: PhoneVerifier):
|
18
|
-
self.verifier = verifier
|
19
|
-
|
20
|
-
def process_csv(self, csv_text: str, phone_column: str = "phone") -> List[PhoneVerification]:
|
21
|
-
"""
|
22
|
-
Process CSV text content.
|
23
|
-
Returns list of verification results.
|
24
|
-
"""
|
25
|
-
results = []
|
26
|
-
|
27
|
-
try:
|
28
|
-
# Use StringIO to parse CSV text
|
29
|
-
csv_file = StringIO(csv_text)
|
30
|
-
reader = csv.DictReader(csv_file)
|
31
|
-
|
32
|
-
# Find phone column (case-insensitive)
|
33
|
-
headers = reader.fieldnames or []
|
34
|
-
phone_col = self._find_phone_column(headers, phone_column)
|
35
|
-
|
36
|
-
if not phone_col:
|
37
|
-
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
38
|
-
|
39
|
-
for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
|
40
|
-
try:
|
41
|
-
phone = row.get(phone_col, "").strip()
|
42
|
-
if not phone:
|
43
|
-
logger.warning(f"Empty phone at row {row_num}")
|
44
|
-
continue
|
45
|
-
|
46
|
-
# Verify phone
|
47
|
-
result = self.verifier.verify(phone)
|
48
|
-
results.append(result)
|
49
|
-
|
50
|
-
# Log progress every 100 rows
|
51
|
-
if len(results) % 100 == 0:
|
52
|
-
logger.info(f"Processed {len(results)} phones")
|
53
|
-
|
54
|
-
except ValueError as e:
|
55
|
-
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
56
|
-
continue
|
57
|
-
except Exception as e:
|
58
|
-
logger.error(f"Error processing row {row_num}: {str(e)}")
|
59
|
-
continue
|
60
|
-
|
61
|
-
logger.info(f"Completed processing {len(results)} valid phones")
|
62
|
-
|
63
|
-
except Exception as e:
|
64
|
-
logger.error(f"CSV processing failed: {str(e)}")
|
65
|
-
raise
|
66
|
-
|
67
|
-
return results
|
68
|
-
|
69
|
-
def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
|
70
|
-
"""Find phone column in headers (case-insensitive)"""
|
71
|
-
# First try exact match
|
72
|
-
for header in headers:
|
73
|
-
if header.lower() == preferred.lower():
|
74
|
-
return header
|
75
|
-
|
76
|
-
# Common phone column names
|
77
|
-
phone_patterns = [
|
78
|
-
"phone", "phone_number", "phonenumber", "mobile",
|
79
|
-
"cell", "telephone", "tel", "number", "contact"
|
80
|
-
]
|
81
|
-
|
82
|
-
for header in headers:
|
83
|
-
header_lower = header.lower()
|
84
|
-
for pattern in phone_patterns:
|
85
|
-
if pattern in header_lower:
|
86
|
-
logger.info(f"Using column '{header}' as phone column")
|
87
|
-
return header
|
88
|
-
|
89
|
-
return None
|
90
|
-
|
91
|
-
def generate_results_csv(
|
92
|
-
self,
|
93
|
-
original_csv_text: str,
|
94
|
-
results: List[PhoneVerification]
|
95
|
-
) -> str:
|
96
|
-
"""
|
97
|
-
Generate CSV with original data plus verification results.
|
98
|
-
Adds columns: line_type, dnc, cached
|
99
|
-
Returns CSV text string.
|
100
|
-
"""
|
101
|
-
# Create lookup dict
|
102
|
-
results_map = {r.phone_number: r for r in results}
|
103
|
-
|
104
|
-
# Parse original CSV
|
105
|
-
input_file = StringIO(original_csv_text)
|
106
|
-
reader = csv.DictReader(input_file)
|
107
|
-
headers = reader.fieldnames or []
|
108
|
-
|
109
|
-
# Add new columns
|
110
|
-
output_headers = headers + ["line_type", "dnc", "cached"]
|
111
|
-
|
112
|
-
# Create output CSV in memory
|
113
|
-
output = StringIO()
|
114
|
-
writer = csv.DictWriter(output, fieldnames=output_headers)
|
115
|
-
writer.writeheader()
|
116
|
-
|
117
|
-
phone_col = self._find_phone_column(headers, "phone")
|
118
|
-
|
119
|
-
for row in reader:
|
120
|
-
phone = row.get(phone_col, "").strip()
|
121
|
-
|
122
|
-
# Try to normalize for lookup
|
123
|
-
try:
|
124
|
-
normalized = self.verifier.normalize_phone(phone)
|
125
|
-
if normalized in results_map:
|
126
|
-
result = results_map[normalized]
|
127
|
-
row["line_type"] = result.line_type.value
|
128
|
-
row["dnc"] = "true" if result.dnc else "false"
|
129
|
-
row["cached"] = "true" if result.cached else "false"
|
130
|
-
else:
|
131
|
-
row["line_type"] = "unknown"
|
132
|
-
row["dnc"] = ""
|
133
|
-
row["cached"] = ""
|
134
|
-
except:
|
135
|
-
row["line_type"] = "invalid"
|
136
|
-
row["dnc"] = ""
|
137
|
-
row["cached"] = ""
|
138
|
-
|
139
|
-
writer.writerow(row)
|
140
|
-
|
141
|
-
# Return CSV text
|
142
|
-
return output.getvalue()
|
143
|
-
|
144
|
-
def process_csv_stream(
|
145
|
-
self,
|
146
|
-
lines: Iterable[str],
|
147
|
-
phone_column: str = "phone",
|
148
|
-
batch_size: int = 100
|
149
|
-
) -> Iterator[List[PhoneVerification]]:
|
150
|
-
"""
|
151
|
-
Process CSV lines as a stream, yielding batches of results.
|
152
|
-
Memory-efficient for large files.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
lines: Iterator of CSV lines (including header)
|
156
|
-
phone_column: Column name containing phone numbers
|
157
|
-
batch_size: Number of results to accumulate before yielding
|
158
|
-
|
159
|
-
Yields:
|
160
|
-
Batches of PhoneVerification results
|
161
|
-
"""
|
162
|
-
lines_list = list(lines) # Need to iterate twice - once for headers, once for data
|
163
|
-
|
164
|
-
if not lines_list:
|
165
|
-
logger.error("Empty CSV stream")
|
166
|
-
return
|
167
|
-
|
168
|
-
# Parse header
|
169
|
-
header_line = lines_list[0]
|
170
|
-
reader = csv.DictReader(StringIO(header_line))
|
171
|
-
headers = reader.fieldnames or []
|
172
|
-
phone_col = self._find_phone_column(headers, phone_column)
|
173
|
-
|
174
|
-
if not phone_col:
|
175
|
-
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
176
|
-
|
177
|
-
batch = []
|
178
|
-
row_num = 2 # Start at 2 (header is 1)
|
179
|
-
total_processed = 0
|
180
|
-
|
181
|
-
# Process data lines
|
182
|
-
for line in lines_list[1:]:
|
183
|
-
if not line.strip():
|
184
|
-
continue
|
185
|
-
|
186
|
-
try:
|
187
|
-
# Parse single line
|
188
|
-
row = next(csv.DictReader(StringIO(line), fieldnames=headers))
|
189
|
-
phone = row.get(phone_col, "").strip()
|
190
|
-
|
191
|
-
if not phone:
|
192
|
-
logger.warning(f"Empty phone at row {row_num}")
|
193
|
-
row_num += 1
|
194
|
-
continue
|
195
|
-
|
196
|
-
# Verify phone
|
197
|
-
result = self.verifier.verify(phone)
|
198
|
-
batch.append(result)
|
199
|
-
total_processed += 1
|
200
|
-
|
201
|
-
# Yield batch when full
|
202
|
-
if len(batch) >= batch_size:
|
203
|
-
logger.info(f"Processed batch of {len(batch)} phones (total: {total_processed})")
|
204
|
-
yield batch
|
205
|
-
batch = []
|
206
|
-
|
207
|
-
except ValueError as e:
|
208
|
-
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
209
|
-
except Exception as e:
|
210
|
-
logger.error(f"Error processing row {row_num}: {str(e)}")
|
211
|
-
finally:
|
212
|
-
row_num += 1
|
213
|
-
|
214
|
-
# Yield remaining results
|
215
|
-
if batch:
|
216
|
-
logger.info(f"Processed final batch of {len(batch)} phones (total: {total_processed})")
|
217
|
-
yield batch
|
218
|
-
|
219
|
-
logger.info(f"Stream processing completed. Total processed: {total_processed}")
|
220
|
-
|
221
|
-
def generate_results_csv_stream(
|
222
|
-
self,
|
223
|
-
original_lines: Iterable[str],
|
224
|
-
results_stream: Iterator[List[PhoneVerification]],
|
225
|
-
phone_column: str = "phone"
|
226
|
-
) -> Iterator[str]:
|
227
|
-
"""
|
228
|
-
Generate CSV results as a stream, line by line.
|
229
|
-
Memory-efficient for large files.
|
230
|
-
|
231
|
-
Args:
|
232
|
-
original_lines: Iterator of original CSV lines
|
233
|
-
results_stream: Iterator of batched PhoneVerification results
|
234
|
-
phone_column: Column name containing phone numbers
|
235
|
-
|
236
|
-
Yields:
|
237
|
-
CSV lines with verification results added
|
238
|
-
"""
|
239
|
-
lines_iter = iter(original_lines)
|
240
|
-
|
241
|
-
# Read and yield modified header
|
242
|
-
try:
|
243
|
-
header_line = next(lines_iter)
|
244
|
-
reader = csv.DictReader(StringIO(header_line))
|
245
|
-
headers = reader.fieldnames or []
|
246
|
-
|
247
|
-
# Add new columns
|
248
|
-
output_headers = headers + ["line_type", "dnc", "cached"]
|
249
|
-
yield ','.join(output_headers) + '\n'
|
250
|
-
|
251
|
-
phone_col = self._find_phone_column(headers, phone_column)
|
252
|
-
|
253
|
-
except StopIteration:
|
254
|
-
return
|
255
|
-
|
256
|
-
# Build results lookup from stream
|
257
|
-
results_map = {}
|
258
|
-
for batch in results_stream:
|
259
|
-
for result in batch:
|
260
|
-
results_map[result.phone_number] = result
|
261
|
-
|
262
|
-
# Reset lines iterator
|
263
|
-
lines_iter = iter(original_lines)
|
264
|
-
next(lines_iter) # Skip header
|
265
|
-
|
266
|
-
# Process and yield data lines
|
267
|
-
for line in lines_iter:
|
268
|
-
if not line.strip():
|
269
|
-
continue
|
270
|
-
|
271
|
-
row = next(csv.DictReader(StringIO(line), fieldnames=headers))
|
272
|
-
phone = row.get(phone_col, "").strip()
|
273
|
-
|
274
|
-
# Add verification results
|
275
|
-
try:
|
276
|
-
normalized = self.verifier.normalize_phone(phone)
|
277
|
-
if normalized in results_map:
|
278
|
-
result = results_map[normalized]
|
279
|
-
row["line_type"] = result.line_type.value
|
280
|
-
row["dnc"] = "true" if result.dnc else "false"
|
281
|
-
row["cached"] = "true" if result.cached else "false"
|
282
|
-
else:
|
283
|
-
row["line_type"] = "unknown"
|
284
|
-
row["dnc"] = ""
|
285
|
-
row["cached"] = ""
|
286
|
-
except:
|
287
|
-
row["line_type"] = "invalid"
|
288
|
-
row["dnc"] = ""
|
289
|
-
row["cached"] = ""
|
290
|
-
|
291
|
-
# Write row
|
292
|
-
output = StringIO()
|
293
|
-
writer = csv.DictWriter(output, fieldnames=output_headers)
|
294
|
-
writer.writerow(row)
|
295
|
-
yield output.getvalue()
|
1
|
+
"""
|
2
|
+
Bulk CSV processing for phone verification
|
3
|
+
"""
|
4
|
+
import csv
|
5
|
+
from io import StringIO
|
6
|
+
from typing import List, Optional, Iterator, Iterable
|
7
|
+
from aws_lambda_powertools import Logger
|
8
|
+
from .models import PhoneVerification
|
9
|
+
from .verifier import PhoneVerifier
|
10
|
+
|
11
|
+
logger = Logger()
|
12
|
+
|
13
|
+
|
14
|
+
class BulkProcessor:
|
15
|
+
"""Process CSV files for bulk phone verification"""
|
16
|
+
|
17
|
+
def __init__(self, verifier: PhoneVerifier):
|
18
|
+
self.verifier = verifier
|
19
|
+
|
20
|
+
def process_csv(self, csv_text: str, phone_column: str = "phone") -> List[PhoneVerification]:
|
21
|
+
"""
|
22
|
+
Process CSV text content.
|
23
|
+
Returns list of verification results.
|
24
|
+
"""
|
25
|
+
results = []
|
26
|
+
|
27
|
+
try:
|
28
|
+
# Use StringIO to parse CSV text
|
29
|
+
csv_file = StringIO(csv_text)
|
30
|
+
reader = csv.DictReader(csv_file)
|
31
|
+
|
32
|
+
# Find phone column (case-insensitive)
|
33
|
+
headers = reader.fieldnames or []
|
34
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
35
|
+
|
36
|
+
if not phone_col:
|
37
|
+
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
38
|
+
|
39
|
+
for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
|
40
|
+
try:
|
41
|
+
phone = row.get(phone_col, "").strip()
|
42
|
+
if not phone:
|
43
|
+
logger.warning(f"Empty phone at row {row_num}")
|
44
|
+
continue
|
45
|
+
|
46
|
+
# Verify phone
|
47
|
+
result = self.verifier.verify(phone)
|
48
|
+
results.append(result)
|
49
|
+
|
50
|
+
# Log progress every 100 rows
|
51
|
+
if len(results) % 100 == 0:
|
52
|
+
logger.info(f"Processed {len(results)} phones")
|
53
|
+
|
54
|
+
except ValueError as e:
|
55
|
+
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
56
|
+
continue
|
57
|
+
except Exception as e:
|
58
|
+
logger.error(f"Error processing row {row_num}: {str(e)}")
|
59
|
+
continue
|
60
|
+
|
61
|
+
logger.info(f"Completed processing {len(results)} valid phones")
|
62
|
+
|
63
|
+
except Exception as e:
|
64
|
+
logger.error(f"CSV processing failed: {str(e)}")
|
65
|
+
raise
|
66
|
+
|
67
|
+
return results
|
68
|
+
|
69
|
+
def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
|
70
|
+
"""Find phone column in headers (case-insensitive)"""
|
71
|
+
# First try exact match
|
72
|
+
for header in headers:
|
73
|
+
if header.lower() == preferred.lower():
|
74
|
+
return header
|
75
|
+
|
76
|
+
# Common phone column names
|
77
|
+
phone_patterns = [
|
78
|
+
"phone", "phone_number", "phonenumber", "mobile",
|
79
|
+
"cell", "telephone", "tel", "number", "contact"
|
80
|
+
]
|
81
|
+
|
82
|
+
for header in headers:
|
83
|
+
header_lower = header.lower()
|
84
|
+
for pattern in phone_patterns:
|
85
|
+
if pattern in header_lower:
|
86
|
+
logger.info(f"Using column '{header}' as phone column")
|
87
|
+
return header
|
88
|
+
|
89
|
+
return None
|
90
|
+
|
91
|
+
def generate_results_csv(
|
92
|
+
self,
|
93
|
+
original_csv_text: str,
|
94
|
+
results: List[PhoneVerification]
|
95
|
+
) -> str:
|
96
|
+
"""
|
97
|
+
Generate CSV with original data plus verification results.
|
98
|
+
Adds columns: line_type, dnc, cached
|
99
|
+
Returns CSV text string.
|
100
|
+
"""
|
101
|
+
# Create lookup dict
|
102
|
+
results_map = {r.phone_number: r for r in results}
|
103
|
+
|
104
|
+
# Parse original CSV
|
105
|
+
input_file = StringIO(original_csv_text)
|
106
|
+
reader = csv.DictReader(input_file)
|
107
|
+
headers = reader.fieldnames or []
|
108
|
+
|
109
|
+
# Add new columns
|
110
|
+
output_headers = headers + ["line_type", "dnc", "cached"]
|
111
|
+
|
112
|
+
# Create output CSV in memory
|
113
|
+
output = StringIO()
|
114
|
+
writer = csv.DictWriter(output, fieldnames=output_headers)
|
115
|
+
writer.writeheader()
|
116
|
+
|
117
|
+
phone_col = self._find_phone_column(headers, "phone")
|
118
|
+
|
119
|
+
for row in reader:
|
120
|
+
phone = row.get(phone_col, "").strip()
|
121
|
+
|
122
|
+
# Try to normalize for lookup
|
123
|
+
try:
|
124
|
+
normalized = self.verifier.normalize_phone(phone)
|
125
|
+
if normalized in results_map:
|
126
|
+
result = results_map[normalized]
|
127
|
+
row["line_type"] = result.line_type.value
|
128
|
+
row["dnc"] = "true" if result.dnc else "false"
|
129
|
+
row["cached"] = "true" if result.cached else "false"
|
130
|
+
else:
|
131
|
+
row["line_type"] = "unknown"
|
132
|
+
row["dnc"] = ""
|
133
|
+
row["cached"] = ""
|
134
|
+
except:
|
135
|
+
row["line_type"] = "invalid"
|
136
|
+
row["dnc"] = ""
|
137
|
+
row["cached"] = ""
|
138
|
+
|
139
|
+
writer.writerow(row)
|
140
|
+
|
141
|
+
# Return CSV text
|
142
|
+
return output.getvalue()
|
143
|
+
|
144
|
+
def process_csv_stream(
|
145
|
+
self,
|
146
|
+
lines: Iterable[str],
|
147
|
+
phone_column: str = "phone",
|
148
|
+
batch_size: int = 100
|
149
|
+
) -> Iterator[List[PhoneVerification]]:
|
150
|
+
"""
|
151
|
+
Process CSV lines as a stream, yielding batches of results.
|
152
|
+
Memory-efficient for large files.
|
153
|
+
|
154
|
+
Args:
|
155
|
+
lines: Iterator of CSV lines (including header)
|
156
|
+
phone_column: Column name containing phone numbers
|
157
|
+
batch_size: Number of results to accumulate before yielding
|
158
|
+
|
159
|
+
Yields:
|
160
|
+
Batches of PhoneVerification results
|
161
|
+
"""
|
162
|
+
lines_list = list(lines) # Need to iterate twice - once for headers, once for data
|
163
|
+
|
164
|
+
if not lines_list:
|
165
|
+
logger.error("Empty CSV stream")
|
166
|
+
return
|
167
|
+
|
168
|
+
# Parse header
|
169
|
+
header_line = lines_list[0]
|
170
|
+
reader = csv.DictReader(StringIO(header_line))
|
171
|
+
headers = reader.fieldnames or []
|
172
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
173
|
+
|
174
|
+
if not phone_col:
|
175
|
+
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
176
|
+
|
177
|
+
batch = []
|
178
|
+
row_num = 2 # Start at 2 (header is 1)
|
179
|
+
total_processed = 0
|
180
|
+
|
181
|
+
# Process data lines
|
182
|
+
for line in lines_list[1:]:
|
183
|
+
if not line.strip():
|
184
|
+
continue
|
185
|
+
|
186
|
+
try:
|
187
|
+
# Parse single line
|
188
|
+
row = next(csv.DictReader(StringIO(line), fieldnames=headers))
|
189
|
+
phone = row.get(phone_col, "").strip()
|
190
|
+
|
191
|
+
if not phone:
|
192
|
+
logger.warning(f"Empty phone at row {row_num}")
|
193
|
+
row_num += 1
|
194
|
+
continue
|
195
|
+
|
196
|
+
# Verify phone
|
197
|
+
result = self.verifier.verify(phone)
|
198
|
+
batch.append(result)
|
199
|
+
total_processed += 1
|
200
|
+
|
201
|
+
# Yield batch when full
|
202
|
+
if len(batch) >= batch_size:
|
203
|
+
logger.info(f"Processed batch of {len(batch)} phones (total: {total_processed})")
|
204
|
+
yield batch
|
205
|
+
batch = []
|
206
|
+
|
207
|
+
except ValueError as e:
|
208
|
+
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
209
|
+
except Exception as e:
|
210
|
+
logger.error(f"Error processing row {row_num}: {str(e)}")
|
211
|
+
finally:
|
212
|
+
row_num += 1
|
213
|
+
|
214
|
+
# Yield remaining results
|
215
|
+
if batch:
|
216
|
+
logger.info(f"Processed final batch of {len(batch)} phones (total: {total_processed})")
|
217
|
+
yield batch
|
218
|
+
|
219
|
+
logger.info(f"Stream processing completed. Total processed: {total_processed}")
|
220
|
+
|
221
|
+
def generate_results_csv_stream(
|
222
|
+
self,
|
223
|
+
original_lines: Iterable[str],
|
224
|
+
results_stream: Iterator[List[PhoneVerification]],
|
225
|
+
phone_column: str = "phone"
|
226
|
+
) -> Iterator[str]:
|
227
|
+
"""
|
228
|
+
Generate CSV results as a stream, line by line.
|
229
|
+
Memory-efficient for large files.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
original_lines: Iterator of original CSV lines
|
233
|
+
results_stream: Iterator of batched PhoneVerification results
|
234
|
+
phone_column: Column name containing phone numbers
|
235
|
+
|
236
|
+
Yields:
|
237
|
+
CSV lines with verification results added
|
238
|
+
"""
|
239
|
+
lines_iter = iter(original_lines)
|
240
|
+
|
241
|
+
# Read and yield modified header
|
242
|
+
try:
|
243
|
+
header_line = next(lines_iter)
|
244
|
+
reader = csv.DictReader(StringIO(header_line))
|
245
|
+
headers = reader.fieldnames or []
|
246
|
+
|
247
|
+
# Add new columns
|
248
|
+
output_headers = headers + ["line_type", "dnc", "cached"]
|
249
|
+
yield ','.join(output_headers) + '\n'
|
250
|
+
|
251
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
252
|
+
|
253
|
+
except StopIteration:
|
254
|
+
return
|
255
|
+
|
256
|
+
# Build results lookup from stream
|
257
|
+
results_map = {}
|
258
|
+
for batch in results_stream:
|
259
|
+
for result in batch:
|
260
|
+
results_map[result.phone_number] = result
|
261
|
+
|
262
|
+
# Reset lines iterator
|
263
|
+
lines_iter = iter(original_lines)
|
264
|
+
next(lines_iter) # Skip header
|
265
|
+
|
266
|
+
# Process and yield data lines
|
267
|
+
for line in lines_iter:
|
268
|
+
if not line.strip():
|
269
|
+
continue
|
270
|
+
|
271
|
+
row = next(csv.DictReader(StringIO(line), fieldnames=headers))
|
272
|
+
phone = row.get(phone_col, "").strip()
|
273
|
+
|
274
|
+
# Add verification results
|
275
|
+
try:
|
276
|
+
normalized = self.verifier.normalize_phone(phone)
|
277
|
+
if normalized in results_map:
|
278
|
+
result = results_map[normalized]
|
279
|
+
row["line_type"] = result.line_type.value
|
280
|
+
row["dnc"] = "true" if result.dnc else "false"
|
281
|
+
row["cached"] = "true" if result.cached else "false"
|
282
|
+
else:
|
283
|
+
row["line_type"] = "unknown"
|
284
|
+
row["dnc"] = ""
|
285
|
+
row["cached"] = ""
|
286
|
+
except:
|
287
|
+
row["line_type"] = "invalid"
|
288
|
+
row["dnc"] = ""
|
289
|
+
row["cached"] = ""
|
290
|
+
|
291
|
+
# Write row
|
292
|
+
output = StringIO()
|
293
|
+
writer = csv.DictWriter(output, fieldnames=output_headers)
|
294
|
+
writer.writerow(row)
|
295
|
+
yield output.getvalue()
|