ai-lls-lib 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/PKG-INFO +60 -8
  2. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/README.md +59 -7
  3. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/pyproject.toml +1 -1
  4. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/__init__.py +1 -1
  5. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/verify.py +3 -3
  6. ai_lls_lib-1.1.0/src/ai_lls_lib/core/processor.py +295 -0
  7. ai_lls_lib-1.1.0/src/ai_lls_lib/core/verifier.py +84 -0
  8. ai_lls_lib-1.1.0/src/ai_lls_lib/providers/__init__.py +7 -0
  9. ai_lls_lib-1.1.0/src/ai_lls_lib/providers/base.py +28 -0
  10. ai_lls_lib-1.1.0/src/ai_lls_lib/providers/external.py +87 -0
  11. ai_lls_lib-1.1.0/src/ai_lls_lib/providers/stub.py +48 -0
  12. ai_lls_lib-1.0.0/src/ai_lls_lib/core/processor.py +0 -135
  13. ai_lls_lib-1.0.0/src/ai_lls_lib/core/verifier.py +0 -95
  14. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/__init__.py +0 -0
  15. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/__main__.py +0 -0
  16. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/aws_client.py +0 -0
  17. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/__init__.py +0 -0
  18. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/admin.py +0 -0
  19. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/cache.py +0 -0
  20. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/test_stack.py +0 -0
  21. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/core/__init__.py +0 -0
  22. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/core/cache.py +0 -0
  23. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/core/models.py +0 -0
  24. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/testing/__init__.py +0 -0
  25. {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/testing/fixtures.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ai-lls-lib
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Landline Scrubber core library - phone verification and DNC checking
5
5
  Author: LandlineScrubber Team
6
6
  Requires-Python: >=3.12,<4.0
@@ -20,6 +20,21 @@ Description-Content-Type: text/markdown
20
20
 
21
21
  Core business logic library and CLI tools for Landline Scrubber - phone verification and DNC checking.
22
22
 
23
+ ## Version 2.1.0 - Streaming & Provider Architecture
24
+
25
+ New features:
26
+ - **Streaming support** for large CSV files to reduce memory usage
27
+ - **Provider architecture** for clean separation of verification logic
28
+ - **Contract tests** ensuring all providers behave consistently
29
+
30
+ ## Version 2.0.0 - Breaking Changes
31
+
32
+ This is a greenfield rewrite with no backwards compatibility:
33
+ - All file-based CSV processing replaced with text-based methods
34
+ - Removed `_sync` suffix from all methods (everything is sync)
35
+ - `process_csv_sync(file_path)` → `process_csv(csv_text)`
36
+ - `generate_results_csv(...)` now returns CSV string instead of writing to file
37
+
23
38
  ## Features
24
39
 
25
40
  - Phone number normalization (E.164 format)
@@ -50,7 +65,7 @@ from ai_lls_lib import PhoneVerifier, DynamoDBCache
50
65
  cache = DynamoDBCache(table_name="phone-cache")
51
66
  verifier = PhoneVerifier(cache)
52
67
 
53
- result = verifier.verify_sync("+15551234567")
68
+ result = verifier.verify("+15551234567")
54
69
  print(f"Line type: {result.line_type}")
55
70
  print(f"DNC: {result.dnc}")
56
71
  print(f"From cache: {result.cached}")
@@ -65,12 +80,49 @@ cache = DynamoDBCache(table_name="phone-cache")
65
80
  verifier = PhoneVerifier(cache)
66
81
  processor = BulkProcessor(verifier)
67
82
 
68
- results = processor.process_csv_sync("/path/to/phones.csv")
69
- processor.generate_results_csv(
70
- original_path="/path/to/phones.csv",
71
- results=results,
72
- output_path="/path/to/results.csv"
73
- )
83
+ # Process CSV text content
84
+ csv_text = "name,phone\nJohn,+15551234567\nJane,+15551234568"
85
+ results = processor.process_csv(csv_text)
86
+
87
+ # Generate results CSV
88
+ results_csv = processor.generate_results_csv(csv_text, results)
89
+ print(results_csv) # CSV string with added line_type, dnc, cached columns
90
+ ```
91
+
92
+ ### Streaming Large Files
93
+
94
+ For memory-efficient processing of large CSV files:
95
+
96
+ ```python
97
+ from ai_lls_lib import BulkProcessor, PhoneVerifier, DynamoDBCache
98
+
99
+ cache = DynamoDBCache(table_name="phone-cache")
100
+ verifier = PhoneVerifier(cache)
101
+ processor = BulkProcessor(verifier)
102
+
103
+ # Process CSV as a stream, yielding batches
104
+ csv_lines = open('large_file.csv').readlines()
105
+ for batch in processor.process_csv_stream(csv_lines, batch_size=100):
106
+ print(f"Processed batch of {len(batch)} phones")
107
+ # Each batch is a list of PhoneVerification objects
108
+ ```
109
+
110
+ ### Custom Verification Providers
111
+
112
+ Use different verification providers based on your needs:
113
+
114
+ ```python
115
+ from ai_lls_lib import PhoneVerifier, DynamoDBCache
116
+ from ai_lls_lib.providers import StubProvider
117
+
118
+ # Use stub provider for testing
119
+ cache = DynamoDBCache(table_name="phone-cache")
120
+ provider = StubProvider() # Deterministic testing provider
121
+ verifier = PhoneVerifier(cache, provider=provider)
122
+
123
+ # When external APIs are ready, switch to:
124
+ # from ai_lls_lib.providers.external import ExternalAPIProvider
125
+ # provider = ExternalAPIProvider(phone_api_key="...", dnc_api_key="...")
74
126
  ```
75
127
 
76
128
  ## CLI Usage
@@ -2,6 +2,21 @@
2
2
 
3
3
  Core business logic library and CLI tools for Landline Scrubber - phone verification and DNC checking.
4
4
 
5
+ ## Version 2.1.0 - Streaming & Provider Architecture
6
+
7
+ New features:
8
+ - **Streaming support** for large CSV files to reduce memory usage
9
+ - **Provider architecture** for clean separation of verification logic
10
+ - **Contract tests** ensuring all providers behave consistently
11
+
12
+ ## Version 2.0.0 - Breaking Changes
13
+
14
+ This is a greenfield rewrite with no backwards compatibility:
15
+ - All file-based CSV processing replaced with text-based methods
16
+ - Removed `_sync` suffix from all methods (everything is sync)
17
+ - `process_csv_sync(file_path)` → `process_csv(csv_text)`
18
+ - `generate_results_csv(...)` now returns CSV string instead of writing to file
19
+
5
20
  ## Features
6
21
 
7
22
  - Phone number normalization (E.164 format)
@@ -32,7 +47,7 @@ from ai_lls_lib import PhoneVerifier, DynamoDBCache
32
47
  cache = DynamoDBCache(table_name="phone-cache")
33
48
  verifier = PhoneVerifier(cache)
34
49
 
35
- result = verifier.verify_sync("+15551234567")
50
+ result = verifier.verify("+15551234567")
36
51
  print(f"Line type: {result.line_type}")
37
52
  print(f"DNC: {result.dnc}")
38
53
  print(f"From cache: {result.cached}")
@@ -47,12 +62,49 @@ cache = DynamoDBCache(table_name="phone-cache")
47
62
  verifier = PhoneVerifier(cache)
48
63
  processor = BulkProcessor(verifier)
49
64
 
50
- results = processor.process_csv_sync("/path/to/phones.csv")
51
- processor.generate_results_csv(
52
- original_path="/path/to/phones.csv",
53
- results=results,
54
- output_path="/path/to/results.csv"
55
- )
65
+ # Process CSV text content
66
+ csv_text = "name,phone\nJohn,+15551234567\nJane,+15551234568"
67
+ results = processor.process_csv(csv_text)
68
+
69
+ # Generate results CSV
70
+ results_csv = processor.generate_results_csv(csv_text, results)
71
+ print(results_csv) # CSV string with added line_type, dnc, cached columns
72
+ ```
73
+
74
+ ### Streaming Large Files
75
+
76
+ For memory-efficient processing of large CSV files:
77
+
78
+ ```python
79
+ from ai_lls_lib import BulkProcessor, PhoneVerifier, DynamoDBCache
80
+
81
+ cache = DynamoDBCache(table_name="phone-cache")
82
+ verifier = PhoneVerifier(cache)
83
+ processor = BulkProcessor(verifier)
84
+
85
+ # Process CSV as a stream, yielding batches
86
+ csv_lines = open('large_file.csv').readlines()
87
+ for batch in processor.process_csv_stream(csv_lines, batch_size=100):
88
+ print(f"Processed batch of {len(batch)} phones")
89
+ # Each batch is a list of PhoneVerification objects
90
+ ```
91
+
92
+ ### Custom Verification Providers
93
+
94
+ Use different verification providers based on your needs:
95
+
96
+ ```python
97
+ from ai_lls_lib import PhoneVerifier, DynamoDBCache
98
+ from ai_lls_lib.providers import StubProvider
99
+
100
+ # Use stub provider for testing
101
+ cache = DynamoDBCache(table_name="phone-cache")
102
+ provider = StubProvider() # Deterministic testing provider
103
+ verifier = PhoneVerifier(cache, provider=provider)
104
+
105
+ # When external APIs are ready, switch to:
106
+ # from ai_lls_lib.providers.external import ExternalAPIProvider
107
+ # provider = ExternalAPIProvider(phone_api_key="...", dnc_api_key="...")
56
108
  ```
57
109
 
58
110
  ## CLI Usage
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ai-lls-lib"
3
- version = "1.0.0"
3
+ version = "1.1.0"
4
4
  description = "Landline Scrubber core library - phone verification and DNC checking"
5
5
  authors = ["LandlineScrubber Team"]
6
6
  readme = "README.md"
@@ -13,7 +13,7 @@ from ai_lls_lib.core.verifier import PhoneVerifier
13
13
  from ai_lls_lib.core.processor import BulkProcessor
14
14
  from ai_lls_lib.core.cache import DynamoDBCache
15
15
 
16
- __version__ = "1.0.0"
16
+ __version__ = "1.1.0"
17
17
  __all__ = [
18
18
  "PhoneVerification",
19
19
  "BulkJob",
@@ -35,8 +35,8 @@ def verify_phone(phone_number, stack, skip_cache, profile, region):
35
35
  if skip_cache:
36
36
  # Force fresh lookup
37
37
  normalized = verifier.normalize_phone(phone_number)
38
- line_type = verifier._check_line_type_sync(normalized)
39
- dnc = verifier._check_dnc_sync(normalized)
38
+ line_type = verifier._check_line_type(normalized)
39
+ dnc = verifier._check_dnc(normalized)
40
40
  result = {
41
41
  "phone_number": normalized,
42
42
  "line_type": line_type,
@@ -46,7 +46,7 @@ def verify_phone(phone_number, stack, skip_cache, profile, region):
46
46
  "source": "cli-direct"
47
47
  }
48
48
  else:
49
- result = verifier.verify_sync(phone_number)
49
+ result = verifier.verify(phone_number)
50
50
  result = result.dict() if hasattr(result, 'dict') else result
51
51
 
52
52
  # Display results
@@ -0,0 +1,295 @@
1
+ """
2
+ Bulk CSV processing for phone verification
3
+ """
4
+ import csv
5
+ from io import StringIO
6
+ from typing import List, Optional, Iterator, Iterable
7
+ from aws_lambda_powertools import Logger
8
+ from .models import PhoneVerification
9
+ from .verifier import PhoneVerifier
10
+
11
+ logger = Logger()
12
+
13
+
14
+ class BulkProcessor:
15
+ """Process CSV files for bulk phone verification"""
16
+
17
+ def __init__(self, verifier: PhoneVerifier):
18
+ self.verifier = verifier
19
+
20
+ def process_csv(self, csv_text: str, phone_column: str = "phone") -> List[PhoneVerification]:
21
+ """
22
+ Process CSV text content.
23
+ Returns list of verification results.
24
+ """
25
+ results = []
26
+
27
+ try:
28
+ # Use StringIO to parse CSV text
29
+ csv_file = StringIO(csv_text)
30
+ reader = csv.DictReader(csv_file)
31
+
32
+ # Find phone column (case-insensitive)
33
+ headers = reader.fieldnames or []
34
+ phone_col = self._find_phone_column(headers, phone_column)
35
+
36
+ if not phone_col:
37
+ raise ValueError(f"Phone column '{phone_column}' not found in CSV")
38
+
39
+ for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
40
+ try:
41
+ phone = row.get(phone_col, "").strip()
42
+ if not phone:
43
+ logger.warning(f"Empty phone at row {row_num}")
44
+ continue
45
+
46
+ # Verify phone
47
+ result = self.verifier.verify(phone)
48
+ results.append(result)
49
+
50
+ # Log progress every 100 rows
51
+ if len(results) % 100 == 0:
52
+ logger.info(f"Processed {len(results)} phones")
53
+
54
+ except ValueError as e:
55
+ logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
56
+ continue
57
+ except Exception as e:
58
+ logger.error(f"Error processing row {row_num}: {str(e)}")
59
+ continue
60
+
61
+ logger.info(f"Completed processing {len(results)} valid phones")
62
+
63
+ except Exception as e:
64
+ logger.error(f"CSV processing failed: {str(e)}")
65
+ raise
66
+
67
+ return results
68
+
69
+ def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
70
+ """Find phone column in headers (case-insensitive)"""
71
+ # First try exact match
72
+ for header in headers:
73
+ if header.lower() == preferred.lower():
74
+ return header
75
+
76
+ # Common phone column names
77
+ phone_patterns = [
78
+ "phone", "phone_number", "phonenumber", "mobile",
79
+ "cell", "telephone", "tel", "number", "contact"
80
+ ]
81
+
82
+ for header in headers:
83
+ header_lower = header.lower()
84
+ for pattern in phone_patterns:
85
+ if pattern in header_lower:
86
+ logger.info(f"Using column '{header}' as phone column")
87
+ return header
88
+
89
+ return None
90
+
91
+ def generate_results_csv(
92
+ self,
93
+ original_csv_text: str,
94
+ results: List[PhoneVerification]
95
+ ) -> str:
96
+ """
97
+ Generate CSV with original data plus verification results.
98
+ Adds columns: line_type, dnc, cached
99
+ Returns CSV text string.
100
+ """
101
+ # Create lookup dict
102
+ results_map = {r.phone_number: r for r in results}
103
+
104
+ # Parse original CSV
105
+ input_file = StringIO(original_csv_text)
106
+ reader = csv.DictReader(input_file)
107
+ headers = reader.fieldnames or []
108
+
109
+ # Add new columns
110
+ output_headers = headers + ["line_type", "dnc", "cached"]
111
+
112
+ # Create output CSV in memory
113
+ output = StringIO()
114
+ writer = csv.DictWriter(output, fieldnames=output_headers)
115
+ writer.writeheader()
116
+
117
+ phone_col = self._find_phone_column(headers, "phone")
118
+
119
+ for row in reader:
120
+ phone = row.get(phone_col, "").strip()
121
+
122
+ # Try to normalize for lookup
123
+ try:
124
+ normalized = self.verifier.normalize_phone(phone)
125
+ if normalized in results_map:
126
+ result = results_map[normalized]
127
+ row["line_type"] = result.line_type.value
128
+ row["dnc"] = "true" if result.dnc else "false"
129
+ row["cached"] = "true" if result.cached else "false"
130
+ else:
131
+ row["line_type"] = "unknown"
132
+ row["dnc"] = ""
133
+ row["cached"] = ""
134
+ except:
135
+ row["line_type"] = "invalid"
136
+ row["dnc"] = ""
137
+ row["cached"] = ""
138
+
139
+ writer.writerow(row)
140
+
141
+ # Return CSV text
142
+ return output.getvalue()
143
+
144
+ def process_csv_stream(
145
+ self,
146
+ lines: Iterable[str],
147
+ phone_column: str = "phone",
148
+ batch_size: int = 100
149
+ ) -> Iterator[List[PhoneVerification]]:
150
+ """
151
+ Process CSV lines as a stream, yielding batches of results.
152
+ Memory-efficient for large files.
153
+
154
+ Args:
155
+ lines: Iterator of CSV lines (including header)
156
+ phone_column: Column name containing phone numbers
157
+ batch_size: Number of results to accumulate before yielding
158
+
159
+ Yields:
160
+ Batches of PhoneVerification results
161
+ """
162
+ lines_list = list(lines) # Need to iterate twice - once for headers, once for data
163
+
164
+ if not lines_list:
165
+ logger.error("Empty CSV stream")
166
+ return
167
+
168
+ # Parse header
169
+ header_line = lines_list[0]
170
+ reader = csv.DictReader(StringIO(header_line))
171
+ headers = reader.fieldnames or []
172
+ phone_col = self._find_phone_column(headers, phone_column)
173
+
174
+ if not phone_col:
175
+ raise ValueError(f"Phone column '{phone_column}' not found in CSV")
176
+
177
+ batch = []
178
+ row_num = 2 # Start at 2 (header is 1)
179
+ total_processed = 0
180
+
181
+ # Process data lines
182
+ for line in lines_list[1:]:
183
+ if not line.strip():
184
+ continue
185
+
186
+ try:
187
+ # Parse single line
188
+ row = next(csv.DictReader(StringIO(line), fieldnames=headers))
189
+ phone = row.get(phone_col, "").strip()
190
+
191
+ if not phone:
192
+ logger.warning(f"Empty phone at row {row_num}")
193
+ row_num += 1
194
+ continue
195
+
196
+ # Verify phone
197
+ result = self.verifier.verify(phone)
198
+ batch.append(result)
199
+ total_processed += 1
200
+
201
+ # Yield batch when full
202
+ if len(batch) >= batch_size:
203
+ logger.info(f"Processed batch of {len(batch)} phones (total: {total_processed})")
204
+ yield batch
205
+ batch = []
206
+
207
+ except ValueError as e:
208
+ logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
209
+ except Exception as e:
210
+ logger.error(f"Error processing row {row_num}: {str(e)}")
211
+ finally:
212
+ row_num += 1
213
+
214
+ # Yield remaining results
215
+ if batch:
216
+ logger.info(f"Processed final batch of {len(batch)} phones (total: {total_processed})")
217
+ yield batch
218
+
219
+ logger.info(f"Stream processing completed. Total processed: {total_processed}")
220
+
221
+ def generate_results_csv_stream(
222
+ self,
223
+ original_lines: Iterable[str],
224
+ results_stream: Iterator[List[PhoneVerification]],
225
+ phone_column: str = "phone"
226
+ ) -> Iterator[str]:
227
+ """
228
+ Generate CSV results as a stream, line by line.
229
+ Memory-efficient for large files.
230
+
231
+ Args:
232
+ original_lines: Iterator of original CSV lines
233
+ results_stream: Iterator of batched PhoneVerification results
234
+ phone_column: Column name containing phone numbers
235
+
236
+ Yields:
237
+ CSV lines with verification results added
238
+ """
239
+ lines_iter = iter(original_lines)
240
+
241
+ # Read and yield modified header
242
+ try:
243
+ header_line = next(lines_iter)
244
+ reader = csv.DictReader(StringIO(header_line))
245
+ headers = reader.fieldnames or []
246
+
247
+ # Add new columns
248
+ output_headers = headers + ["line_type", "dnc", "cached"]
249
+ yield ','.join(output_headers) + '\n'
250
+
251
+ phone_col = self._find_phone_column(headers, phone_column)
252
+
253
+ except StopIteration:
254
+ return
255
+
256
+ # Build results lookup from stream
257
+ results_map = {}
258
+ for batch in results_stream:
259
+ for result in batch:
260
+ results_map[result.phone_number] = result
261
+
262
+ # Reset lines iterator
263
+ lines_iter = iter(original_lines)
264
+ next(lines_iter) # Skip header
265
+
266
+ # Process and yield data lines
267
+ for line in lines_iter:
268
+ if not line.strip():
269
+ continue
270
+
271
+ row = next(csv.DictReader(StringIO(line), fieldnames=headers))
272
+ phone = row.get(phone_col, "").strip()
273
+
274
+ # Add verification results
275
+ try:
276
+ normalized = self.verifier.normalize_phone(phone)
277
+ if normalized in results_map:
278
+ result = results_map[normalized]
279
+ row["line_type"] = result.line_type.value
280
+ row["dnc"] = "true" if result.dnc else "false"
281
+ row["cached"] = "true" if result.cached else "false"
282
+ else:
283
+ row["line_type"] = "unknown"
284
+ row["dnc"] = ""
285
+ row["cached"] = ""
286
+ except:
287
+ row["line_type"] = "invalid"
288
+ row["dnc"] = ""
289
+ row["cached"] = ""
290
+
291
+ # Write row
292
+ output = StringIO()
293
+ writer = csv.DictWriter(output, fieldnames=output_headers)
294
+ writer.writerow(row)
295
+ yield output.getvalue()
@@ -0,0 +1,84 @@
1
+ """
2
+ Phone verification logic - checks line type and DNC status
3
+ """
4
+ import os
5
+ from datetime import datetime, timezone
6
+ from typing import Optional
7
+ import phonenumbers
8
+ from aws_lambda_powertools import Logger
9
+ from .models import PhoneVerification, LineType, VerificationSource
10
+ from .cache import DynamoDBCache
11
+ from ..providers import VerificationProvider, StubProvider
12
+
13
+ logger = Logger()
14
+
15
+
16
+ class PhoneVerifier:
17
+ """Verifies phone numbers for line type and DNC status"""
18
+
19
+ def __init__(self, cache: DynamoDBCache, provider: Optional[VerificationProvider] = None):
20
+ """
21
+ Initialize phone verifier.
22
+
23
+ Args:
24
+ cache: DynamoDB cache for storing results
25
+ provider: Verification provider (defaults to StubProvider)
26
+ """
27
+ self.cache = cache
28
+ self.provider = provider or StubProvider()
29
+
30
+ def normalize_phone(self, phone: str) -> str:
31
+ """Normalize phone to E.164 format"""
32
+ try:
33
+ # Parse with US as default country
34
+ parsed = phonenumbers.parse(phone, "US")
35
+ if not phonenumbers.is_valid_number(parsed):
36
+ raise ValueError(f"Invalid phone number: {phone}")
37
+
38
+ # Format as E.164
39
+ return phonenumbers.format_number(parsed, phonenumbers.PhoneNumberFormat.E164)
40
+ except Exception as e:
41
+ logger.error(f"Phone normalization failed: {str(e)}")
42
+ raise ValueError(f"Invalid phone format: {phone}")
43
+
44
+ def verify(self, phone: str) -> PhoneVerification:
45
+ """Verify phone number for line type and DNC status"""
46
+ normalized = self.normalize_phone(phone)
47
+
48
+ # Check cache first
49
+ cached = self.cache.get(normalized)
50
+ if cached:
51
+ return cached
52
+
53
+ # Use provider to verify
54
+ line_type, dnc_status = self.provider.verify_phone(normalized)
55
+
56
+ result = PhoneVerification(
57
+ phone_number=normalized,
58
+ line_type=line_type,
59
+ dnc=dnc_status,
60
+ cached=False,
61
+ verified_at=datetime.now(timezone.utc),
62
+ source=VerificationSource.API
63
+ )
64
+
65
+ # Store in cache
66
+ self.cache.set(normalized, result)
67
+
68
+ return result
69
+
70
+ def _check_line_type(self, phone: str) -> LineType:
71
+ """
72
+ Check line type (for backwards compatibility with CLI).
73
+ Delegates to provider.
74
+ """
75
+ line_type, _ = self.provider.verify_phone(phone)
76
+ return line_type
77
+
78
+ def _check_dnc(self, phone: str) -> bool:
79
+ """
80
+ Check DNC status (for backwards compatibility with CLI).
81
+ Delegates to provider.
82
+ """
83
+ _, dnc_status = self.provider.verify_phone(phone)
84
+ return dnc_status
@@ -0,0 +1,7 @@
1
+ """
2
+ Verification providers for phone number checking
3
+ """
4
+ from .base import VerificationProvider
5
+ from .stub import StubProvider
6
+
7
+ __all__ = ["VerificationProvider", "StubProvider"]
@@ -0,0 +1,28 @@
1
+ """
2
+ Base protocol for verification providers
3
+ """
4
+ from typing import Protocol, Tuple
5
+ from ..core.models import LineType
6
+
7
+
8
+ class VerificationProvider(Protocol):
9
+ """
10
+ Protocol for phone verification providers.
11
+ All providers must implement this interface.
12
+ """
13
+
14
+ def verify_phone(self, phone: str) -> Tuple[LineType, bool]:
15
+ """
16
+ Verify a phone number's line type and DNC status.
17
+
18
+ Args:
19
+ phone: E.164 formatted phone number
20
+
21
+ Returns:
22
+ Tuple of (line_type, is_on_dnc_list)
23
+
24
+ Raises:
25
+ ValueError: If phone format is invalid
26
+ Exception: For provider-specific errors
27
+ """
28
+ ...
@@ -0,0 +1,87 @@
1
+ """
2
+ External API provider for production phone verification
3
+ """
4
+ import os
5
+ from typing import Tuple, Optional
6
+ import httpx
7
+ from aws_lambda_powertools import Logger
8
+ from ..core.models import LineType
9
+
10
+ logger = Logger()
11
+
12
+
13
+ class ExternalAPIProvider:
14
+ """
15
+ Production provider that calls external verification APIs.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ phone_api_key: Optional[str] = None,
21
+ dnc_api_key: Optional[str] = None,
22
+ timeout: float = 10.0
23
+ ):
24
+ """
25
+ Initialize external API provider.
26
+
27
+ Args:
28
+ phone_api_key: API key for phone line type verification
29
+ dnc_api_key: API key for DNC list checking
30
+ timeout: HTTP request timeout in seconds
31
+ """
32
+ self.phone_api_key = phone_api_key or os.environ.get("PHONE_VERIFY_API_KEY", "")
33
+ self.dnc_api_key = dnc_api_key or os.environ.get("DNC_API_KEY", "")
34
+ self.http_client = httpx.Client(timeout=timeout)
35
+
36
+ def verify_phone(self, phone: str) -> Tuple[LineType, bool]:
37
+ """
38
+ Verify phone using external APIs.
39
+
40
+ Args:
41
+ phone: E.164 formatted phone number
42
+
43
+ Returns:
44
+ Tuple of (line_type, is_on_dnc_list)
45
+
46
+ Raises:
47
+ httpx.HTTPError: For API communication errors
48
+ ValueError: For invalid responses
49
+ """
50
+ line_type = self._check_line_type(phone)
51
+ is_dnc = self._check_dnc(phone)
52
+ return line_type, is_dnc
53
+
54
+ def _check_line_type(self, phone: str) -> LineType:
55
+ """
56
+ Check line type via external API.
57
+
58
+ TODO: Implement actual API call
59
+ - Use self.phone_api_key for authentication
60
+ - Parse API response
61
+ - Map to LineType enum
62
+ """
63
+ logger.info(f"External line type check for {phone[:6]}***")
64
+
65
+ # Placeholder implementation
66
+ # In production, this would make an actual API call
67
+ raise NotImplementedError("External line type API not yet configured")
68
+
69
+ def _check_dnc(self, phone: str) -> bool:
70
+ """
71
+ Check DNC status via external API.
72
+
73
+ TODO: Implement actual API call
74
+ - Use self.dnc_api_key for authentication
75
+ - Parse API response
76
+ - Return boolean status
77
+ """
78
+ logger.info(f"External DNC check for {phone[:6]}***")
79
+
80
+ # Placeholder implementation
81
+ # In production, this would make an actual API call
82
+ raise NotImplementedError("External DNC API not yet configured")
83
+
84
+ def __del__(self):
85
+ """Cleanup HTTP client"""
86
+ if hasattr(self, 'http_client'):
87
+ self.http_client.close()
@@ -0,0 +1,48 @@
1
+ """
2
+ Stub provider for development and testing
3
+ """
4
+ from typing import Tuple
5
+ from aws_lambda_powertools import Logger
6
+ from ..core.models import LineType
7
+
8
+ logger = Logger()
9
+
10
+
11
+ class StubProvider:
12
+ """
13
+ Stub implementation for development and testing.
14
+ Uses deterministic rules based on phone number digits.
15
+ """
16
+
17
+ def verify_phone(self, phone: str) -> Tuple[LineType, bool]:
18
+ """
19
+ Verify using stub logic based on last digit.
20
+
21
+ Line type:
22
+ - Ends in 2 or 0: LANDLINE
23
+ - Otherwise: MOBILE
24
+
25
+ DNC status:
26
+ - Ends in 1 or 0: on DNC list
27
+ - Otherwise: not on DNC
28
+
29
+ Args:
30
+ phone: E.164 formatted phone number
31
+
32
+ Returns:
33
+ Tuple of (line_type, is_on_dnc_list)
34
+ """
35
+ logger.info(f"Stub verification for {phone[:6]}***")
36
+
37
+ last_digit = phone[-1] if phone else '5'
38
+
39
+ # Determine line type
40
+ if last_digit in ['2', '0']:
41
+ line_type = LineType.LANDLINE
42
+ else:
43
+ line_type = LineType.MOBILE
44
+
45
+ # Determine DNC status
46
+ is_dnc = last_digit in ['1', '0']
47
+
48
+ return line_type, is_dnc
@@ -1,135 +0,0 @@
1
- """
2
- Bulk CSV processing for phone verification
3
- """
4
- import csv
5
- from typing import List, Optional
6
- from aws_lambda_powertools import Logger
7
- from .models import PhoneVerification
8
- from .verifier import PhoneVerifier
9
-
10
- logger = Logger()
11
-
12
-
13
- class BulkProcessor:
14
- """Process CSV files for bulk phone verification"""
15
-
16
- def __init__(self, verifier: PhoneVerifier):
17
- self.verifier = verifier
18
-
19
- def process_csv_sync(self, file_path: str, phone_column: str = "phone") -> List[PhoneVerification]:
20
- """
21
- Process CSV file synchronously.
22
- Returns list of verification results.
23
- """
24
- results = []
25
-
26
- try:
27
- with open(file_path, 'r', encoding='utf-8-sig') as f:
28
- reader = csv.DictReader(f)
29
-
30
- # Find phone column (case-insensitive)
31
- headers = reader.fieldnames or []
32
- phone_col = self._find_phone_column(headers, phone_column)
33
-
34
- if not phone_col:
35
- raise ValueError(f"Phone column '{phone_column}' not found in CSV")
36
-
37
- for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
38
- try:
39
- phone = row.get(phone_col, "").strip()
40
- if not phone:
41
- logger.warning(f"Empty phone at row {row_num}")
42
- continue
43
-
44
- # Verify phone
45
- result = self.verifier.verify_sync(phone)
46
- results.append(result)
47
-
48
- # Log progress every 100 rows
49
- if len(results) % 100 == 0:
50
- logger.info(f"Processed {len(results)} phones")
51
-
52
- except ValueError as e:
53
- logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
54
- continue
55
- except Exception as e:
56
- logger.error(f"Error processing row {row_num}: {str(e)}")
57
- continue
58
-
59
- logger.info(f"Completed processing {len(results)} valid phones")
60
-
61
- except Exception as e:
62
- logger.error(f"CSV processing failed: {str(e)}")
63
- raise
64
-
65
- return results
66
-
67
- def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
68
- """Find phone column in headers (case-insensitive)"""
69
- # First try exact match
70
- for header in headers:
71
- if header.lower() == preferred.lower():
72
- return header
73
-
74
- # Common phone column names
75
- phone_patterns = [
76
- "phone", "phone_number", "phonenumber", "mobile",
77
- "cell", "telephone", "tel", "number", "contact"
78
- ]
79
-
80
- for header in headers:
81
- header_lower = header.lower()
82
- for pattern in phone_patterns:
83
- if pattern in header_lower:
84
- logger.info(f"Using column '{header}' as phone column")
85
- return header
86
-
87
- return None
88
-
89
- def generate_results_csv(
90
- self,
91
- original_path: str,
92
- results: List[PhoneVerification],
93
- output_path: str
94
- ) -> None:
95
- """
96
- Generate CSV with original data plus verification results.
97
- Adds columns: line_type, dnc, cached
98
- """
99
- # Create lookup dict
100
- results_map = {r.phone_number: r for r in results}
101
-
102
- with open(original_path, 'r', encoding='utf-8-sig') as infile:
103
- reader = csv.DictReader(infile)
104
- headers = reader.fieldnames or []
105
-
106
- # Add new columns
107
- output_headers = headers + ["line_type", "dnc", "cached"]
108
-
109
- with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
110
- writer = csv.DictWriter(outfile, fieldnames=output_headers)
111
- writer.writeheader()
112
-
113
- phone_col = self._find_phone_column(headers, "phone")
114
-
115
- for row in reader:
116
- phone = row.get(phone_col, "").strip()
117
-
118
- # Try to normalize for lookup
119
- try:
120
- normalized = self.verifier.normalize_phone(phone)
121
- if normalized in results_map:
122
- result = results_map[normalized]
123
- row["line_type"] = result.line_type
124
- row["dnc"] = "true" if result.dnc else "false"
125
- row["cached"] = "true" if result.cached else "false"
126
- else:
127
- row["line_type"] = "unknown"
128
- row["dnc"] = ""
129
- row["cached"] = ""
130
- except:
131
- row["line_type"] = "invalid"
132
- row["dnc"] = ""
133
- row["cached"] = ""
134
-
135
- writer.writerow(row)
@@ -1,95 +0,0 @@
1
- """
2
- Phone verification logic - checks line type and DNC status
3
- """
4
- import os
5
- import re
6
- from datetime import datetime, timezone
7
- from typing import Optional
8
- import httpx
9
- import phonenumbers
10
- from aws_lambda_powertools import Logger
11
- from .models import PhoneVerification, LineType, VerificationSource
12
- from .cache import DynamoDBCache
13
-
14
- logger = Logger()
15
-
16
-
17
- class PhoneVerifier:
18
- """Verifies phone numbers for line type and DNC status"""
19
-
20
- def __init__(self, cache: DynamoDBCache):
21
- self.cache = cache
22
- self.dnc_api_key = os.environ.get("DNC_API_KEY", "")
23
- self.phone_api_key = os.environ.get("PHONE_VERIFY_API_KEY", "")
24
- self.http_client = httpx.Client(timeout=10.0)
25
-
26
- def normalize_phone(self, phone: str) -> str:
27
- """Normalize phone to E.164 format"""
28
- try:
29
- # Parse with US as default country
30
- parsed = phonenumbers.parse(phone, "US")
31
- if not phonenumbers.is_valid_number(parsed):
32
- raise ValueError(f"Invalid phone number: {phone}")
33
-
34
- # Format as E.164
35
- return phonenumbers.format_number(parsed, phonenumbers.PhoneNumberFormat.E164)
36
- except Exception as e:
37
- logger.error(f"Phone normalization failed: {str(e)}")
38
- raise ValueError(f"Invalid phone format: {phone}")
39
-
40
- def verify_sync(self, phone: str) -> PhoneVerification:
41
- """Synchronous verification for Lambda handlers"""
42
- normalized = self.normalize_phone(phone)
43
-
44
- # Check cache first
45
- cached = self.cache.get(normalized)
46
- if cached:
47
- return cached
48
-
49
- # Call external APIs
50
- line_type = self._check_line_type_sync(normalized)
51
- dnc_status = self._check_dnc_sync(normalized)
52
-
53
- result = PhoneVerification(
54
- phone_number=normalized,
55
- line_type=line_type,
56
- dnc=dnc_status,
57
- cached=False,
58
- verified_at=datetime.now(timezone.utc),
59
- source=VerificationSource.API
60
- )
61
-
62
- # Store in cache
63
- self.cache.set(normalized, result)
64
-
65
- return result
66
-
67
- def _check_line_type_sync(self, phone: str) -> LineType:
68
- """Check if phone is mobile/landline/voip"""
69
- # TODO: Implement actual API call to phone verification service
70
- # Would use self.phone_api_key to authenticate
71
- logger.info(f"Checking line type for {phone[:6]}***")
72
-
73
- # Stub implementation based on last digit
74
- last_digit = phone[-1] if phone else '5'
75
- if last_digit in ['2', '0']:
76
- return LineType.LANDLINE
77
- else:
78
- return LineType.MOBILE
79
-
80
- def _check_dnc_sync(self, phone: str) -> bool:
81
- """Check if phone is on DNC list"""
82
- # TODO: Implement actual DNC API call
83
- # Would use self.dnc_api_key or os.environ.get("DNC_CHECK_API_KEY")
84
- logger.info(f"Checking DNC status for {phone[:6]}***")
85
-
86
- # Stub implementation based on last digit:
87
- # - Ends in 1 or 0: on DNC list
88
- # - Otherwise: not on DNC
89
- last_digit = phone[-1] if phone else '5'
90
- return last_digit in ['1', '0']
91
-
92
- def __del__(self):
93
- """Cleanup HTTP client"""
94
- if hasattr(self, 'http_client'):
95
- self.http_client.close()