ai-lls-lib 1.0.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/PKG-INFO +60 -8
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/README.md +59 -7
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/pyproject.toml +1 -1
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/__init__.py +1 -1
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/verify.py +3 -3
- ai_lls_lib-1.1.0/src/ai_lls_lib/core/processor.py +295 -0
- ai_lls_lib-1.1.0/src/ai_lls_lib/core/verifier.py +84 -0
- ai_lls_lib-1.1.0/src/ai_lls_lib/providers/__init__.py +7 -0
- ai_lls_lib-1.1.0/src/ai_lls_lib/providers/base.py +28 -0
- ai_lls_lib-1.1.0/src/ai_lls_lib/providers/external.py +87 -0
- ai_lls_lib-1.1.0/src/ai_lls_lib/providers/stub.py +48 -0
- ai_lls_lib-1.0.0/src/ai_lls_lib/core/processor.py +0 -135
- ai_lls_lib-1.0.0/src/ai_lls_lib/core/verifier.py +0 -95
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/__init__.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/__main__.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/aws_client.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/__init__.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/admin.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/cache.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/cli/commands/test_stack.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/core/__init__.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/core/cache.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/core/models.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/testing/__init__.py +0 -0
- {ai_lls_lib-1.0.0 → ai_lls_lib-1.1.0}/src/ai_lls_lib/testing/fixtures.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: ai-lls-lib
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: Landline Scrubber core library - phone verification and DNC checking
|
5
5
|
Author: LandlineScrubber Team
|
6
6
|
Requires-Python: >=3.12,<4.0
|
@@ -20,6 +20,21 @@ Description-Content-Type: text/markdown
|
|
20
20
|
|
21
21
|
Core business logic library and CLI tools for Landline Scrubber - phone verification and DNC checking.
|
22
22
|
|
23
|
+
## Version 2.1.0 - Streaming & Provider Architecture
|
24
|
+
|
25
|
+
New features:
|
26
|
+
- **Streaming support** for large CSV files to reduce memory usage
|
27
|
+
- **Provider architecture** for clean separation of verification logic
|
28
|
+
- **Contract tests** ensuring all providers behave consistently
|
29
|
+
|
30
|
+
## Version 2.0.0 - Breaking Changes
|
31
|
+
|
32
|
+
This is a greenfield rewrite with no backwards compatibility:
|
33
|
+
- All file-based CSV processing replaced with text-based methods
|
34
|
+
- Removed `_sync` suffix from all methods (everything is sync)
|
35
|
+
- `process_csv_sync(file_path)` → `process_csv(csv_text)`
|
36
|
+
- `generate_results_csv(...)` now returns CSV string instead of writing to file
|
37
|
+
|
23
38
|
## Features
|
24
39
|
|
25
40
|
- Phone number normalization (E.164 format)
|
@@ -50,7 +65,7 @@ from ai_lls_lib import PhoneVerifier, DynamoDBCache
|
|
50
65
|
cache = DynamoDBCache(table_name="phone-cache")
|
51
66
|
verifier = PhoneVerifier(cache)
|
52
67
|
|
53
|
-
result = verifier.
|
68
|
+
result = verifier.verify("+15551234567")
|
54
69
|
print(f"Line type: {result.line_type}")
|
55
70
|
print(f"DNC: {result.dnc}")
|
56
71
|
print(f"From cache: {result.cached}")
|
@@ -65,12 +80,49 @@ cache = DynamoDBCache(table_name="phone-cache")
|
|
65
80
|
verifier = PhoneVerifier(cache)
|
66
81
|
processor = BulkProcessor(verifier)
|
67
82
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
)
|
83
|
+
# Process CSV text content
|
84
|
+
csv_text = "name,phone\nJohn,+15551234567\nJane,+15551234568"
|
85
|
+
results = processor.process_csv(csv_text)
|
86
|
+
|
87
|
+
# Generate results CSV
|
88
|
+
results_csv = processor.generate_results_csv(csv_text, results)
|
89
|
+
print(results_csv) # CSV string with added line_type, dnc, cached columns
|
90
|
+
```
|
91
|
+
|
92
|
+
### Streaming Large Files
|
93
|
+
|
94
|
+
For memory-efficient processing of large CSV files:
|
95
|
+
|
96
|
+
```python
|
97
|
+
from ai_lls_lib import BulkProcessor, PhoneVerifier, DynamoDBCache
|
98
|
+
|
99
|
+
cache = DynamoDBCache(table_name="phone-cache")
|
100
|
+
verifier = PhoneVerifier(cache)
|
101
|
+
processor = BulkProcessor(verifier)
|
102
|
+
|
103
|
+
# Process CSV as a stream, yielding batches
|
104
|
+
csv_lines = open('large_file.csv').readlines()
|
105
|
+
for batch in processor.process_csv_stream(csv_lines, batch_size=100):
|
106
|
+
print(f"Processed batch of {len(batch)} phones")
|
107
|
+
# Each batch is a list of PhoneVerification objects
|
108
|
+
```
|
109
|
+
|
110
|
+
### Custom Verification Providers
|
111
|
+
|
112
|
+
Use different verification providers based on your needs:
|
113
|
+
|
114
|
+
```python
|
115
|
+
from ai_lls_lib import PhoneVerifier, DynamoDBCache
|
116
|
+
from ai_lls_lib.providers import StubProvider
|
117
|
+
|
118
|
+
# Use stub provider for testing
|
119
|
+
cache = DynamoDBCache(table_name="phone-cache")
|
120
|
+
provider = StubProvider() # Deterministic testing provider
|
121
|
+
verifier = PhoneVerifier(cache, provider=provider)
|
122
|
+
|
123
|
+
# When external APIs are ready, switch to:
|
124
|
+
# from ai_lls_lib.providers.external import ExternalAPIProvider
|
125
|
+
# provider = ExternalAPIProvider(phone_api_key="...", dnc_api_key="...")
|
74
126
|
```
|
75
127
|
|
76
128
|
## CLI Usage
|
@@ -2,6 +2,21 @@
|
|
2
2
|
|
3
3
|
Core business logic library and CLI tools for Landline Scrubber - phone verification and DNC checking.
|
4
4
|
|
5
|
+
## Version 2.1.0 - Streaming & Provider Architecture
|
6
|
+
|
7
|
+
New features:
|
8
|
+
- **Streaming support** for large CSV files to reduce memory usage
|
9
|
+
- **Provider architecture** for clean separation of verification logic
|
10
|
+
- **Contract tests** ensuring all providers behave consistently
|
11
|
+
|
12
|
+
## Version 2.0.0 - Breaking Changes
|
13
|
+
|
14
|
+
This is a greenfield rewrite with no backwards compatibility:
|
15
|
+
- All file-based CSV processing replaced with text-based methods
|
16
|
+
- Removed `_sync` suffix from all methods (everything is sync)
|
17
|
+
- `process_csv_sync(file_path)` → `process_csv(csv_text)`
|
18
|
+
- `generate_results_csv(...)` now returns CSV string instead of writing to file
|
19
|
+
|
5
20
|
## Features
|
6
21
|
|
7
22
|
- Phone number normalization (E.164 format)
|
@@ -32,7 +47,7 @@ from ai_lls_lib import PhoneVerifier, DynamoDBCache
|
|
32
47
|
cache = DynamoDBCache(table_name="phone-cache")
|
33
48
|
verifier = PhoneVerifier(cache)
|
34
49
|
|
35
|
-
result = verifier.
|
50
|
+
result = verifier.verify("+15551234567")
|
36
51
|
print(f"Line type: {result.line_type}")
|
37
52
|
print(f"DNC: {result.dnc}")
|
38
53
|
print(f"From cache: {result.cached}")
|
@@ -47,12 +62,49 @@ cache = DynamoDBCache(table_name="phone-cache")
|
|
47
62
|
verifier = PhoneVerifier(cache)
|
48
63
|
processor = BulkProcessor(verifier)
|
49
64
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
)
|
65
|
+
# Process CSV text content
|
66
|
+
csv_text = "name,phone\nJohn,+15551234567\nJane,+15551234568"
|
67
|
+
results = processor.process_csv(csv_text)
|
68
|
+
|
69
|
+
# Generate results CSV
|
70
|
+
results_csv = processor.generate_results_csv(csv_text, results)
|
71
|
+
print(results_csv) # CSV string with added line_type, dnc, cached columns
|
72
|
+
```
|
73
|
+
|
74
|
+
### Streaming Large Files
|
75
|
+
|
76
|
+
For memory-efficient processing of large CSV files:
|
77
|
+
|
78
|
+
```python
|
79
|
+
from ai_lls_lib import BulkProcessor, PhoneVerifier, DynamoDBCache
|
80
|
+
|
81
|
+
cache = DynamoDBCache(table_name="phone-cache")
|
82
|
+
verifier = PhoneVerifier(cache)
|
83
|
+
processor = BulkProcessor(verifier)
|
84
|
+
|
85
|
+
# Process CSV as a stream, yielding batches
|
86
|
+
csv_lines = open('large_file.csv').readlines()
|
87
|
+
for batch in processor.process_csv_stream(csv_lines, batch_size=100):
|
88
|
+
print(f"Processed batch of {len(batch)} phones")
|
89
|
+
# Each batch is a list of PhoneVerification objects
|
90
|
+
```
|
91
|
+
|
92
|
+
### Custom Verification Providers
|
93
|
+
|
94
|
+
Use different verification providers based on your needs:
|
95
|
+
|
96
|
+
```python
|
97
|
+
from ai_lls_lib import PhoneVerifier, DynamoDBCache
|
98
|
+
from ai_lls_lib.providers import StubProvider
|
99
|
+
|
100
|
+
# Use stub provider for testing
|
101
|
+
cache = DynamoDBCache(table_name="phone-cache")
|
102
|
+
provider = StubProvider() # Deterministic testing provider
|
103
|
+
verifier = PhoneVerifier(cache, provider=provider)
|
104
|
+
|
105
|
+
# When external APIs are ready, switch to:
|
106
|
+
# from ai_lls_lib.providers.external import ExternalAPIProvider
|
107
|
+
# provider = ExternalAPIProvider(phone_api_key="...", dnc_api_key="...")
|
56
108
|
```
|
57
109
|
|
58
110
|
## CLI Usage
|
@@ -13,7 +13,7 @@ from ai_lls_lib.core.verifier import PhoneVerifier
|
|
13
13
|
from ai_lls_lib.core.processor import BulkProcessor
|
14
14
|
from ai_lls_lib.core.cache import DynamoDBCache
|
15
15
|
|
16
|
-
__version__ = "1.
|
16
|
+
__version__ = "1.1.0"
|
17
17
|
__all__ = [
|
18
18
|
"PhoneVerification",
|
19
19
|
"BulkJob",
|
@@ -35,8 +35,8 @@ def verify_phone(phone_number, stack, skip_cache, profile, region):
|
|
35
35
|
if skip_cache:
|
36
36
|
# Force fresh lookup
|
37
37
|
normalized = verifier.normalize_phone(phone_number)
|
38
|
-
line_type = verifier.
|
39
|
-
dnc = verifier.
|
38
|
+
line_type = verifier._check_line_type(normalized)
|
39
|
+
dnc = verifier._check_dnc(normalized)
|
40
40
|
result = {
|
41
41
|
"phone_number": normalized,
|
42
42
|
"line_type": line_type,
|
@@ -46,7 +46,7 @@ def verify_phone(phone_number, stack, skip_cache, profile, region):
|
|
46
46
|
"source": "cli-direct"
|
47
47
|
}
|
48
48
|
else:
|
49
|
-
result = verifier.
|
49
|
+
result = verifier.verify(phone_number)
|
50
50
|
result = result.dict() if hasattr(result, 'dict') else result
|
51
51
|
|
52
52
|
# Display results
|
@@ -0,0 +1,295 @@
|
|
1
|
+
"""
|
2
|
+
Bulk CSV processing for phone verification
|
3
|
+
"""
|
4
|
+
import csv
|
5
|
+
from io import StringIO
|
6
|
+
from typing import List, Optional, Iterator, Iterable
|
7
|
+
from aws_lambda_powertools import Logger
|
8
|
+
from .models import PhoneVerification
|
9
|
+
from .verifier import PhoneVerifier
|
10
|
+
|
11
|
+
logger = Logger()
|
12
|
+
|
13
|
+
|
14
|
+
class BulkProcessor:
|
15
|
+
"""Process CSV files for bulk phone verification"""
|
16
|
+
|
17
|
+
def __init__(self, verifier: PhoneVerifier):
|
18
|
+
self.verifier = verifier
|
19
|
+
|
20
|
+
def process_csv(self, csv_text: str, phone_column: str = "phone") -> List[PhoneVerification]:
|
21
|
+
"""
|
22
|
+
Process CSV text content.
|
23
|
+
Returns list of verification results.
|
24
|
+
"""
|
25
|
+
results = []
|
26
|
+
|
27
|
+
try:
|
28
|
+
# Use StringIO to parse CSV text
|
29
|
+
csv_file = StringIO(csv_text)
|
30
|
+
reader = csv.DictReader(csv_file)
|
31
|
+
|
32
|
+
# Find phone column (case-insensitive)
|
33
|
+
headers = reader.fieldnames or []
|
34
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
35
|
+
|
36
|
+
if not phone_col:
|
37
|
+
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
38
|
+
|
39
|
+
for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
|
40
|
+
try:
|
41
|
+
phone = row.get(phone_col, "").strip()
|
42
|
+
if not phone:
|
43
|
+
logger.warning(f"Empty phone at row {row_num}")
|
44
|
+
continue
|
45
|
+
|
46
|
+
# Verify phone
|
47
|
+
result = self.verifier.verify(phone)
|
48
|
+
results.append(result)
|
49
|
+
|
50
|
+
# Log progress every 100 rows
|
51
|
+
if len(results) % 100 == 0:
|
52
|
+
logger.info(f"Processed {len(results)} phones")
|
53
|
+
|
54
|
+
except ValueError as e:
|
55
|
+
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
56
|
+
continue
|
57
|
+
except Exception as e:
|
58
|
+
logger.error(f"Error processing row {row_num}: {str(e)}")
|
59
|
+
continue
|
60
|
+
|
61
|
+
logger.info(f"Completed processing {len(results)} valid phones")
|
62
|
+
|
63
|
+
except Exception as e:
|
64
|
+
logger.error(f"CSV processing failed: {str(e)}")
|
65
|
+
raise
|
66
|
+
|
67
|
+
return results
|
68
|
+
|
69
|
+
def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
|
70
|
+
"""Find phone column in headers (case-insensitive)"""
|
71
|
+
# First try exact match
|
72
|
+
for header in headers:
|
73
|
+
if header.lower() == preferred.lower():
|
74
|
+
return header
|
75
|
+
|
76
|
+
# Common phone column names
|
77
|
+
phone_patterns = [
|
78
|
+
"phone", "phone_number", "phonenumber", "mobile",
|
79
|
+
"cell", "telephone", "tel", "number", "contact"
|
80
|
+
]
|
81
|
+
|
82
|
+
for header in headers:
|
83
|
+
header_lower = header.lower()
|
84
|
+
for pattern in phone_patterns:
|
85
|
+
if pattern in header_lower:
|
86
|
+
logger.info(f"Using column '{header}' as phone column")
|
87
|
+
return header
|
88
|
+
|
89
|
+
return None
|
90
|
+
|
91
|
+
def generate_results_csv(
|
92
|
+
self,
|
93
|
+
original_csv_text: str,
|
94
|
+
results: List[PhoneVerification]
|
95
|
+
) -> str:
|
96
|
+
"""
|
97
|
+
Generate CSV with original data plus verification results.
|
98
|
+
Adds columns: line_type, dnc, cached
|
99
|
+
Returns CSV text string.
|
100
|
+
"""
|
101
|
+
# Create lookup dict
|
102
|
+
results_map = {r.phone_number: r for r in results}
|
103
|
+
|
104
|
+
# Parse original CSV
|
105
|
+
input_file = StringIO(original_csv_text)
|
106
|
+
reader = csv.DictReader(input_file)
|
107
|
+
headers = reader.fieldnames or []
|
108
|
+
|
109
|
+
# Add new columns
|
110
|
+
output_headers = headers + ["line_type", "dnc", "cached"]
|
111
|
+
|
112
|
+
# Create output CSV in memory
|
113
|
+
output = StringIO()
|
114
|
+
writer = csv.DictWriter(output, fieldnames=output_headers)
|
115
|
+
writer.writeheader()
|
116
|
+
|
117
|
+
phone_col = self._find_phone_column(headers, "phone")
|
118
|
+
|
119
|
+
for row in reader:
|
120
|
+
phone = row.get(phone_col, "").strip()
|
121
|
+
|
122
|
+
# Try to normalize for lookup
|
123
|
+
try:
|
124
|
+
normalized = self.verifier.normalize_phone(phone)
|
125
|
+
if normalized in results_map:
|
126
|
+
result = results_map[normalized]
|
127
|
+
row["line_type"] = result.line_type.value
|
128
|
+
row["dnc"] = "true" if result.dnc else "false"
|
129
|
+
row["cached"] = "true" if result.cached else "false"
|
130
|
+
else:
|
131
|
+
row["line_type"] = "unknown"
|
132
|
+
row["dnc"] = ""
|
133
|
+
row["cached"] = ""
|
134
|
+
except:
|
135
|
+
row["line_type"] = "invalid"
|
136
|
+
row["dnc"] = ""
|
137
|
+
row["cached"] = ""
|
138
|
+
|
139
|
+
writer.writerow(row)
|
140
|
+
|
141
|
+
# Return CSV text
|
142
|
+
return output.getvalue()
|
143
|
+
|
144
|
+
def process_csv_stream(
|
145
|
+
self,
|
146
|
+
lines: Iterable[str],
|
147
|
+
phone_column: str = "phone",
|
148
|
+
batch_size: int = 100
|
149
|
+
) -> Iterator[List[PhoneVerification]]:
|
150
|
+
"""
|
151
|
+
Process CSV lines as a stream, yielding batches of results.
|
152
|
+
Memory-efficient for large files.
|
153
|
+
|
154
|
+
Args:
|
155
|
+
lines: Iterator of CSV lines (including header)
|
156
|
+
phone_column: Column name containing phone numbers
|
157
|
+
batch_size: Number of results to accumulate before yielding
|
158
|
+
|
159
|
+
Yields:
|
160
|
+
Batches of PhoneVerification results
|
161
|
+
"""
|
162
|
+
lines_list = list(lines) # Need to iterate twice - once for headers, once for data
|
163
|
+
|
164
|
+
if not lines_list:
|
165
|
+
logger.error("Empty CSV stream")
|
166
|
+
return
|
167
|
+
|
168
|
+
# Parse header
|
169
|
+
header_line = lines_list[0]
|
170
|
+
reader = csv.DictReader(StringIO(header_line))
|
171
|
+
headers = reader.fieldnames or []
|
172
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
173
|
+
|
174
|
+
if not phone_col:
|
175
|
+
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
176
|
+
|
177
|
+
batch = []
|
178
|
+
row_num = 2 # Start at 2 (header is 1)
|
179
|
+
total_processed = 0
|
180
|
+
|
181
|
+
# Process data lines
|
182
|
+
for line in lines_list[1:]:
|
183
|
+
if not line.strip():
|
184
|
+
continue
|
185
|
+
|
186
|
+
try:
|
187
|
+
# Parse single line
|
188
|
+
row = next(csv.DictReader(StringIO(line), fieldnames=headers))
|
189
|
+
phone = row.get(phone_col, "").strip()
|
190
|
+
|
191
|
+
if not phone:
|
192
|
+
logger.warning(f"Empty phone at row {row_num}")
|
193
|
+
row_num += 1
|
194
|
+
continue
|
195
|
+
|
196
|
+
# Verify phone
|
197
|
+
result = self.verifier.verify(phone)
|
198
|
+
batch.append(result)
|
199
|
+
total_processed += 1
|
200
|
+
|
201
|
+
# Yield batch when full
|
202
|
+
if len(batch) >= batch_size:
|
203
|
+
logger.info(f"Processed batch of {len(batch)} phones (total: {total_processed})")
|
204
|
+
yield batch
|
205
|
+
batch = []
|
206
|
+
|
207
|
+
except ValueError as e:
|
208
|
+
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
209
|
+
except Exception as e:
|
210
|
+
logger.error(f"Error processing row {row_num}: {str(e)}")
|
211
|
+
finally:
|
212
|
+
row_num += 1
|
213
|
+
|
214
|
+
# Yield remaining results
|
215
|
+
if batch:
|
216
|
+
logger.info(f"Processed final batch of {len(batch)} phones (total: {total_processed})")
|
217
|
+
yield batch
|
218
|
+
|
219
|
+
logger.info(f"Stream processing completed. Total processed: {total_processed}")
|
220
|
+
|
221
|
+
def generate_results_csv_stream(
|
222
|
+
self,
|
223
|
+
original_lines: Iterable[str],
|
224
|
+
results_stream: Iterator[List[PhoneVerification]],
|
225
|
+
phone_column: str = "phone"
|
226
|
+
) -> Iterator[str]:
|
227
|
+
"""
|
228
|
+
Generate CSV results as a stream, line by line.
|
229
|
+
Memory-efficient for large files.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
original_lines: Iterator of original CSV lines
|
233
|
+
results_stream: Iterator of batched PhoneVerification results
|
234
|
+
phone_column: Column name containing phone numbers
|
235
|
+
|
236
|
+
Yields:
|
237
|
+
CSV lines with verification results added
|
238
|
+
"""
|
239
|
+
lines_iter = iter(original_lines)
|
240
|
+
|
241
|
+
# Read and yield modified header
|
242
|
+
try:
|
243
|
+
header_line = next(lines_iter)
|
244
|
+
reader = csv.DictReader(StringIO(header_line))
|
245
|
+
headers = reader.fieldnames or []
|
246
|
+
|
247
|
+
# Add new columns
|
248
|
+
output_headers = headers + ["line_type", "dnc", "cached"]
|
249
|
+
yield ','.join(output_headers) + '\n'
|
250
|
+
|
251
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
252
|
+
|
253
|
+
except StopIteration:
|
254
|
+
return
|
255
|
+
|
256
|
+
# Build results lookup from stream
|
257
|
+
results_map = {}
|
258
|
+
for batch in results_stream:
|
259
|
+
for result in batch:
|
260
|
+
results_map[result.phone_number] = result
|
261
|
+
|
262
|
+
# Reset lines iterator
|
263
|
+
lines_iter = iter(original_lines)
|
264
|
+
next(lines_iter) # Skip header
|
265
|
+
|
266
|
+
# Process and yield data lines
|
267
|
+
for line in lines_iter:
|
268
|
+
if not line.strip():
|
269
|
+
continue
|
270
|
+
|
271
|
+
row = next(csv.DictReader(StringIO(line), fieldnames=headers))
|
272
|
+
phone = row.get(phone_col, "").strip()
|
273
|
+
|
274
|
+
# Add verification results
|
275
|
+
try:
|
276
|
+
normalized = self.verifier.normalize_phone(phone)
|
277
|
+
if normalized in results_map:
|
278
|
+
result = results_map[normalized]
|
279
|
+
row["line_type"] = result.line_type.value
|
280
|
+
row["dnc"] = "true" if result.dnc else "false"
|
281
|
+
row["cached"] = "true" if result.cached else "false"
|
282
|
+
else:
|
283
|
+
row["line_type"] = "unknown"
|
284
|
+
row["dnc"] = ""
|
285
|
+
row["cached"] = ""
|
286
|
+
except:
|
287
|
+
row["line_type"] = "invalid"
|
288
|
+
row["dnc"] = ""
|
289
|
+
row["cached"] = ""
|
290
|
+
|
291
|
+
# Write row
|
292
|
+
output = StringIO()
|
293
|
+
writer = csv.DictWriter(output, fieldnames=output_headers)
|
294
|
+
writer.writerow(row)
|
295
|
+
yield output.getvalue()
|
@@ -0,0 +1,84 @@
|
|
1
|
+
"""
|
2
|
+
Phone verification logic - checks line type and DNC status
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from datetime import datetime, timezone
|
6
|
+
from typing import Optional
|
7
|
+
import phonenumbers
|
8
|
+
from aws_lambda_powertools import Logger
|
9
|
+
from .models import PhoneVerification, LineType, VerificationSource
|
10
|
+
from .cache import DynamoDBCache
|
11
|
+
from ..providers import VerificationProvider, StubProvider
|
12
|
+
|
13
|
+
logger = Logger()
|
14
|
+
|
15
|
+
|
16
|
+
class PhoneVerifier:
|
17
|
+
"""Verifies phone numbers for line type and DNC status"""
|
18
|
+
|
19
|
+
def __init__(self, cache: DynamoDBCache, provider: Optional[VerificationProvider] = None):
|
20
|
+
"""
|
21
|
+
Initialize phone verifier.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
cache: DynamoDB cache for storing results
|
25
|
+
provider: Verification provider (defaults to StubProvider)
|
26
|
+
"""
|
27
|
+
self.cache = cache
|
28
|
+
self.provider = provider or StubProvider()
|
29
|
+
|
30
|
+
def normalize_phone(self, phone: str) -> str:
|
31
|
+
"""Normalize phone to E.164 format"""
|
32
|
+
try:
|
33
|
+
# Parse with US as default country
|
34
|
+
parsed = phonenumbers.parse(phone, "US")
|
35
|
+
if not phonenumbers.is_valid_number(parsed):
|
36
|
+
raise ValueError(f"Invalid phone number: {phone}")
|
37
|
+
|
38
|
+
# Format as E.164
|
39
|
+
return phonenumbers.format_number(parsed, phonenumbers.PhoneNumberFormat.E164)
|
40
|
+
except Exception as e:
|
41
|
+
logger.error(f"Phone normalization failed: {str(e)}")
|
42
|
+
raise ValueError(f"Invalid phone format: {phone}")
|
43
|
+
|
44
|
+
def verify(self, phone: str) -> PhoneVerification:
|
45
|
+
"""Verify phone number for line type and DNC status"""
|
46
|
+
normalized = self.normalize_phone(phone)
|
47
|
+
|
48
|
+
# Check cache first
|
49
|
+
cached = self.cache.get(normalized)
|
50
|
+
if cached:
|
51
|
+
return cached
|
52
|
+
|
53
|
+
# Use provider to verify
|
54
|
+
line_type, dnc_status = self.provider.verify_phone(normalized)
|
55
|
+
|
56
|
+
result = PhoneVerification(
|
57
|
+
phone_number=normalized,
|
58
|
+
line_type=line_type,
|
59
|
+
dnc=dnc_status,
|
60
|
+
cached=False,
|
61
|
+
verified_at=datetime.now(timezone.utc),
|
62
|
+
source=VerificationSource.API
|
63
|
+
)
|
64
|
+
|
65
|
+
# Store in cache
|
66
|
+
self.cache.set(normalized, result)
|
67
|
+
|
68
|
+
return result
|
69
|
+
|
70
|
+
def _check_line_type(self, phone: str) -> LineType:
|
71
|
+
"""
|
72
|
+
Check line type (for backwards compatibility with CLI).
|
73
|
+
Delegates to provider.
|
74
|
+
"""
|
75
|
+
line_type, _ = self.provider.verify_phone(phone)
|
76
|
+
return line_type
|
77
|
+
|
78
|
+
def _check_dnc(self, phone: str) -> bool:
|
79
|
+
"""
|
80
|
+
Check DNC status (for backwards compatibility with CLI).
|
81
|
+
Delegates to provider.
|
82
|
+
"""
|
83
|
+
_, dnc_status = self.provider.verify_phone(phone)
|
84
|
+
return dnc_status
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
Base protocol for verification providers
|
3
|
+
"""
|
4
|
+
from typing import Protocol, Tuple
|
5
|
+
from ..core.models import LineType
|
6
|
+
|
7
|
+
|
8
|
+
class VerificationProvider(Protocol):
|
9
|
+
"""
|
10
|
+
Protocol for phone verification providers.
|
11
|
+
All providers must implement this interface.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def verify_phone(self, phone: str) -> Tuple[LineType, bool]:
|
15
|
+
"""
|
16
|
+
Verify a phone number's line type and DNC status.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
phone: E.164 formatted phone number
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
Tuple of (line_type, is_on_dnc_list)
|
23
|
+
|
24
|
+
Raises:
|
25
|
+
ValueError: If phone format is invalid
|
26
|
+
Exception: For provider-specific errors
|
27
|
+
"""
|
28
|
+
...
|
@@ -0,0 +1,87 @@
|
|
1
|
+
"""
|
2
|
+
External API provider for production phone verification
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Tuple, Optional
|
6
|
+
import httpx
|
7
|
+
from aws_lambda_powertools import Logger
|
8
|
+
from ..core.models import LineType
|
9
|
+
|
10
|
+
logger = Logger()
|
11
|
+
|
12
|
+
|
13
|
+
class ExternalAPIProvider:
|
14
|
+
"""
|
15
|
+
Production provider that calls external verification APIs.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
phone_api_key: Optional[str] = None,
|
21
|
+
dnc_api_key: Optional[str] = None,
|
22
|
+
timeout: float = 10.0
|
23
|
+
):
|
24
|
+
"""
|
25
|
+
Initialize external API provider.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
phone_api_key: API key for phone line type verification
|
29
|
+
dnc_api_key: API key for DNC list checking
|
30
|
+
timeout: HTTP request timeout in seconds
|
31
|
+
"""
|
32
|
+
self.phone_api_key = phone_api_key or os.environ.get("PHONE_VERIFY_API_KEY", "")
|
33
|
+
self.dnc_api_key = dnc_api_key or os.environ.get("DNC_API_KEY", "")
|
34
|
+
self.http_client = httpx.Client(timeout=timeout)
|
35
|
+
|
36
|
+
def verify_phone(self, phone: str) -> Tuple[LineType, bool]:
|
37
|
+
"""
|
38
|
+
Verify phone using external APIs.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
phone: E.164 formatted phone number
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
Tuple of (line_type, is_on_dnc_list)
|
45
|
+
|
46
|
+
Raises:
|
47
|
+
httpx.HTTPError: For API communication errors
|
48
|
+
ValueError: For invalid responses
|
49
|
+
"""
|
50
|
+
line_type = self._check_line_type(phone)
|
51
|
+
is_dnc = self._check_dnc(phone)
|
52
|
+
return line_type, is_dnc
|
53
|
+
|
54
|
+
def _check_line_type(self, phone: str) -> LineType:
|
55
|
+
"""
|
56
|
+
Check line type via external API.
|
57
|
+
|
58
|
+
TODO: Implement actual API call
|
59
|
+
- Use self.phone_api_key for authentication
|
60
|
+
- Parse API response
|
61
|
+
- Map to LineType enum
|
62
|
+
"""
|
63
|
+
logger.info(f"External line type check for {phone[:6]}***")
|
64
|
+
|
65
|
+
# Placeholder implementation
|
66
|
+
# In production, this would make an actual API call
|
67
|
+
raise NotImplementedError("External line type API not yet configured")
|
68
|
+
|
69
|
+
def _check_dnc(self, phone: str) -> bool:
|
70
|
+
"""
|
71
|
+
Check DNC status via external API.
|
72
|
+
|
73
|
+
TODO: Implement actual API call
|
74
|
+
- Use self.dnc_api_key for authentication
|
75
|
+
- Parse API response
|
76
|
+
- Return boolean status
|
77
|
+
"""
|
78
|
+
logger.info(f"External DNC check for {phone[:6]}***")
|
79
|
+
|
80
|
+
# Placeholder implementation
|
81
|
+
# In production, this would make an actual API call
|
82
|
+
raise NotImplementedError("External DNC API not yet configured")
|
83
|
+
|
84
|
+
def __del__(self):
|
85
|
+
"""Cleanup HTTP client"""
|
86
|
+
if hasattr(self, 'http_client'):
|
87
|
+
self.http_client.close()
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
Stub provider for development and testing
|
3
|
+
"""
|
4
|
+
from typing import Tuple
|
5
|
+
from aws_lambda_powertools import Logger
|
6
|
+
from ..core.models import LineType
|
7
|
+
|
8
|
+
logger = Logger()
|
9
|
+
|
10
|
+
|
11
|
+
class StubProvider:
|
12
|
+
"""
|
13
|
+
Stub implementation for development and testing.
|
14
|
+
Uses deterministic rules based on phone number digits.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def verify_phone(self, phone: str) -> Tuple[LineType, bool]:
|
18
|
+
"""
|
19
|
+
Verify using stub logic based on last digit.
|
20
|
+
|
21
|
+
Line type:
|
22
|
+
- Ends in 2 or 0: LANDLINE
|
23
|
+
- Otherwise: MOBILE
|
24
|
+
|
25
|
+
DNC status:
|
26
|
+
- Ends in 1 or 0: on DNC list
|
27
|
+
- Otherwise: not on DNC
|
28
|
+
|
29
|
+
Args:
|
30
|
+
phone: E.164 formatted phone number
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
Tuple of (line_type, is_on_dnc_list)
|
34
|
+
"""
|
35
|
+
logger.info(f"Stub verification for {phone[:6]}***")
|
36
|
+
|
37
|
+
last_digit = phone[-1] if phone else '5'
|
38
|
+
|
39
|
+
# Determine line type
|
40
|
+
if last_digit in ['2', '0']:
|
41
|
+
line_type = LineType.LANDLINE
|
42
|
+
else:
|
43
|
+
line_type = LineType.MOBILE
|
44
|
+
|
45
|
+
# Determine DNC status
|
46
|
+
is_dnc = last_digit in ['1', '0']
|
47
|
+
|
48
|
+
return line_type, is_dnc
|
@@ -1,135 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Bulk CSV processing for phone verification
|
3
|
-
"""
|
4
|
-
import csv
|
5
|
-
from typing import List, Optional
|
6
|
-
from aws_lambda_powertools import Logger
|
7
|
-
from .models import PhoneVerification
|
8
|
-
from .verifier import PhoneVerifier
|
9
|
-
|
10
|
-
logger = Logger()
|
11
|
-
|
12
|
-
|
13
|
-
class BulkProcessor:
|
14
|
-
"""Process CSV files for bulk phone verification"""
|
15
|
-
|
16
|
-
def __init__(self, verifier: PhoneVerifier):
|
17
|
-
self.verifier = verifier
|
18
|
-
|
19
|
-
def process_csv_sync(self, file_path: str, phone_column: str = "phone") -> List[PhoneVerification]:
|
20
|
-
"""
|
21
|
-
Process CSV file synchronously.
|
22
|
-
Returns list of verification results.
|
23
|
-
"""
|
24
|
-
results = []
|
25
|
-
|
26
|
-
try:
|
27
|
-
with open(file_path, 'r', encoding='utf-8-sig') as f:
|
28
|
-
reader = csv.DictReader(f)
|
29
|
-
|
30
|
-
# Find phone column (case-insensitive)
|
31
|
-
headers = reader.fieldnames or []
|
32
|
-
phone_col = self._find_phone_column(headers, phone_column)
|
33
|
-
|
34
|
-
if not phone_col:
|
35
|
-
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
36
|
-
|
37
|
-
for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
|
38
|
-
try:
|
39
|
-
phone = row.get(phone_col, "").strip()
|
40
|
-
if not phone:
|
41
|
-
logger.warning(f"Empty phone at row {row_num}")
|
42
|
-
continue
|
43
|
-
|
44
|
-
# Verify phone
|
45
|
-
result = self.verifier.verify_sync(phone)
|
46
|
-
results.append(result)
|
47
|
-
|
48
|
-
# Log progress every 100 rows
|
49
|
-
if len(results) % 100 == 0:
|
50
|
-
logger.info(f"Processed {len(results)} phones")
|
51
|
-
|
52
|
-
except ValueError as e:
|
53
|
-
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
54
|
-
continue
|
55
|
-
except Exception as e:
|
56
|
-
logger.error(f"Error processing row {row_num}: {str(e)}")
|
57
|
-
continue
|
58
|
-
|
59
|
-
logger.info(f"Completed processing {len(results)} valid phones")
|
60
|
-
|
61
|
-
except Exception as e:
|
62
|
-
logger.error(f"CSV processing failed: {str(e)}")
|
63
|
-
raise
|
64
|
-
|
65
|
-
return results
|
66
|
-
|
67
|
-
def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
|
68
|
-
"""Find phone column in headers (case-insensitive)"""
|
69
|
-
# First try exact match
|
70
|
-
for header in headers:
|
71
|
-
if header.lower() == preferred.lower():
|
72
|
-
return header
|
73
|
-
|
74
|
-
# Common phone column names
|
75
|
-
phone_patterns = [
|
76
|
-
"phone", "phone_number", "phonenumber", "mobile",
|
77
|
-
"cell", "telephone", "tel", "number", "contact"
|
78
|
-
]
|
79
|
-
|
80
|
-
for header in headers:
|
81
|
-
header_lower = header.lower()
|
82
|
-
for pattern in phone_patterns:
|
83
|
-
if pattern in header_lower:
|
84
|
-
logger.info(f"Using column '{header}' as phone column")
|
85
|
-
return header
|
86
|
-
|
87
|
-
return None
|
88
|
-
|
89
|
-
def generate_results_csv(
|
90
|
-
self,
|
91
|
-
original_path: str,
|
92
|
-
results: List[PhoneVerification],
|
93
|
-
output_path: str
|
94
|
-
) -> None:
|
95
|
-
"""
|
96
|
-
Generate CSV with original data plus verification results.
|
97
|
-
Adds columns: line_type, dnc, cached
|
98
|
-
"""
|
99
|
-
# Create lookup dict
|
100
|
-
results_map = {r.phone_number: r for r in results}
|
101
|
-
|
102
|
-
with open(original_path, 'r', encoding='utf-8-sig') as infile:
|
103
|
-
reader = csv.DictReader(infile)
|
104
|
-
headers = reader.fieldnames or []
|
105
|
-
|
106
|
-
# Add new columns
|
107
|
-
output_headers = headers + ["line_type", "dnc", "cached"]
|
108
|
-
|
109
|
-
with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
|
110
|
-
writer = csv.DictWriter(outfile, fieldnames=output_headers)
|
111
|
-
writer.writeheader()
|
112
|
-
|
113
|
-
phone_col = self._find_phone_column(headers, "phone")
|
114
|
-
|
115
|
-
for row in reader:
|
116
|
-
phone = row.get(phone_col, "").strip()
|
117
|
-
|
118
|
-
# Try to normalize for lookup
|
119
|
-
try:
|
120
|
-
normalized = self.verifier.normalize_phone(phone)
|
121
|
-
if normalized in results_map:
|
122
|
-
result = results_map[normalized]
|
123
|
-
row["line_type"] = result.line_type
|
124
|
-
row["dnc"] = "true" if result.dnc else "false"
|
125
|
-
row["cached"] = "true" if result.cached else "false"
|
126
|
-
else:
|
127
|
-
row["line_type"] = "unknown"
|
128
|
-
row["dnc"] = ""
|
129
|
-
row["cached"] = ""
|
130
|
-
except:
|
131
|
-
row["line_type"] = "invalid"
|
132
|
-
row["dnc"] = ""
|
133
|
-
row["cached"] = ""
|
134
|
-
|
135
|
-
writer.writerow(row)
|
@@ -1,95 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Phone verification logic - checks line type and DNC status
|
3
|
-
"""
|
4
|
-
import os
|
5
|
-
import re
|
6
|
-
from datetime import datetime, timezone
|
7
|
-
from typing import Optional
|
8
|
-
import httpx
|
9
|
-
import phonenumbers
|
10
|
-
from aws_lambda_powertools import Logger
|
11
|
-
from .models import PhoneVerification, LineType, VerificationSource
|
12
|
-
from .cache import DynamoDBCache
|
13
|
-
|
14
|
-
logger = Logger()
|
15
|
-
|
16
|
-
|
17
|
-
class PhoneVerifier:
|
18
|
-
"""Verifies phone numbers for line type and DNC status"""
|
19
|
-
|
20
|
-
def __init__(self, cache: DynamoDBCache):
|
21
|
-
self.cache = cache
|
22
|
-
self.dnc_api_key = os.environ.get("DNC_API_KEY", "")
|
23
|
-
self.phone_api_key = os.environ.get("PHONE_VERIFY_API_KEY", "")
|
24
|
-
self.http_client = httpx.Client(timeout=10.0)
|
25
|
-
|
26
|
-
def normalize_phone(self, phone: str) -> str:
|
27
|
-
"""Normalize phone to E.164 format"""
|
28
|
-
try:
|
29
|
-
# Parse with US as default country
|
30
|
-
parsed = phonenumbers.parse(phone, "US")
|
31
|
-
if not phonenumbers.is_valid_number(parsed):
|
32
|
-
raise ValueError(f"Invalid phone number: {phone}")
|
33
|
-
|
34
|
-
# Format as E.164
|
35
|
-
return phonenumbers.format_number(parsed, phonenumbers.PhoneNumberFormat.E164)
|
36
|
-
except Exception as e:
|
37
|
-
logger.error(f"Phone normalization failed: {str(e)}")
|
38
|
-
raise ValueError(f"Invalid phone format: {phone}")
|
39
|
-
|
40
|
-
def verify_sync(self, phone: str) -> PhoneVerification:
|
41
|
-
"""Synchronous verification for Lambda handlers"""
|
42
|
-
normalized = self.normalize_phone(phone)
|
43
|
-
|
44
|
-
# Check cache first
|
45
|
-
cached = self.cache.get(normalized)
|
46
|
-
if cached:
|
47
|
-
return cached
|
48
|
-
|
49
|
-
# Call external APIs
|
50
|
-
line_type = self._check_line_type_sync(normalized)
|
51
|
-
dnc_status = self._check_dnc_sync(normalized)
|
52
|
-
|
53
|
-
result = PhoneVerification(
|
54
|
-
phone_number=normalized,
|
55
|
-
line_type=line_type,
|
56
|
-
dnc=dnc_status,
|
57
|
-
cached=False,
|
58
|
-
verified_at=datetime.now(timezone.utc),
|
59
|
-
source=VerificationSource.API
|
60
|
-
)
|
61
|
-
|
62
|
-
# Store in cache
|
63
|
-
self.cache.set(normalized, result)
|
64
|
-
|
65
|
-
return result
|
66
|
-
|
67
|
-
def _check_line_type_sync(self, phone: str) -> LineType:
|
68
|
-
"""Check if phone is mobile/landline/voip"""
|
69
|
-
# TODO: Implement actual API call to phone verification service
|
70
|
-
# Would use self.phone_api_key to authenticate
|
71
|
-
logger.info(f"Checking line type for {phone[:6]}***")
|
72
|
-
|
73
|
-
# Stub implementation based on last digit
|
74
|
-
last_digit = phone[-1] if phone else '5'
|
75
|
-
if last_digit in ['2', '0']:
|
76
|
-
return LineType.LANDLINE
|
77
|
-
else:
|
78
|
-
return LineType.MOBILE
|
79
|
-
|
80
|
-
def _check_dnc_sync(self, phone: str) -> bool:
|
81
|
-
"""Check if phone is on DNC list"""
|
82
|
-
# TODO: Implement actual DNC API call
|
83
|
-
# Would use self.dnc_api_key or os.environ.get("DNC_CHECK_API_KEY")
|
84
|
-
logger.info(f"Checking DNC status for {phone[:6]}***")
|
85
|
-
|
86
|
-
# Stub implementation based on last digit:
|
87
|
-
# - Ends in 1 or 0: on DNC list
|
88
|
-
# - Otherwise: not on DNC
|
89
|
-
last_digit = phone[-1] if phone else '5'
|
90
|
-
return last_digit in ['1', '0']
|
91
|
-
|
92
|
-
def __del__(self):
|
93
|
-
"""Cleanup HTTP client"""
|
94
|
-
if hasattr(self, 'http_client'):
|
95
|
-
self.http_client.close()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|