ai-lls-lib 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_lls_lib/__init__.py +27 -0
- ai_lls_lib/cli/__init__.py +3 -0
- ai_lls_lib/cli/__main__.py +29 -0
- ai_lls_lib/cli/aws_client.py +115 -0
- ai_lls_lib/cli/commands/__init__.py +3 -0
- ai_lls_lib/cli/commands/admin.py +174 -0
- ai_lls_lib/cli/commands/cache.py +142 -0
- ai_lls_lib/cli/commands/test_stack.py +216 -0
- ai_lls_lib/cli/commands/verify.py +111 -0
- ai_lls_lib/core/__init__.py +3 -0
- ai_lls_lib/core/cache.py +106 -0
- ai_lls_lib/core/models.py +77 -0
- ai_lls_lib/core/processor.py +135 -0
- ai_lls_lib/core/verifier.py +95 -0
- ai_lls_lib/testing/__init__.py +3 -0
- ai_lls_lib/testing/fixtures.py +104 -0
- ai_lls_lib-1.0.0.dist-info/METADATA +220 -0
- ai_lls_lib-1.0.0.dist-info/RECORD +20 -0
- ai_lls_lib-1.0.0.dist-info/WHEEL +4 -0
- ai_lls_lib-1.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,216 @@
|
|
1
|
+
"""
|
2
|
+
Test stack management commands
|
3
|
+
"""
|
4
|
+
import click
|
5
|
+
import subprocess
|
6
|
+
import os
|
7
|
+
import time
|
8
|
+
from ai_lls_lib.cli.aws_client import AWSClient
|
9
|
+
|
10
|
+
@click.group(name="test-stack")
|
11
|
+
def test_stack_group():
|
12
|
+
"""Test stack management"""
|
13
|
+
pass
|
14
|
+
|
15
|
+
@test_stack_group.command(name="deploy")
|
16
|
+
@click.option("--stack-name", default="ai-lls-lib-test", help="Test stack name")
|
17
|
+
@click.option("--profile", help="AWS profile")
|
18
|
+
@click.option("--region", help="AWS region")
|
19
|
+
def deploy_test_stack(stack_name, profile, region):
|
20
|
+
"""Deploy the test stack"""
|
21
|
+
template_path = os.path.join(
|
22
|
+
os.path.dirname(__file__),
|
23
|
+
"..", "..", "..", "..", "template.yaml"
|
24
|
+
)
|
25
|
+
|
26
|
+
if not os.path.exists(template_path):
|
27
|
+
click.echo(f"Test stack template not found at {template_path}")
|
28
|
+
click.echo("Creating minimal test stack template...")
|
29
|
+
|
30
|
+
# Create a minimal test stack
|
31
|
+
template_content = """AWSTemplateFormatVersion: '2010-09-09'
|
32
|
+
Description: Minimal test stack for ai-lls-lib integration testing
|
33
|
+
|
34
|
+
Resources:
|
35
|
+
TestPhoneCache:
|
36
|
+
Type: AWS::DynamoDB::Table
|
37
|
+
Properties:
|
38
|
+
TableName: !Sub "${AWS::StackName}-phone-cache"
|
39
|
+
BillingMode: PAY_PER_REQUEST
|
40
|
+
AttributeDefinitions:
|
41
|
+
- AttributeName: phone_number
|
42
|
+
AttributeType: S
|
43
|
+
KeySchema:
|
44
|
+
- AttributeName: phone_number
|
45
|
+
KeyType: HASH
|
46
|
+
TimeToLiveSpecification:
|
47
|
+
AttributeName: ttl
|
48
|
+
Enabled: true
|
49
|
+
|
50
|
+
TestUploadBucket:
|
51
|
+
Type: AWS::S3::Bucket
|
52
|
+
Properties:
|
53
|
+
BucketName: !Sub "${AWS::StackName}-uploads-${AWS::AccountId}"
|
54
|
+
LifecycleConfiguration:
|
55
|
+
Rules:
|
56
|
+
- Id: DeleteOldTestFiles
|
57
|
+
Status: Enabled
|
58
|
+
ExpirationInDays: 1
|
59
|
+
|
60
|
+
TestQueue:
|
61
|
+
Type: AWS::SQS::Queue
|
62
|
+
Properties:
|
63
|
+
QueueName: !Sub "${AWS::StackName}-test-queue"
|
64
|
+
MessageRetentionPeriod: 3600 # 1 hour for test
|
65
|
+
|
66
|
+
Outputs:
|
67
|
+
CacheTableName:
|
68
|
+
Value: !Ref TestPhoneCache
|
69
|
+
BucketName:
|
70
|
+
Value: !Ref TestUploadBucket
|
71
|
+
QueueUrl:
|
72
|
+
Value: !Ref TestQueue
|
73
|
+
"""
|
74
|
+
|
75
|
+
os.makedirs(os.path.dirname(template_path), exist_ok=True)
|
76
|
+
with open(template_path, 'w') as f:
|
77
|
+
f.write(template_content)
|
78
|
+
click.echo(f"Created {template_path}")
|
79
|
+
|
80
|
+
# Deploy using AWS CLI
|
81
|
+
cmd = [
|
82
|
+
"aws", "cloudformation", "deploy",
|
83
|
+
"--template-file", template_path,
|
84
|
+
"--stack-name", stack_name,
|
85
|
+
"--capabilities", "CAPABILITY_IAM"
|
86
|
+
]
|
87
|
+
|
88
|
+
if profile:
|
89
|
+
cmd.extend(["--profile", profile])
|
90
|
+
if region:
|
91
|
+
cmd.extend(["--region", region])
|
92
|
+
|
93
|
+
click.echo(f"Deploying test stack '{stack_name}'...")
|
94
|
+
try:
|
95
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
96
|
+
if result.returncode == 0:
|
97
|
+
click.echo("Test stack deployed successfully!")
|
98
|
+
|
99
|
+
# Show outputs
|
100
|
+
aws = AWSClient(region=region, profile=profile)
|
101
|
+
outputs = aws.get_stack_outputs(stack_name)
|
102
|
+
if outputs:
|
103
|
+
click.echo("\nStack Outputs:")
|
104
|
+
for key, value in outputs.items():
|
105
|
+
click.echo(f" {key}: {value}")
|
106
|
+
else:
|
107
|
+
click.echo(f"Deployment failed: {result.stderr}", err=True)
|
108
|
+
except Exception as e:
|
109
|
+
click.echo(f"Error deploying test stack: {e}", err=True)
|
110
|
+
|
111
|
+
@test_stack_group.command(name="delete")
|
112
|
+
@click.option("--stack-name", default="ai-lls-lib-test", help="Test stack name")
|
113
|
+
@click.option("--profile", help="AWS profile")
|
114
|
+
@click.option("--region", help="AWS region")
|
115
|
+
@click.confirmation_option(prompt="Delete test stack and all resources?")
|
116
|
+
def delete_test_stack(stack_name, profile, region):
|
117
|
+
"""Delete the test stack"""
|
118
|
+
aws = AWSClient(region=region, profile=profile)
|
119
|
+
|
120
|
+
try:
|
121
|
+
# Empty S3 bucket first
|
122
|
+
outputs = aws.get_stack_outputs(stack_name)
|
123
|
+
if 'BucketName' in outputs:
|
124
|
+
bucket_name = outputs['BucketName']
|
125
|
+
click.echo(f"Emptying bucket {bucket_name}...")
|
126
|
+
|
127
|
+
# List and delete all objects
|
128
|
+
try:
|
129
|
+
objects = aws.s3.list_objects_v2(Bucket=bucket_name)
|
130
|
+
if 'Contents' in objects:
|
131
|
+
for obj in objects['Contents']:
|
132
|
+
aws.s3.delete_object(Bucket=bucket_name, Key=obj['Key'])
|
133
|
+
except:
|
134
|
+
pass # Bucket might not exist
|
135
|
+
|
136
|
+
# Delete stack
|
137
|
+
click.echo(f"Deleting stack '{stack_name}'...")
|
138
|
+
aws.cloudformation.delete_stack(StackName=stack_name)
|
139
|
+
|
140
|
+
# Wait for deletion
|
141
|
+
click.echo("Waiting for stack deletion...")
|
142
|
+
waiter = aws.cloudformation.get_waiter('stack_delete_complete')
|
143
|
+
waiter.wait(StackName=stack_name)
|
144
|
+
|
145
|
+
click.echo("Test stack deleted successfully!")
|
146
|
+
|
147
|
+
except Exception as e:
|
148
|
+
click.echo(f"Error deleting test stack: {e}", err=True)
|
149
|
+
|
150
|
+
@test_stack_group.command(name="status")
|
151
|
+
@click.option("--stack-name", default="ai-lls-lib-test", help="Test stack name")
|
152
|
+
@click.option("--profile", help="AWS profile")
|
153
|
+
@click.option("--region", help="AWS region")
|
154
|
+
def test_stack_status(stack_name, profile, region):
|
155
|
+
"""Show test stack status"""
|
156
|
+
aws = AWSClient(region=region, profile=profile)
|
157
|
+
|
158
|
+
try:
|
159
|
+
response = aws.cloudformation.describe_stacks(StackName=stack_name)
|
160
|
+
stack = response['Stacks'][0]
|
161
|
+
|
162
|
+
click.echo(f"\nTest Stack: {stack_name}")
|
163
|
+
click.echo("=" * 50)
|
164
|
+
click.echo(f"Status: {stack['StackStatus']}")
|
165
|
+
click.echo(f"Created: {stack.get('CreationTime', 'N/A')}")
|
166
|
+
|
167
|
+
if 'Outputs' in stack:
|
168
|
+
click.echo("\nOutputs:")
|
169
|
+
for output in stack['Outputs']:
|
170
|
+
click.echo(f" {output['OutputKey']}: {output['OutputValue']}")
|
171
|
+
|
172
|
+
except aws.cloudformation.exceptions.ClientError as e:
|
173
|
+
if 'does not exist' in str(e):
|
174
|
+
click.echo(f"Test stack '{stack_name}' does not exist")
|
175
|
+
else:
|
176
|
+
click.echo(f"Error: {e}", err=True)
|
177
|
+
|
178
|
+
@test_stack_group.command(name="test")
|
179
|
+
@click.option("--stack-name", default="ai-lls-lib-test", help="Test stack name")
|
180
|
+
@click.option("--profile", help="AWS profile")
|
181
|
+
@click.option("--region", help="AWS region")
|
182
|
+
def run_integration_tests(stack_name, profile, region):
|
183
|
+
"""Run integration tests against test stack"""
|
184
|
+
aws = AWSClient(region=region, profile=profile)
|
185
|
+
|
186
|
+
try:
|
187
|
+
# Check stack exists
|
188
|
+
outputs = aws.get_stack_outputs(stack_name)
|
189
|
+
if not outputs:
|
190
|
+
click.echo(f"Test stack '{stack_name}' not found. Deploy it first.")
|
191
|
+
return
|
192
|
+
|
193
|
+
# Set environment variables for tests
|
194
|
+
os.environ['TEST_STACK_NAME'] = stack_name
|
195
|
+
os.environ['TEST_CACHE_TABLE'] = outputs.get('CacheTableName', '')
|
196
|
+
os.environ['TEST_BUCKET'] = outputs.get('BucketName', '')
|
197
|
+
os.environ['TEST_QUEUE_URL'] = outputs.get('QueueUrl', '')
|
198
|
+
|
199
|
+
if profile:
|
200
|
+
os.environ['AWS_PROFILE'] = profile
|
201
|
+
if region:
|
202
|
+
os.environ['AWS_REGION'] = region
|
203
|
+
|
204
|
+
click.echo(f"Running integration tests against '{stack_name}'...")
|
205
|
+
|
206
|
+
# Run pytest
|
207
|
+
cmd = ["pytest", "tests/integration", "-v"]
|
208
|
+
result = subprocess.run(cmd, cwd=os.path.dirname(template_path))
|
209
|
+
|
210
|
+
if result.returncode == 0:
|
211
|
+
click.echo("\nIntegration tests passed!")
|
212
|
+
else:
|
213
|
+
click.echo("\nSome tests failed", err=True)
|
214
|
+
|
215
|
+
except Exception as e:
|
216
|
+
click.echo(f"Error running tests: {e}", err=True)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
"""
|
2
|
+
Verification commands - direct phone verification bypassing API
|
3
|
+
"""
|
4
|
+
import click
|
5
|
+
import json
|
6
|
+
from datetime import datetime
|
7
|
+
from ai_lls_lib.core.verifier import PhoneVerifier
|
8
|
+
from ai_lls_lib.core.cache import DynamoDBCache
|
9
|
+
from ai_lls_lib.cli.aws_client import AWSClient
|
10
|
+
|
11
|
+
@click.group(name="verify")
|
12
|
+
def verify_group():
|
13
|
+
"""Phone verification commands"""
|
14
|
+
pass
|
15
|
+
|
16
|
+
@verify_group.command(name="phone")
|
17
|
+
@click.argument("phone_number")
|
18
|
+
@click.option("--stack", default="landline-api", help="CloudFormation stack name")
|
19
|
+
@click.option("--skip-cache", is_flag=True, help="Skip cache lookup")
|
20
|
+
@click.option("--profile", help="AWS profile to use")
|
21
|
+
@click.option("--region", help="AWS region")
|
22
|
+
def verify_phone(phone_number, stack, skip_cache, profile, region):
|
23
|
+
"""Verify a single phone number"""
|
24
|
+
aws = AWSClient(region=region, profile=profile)
|
25
|
+
|
26
|
+
# Get cache table name from stack
|
27
|
+
cache_table = aws.get_table_name(stack, "PhoneCacheTable")
|
28
|
+
click.echo(f"Using cache table: {cache_table}")
|
29
|
+
|
30
|
+
# Initialize cache and verifier
|
31
|
+
cache = DynamoDBCache(table_name=cache_table)
|
32
|
+
verifier = PhoneVerifier(cache=cache)
|
33
|
+
|
34
|
+
try:
|
35
|
+
if skip_cache:
|
36
|
+
# Force fresh lookup
|
37
|
+
normalized = verifier.normalize_phone(phone_number)
|
38
|
+
line_type = verifier._check_line_type_sync(normalized)
|
39
|
+
dnc = verifier._check_dnc_sync(normalized)
|
40
|
+
result = {
|
41
|
+
"phone_number": normalized,
|
42
|
+
"line_type": line_type,
|
43
|
+
"dnc": dnc,
|
44
|
+
"cached": False,
|
45
|
+
"verified_at": datetime.utcnow().isoformat(),
|
46
|
+
"source": "cli-direct"
|
47
|
+
}
|
48
|
+
else:
|
49
|
+
result = verifier.verify_sync(phone_number)
|
50
|
+
result = result.dict() if hasattr(result, 'dict') else result
|
51
|
+
|
52
|
+
# Display results
|
53
|
+
click.echo("\n" + "=" * 40)
|
54
|
+
click.echo(f"Phone: {result['phone_number']}")
|
55
|
+
click.echo(f"Line Type: {result['line_type']}")
|
56
|
+
click.echo(f"DNC Status: {'Yes' if result['dnc'] else 'No'}")
|
57
|
+
click.echo(f"From Cache: {'Yes' if result.get('cached') else 'No'}")
|
58
|
+
click.echo(f"Verified: {result.get('verified_at', 'Unknown')}")
|
59
|
+
click.echo("=" * 40)
|
60
|
+
|
61
|
+
if click.confirm("\nShow JSON output?"):
|
62
|
+
click.echo(json.dumps(result, indent=2, default=str))
|
63
|
+
|
64
|
+
except ValueError as e:
|
65
|
+
click.echo(f"Error: {e}", err=True)
|
66
|
+
except Exception as e:
|
67
|
+
click.echo(f"Verification failed: {e}", err=True)
|
68
|
+
|
69
|
+
@verify_group.command(name="bulk")
|
70
|
+
@click.argument("csv_file", type=click.Path(exists=True))
|
71
|
+
@click.option("--output", "-o", help="Output CSV file")
|
72
|
+
@click.option("--stack", default="landline-api", help="CloudFormation stack name")
|
73
|
+
@click.option("--profile", help="AWS profile to use")
|
74
|
+
@click.option("--region", help="AWS region")
|
75
|
+
def verify_bulk(csv_file, output, stack, profile, region):
|
76
|
+
"""Process a CSV file for bulk verification"""
|
77
|
+
from ai_lls_lib.core.processor import BulkProcessor
|
78
|
+
|
79
|
+
aws = AWSClient(region=region, profile=profile)
|
80
|
+
cache_table = aws.get_table_name(stack, "PhoneCacheTable")
|
81
|
+
|
82
|
+
cache = DynamoDBCache(table_name=cache_table)
|
83
|
+
verifier = PhoneVerifier(cache=cache)
|
84
|
+
processor = BulkProcessor(verifier=verifier)
|
85
|
+
|
86
|
+
click.echo(f"Processing {csv_file}...")
|
87
|
+
|
88
|
+
try:
|
89
|
+
# Process CSV
|
90
|
+
results = processor.process_csv_sync(csv_file)
|
91
|
+
click.echo(f"\nProcessed {len(results)} phone numbers")
|
92
|
+
|
93
|
+
# Show summary
|
94
|
+
mobile_count = sum(1 for r in results if r.line_type == "mobile")
|
95
|
+
landline_count = sum(1 for r in results if r.line_type == "landline")
|
96
|
+
dnc_count = sum(1 for r in results if r.dnc)
|
97
|
+
cached_count = sum(1 for r in results if r.cached)
|
98
|
+
|
99
|
+
click.echo("\nSummary:")
|
100
|
+
click.echo(f" Mobile: {mobile_count}")
|
101
|
+
click.echo(f" Landline: {landline_count}")
|
102
|
+
click.echo(f" On DNC: {dnc_count}")
|
103
|
+
click.echo(f" From Cache: {cached_count}")
|
104
|
+
|
105
|
+
# Generate output if requested
|
106
|
+
if output:
|
107
|
+
processor.generate_results_csv(csv_file, results, output)
|
108
|
+
click.echo(f"\nResults saved to: {output}")
|
109
|
+
|
110
|
+
except Exception as e:
|
111
|
+
click.echo(f"Bulk processing failed: {e}", err=True)
|
ai_lls_lib/core/cache.py
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
"""
|
2
|
+
DynamoDB cache implementation for phone verifications
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from datetime import datetime, timedelta, timezone
|
6
|
+
from typing import Optional
|
7
|
+
import boto3
|
8
|
+
from aws_lambda_powertools import Logger
|
9
|
+
from .models import PhoneVerification, CacheEntry
|
10
|
+
|
11
|
+
logger = Logger()
|
12
|
+
|
13
|
+
|
14
|
+
class DynamoDBCache:
|
15
|
+
"""Cache for phone verification results using DynamoDB with TTL"""
|
16
|
+
|
17
|
+
def __init__(self, table_name: str, ttl_days: int = 30):
|
18
|
+
self.table_name = table_name
|
19
|
+
self.ttl_days = ttl_days
|
20
|
+
self.dynamodb = boto3.resource("dynamodb")
|
21
|
+
self.table = self.dynamodb.Table(table_name)
|
22
|
+
|
23
|
+
def get(self, phone_number: str) -> Optional[PhoneVerification]:
|
24
|
+
"""Get cached verification result"""
|
25
|
+
try:
|
26
|
+
response = self.table.get_item(Key={"phone_number": phone_number})
|
27
|
+
|
28
|
+
if "Item" not in response:
|
29
|
+
logger.info(f"Cache miss for {phone_number[:6]}***")
|
30
|
+
return None
|
31
|
+
|
32
|
+
item = response["Item"]
|
33
|
+
logger.info(f"Cache hit for {phone_number[:6]}***")
|
34
|
+
|
35
|
+
return PhoneVerification(
|
36
|
+
phone_number=item["phone_number"],
|
37
|
+
line_type=item["line_type"],
|
38
|
+
dnc=item["dnc"],
|
39
|
+
cached=True,
|
40
|
+
verified_at=datetime.fromisoformat(item["verified_at"]),
|
41
|
+
source="cache"
|
42
|
+
)
|
43
|
+
|
44
|
+
except Exception as e:
|
45
|
+
logger.error(f"Cache get error: {str(e)}")
|
46
|
+
return None
|
47
|
+
|
48
|
+
def set(self, phone_number: str, verification: PhoneVerification) -> None:
|
49
|
+
"""Store verification result in cache"""
|
50
|
+
try:
|
51
|
+
ttl = int((datetime.now(timezone.utc) + timedelta(days=self.ttl_days)).timestamp())
|
52
|
+
|
53
|
+
self.table.put_item(
|
54
|
+
Item={
|
55
|
+
"phone_number": phone_number,
|
56
|
+
"line_type": verification.line_type,
|
57
|
+
"dnc": verification.dnc,
|
58
|
+
"verified_at": verification.verified_at.isoformat(),
|
59
|
+
"source": verification.source,
|
60
|
+
"ttl": ttl
|
61
|
+
}
|
62
|
+
)
|
63
|
+
|
64
|
+
logger.info(f"Cached result for {phone_number[:6]}***")
|
65
|
+
|
66
|
+
except Exception as e:
|
67
|
+
logger.error(f"Cache set error: {str(e)}")
|
68
|
+
# Don't fail the request if cache write fails
|
69
|
+
|
70
|
+
def batch_get(self, phone_numbers: list[str]) -> dict[str, Optional[PhoneVerification]]:
|
71
|
+
"""Get multiple cached results"""
|
72
|
+
results = {}
|
73
|
+
|
74
|
+
# DynamoDB batch get (max 100 items per request)
|
75
|
+
for i in range(0, len(phone_numbers), 100):
|
76
|
+
batch = phone_numbers[i:i+100]
|
77
|
+
|
78
|
+
try:
|
79
|
+
response = self.dynamodb.batch_get_item(
|
80
|
+
RequestItems={
|
81
|
+
self.table_name: {
|
82
|
+
"Keys": [{"phone_number": phone} for phone in batch]
|
83
|
+
}
|
84
|
+
}
|
85
|
+
)
|
86
|
+
|
87
|
+
for item in response.get("Responses", {}).get(self.table_name, []):
|
88
|
+
phone = item["phone_number"]
|
89
|
+
results[phone] = PhoneVerification(
|
90
|
+
phone_number=phone,
|
91
|
+
line_type=item["line_type"],
|
92
|
+
dnc=item["dnc"],
|
93
|
+
cached=True,
|
94
|
+
verified_at=datetime.fromisoformat(item["verified_at"]),
|
95
|
+
source="cache"
|
96
|
+
)
|
97
|
+
|
98
|
+
except Exception as e:
|
99
|
+
logger.error(f"Batch cache get error: {str(e)}")
|
100
|
+
|
101
|
+
# Fill in None for misses
|
102
|
+
for phone in phone_numbers:
|
103
|
+
if phone not in results:
|
104
|
+
results[phone] = None
|
105
|
+
|
106
|
+
return results
|
@@ -0,0 +1,77 @@
|
|
1
|
+
"""
|
2
|
+
Data models for phone verification
|
3
|
+
"""
|
4
|
+
from datetime import datetime
|
5
|
+
from enum import Enum
|
6
|
+
from typing import Optional
|
7
|
+
from pydantic import BaseModel, Field
|
8
|
+
|
9
|
+
|
10
|
+
class LineType(str, Enum):
|
11
|
+
"""Phone line type enumeration"""
|
12
|
+
MOBILE = "mobile"
|
13
|
+
LANDLINE = "landline"
|
14
|
+
VOIP = "voip"
|
15
|
+
UNKNOWN = "unknown"
|
16
|
+
|
17
|
+
|
18
|
+
class VerificationSource(str, Enum):
|
19
|
+
"""Source of verification data"""
|
20
|
+
API = "api"
|
21
|
+
CACHE = "cache"
|
22
|
+
BULK_IMPORT = "bulk_import"
|
23
|
+
|
24
|
+
|
25
|
+
class JobStatus(str, Enum):
|
26
|
+
"""Bulk job status enumeration"""
|
27
|
+
PENDING = "pending"
|
28
|
+
PROCESSING = "processing"
|
29
|
+
COMPLETED = "completed"
|
30
|
+
FAILED = "failed"
|
31
|
+
|
32
|
+
|
33
|
+
class PhoneVerification(BaseModel):
|
34
|
+
"""Result of phone number verification"""
|
35
|
+
phone_number: str = Field(..., description="E.164 formatted phone number")
|
36
|
+
line_type: LineType = Field(
|
37
|
+
..., description="Type of phone line"
|
38
|
+
)
|
39
|
+
dnc: bool = Field(..., description="Whether number is on DNC list")
|
40
|
+
cached: bool = Field(..., description="Whether result came from cache")
|
41
|
+
verified_at: datetime = Field(..., description="When verification occurred")
|
42
|
+
source: VerificationSource = Field(
|
43
|
+
..., description="Source of verification data"
|
44
|
+
)
|
45
|
+
|
46
|
+
class Config:
|
47
|
+
json_encoders = {
|
48
|
+
datetime: lambda v: v.isoformat()
|
49
|
+
}
|
50
|
+
|
51
|
+
|
52
|
+
class BulkJob(BaseModel):
|
53
|
+
"""Bulk processing job metadata"""
|
54
|
+
job_id: str = Field(..., description="Unique job identifier")
|
55
|
+
status: JobStatus = Field(
|
56
|
+
..., description="Current job status"
|
57
|
+
)
|
58
|
+
|
59
|
+
|
60
|
+
class BulkJobStatus(BulkJob):
|
61
|
+
"""Extended bulk job status with progress info"""
|
62
|
+
total_rows: Optional[int] = Field(None, description="Total rows to process")
|
63
|
+
processed_rows: Optional[int] = Field(None, description="Rows processed so far")
|
64
|
+
result_url: Optional[str] = Field(None, description="S3 URL of results")
|
65
|
+
created_at: datetime = Field(..., description="Job creation time")
|
66
|
+
completed_at: Optional[datetime] = Field(None, description="Job completion time")
|
67
|
+
error: Optional[str] = Field(None, description="Error message if failed")
|
68
|
+
|
69
|
+
|
70
|
+
class CacheEntry(BaseModel):
|
71
|
+
"""DynamoDB cache entry"""
|
72
|
+
phone_number: str
|
73
|
+
line_type: str # Stored as string in DynamoDB
|
74
|
+
dnc: bool
|
75
|
+
verified_at: str # ISO format string
|
76
|
+
source: str # Stored as string in DynamoDB
|
77
|
+
ttl: int # Unix timestamp for DynamoDB TTL
|
@@ -0,0 +1,135 @@
|
|
1
|
+
"""
|
2
|
+
Bulk CSV processing for phone verification
|
3
|
+
"""
|
4
|
+
import csv
|
5
|
+
from typing import List, Optional
|
6
|
+
from aws_lambda_powertools import Logger
|
7
|
+
from .models import PhoneVerification
|
8
|
+
from .verifier import PhoneVerifier
|
9
|
+
|
10
|
+
logger = Logger()
|
11
|
+
|
12
|
+
|
13
|
+
class BulkProcessor:
|
14
|
+
"""Process CSV files for bulk phone verification"""
|
15
|
+
|
16
|
+
def __init__(self, verifier: PhoneVerifier):
|
17
|
+
self.verifier = verifier
|
18
|
+
|
19
|
+
def process_csv_sync(self, file_path: str, phone_column: str = "phone") -> List[PhoneVerification]:
|
20
|
+
"""
|
21
|
+
Process CSV file synchronously.
|
22
|
+
Returns list of verification results.
|
23
|
+
"""
|
24
|
+
results = []
|
25
|
+
|
26
|
+
try:
|
27
|
+
with open(file_path, 'r', encoding='utf-8-sig') as f:
|
28
|
+
reader = csv.DictReader(f)
|
29
|
+
|
30
|
+
# Find phone column (case-insensitive)
|
31
|
+
headers = reader.fieldnames or []
|
32
|
+
phone_col = self._find_phone_column(headers, phone_column)
|
33
|
+
|
34
|
+
if not phone_col:
|
35
|
+
raise ValueError(f"Phone column '{phone_column}' not found in CSV")
|
36
|
+
|
37
|
+
for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
|
38
|
+
try:
|
39
|
+
phone = row.get(phone_col, "").strip()
|
40
|
+
if not phone:
|
41
|
+
logger.warning(f"Empty phone at row {row_num}")
|
42
|
+
continue
|
43
|
+
|
44
|
+
# Verify phone
|
45
|
+
result = self.verifier.verify_sync(phone)
|
46
|
+
results.append(result)
|
47
|
+
|
48
|
+
# Log progress every 100 rows
|
49
|
+
if len(results) % 100 == 0:
|
50
|
+
logger.info(f"Processed {len(results)} phones")
|
51
|
+
|
52
|
+
except ValueError as e:
|
53
|
+
logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
|
54
|
+
continue
|
55
|
+
except Exception as e:
|
56
|
+
logger.error(f"Error processing row {row_num}: {str(e)}")
|
57
|
+
continue
|
58
|
+
|
59
|
+
logger.info(f"Completed processing {len(results)} valid phones")
|
60
|
+
|
61
|
+
except Exception as e:
|
62
|
+
logger.error(f"CSV processing failed: {str(e)}")
|
63
|
+
raise
|
64
|
+
|
65
|
+
return results
|
66
|
+
|
67
|
+
def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
|
68
|
+
"""Find phone column in headers (case-insensitive)"""
|
69
|
+
# First try exact match
|
70
|
+
for header in headers:
|
71
|
+
if header.lower() == preferred.lower():
|
72
|
+
return header
|
73
|
+
|
74
|
+
# Common phone column names
|
75
|
+
phone_patterns = [
|
76
|
+
"phone", "phone_number", "phonenumber", "mobile",
|
77
|
+
"cell", "telephone", "tel", "number", "contact"
|
78
|
+
]
|
79
|
+
|
80
|
+
for header in headers:
|
81
|
+
header_lower = header.lower()
|
82
|
+
for pattern in phone_patterns:
|
83
|
+
if pattern in header_lower:
|
84
|
+
logger.info(f"Using column '{header}' as phone column")
|
85
|
+
return header
|
86
|
+
|
87
|
+
return None
|
88
|
+
|
89
|
+
def generate_results_csv(
|
90
|
+
self,
|
91
|
+
original_path: str,
|
92
|
+
results: List[PhoneVerification],
|
93
|
+
output_path: str
|
94
|
+
) -> None:
|
95
|
+
"""
|
96
|
+
Generate CSV with original data plus verification results.
|
97
|
+
Adds columns: line_type, dnc, cached
|
98
|
+
"""
|
99
|
+
# Create lookup dict
|
100
|
+
results_map = {r.phone_number: r for r in results}
|
101
|
+
|
102
|
+
with open(original_path, 'r', encoding='utf-8-sig') as infile:
|
103
|
+
reader = csv.DictReader(infile)
|
104
|
+
headers = reader.fieldnames or []
|
105
|
+
|
106
|
+
# Add new columns
|
107
|
+
output_headers = headers + ["line_type", "dnc", "cached"]
|
108
|
+
|
109
|
+
with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
|
110
|
+
writer = csv.DictWriter(outfile, fieldnames=output_headers)
|
111
|
+
writer.writeheader()
|
112
|
+
|
113
|
+
phone_col = self._find_phone_column(headers, "phone")
|
114
|
+
|
115
|
+
for row in reader:
|
116
|
+
phone = row.get(phone_col, "").strip()
|
117
|
+
|
118
|
+
# Try to normalize for lookup
|
119
|
+
try:
|
120
|
+
normalized = self.verifier.normalize_phone(phone)
|
121
|
+
if normalized in results_map:
|
122
|
+
result = results_map[normalized]
|
123
|
+
row["line_type"] = result.line_type
|
124
|
+
row["dnc"] = "true" if result.dnc else "false"
|
125
|
+
row["cached"] = "true" if result.cached else "false"
|
126
|
+
else:
|
127
|
+
row["line_type"] = "unknown"
|
128
|
+
row["dnc"] = ""
|
129
|
+
row["cached"] = ""
|
130
|
+
except:
|
131
|
+
row["line_type"] = "invalid"
|
132
|
+
row["dnc"] = ""
|
133
|
+
row["cached"] = ""
|
134
|
+
|
135
|
+
writer.writerow(row)
|