ai-lls-lib 2.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ """
2
+ Data models for phone verification
3
+ """
4
+ from datetime import datetime
5
+ from enum import Enum
6
+ from typing import Optional
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class LineType(str, Enum):
11
+ """Phone line type enumeration"""
12
+ MOBILE = "mobile"
13
+ LANDLINE = "landline"
14
+ VOIP = "voip"
15
+ UNKNOWN = "unknown"
16
+
17
+
18
+ class VerificationSource(str, Enum):
19
+ """Source of verification data"""
20
+ API = "api"
21
+ CACHE = "cache"
22
+ BULK_IMPORT = "bulk_import"
23
+
24
+
25
+ class JobStatus(str, Enum):
26
+ """Bulk job status enumeration"""
27
+ PENDING = "pending"
28
+ PROCESSING = "processing"
29
+ COMPLETED = "completed"
30
+ FAILED = "failed"
31
+
32
+
33
+ class PhoneVerification(BaseModel):
34
+ """Result of phone number verification"""
35
+ phone_number: str = Field(..., description="E.164 formatted phone number")
36
+ line_type: LineType = Field(
37
+ ..., description="Type of phone line"
38
+ )
39
+ dnc: bool = Field(..., description="Whether number is on DNC list")
40
+ cached: bool = Field(..., description="Whether result came from cache")
41
+ verified_at: datetime = Field(..., description="When verification occurred")
42
+ source: VerificationSource = Field(
43
+ ..., description="Source of verification data"
44
+ )
45
+
46
+ class Config:
47
+ json_encoders = {
48
+ datetime: lambda v: v.isoformat()
49
+ }
50
+
51
+
52
+ class BulkJob(BaseModel):
53
+ """Bulk processing job metadata"""
54
+ job_id: str = Field(..., description="Unique job identifier")
55
+ status: JobStatus = Field(
56
+ ..., description="Current job status"
57
+ )
58
+
59
+
60
+ class BulkJobStatus(BulkJob):
61
+ """Extended bulk job status with progress info"""
62
+ total_rows: Optional[int] = Field(None, description="Total rows to process")
63
+ processed_rows: Optional[int] = Field(None, description="Rows processed so far")
64
+ result_url: Optional[str] = Field(None, description="S3 URL of results")
65
+ created_at: datetime = Field(..., description="Job creation time")
66
+ completed_at: Optional[datetime] = Field(None, description="Job completion time")
67
+ error: Optional[str] = Field(None, description="Error message if failed")
68
+
69
+
70
+ class CacheEntry(BaseModel):
71
+ """DynamoDB cache entry"""
72
+ phone_number: str
73
+ line_type: str # Stored as string in DynamoDB
74
+ dnc: bool
75
+ verified_at: str # ISO format string
76
+ source: str # Stored as string in DynamoDB
77
+ ttl: int # Unix timestamp for DynamoDB TTL
@@ -0,0 +1,295 @@
1
+ """
2
+ Bulk CSV processing for phone verification
3
+ """
4
+ import csv
5
+ from io import StringIO
6
+ from typing import List, Optional, Iterator, Iterable
7
+ from aws_lambda_powertools import Logger
8
+ from .models import PhoneVerification
9
+ from .verifier import PhoneVerifier
10
+
11
+ logger = Logger()
12
+
13
+
14
+ class BulkProcessor:
15
+ """Process CSV files for bulk phone verification"""
16
+
17
+ def __init__(self, verifier: PhoneVerifier):
18
+ self.verifier = verifier
19
+
20
+ def process_csv(self, csv_text: str, phone_column: str = "phone") -> List[PhoneVerification]:
21
+ """
22
+ Process CSV text content.
23
+ Returns list of verification results.
24
+ """
25
+ results = []
26
+
27
+ try:
28
+ # Use StringIO to parse CSV text
29
+ csv_file = StringIO(csv_text)
30
+ reader = csv.DictReader(csv_file)
31
+
32
+ # Find phone column (case-insensitive)
33
+ headers = reader.fieldnames or []
34
+ phone_col = self._find_phone_column(headers, phone_column)
35
+
36
+ if not phone_col:
37
+ raise ValueError(f"Phone column '{phone_column}' not found in CSV")
38
+
39
+ for row_num, row in enumerate(reader, start=2): # Start at 2 (header is 1)
40
+ try:
41
+ phone = row.get(phone_col, "").strip()
42
+ if not phone:
43
+ logger.warning(f"Empty phone at row {row_num}")
44
+ continue
45
+
46
+ # Verify phone
47
+ result = self.verifier.verify(phone)
48
+ results.append(result)
49
+
50
+ # Log progress every 100 rows
51
+ if len(results) % 100 == 0:
52
+ logger.info(f"Processed {len(results)} phones")
53
+
54
+ except ValueError as e:
55
+ logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
56
+ continue
57
+ except Exception as e:
58
+ logger.error(f"Error processing row {row_num}: {str(e)}")
59
+ continue
60
+
61
+ logger.info(f"Completed processing {len(results)} valid phones")
62
+
63
+ except Exception as e:
64
+ logger.error(f"CSV processing failed: {str(e)}")
65
+ raise
66
+
67
+ return results
68
+
69
+ def _find_phone_column(self, headers: List[str], preferred: str) -> Optional[str]:
70
+ """Find phone column in headers (case-insensitive)"""
71
+ # First try exact match
72
+ for header in headers:
73
+ if header.lower() == preferred.lower():
74
+ return header
75
+
76
+ # Common phone column names
77
+ phone_patterns = [
78
+ "phone", "phone_number", "phonenumber", "mobile",
79
+ "cell", "telephone", "tel", "number", "contact"
80
+ ]
81
+
82
+ for header in headers:
83
+ header_lower = header.lower()
84
+ for pattern in phone_patterns:
85
+ if pattern in header_lower:
86
+ logger.info(f"Using column '{header}' as phone column")
87
+ return header
88
+
89
+ return None
90
+
91
+ def generate_results_csv(
92
+ self,
93
+ original_csv_text: str,
94
+ results: List[PhoneVerification]
95
+ ) -> str:
96
+ """
97
+ Generate CSV with original data plus verification results.
98
+ Adds columns: line_type, dnc, cached
99
+ Returns CSV text string.
100
+ """
101
+ # Create lookup dict
102
+ results_map = {r.phone_number: r for r in results}
103
+
104
+ # Parse original CSV
105
+ input_file = StringIO(original_csv_text)
106
+ reader = csv.DictReader(input_file)
107
+ headers = reader.fieldnames or []
108
+
109
+ # Add new columns
110
+ output_headers = headers + ["line_type", "dnc", "cached"]
111
+
112
+ # Create output CSV in memory
113
+ output = StringIO()
114
+ writer = csv.DictWriter(output, fieldnames=output_headers)
115
+ writer.writeheader()
116
+
117
+ phone_col = self._find_phone_column(headers, "phone")
118
+
119
+ for row in reader:
120
+ phone = row.get(phone_col, "").strip()
121
+
122
+ # Try to normalize for lookup
123
+ try:
124
+ normalized = self.verifier.normalize_phone(phone)
125
+ if normalized in results_map:
126
+ result = results_map[normalized]
127
+ row["line_type"] = result.line_type.value
128
+ row["dnc"] = "true" if result.dnc else "false"
129
+ row["cached"] = "true" if result.cached else "false"
130
+ else:
131
+ row["line_type"] = "unknown"
132
+ row["dnc"] = ""
133
+ row["cached"] = ""
134
+ except:
135
+ row["line_type"] = "invalid"
136
+ row["dnc"] = ""
137
+ row["cached"] = ""
138
+
139
+ writer.writerow(row)
140
+
141
+ # Return CSV text
142
+ return output.getvalue()
143
+
144
+ def process_csv_stream(
145
+ self,
146
+ lines: Iterable[str],
147
+ phone_column: str = "phone",
148
+ batch_size: int = 100
149
+ ) -> Iterator[List[PhoneVerification]]:
150
+ """
151
+ Process CSV lines as a stream, yielding batches of results.
152
+ Memory-efficient for large files.
153
+
154
+ Args:
155
+ lines: Iterator of CSV lines (including header)
156
+ phone_column: Column name containing phone numbers
157
+ batch_size: Number of results to accumulate before yielding
158
+
159
+ Yields:
160
+ Batches of PhoneVerification results
161
+ """
162
+ lines_list = list(lines) # Need to iterate twice - once for headers, once for data
163
+
164
+ if not lines_list:
165
+ logger.error("Empty CSV stream")
166
+ return
167
+
168
+ # Parse header
169
+ header_line = lines_list[0]
170
+ reader = csv.DictReader(StringIO(header_line))
171
+ headers = reader.fieldnames or []
172
+ phone_col = self._find_phone_column(headers, phone_column)
173
+
174
+ if not phone_col:
175
+ raise ValueError(f"Phone column '{phone_column}' not found in CSV")
176
+
177
+ batch = []
178
+ row_num = 2 # Start at 2 (header is 1)
179
+ total_processed = 0
180
+
181
+ # Process data lines
182
+ for line in lines_list[1:]:
183
+ if not line.strip():
184
+ continue
185
+
186
+ try:
187
+ # Parse single line
188
+ row = next(csv.DictReader(StringIO(line), fieldnames=headers))
189
+ phone = row.get(phone_col, "").strip()
190
+
191
+ if not phone:
192
+ logger.warning(f"Empty phone at row {row_num}")
193
+ row_num += 1
194
+ continue
195
+
196
+ # Verify phone
197
+ result = self.verifier.verify(phone)
198
+ batch.append(result)
199
+ total_processed += 1
200
+
201
+ # Yield batch when full
202
+ if len(batch) >= batch_size:
203
+ logger.info(f"Processed batch of {len(batch)} phones (total: {total_processed})")
204
+ yield batch
205
+ batch = []
206
+
207
+ except ValueError as e:
208
+ logger.warning(f"Invalid phone at row {row_num}: {str(e)}")
209
+ except Exception as e:
210
+ logger.error(f"Error processing row {row_num}: {str(e)}")
211
+ finally:
212
+ row_num += 1
213
+
214
+ # Yield remaining results
215
+ if batch:
216
+ logger.info(f"Processed final batch of {len(batch)} phones (total: {total_processed})")
217
+ yield batch
218
+
219
+ logger.info(f"Stream processing completed. Total processed: {total_processed}")
220
+
221
+ def generate_results_csv_stream(
222
+ self,
223
+ original_lines: Iterable[str],
224
+ results_stream: Iterator[List[PhoneVerification]],
225
+ phone_column: str = "phone"
226
+ ) -> Iterator[str]:
227
+ """
228
+ Generate CSV results as a stream, line by line.
229
+ Memory-efficient for large files.
230
+
231
+ Args:
232
+ original_lines: Iterator of original CSV lines
233
+ results_stream: Iterator of batched PhoneVerification results
234
+ phone_column: Column name containing phone numbers
235
+
236
+ Yields:
237
+ CSV lines with verification results added
238
+ """
239
+ lines_iter = iter(original_lines)
240
+
241
+ # Read and yield modified header
242
+ try:
243
+ header_line = next(lines_iter)
244
+ reader = csv.DictReader(StringIO(header_line))
245
+ headers = reader.fieldnames or []
246
+
247
+ # Add new columns
248
+ output_headers = headers + ["line_type", "dnc", "cached"]
249
+ yield ','.join(output_headers) + '\n'
250
+
251
+ phone_col = self._find_phone_column(headers, phone_column)
252
+
253
+ except StopIteration:
254
+ return
255
+
256
+ # Build results lookup from stream
257
+ results_map = {}
258
+ for batch in results_stream:
259
+ for result in batch:
260
+ results_map[result.phone_number] = result
261
+
262
+ # Reset lines iterator
263
+ lines_iter = iter(original_lines)
264
+ next(lines_iter) # Skip header
265
+
266
+ # Process and yield data lines
267
+ for line in lines_iter:
268
+ if not line.strip():
269
+ continue
270
+
271
+ row = next(csv.DictReader(StringIO(line), fieldnames=headers))
272
+ phone = row.get(phone_col, "").strip()
273
+
274
+ # Add verification results
275
+ try:
276
+ normalized = self.verifier.normalize_phone(phone)
277
+ if normalized in results_map:
278
+ result = results_map[normalized]
279
+ row["line_type"] = result.line_type.value
280
+ row["dnc"] = "true" if result.dnc else "false"
281
+ row["cached"] = "true" if result.cached else "false"
282
+ else:
283
+ row["line_type"] = "unknown"
284
+ row["dnc"] = ""
285
+ row["cached"] = ""
286
+ except:
287
+ row["line_type"] = "invalid"
288
+ row["dnc"] = ""
289
+ row["cached"] = ""
290
+
291
+ # Write row
292
+ output = StringIO()
293
+ writer = csv.DictWriter(output, fieldnames=output_headers)
294
+ writer.writerow(row)
295
+ yield output.getvalue()
@@ -0,0 +1,90 @@
1
+ """
2
+ Phone verification logic - checks line type and DNC status
3
+ """
4
+ import os
5
+ from datetime import datetime, timezone
6
+ from typing import Optional
7
+ import phonenumbers
8
+ from aws_lambda_powertools import Logger
9
+ from .models import PhoneVerification, LineType, VerificationSource
10
+ from .cache import DynamoDBCache
11
+ from ..providers import VerificationProvider, StubProvider, ExternalAPIProvider
12
+
13
+ logger = Logger()
14
+
15
+
16
+ class PhoneVerifier:
17
+ """Verifies phone numbers for line type and DNC status"""
18
+
19
+ def __init__(self, cache: Optional[DynamoDBCache] = None, provider: Optional[VerificationProvider] = None):
20
+ """
21
+ Initialize phone verifier.
22
+
23
+ Args:
24
+ cache: Optional DynamoDB cache for storing results
25
+ provider: Verification provider (defaults to ExternalAPIProvider)
26
+ """
27
+ self.cache = cache
28
+ self.provider = provider or ExternalAPIProvider()
29
+
30
+ def normalize_phone(self, phone: str) -> str:
31
+ """Normalize phone to E.164 format"""
32
+ try:
33
+ # Parse with US as default country
34
+ parsed = phonenumbers.parse(phone, "US")
35
+ if not phonenumbers.is_valid_number(parsed):
36
+ raise ValueError(f"Invalid phone number: {phone}")
37
+
38
+ # Format as E.164
39
+ return phonenumbers.format_number(parsed, phonenumbers.PhoneNumberFormat.E164)
40
+ except Exception as e:
41
+ logger.error(f"Phone normalization failed: {str(e)}")
42
+ raise ValueError(f"Invalid phone format: {phone}")
43
+
44
+ def verify(self, phone: str) -> PhoneVerification:
45
+ """Verify phone number for line type and DNC status"""
46
+ normalized = self.normalize_phone(phone)
47
+
48
+ # Check cache first if available
49
+ if self.cache:
50
+ cached = self.cache.get(normalized)
51
+ if cached:
52
+ return cached
53
+
54
+ # Use provider to verify
55
+ line_type, dnc_status = self.provider.verify_phone(normalized)
56
+
57
+ result = PhoneVerification(
58
+ phone_number=normalized,
59
+ line_type=line_type,
60
+ dnc=dnc_status,
61
+ cached=False,
62
+ verified_at=datetime.now(timezone.utc),
63
+ source=VerificationSource.API
64
+ )
65
+
66
+ # Store in cache if available
67
+ if self.cache:
68
+ try:
69
+ self.cache.set(normalized, result)
70
+ except Exception as e:
71
+ logger.warning(f"Failed to cache result: {e}")
72
+ # Continue without caching - don't fail the verification
73
+
74
+ return result
75
+
76
+ def _check_line_type(self, phone: str) -> LineType:
77
+ """
78
+ Check line type (for backwards compatibility with CLI).
79
+ Delegates to provider.
80
+ """
81
+ line_type, _ = self.provider.verify_phone(phone)
82
+ return line_type
83
+
84
+ def _check_dnc(self, phone: str) -> bool:
85
+ """
86
+ Check DNC status (for backwards compatibility with CLI).
87
+ Delegates to provider.
88
+ """
89
+ _, dnc_status = self.provider.verify_phone(phone)
90
+ return dnc_status
@@ -0,0 +1,13 @@
1
+ """Payment module for Landline Scrubber."""
2
+
3
+ from .models import Plan, PlanType, SubscriptionStatus
4
+ from .stripe_manager import StripeManager
5
+ from .credit_manager import CreditManager
6
+
7
+ __all__ = [
8
+ "Plan",
9
+ "PlanType",
10
+ "SubscriptionStatus",
11
+ "StripeManager",
12
+ "CreditManager",
13
+ ]
@@ -0,0 +1,186 @@
1
+ """Credit balance management with DynamoDB."""
2
+
3
+ import os
4
+ from typing import Optional, Dict, Any
5
+ from decimal import Decimal
6
+ import logging
7
+ from datetime import datetime
8
+
9
+ try:
10
+ import boto3
11
+ from botocore.exceptions import ClientError
12
+ except ImportError:
13
+ boto3 = None # Handle gracefully for testing
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CreditManager:
19
+ """
20
+ Manages user credit balances in DynamoDB CreditsTable.
21
+ """
22
+
23
+ def __init__(self, table_name: Optional[str] = None):
24
+ """Initialize with DynamoDB table."""
25
+ if not boto3:
26
+ raise RuntimeError("boto3 is required for CreditManager")
27
+
28
+ self.dynamodb = boto3.resource("dynamodb")
29
+ self.table_name = table_name if table_name else os.environ['CREDITS_TABLE']
30
+
31
+ try:
32
+ self.table = self.dynamodb.Table(self.table_name)
33
+ except Exception as e:
34
+ logger.error(f"Failed to connect to DynamoDB table {self.table_name}: {e}")
35
+ self.table = None
36
+
37
+ def get_balance(self, user_id: str) -> int:
38
+ """Get current credit balance for a user."""
39
+ if not self.table:
40
+ raise RuntimeError(f"DynamoDB table {self.table_name} not accessible")
41
+
42
+ try:
43
+ response = self.table.get_item(Key={"user_id": user_id})
44
+ if "Item" in response:
45
+ return int(response["Item"].get("credits", 0))
46
+ return 0
47
+ except ClientError as e:
48
+ logger.error(f"Error getting balance for {user_id}: {e}")
49
+ return 0
50
+
51
+ def add_credits(self, user_id: str, amount: int) -> int:
52
+ """Add credits to user balance and return new balance."""
53
+ if not self.table:
54
+ raise RuntimeError(f"DynamoDB table {self.table_name} not accessible")
55
+
56
+ try:
57
+ response = self.table.update_item(
58
+ Key={"user_id": user_id},
59
+ UpdateExpression="ADD credits :amount SET updated_at = :now",
60
+ ExpressionAttributeValues={
61
+ ":amount": Decimal(amount),
62
+ ":now": datetime.utcnow().isoformat()
63
+ },
64
+ ReturnValues="ALL_NEW"
65
+ )
66
+ return int(response["Attributes"]["credits"])
67
+ except ClientError as e:
68
+ logger.error(f"Error adding credits for {user_id}: {e}")
69
+ raise
70
+
71
+ def deduct_credits(self, user_id: str, amount: int) -> bool:
72
+ """
73
+ Deduct credits from user balance.
74
+ Returns True if successful, False if insufficient balance.
75
+ """
76
+ if not self.table:
77
+ raise RuntimeError(f"DynamoDB table {self.table_name} not accessible")
78
+
79
+ try:
80
+ # Conditional update - only deduct if balance >= amount
81
+ self.table.update_item(
82
+ Key={"user_id": user_id},
83
+ UpdateExpression="ADD credits :negative_amount SET updated_at = :now",
84
+ ConditionExpression="credits >= :amount",
85
+ ExpressionAttributeValues={
86
+ ":negative_amount": Decimal(-amount),
87
+ ":amount": Decimal(amount),
88
+ ":now": datetime.utcnow().isoformat()
89
+ }
90
+ )
91
+ return True
92
+ except ClientError as e:
93
+ if e.response["Error"]["Code"] == "ConditionalCheckFailedException":
94
+ logger.info(f"Insufficient credits for {user_id}")
95
+ return False
96
+ logger.error(f"Error deducting credits for {user_id}: {e}")
97
+ raise
98
+
99
+ def set_subscription_state(
100
+ self,
101
+ user_id: str,
102
+ status: str,
103
+ stripe_customer_id: Optional[str] = None,
104
+ stripe_subscription_id: Optional[str] = None
105
+ ) -> None:
106
+ """Update subscription state in CreditsTable."""
107
+ if not self.table:
108
+ raise RuntimeError(f"DynamoDB table {self.table_name} not accessible")
109
+
110
+ try:
111
+ update_expr = "SET subscription_status = :status, updated_at = :now"
112
+ expr_values = {
113
+ ":status": status,
114
+ ":now": datetime.utcnow().isoformat()
115
+ }
116
+
117
+ if stripe_customer_id:
118
+ update_expr += ", stripe_customer_id = :customer_id"
119
+ expr_values[":customer_id"] = stripe_customer_id
120
+
121
+ if stripe_subscription_id:
122
+ update_expr += ", stripe_subscription_id = :subscription_id"
123
+ expr_values[":subscription_id"] = stripe_subscription_id
124
+
125
+ self.table.update_item(
126
+ Key={"user_id": user_id},
127
+ UpdateExpression=update_expr,
128
+ ExpressionAttributeValues=expr_values
129
+ )
130
+ except ClientError as e:
131
+ logger.error(f"Error updating subscription state for {user_id}: {e}")
132
+ raise
133
+
134
+ def get_user_payment_info(self, user_id: str) -> Dict[str, Any]:
135
+ """Get user's payment-related information."""
136
+ if not self.table:
137
+ raise RuntimeError(f"DynamoDB table {self.table_name} not accessible")
138
+
139
+ try:
140
+ response = self.table.get_item(Key={"user_id": user_id})
141
+ if "Item" in response:
142
+ item = response["Item"]
143
+ return {
144
+ "credits": int(item.get("credits", 0)),
145
+ "stripe_customer_id": item.get("stripe_customer_id"),
146
+ "stripe_subscription_id": item.get("stripe_subscription_id"),
147
+ "subscription_status": item.get("subscription_status")
148
+ }
149
+ return {
150
+ "credits": 0,
151
+ "stripe_customer_id": None,
152
+ "stripe_subscription_id": None,
153
+ "subscription_status": None
154
+ }
155
+ except ClientError as e:
156
+ logger.error(f"Error getting payment info for {user_id}: {e}")
157
+ return {
158
+ "credits": 0,
159
+ "stripe_customer_id": None,
160
+ "stripe_subscription_id": None,
161
+ "subscription_status": None
162
+ }
163
+
164
+ def has_unlimited_access(self, user_id: str) -> bool:
165
+ """Check if user has unlimited access via active subscription."""
166
+ info = self.get_user_payment_info(user_id)
167
+ return info.get("subscription_status") == "active"
168
+
169
+ def set_stripe_customer_id(self, user_id: str, stripe_customer_id: str) -> None:
170
+ """Store Stripe customer ID for a user."""
171
+ if not self.table:
172
+ raise RuntimeError(f"DynamoDB table {self.table_name} not accessible")
173
+
174
+ try:
175
+ self.table.update_item(
176
+ Key={"user_id": user_id},
177
+ UpdateExpression="SET stripe_customer_id = :customer_id, updated_at = :now",
178
+ ExpressionAttributeValues={
179
+ ":customer_id": stripe_customer_id,
180
+ ":now": datetime.utcnow().isoformat()
181
+ }
182
+ )
183
+ logger.info(f"Stored Stripe customer ID {stripe_customer_id} for user {user_id}")
184
+ except ClientError as e:
185
+ logger.error(f"Error storing Stripe customer ID for {user_id}: {e}")
186
+ raise