fusesell 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fusesell might be problematic. Click here for more details.

@@ -0,0 +1,416 @@
1
+ """
2
+ Input validation utilities for FuseSell Local
3
+ """
4
+
5
+ import re
6
+ import urllib.parse
7
+ from typing import Any, Dict, List, Optional
8
+ import logging
9
+
10
+
11
+ class InputValidator:
12
+ """
13
+ Validates input data for FuseSell pipeline execution.
14
+ Provides validation methods for URLs, emails, API keys, and other inputs.
15
+ """
16
+
17
+ def __init__(self):
18
+ """Initialize validator with regex patterns."""
19
+ self.logger = logging.getLogger("fusesell.validator")
20
+
21
+ # Regex patterns
22
+ self.email_pattern = re.compile(
23
+ r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
24
+ )
25
+
26
+ self.phone_pattern = re.compile(
27
+ r'^[\+]?[1-9][\d]{0,15}$|^[\(]?[\d\s\-\(\)]{7,}$'
28
+ )
29
+
30
+ self.api_key_pattern = re.compile(
31
+ r'^sk-[a-zA-Z0-9\-_]{3,}$'
32
+ )
33
+
34
+ def validate_url(self, url: str) -> bool:
35
+ """
36
+ Validate URL format and accessibility.
37
+
38
+ Args:
39
+ url: URL string to validate
40
+
41
+ Returns:
42
+ True if URL is valid, False otherwise
43
+ """
44
+ if not url or not isinstance(url, str):
45
+ return False
46
+
47
+ try:
48
+ # Parse URL
49
+ parsed = urllib.parse.urlparse(url)
50
+
51
+ # Check required components
52
+ if not parsed.scheme or not parsed.netloc:
53
+ return False
54
+
55
+ # Check valid schemes
56
+ if parsed.scheme not in ['http', 'https']:
57
+ return False
58
+
59
+ # Check for valid domain format
60
+ domain = parsed.netloc.lower()
61
+ if not re.match(r'^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', domain):
62
+ return False
63
+
64
+ return True
65
+
66
+ except Exception as e:
67
+ self.logger.debug(f"URL validation failed for {url}: {str(e)}")
68
+ return False
69
+
70
+ def validate_email(self, email: str) -> bool:
71
+ """
72
+ Validate email address format.
73
+
74
+ Args:
75
+ email: Email address to validate
76
+
77
+ Returns:
78
+ True if email is valid, False otherwise
79
+ """
80
+ if not email or not isinstance(email, str):
81
+ return False
82
+
83
+ return bool(self.email_pattern.match(email.strip()))
84
+
85
+ def validate_phone(self, phone: str) -> bool:
86
+ """
87
+ Validate phone number format.
88
+
89
+ Args:
90
+ phone: Phone number to validate
91
+
92
+ Returns:
93
+ True if phone is valid, False otherwise
94
+ """
95
+ if not phone or not isinstance(phone, str):
96
+ return False
97
+
98
+ # Clean phone number
99
+ cleaned = re.sub(r'[^\d\+\(\)\-\s]', '', phone.strip())
100
+
101
+ return bool(self.phone_pattern.match(cleaned))
102
+
103
+ def validate_api_key(self, api_key: str) -> bool:
104
+ """
105
+ Validate OpenAI API key format.
106
+
107
+ Args:
108
+ api_key: API key to validate
109
+
110
+ Returns:
111
+ True if API key format is valid, False otherwise
112
+ """
113
+ if not api_key or not isinstance(api_key, str):
114
+ return False
115
+
116
+ return bool(self.api_key_pattern.match(api_key.strip()))
117
+
118
+ def validate_execution_context(self, context: Dict[str, Any]) -> List[str]:
119
+ """
120
+ Validate execution context for pipeline stages.
121
+
122
+ Args:
123
+ context: Execution context dictionary
124
+
125
+ Returns:
126
+ List of validation error messages (empty if valid)
127
+ """
128
+ errors = []
129
+
130
+ # Check required fields
131
+ required_fields = ['execution_id', 'config']
132
+ for field in required_fields:
133
+ if field not in context:
134
+ errors.append(f"Missing required field: {field}")
135
+
136
+ # Validate config if present
137
+ config = context.get('config', {})
138
+ if config:
139
+ config_errors = self.validate_config(config)
140
+ errors.extend(config_errors)
141
+
142
+ return errors
143
+
144
+ def validate_config(self, config: Dict[str, Any]) -> List[str]:
145
+ """
146
+ Validate pipeline configuration.
147
+
148
+ Args:
149
+ config: Configuration dictionary
150
+
151
+ Returns:
152
+ List of validation error messages (empty if valid)
153
+ """
154
+ errors = []
155
+
156
+ # Required configuration fields
157
+ required_fields = {
158
+ 'openai_api_key': 'OpenAI API key',
159
+ 'org_id': 'Organization ID',
160
+ 'org_name': 'Organization name'
161
+ }
162
+
163
+ for field, description in required_fields.items():
164
+ if not config.get(field):
165
+ errors.append(f"Missing required configuration: {description}")
166
+
167
+ # Check that at least one data source is provided (matching new input schema)
168
+ data_sources = [
169
+ config.get('input_website'),
170
+ config.get('input_description'),
171
+ config.get('input_business_card'),
172
+ config.get('input_linkedin_url'),
173
+ config.get('input_facebook_url'),
174
+ config.get('input_freetext')
175
+ ]
176
+
177
+ # Filter out empty strings and None values
178
+ valid_sources = [s for s in data_sources if s and s.strip()]
179
+
180
+ if not valid_sources:
181
+ errors.append("At least one data source is required (input_website, input_description, input_business_card, input_linkedin_url, input_facebook_url, or input_freetext)")
182
+
183
+ # Validate specific fields
184
+ if config.get('openai_api_key') and not self.validate_api_key(config['openai_api_key']):
185
+ errors.append("Invalid OpenAI API key format")
186
+
187
+ # Validate URLs if provided (matching new input schema)
188
+ url_fields = {
189
+ 'input_website': 'input website URL',
190
+ 'input_business_card': 'input business card URL',
191
+ 'input_linkedin_url': 'input LinkedIn URL',
192
+ 'input_facebook_url': 'input Facebook URL'
193
+ }
194
+
195
+ for field, description in url_fields.items():
196
+ if config.get(field) and not self.validate_url(config[field]):
197
+ errors.append(f"Invalid {description}")
198
+
199
+ if config.get('contact_email') and not self.validate_email(config['contact_email']):
200
+ errors.append("Invalid contact email address")
201
+
202
+ if config.get('contact_phone') and not self.validate_phone(config['contact_phone']):
203
+ errors.append("Invalid contact phone number")
204
+
205
+ # Validate optional URLs
206
+ url_fields = ['business_card_url', 'linkedin_url', 'facebook_url']
207
+ for field in url_fields:
208
+ if config.get(field) and not self.validate_url(config[field]):
209
+ errors.append(f"Invalid {field.replace('_', ' ')}")
210
+
211
+ # Validate numeric ranges
212
+ if 'temperature' in config:
213
+ temp = config['temperature']
214
+ if not isinstance(temp, (int, float)) or not (0.0 <= temp <= 2.0):
215
+ errors.append("Temperature must be a number between 0.0 and 2.0")
216
+
217
+ if 'max_retries' in config:
218
+ retries = config['max_retries']
219
+ if not isinstance(retries, int) or retries < 0:
220
+ errors.append("Max retries must be a non-negative integer")
221
+
222
+ return errors
223
+
224
+ def validate_stage_input(self, stage_name: str, input_data: Dict[str, Any]) -> List[str]:
225
+ """
226
+ Validate input data for specific pipeline stage.
227
+
228
+ Args:
229
+ stage_name: Name of the pipeline stage
230
+ input_data: Input data to validate
231
+
232
+ Returns:
233
+ List of validation error messages (empty if valid)
234
+ """
235
+ errors = []
236
+
237
+ if stage_name == 'data_acquisition':
238
+ errors.extend(self._validate_data_acquisition_input(input_data))
239
+ elif stage_name == 'data_preparation':
240
+ errors.extend(self._validate_data_preparation_input(input_data))
241
+ elif stage_name == 'lead_scoring':
242
+ errors.extend(self._validate_lead_scoring_input(input_data))
243
+ elif stage_name == 'initial_outreach':
244
+ errors.extend(self._validate_initial_outreach_input(input_data))
245
+ elif stage_name == 'follow_up':
246
+ errors.extend(self._validate_follow_up_input(input_data))
247
+
248
+ return errors
249
+
250
+ def _validate_data_acquisition_input(self, input_data: Dict[str, Any]) -> List[str]:
251
+ """Validate data acquisition stage input."""
252
+ errors = []
253
+
254
+ # Check that at least one data source is provided (matching new input schema)
255
+ data_sources = [
256
+ input_data.get('input_website'),
257
+ input_data.get('input_description'),
258
+ input_data.get('input_business_card'),
259
+ input_data.get('input_linkedin_url'),
260
+ input_data.get('input_facebook_url'),
261
+ input_data.get('input_freetext')
262
+ ]
263
+
264
+ # Filter out empty strings and None values
265
+ valid_sources = [s for s in data_sources if s and s.strip()]
266
+
267
+ if not valid_sources:
268
+ errors.append("At least one customer data source is required for data acquisition")
269
+
270
+ # Validate URLs if provided (matching new input schema)
271
+ if input_data.get('input_website') and not self.validate_url(input_data['input_website']):
272
+ errors.append("Invalid input website URL")
273
+
274
+ if input_data.get('input_business_card') and not self.validate_url(input_data['input_business_card']):
275
+ errors.append("Invalid input business card URL")
276
+
277
+ if input_data.get('input_linkedin_url') and not self.validate_url(input_data['input_linkedin_url']):
278
+ errors.append("Invalid input LinkedIn URL")
279
+
280
+ if input_data.get('input_facebook_url') and not self.validate_url(input_data['input_facebook_url']):
281
+ errors.append("Invalid input Facebook URL")
282
+
283
+ return errors
284
+
285
+ def _validate_data_preparation_input(self, input_data: Dict[str, Any]) -> List[str]:
286
+ """Validate data preparation stage input."""
287
+ errors = []
288
+
289
+ # Should have raw customer data from previous stage
290
+ if not input_data.get('raw_customer_data'):
291
+ errors.append("Raw customer data is required for data preparation")
292
+
293
+ return errors
294
+
295
+ def _validate_lead_scoring_input(self, input_data: Dict[str, Any]) -> List[str]:
296
+ """Validate lead scoring stage input."""
297
+ errors = []
298
+
299
+ # Should have structured customer data
300
+ required_fields = ['companyInfo', 'painPoints']
301
+ for field in required_fields:
302
+ if field not in input_data:
303
+ errors.append(f"Missing required field for lead scoring: {field}")
304
+
305
+ return errors
306
+
307
+ def _validate_initial_outreach_input(self, input_data: Dict[str, Any]) -> List[str]:
308
+ """Validate initial outreach stage input."""
309
+ errors = []
310
+
311
+ # Should have customer data and lead scores
312
+ required_fields = ['customer_data', 'lead_scores']
313
+ for field in required_fields:
314
+ if field not in input_data:
315
+ errors.append(f"Missing required field for initial outreach: {field}")
316
+
317
+ # Validate contact information (check both old and new data structures)
318
+ customer_data = input_data.get('customer_data', {})
319
+
320
+ # Check old structure first
321
+ has_old_contact = customer_data.get('contact_email') or customer_data.get('contact_name')
322
+
323
+ # Check new structure (primaryContact)
324
+ primary_contact = customer_data.get('primaryContact', {})
325
+ has_new_contact = primary_contact.get('email') or primary_contact.get('name')
326
+
327
+ if not has_old_contact and not has_new_contact:
328
+ errors.append("Contact email or name is required for outreach")
329
+
330
+ return errors
331
+
332
+ def _validate_follow_up_input(self, input_data: Dict[str, Any]) -> List[str]:
333
+ """Validate follow-up stage input."""
334
+ errors = []
335
+
336
+ # Should have previous interaction data
337
+ if not input_data.get('previous_interactions'):
338
+ errors.append("Previous interaction data is required for follow-up")
339
+
340
+ return errors
341
+
342
+ def sanitize_input(self, data: Any) -> Any:
343
+ """
344
+ Sanitize input data to prevent injection attacks.
345
+
346
+ Args:
347
+ data: Input data to sanitize
348
+
349
+ Returns:
350
+ Sanitized data
351
+ """
352
+ if isinstance(data, str):
353
+ # Remove potentially dangerous characters
354
+ sanitized = re.sub(r'[<>"\']', '', data)
355
+ return sanitized.strip()
356
+
357
+ elif isinstance(data, dict):
358
+ return {key: self.sanitize_input(value) for key, value in data.items()}
359
+
360
+ elif isinstance(data, list):
361
+ return [self.sanitize_input(item) for item in data]
362
+
363
+ else:
364
+ return data
365
+
366
+ def validate_json_schema(self, data: Dict[str, Any], schema: Dict[str, Any]) -> List[str]:
367
+ """
368
+ Validate data against JSON schema.
369
+
370
+ Args:
371
+ data: Data to validate
372
+ schema: JSON schema definition
373
+
374
+ Returns:
375
+ List of validation error messages (empty if valid)
376
+ """
377
+ errors = []
378
+
379
+ try:
380
+ # Basic schema validation (simplified)
381
+ required = schema.get('required', [])
382
+ properties = schema.get('properties', {})
383
+
384
+ # Check required fields
385
+ for field in required:
386
+ if field not in data:
387
+ errors.append(f"Missing required field: {field}")
388
+
389
+ # Check field types
390
+ for field, value in data.items():
391
+ if field in properties:
392
+ expected_type = properties[field].get('type')
393
+ if expected_type and not self._check_type(value, expected_type):
394
+ errors.append(f"Invalid type for field {field}: expected {expected_type}")
395
+
396
+ except Exception as e:
397
+ errors.append(f"Schema validation error: {str(e)}")
398
+
399
+ return errors
400
+
401
+ def _check_type(self, value: Any, expected_type: str) -> bool:
402
+ """Check if value matches expected type."""
403
+ type_mapping = {
404
+ 'string': str,
405
+ 'integer': int,
406
+ 'number': (int, float),
407
+ 'boolean': bool,
408
+ 'array': list,
409
+ 'object': dict
410
+ }
411
+
412
+ expected_python_type = type_mapping.get(expected_type)
413
+ if expected_python_type:
414
+ return isinstance(value, expected_python_type)
415
+
416
+ return True