fusesell 1.3.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. fusesell-1.3.42.dist-info/METADATA +873 -0
  2. fusesell-1.3.42.dist-info/RECORD +35 -0
  3. fusesell-1.3.42.dist-info/WHEEL +5 -0
  4. fusesell-1.3.42.dist-info/entry_points.txt +2 -0
  5. fusesell-1.3.42.dist-info/licenses/LICENSE +21 -0
  6. fusesell-1.3.42.dist-info/top_level.txt +2 -0
  7. fusesell.py +20 -0
  8. fusesell_local/__init__.py +37 -0
  9. fusesell_local/api.py +343 -0
  10. fusesell_local/cli.py +1480 -0
  11. fusesell_local/config/__init__.py +11 -0
  12. fusesell_local/config/default_email_templates.json +34 -0
  13. fusesell_local/config/default_prompts.json +19 -0
  14. fusesell_local/config/default_scoring_criteria.json +154 -0
  15. fusesell_local/config/prompts.py +245 -0
  16. fusesell_local/config/settings.py +277 -0
  17. fusesell_local/pipeline.py +978 -0
  18. fusesell_local/stages/__init__.py +19 -0
  19. fusesell_local/stages/base_stage.py +603 -0
  20. fusesell_local/stages/data_acquisition.py +1820 -0
  21. fusesell_local/stages/data_preparation.py +1238 -0
  22. fusesell_local/stages/follow_up.py +1728 -0
  23. fusesell_local/stages/initial_outreach.py +2972 -0
  24. fusesell_local/stages/lead_scoring.py +1452 -0
  25. fusesell_local/utils/__init__.py +36 -0
  26. fusesell_local/utils/agent_context.py +552 -0
  27. fusesell_local/utils/auto_setup.py +361 -0
  28. fusesell_local/utils/birthday_email_manager.py +467 -0
  29. fusesell_local/utils/data_manager.py +4857 -0
  30. fusesell_local/utils/event_scheduler.py +959 -0
  31. fusesell_local/utils/llm_client.py +342 -0
  32. fusesell_local/utils/logger.py +203 -0
  33. fusesell_local/utils/output_helpers.py +2443 -0
  34. fusesell_local/utils/timezone_detector.py +914 -0
  35. fusesell_local/utils/validators.py +436 -0
@@ -0,0 +1,436 @@
1
+ """
2
+ Input validation utilities for FuseSell Local
3
+ """
4
+
5
+ import re
6
+ import urllib.parse
7
+ from typing import Any, Dict, List, Optional
8
+ import logging
9
+
10
+
11
+ class InputValidator:
12
+ """
13
+ Validates input data for FuseSell pipeline execution.
14
+ Provides validation methods for URLs, emails, API keys, and other inputs.
15
+ """
16
+
17
+ def __init__(self):
18
+ """Initialize validator with regex patterns."""
19
+ self.logger = logging.getLogger("fusesell.validator")
20
+
21
+ # Regex patterns
22
+ self.email_pattern = re.compile(
23
+ r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
24
+ )
25
+
26
+ self.phone_pattern = re.compile(
27
+ r'^[\+]?[1-9][\d]{0,15}$|^[\(]?[\d\s\-\(\)]{7,}$'
28
+ )
29
+
30
+ self.api_key_pattern = re.compile(
31
+ r'^sk-[a-zA-Z0-9\-_]{3,}$'
32
+ )
33
+
34
+ def validate_url(self, url: str) -> bool:
35
+ """
36
+ Validate URL format and accessibility.
37
+ Auto-prepends https:// if scheme is missing.
38
+
39
+ Args:
40
+ url: URL string to validate
41
+
42
+ Returns:
43
+ True if URL is valid, False otherwise
44
+ """
45
+ if not url or not isinstance(url, str):
46
+ return False
47
+
48
+ try:
49
+ # Auto-prepend https:// if no scheme is present
50
+ url = url.strip()
51
+ if not url.startswith(('http://', 'https://')):
52
+ url = f'https://{url}'
53
+
54
+ # Parse URL
55
+ parsed = urllib.parse.urlparse(url)
56
+
57
+ # Check required components
58
+ if not parsed.scheme or not parsed.netloc:
59
+ return False
60
+
61
+ # Check valid schemes
62
+ if parsed.scheme not in ['http', 'https']:
63
+ return False
64
+
65
+ # Check for valid domain format
66
+ domain = parsed.netloc.lower()
67
+
68
+ # Allow localhost, IP addresses, and standard domains
69
+ # - localhost (for development)
70
+ # - IP addresses (e.g., 192.168.1.1, with optional port)
71
+ # - Standard domains with TLD (e.g., example.com)
72
+ if domain.startswith('localhost') or domain.split(':')[0] == 'localhost':
73
+ return True
74
+
75
+ # Check for IP address (IPv4)
76
+ domain_without_port = domain.split(':')[0]
77
+ if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', domain_without_port):
78
+ return True
79
+
80
+ # Check for standard domain with TLD
81
+ if not re.match(r'^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(:\d+)?$', domain):
82
+ return False
83
+
84
+ return True
85
+
86
+ except Exception as e:
87
+ self.logger.debug(f"URL validation failed for {url}: {str(e)}")
88
+ return False
89
+
90
+ def validate_email(self, email: str) -> bool:
91
+ """
92
+ Validate email address format.
93
+
94
+ Args:
95
+ email: Email address to validate
96
+
97
+ Returns:
98
+ True if email is valid, False otherwise
99
+ """
100
+ if not email or not isinstance(email, str):
101
+ return False
102
+
103
+ return bool(self.email_pattern.match(email.strip()))
104
+
105
+ def validate_phone(self, phone: str) -> bool:
106
+ """
107
+ Validate phone number format.
108
+
109
+ Args:
110
+ phone: Phone number to validate
111
+
112
+ Returns:
113
+ True if phone is valid, False otherwise
114
+ """
115
+ if not phone or not isinstance(phone, str):
116
+ return False
117
+
118
+ # Clean phone number
119
+ cleaned = re.sub(r'[^\d\+\(\)\-\s]', '', phone.strip())
120
+
121
+ return bool(self.phone_pattern.match(cleaned))
122
+
123
+ def validate_api_key(self, api_key: str) -> bool:
124
+ """
125
+ Validate OpenAI API key format.
126
+
127
+ Args:
128
+ api_key: API key to validate
129
+
130
+ Returns:
131
+ True if API key format is valid, False otherwise
132
+ """
133
+ if not api_key or not isinstance(api_key, str):
134
+ return False
135
+
136
+ return bool(self.api_key_pattern.match(api_key.strip()))
137
+
138
+ def validate_execution_context(self, context: Dict[str, Any]) -> List[str]:
139
+ """
140
+ Validate execution context for pipeline stages.
141
+
142
+ Args:
143
+ context: Execution context dictionary
144
+
145
+ Returns:
146
+ List of validation error messages (empty if valid)
147
+ """
148
+ errors = []
149
+
150
+ # Check required fields
151
+ required_fields = ['execution_id', 'config']
152
+ for field in required_fields:
153
+ if field not in context:
154
+ errors.append(f"Missing required field: {field}")
155
+
156
+ # Validate config if present
157
+ config = context.get('config', {})
158
+ if config:
159
+ config_errors = self.validate_config(config)
160
+ errors.extend(config_errors)
161
+
162
+ return errors
163
+
164
+ def validate_config(self, config: Dict[str, Any]) -> List[str]:
165
+ """
166
+ Validate pipeline configuration.
167
+
168
+ Args:
169
+ config: Configuration dictionary
170
+
171
+ Returns:
172
+ List of validation error messages (empty if valid)
173
+ """
174
+ errors = []
175
+
176
+ # Required configuration fields
177
+ required_fields = {
178
+ 'openai_api_key': 'OpenAI API key',
179
+ 'org_id': 'Organization ID',
180
+ 'org_name': 'Organization name'
181
+ }
182
+
183
+ for field, description in required_fields.items():
184
+ if not config.get(field):
185
+ errors.append(f"Missing required configuration: {description}")
186
+
187
+ # Check that at least one data source is provided (matching new input schema)
188
+ data_sources = [
189
+ config.get('input_website'),
190
+ config.get('input_description'),
191
+ config.get('input_business_card'),
192
+ config.get('input_linkedin_url'),
193
+ config.get('input_facebook_url'),
194
+ config.get('input_freetext')
195
+ ]
196
+
197
+ # Filter out empty strings and None values
198
+ valid_sources = [s for s in data_sources if s and s.strip()]
199
+
200
+ if not valid_sources:
201
+ errors.append("At least one data source is required (input_website, input_description, input_business_card, input_linkedin_url, input_facebook_url, or input_freetext)")
202
+
203
+ # Validate specific fields
204
+ if config.get('openai_api_key') and not self.validate_api_key(config['openai_api_key']):
205
+ errors.append("Invalid OpenAI API key format")
206
+
207
+ # Validate URLs if provided (matching new input schema)
208
+ url_fields = {
209
+ 'input_website': 'input website URL',
210
+ 'input_business_card': 'input business card URL',
211
+ 'input_linkedin_url': 'input LinkedIn URL',
212
+ 'input_facebook_url': 'input Facebook URL'
213
+ }
214
+
215
+ for field, description in url_fields.items():
216
+ if config.get(field) and not self.validate_url(config[field]):
217
+ errors.append(f"Invalid {description}")
218
+
219
+ if config.get('contact_email') and not self.validate_email(config['contact_email']):
220
+ errors.append("Invalid contact email address")
221
+
222
+ if config.get('contact_phone') and not self.validate_phone(config['contact_phone']):
223
+ errors.append("Invalid contact phone number")
224
+
225
+ # Validate optional URLs
226
+ url_fields = ['business_card_url', 'linkedin_url', 'facebook_url']
227
+ for field in url_fields:
228
+ if config.get(field) and not self.validate_url(config[field]):
229
+ errors.append(f"Invalid {field.replace('_', ' ')}")
230
+
231
+ # Validate numeric ranges
232
+ if 'temperature' in config:
233
+ temp = config['temperature']
234
+ if not isinstance(temp, (int, float)) or not (0.0 <= temp <= 2.0):
235
+ errors.append("Temperature must be a number between 0.0 and 2.0")
236
+
237
+ if 'max_retries' in config:
238
+ retries = config['max_retries']
239
+ if not isinstance(retries, int) or retries < 0:
240
+ errors.append("Max retries must be a non-negative integer")
241
+
242
+ return errors
243
+
244
+ def validate_stage_input(self, stage_name: str, input_data: Dict[str, Any]) -> List[str]:
245
+ """
246
+ Validate input data for specific pipeline stage.
247
+
248
+ Args:
249
+ stage_name: Name of the pipeline stage
250
+ input_data: Input data to validate
251
+
252
+ Returns:
253
+ List of validation error messages (empty if valid)
254
+ """
255
+ errors = []
256
+
257
+ if stage_name == 'data_acquisition':
258
+ errors.extend(self._validate_data_acquisition_input(input_data))
259
+ elif stage_name == 'data_preparation':
260
+ errors.extend(self._validate_data_preparation_input(input_data))
261
+ elif stage_name == 'lead_scoring':
262
+ errors.extend(self._validate_lead_scoring_input(input_data))
263
+ elif stage_name == 'initial_outreach':
264
+ errors.extend(self._validate_initial_outreach_input(input_data))
265
+ elif stage_name == 'follow_up':
266
+ errors.extend(self._validate_follow_up_input(input_data))
267
+
268
+ return errors
269
+
270
+ def _validate_data_acquisition_input(self, input_data: Dict[str, Any]) -> List[str]:
271
+ """Validate data acquisition stage input."""
272
+ errors = []
273
+
274
+ # Check that at least one data source is provided (matching new input schema)
275
+ data_sources = [
276
+ input_data.get('input_website'),
277
+ input_data.get('input_description'),
278
+ input_data.get('input_business_card'),
279
+ input_data.get('input_linkedin_url'),
280
+ input_data.get('input_facebook_url'),
281
+ input_data.get('input_freetext')
282
+ ]
283
+
284
+ # Filter out empty strings and None values
285
+ valid_sources = [s for s in data_sources if s and s.strip()]
286
+
287
+ if not valid_sources:
288
+ errors.append("At least one customer data source is required for data acquisition")
289
+
290
+ # Validate URLs if provided (matching new input schema)
291
+ if input_data.get('input_website') and not self.validate_url(input_data['input_website']):
292
+ errors.append("Invalid input website URL")
293
+
294
+ if input_data.get('input_business_card') and not self.validate_url(input_data['input_business_card']):
295
+ errors.append("Invalid input business card URL")
296
+
297
+ if input_data.get('input_linkedin_url') and not self.validate_url(input_data['input_linkedin_url']):
298
+ errors.append("Invalid input LinkedIn URL")
299
+
300
+ if input_data.get('input_facebook_url') and not self.validate_url(input_data['input_facebook_url']):
301
+ errors.append("Invalid input Facebook URL")
302
+
303
+ return errors
304
+
305
+ def _validate_data_preparation_input(self, input_data: Dict[str, Any]) -> List[str]:
306
+ """Validate data preparation stage input."""
307
+ errors = []
308
+
309
+ # Should have raw customer data from previous stage
310
+ if not input_data.get('raw_customer_data'):
311
+ errors.append("Raw customer data is required for data preparation")
312
+
313
+ return errors
314
+
315
+ def _validate_lead_scoring_input(self, input_data: Dict[str, Any]) -> List[str]:
316
+ """Validate lead scoring stage input."""
317
+ errors = []
318
+
319
+ # Should have structured customer data
320
+ required_fields = ['companyInfo', 'painPoints']
321
+ for field in required_fields:
322
+ if field not in input_data:
323
+ errors.append(f"Missing required field for lead scoring: {field}")
324
+
325
+ return errors
326
+
327
+ def _validate_initial_outreach_input(self, input_data: Dict[str, Any]) -> List[str]:
328
+ """Validate initial outreach stage input."""
329
+ errors = []
330
+
331
+ # Should have customer data and lead scores
332
+ required_fields = ['customer_data', 'lead_scores']
333
+ for field in required_fields:
334
+ if field not in input_data:
335
+ errors.append(f"Missing required field for initial outreach: {field}")
336
+
337
+ # Validate contact information (check both old and new data structures)
338
+ customer_data = input_data.get('customer_data', {})
339
+
340
+ # Check old structure first
341
+ has_old_contact = customer_data.get('contact_email') or customer_data.get('contact_name')
342
+
343
+ # Check new structure (primaryContact)
344
+ primary_contact = customer_data.get('primaryContact', {})
345
+ has_new_contact = primary_contact.get('email') or primary_contact.get('name')
346
+
347
+ if not has_old_contact and not has_new_contact:
348
+ errors.append("Contact email or name is required for outreach")
349
+
350
+ return errors
351
+
352
+ def _validate_follow_up_input(self, input_data: Dict[str, Any]) -> List[str]:
353
+ """Validate follow-up stage input."""
354
+ errors = []
355
+
356
+ # Should have previous interaction data
357
+ if not input_data.get('previous_interactions'):
358
+ errors.append("Previous interaction data is required for follow-up")
359
+
360
+ return errors
361
+
362
+ def sanitize_input(self, data: Any) -> Any:
363
+ """
364
+ Sanitize input data to prevent injection attacks.
365
+
366
+ Args:
367
+ data: Input data to sanitize
368
+
369
+ Returns:
370
+ Sanitized data
371
+ """
372
+ if isinstance(data, str):
373
+ # Remove potentially dangerous characters
374
+ sanitized = re.sub(r'[<>"\']', '', data)
375
+ return sanitized.strip()
376
+
377
+ elif isinstance(data, dict):
378
+ return {key: self.sanitize_input(value) for key, value in data.items()}
379
+
380
+ elif isinstance(data, list):
381
+ return [self.sanitize_input(item) for item in data]
382
+
383
+ else:
384
+ return data
385
+
386
+ def validate_json_schema(self, data: Dict[str, Any], schema: Dict[str, Any]) -> List[str]:
387
+ """
388
+ Validate data against JSON schema.
389
+
390
+ Args:
391
+ data: Data to validate
392
+ schema: JSON schema definition
393
+
394
+ Returns:
395
+ List of validation error messages (empty if valid)
396
+ """
397
+ errors = []
398
+
399
+ try:
400
+ # Basic schema validation (simplified)
401
+ required = schema.get('required', [])
402
+ properties = schema.get('properties', {})
403
+
404
+ # Check required fields
405
+ for field in required:
406
+ if field not in data:
407
+ errors.append(f"Missing required field: {field}")
408
+
409
+ # Check field types
410
+ for field, value in data.items():
411
+ if field in properties:
412
+ expected_type = properties[field].get('type')
413
+ if expected_type and not self._check_type(value, expected_type):
414
+ errors.append(f"Invalid type for field {field}: expected {expected_type}")
415
+
416
+ except Exception as e:
417
+ errors.append(f"Schema validation error: {str(e)}")
418
+
419
+ return errors
420
+
421
+ def _check_type(self, value: Any, expected_type: str) -> bool:
422
+ """Check if value matches expected type."""
423
+ type_mapping = {
424
+ 'string': str,
425
+ 'integer': int,
426
+ 'number': (int, float),
427
+ 'boolean': bool,
428
+ 'array': list,
429
+ 'object': dict
430
+ }
431
+
432
+ expected_python_type = type_mapping.get(expected_type)
433
+ if expected_python_type:
434
+ return isinstance(value, expected_python_type)
435
+
436
+ return True