fusesell 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fusesell might be problematic. Click here for more details.

@@ -0,0 +1,19 @@
1
+ """
2
+ FuseSell Stages - Individual pipeline stage implementations
3
+ """
4
+
5
+ from .base_stage import BaseStage
6
+ from .data_acquisition import DataAcquisitionStage
7
+ from .data_preparation import DataPreparationStage
8
+ from .lead_scoring import LeadScoringStage
9
+ from .initial_outreach import InitialOutreachStage
10
+ from .follow_up import FollowUpStage
11
+
12
+ __all__ = [
13
+ 'BaseStage',
14
+ 'DataAcquisitionStage',
15
+ 'DataPreparationStage',
16
+ 'LeadScoringStage',
17
+ 'InitialOutreachStage',
18
+ 'FollowUpStage'
19
+ ]
@@ -0,0 +1,602 @@
1
+ """
2
+ Base Stage Interface for FuseSell Pipeline Stages
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Dict, Any, Optional
7
+ import logging
8
+ import json
9
+ import time
10
+ from datetime import datetime
11
+ import uuid
12
+
13
+ from ..utils.llm_client import LLMClient
14
+ from ..utils.data_manager import LocalDataManager
15
+
16
+
17
+ class BaseStage(ABC):
18
+ """
19
+ Abstract base class for all FuseSell pipeline stages.
20
+ Provides common functionality and interface for stage implementations.
21
+ """
22
+
23
+ def __init__(self, config: Dict[str, Any], data_manager: Optional[LocalDataManager] = None):
24
+ """
25
+ Initialize the stage with configuration.
26
+
27
+ Args:
28
+ config: Configuration dictionary containing API keys, settings, etc.
29
+ data_manager: Optional shared data manager instance. If not provided, creates a new one.
30
+ """
31
+ self.config = config
32
+ # Convert class name to snake_case stage name
33
+ class_name = self.__class__.__name__.replace('Stage', '')
34
+ # Convert CamelCase to snake_case
35
+ import re
36
+ self.stage_name = re.sub('([a-z0-9])([A-Z])', r'\1_\2', class_name).lower()
37
+ self.logger = logging.getLogger(f"fusesell.{self.stage_name}")
38
+
39
+ # Initialize LLM client if API key provided
40
+ if config.get('openai_api_key'):
41
+ try:
42
+ # Initialize with base URL if provided
43
+ llm_kwargs = {
44
+ 'api_key': config['openai_api_key'],
45
+ 'model': config.get('llm_model', 'gpt-4o-mini')
46
+ }
47
+ if config.get('llm_base_url'):
48
+ llm_kwargs['base_url'] = config['llm_base_url']
49
+
50
+ self.llm_client = LLMClient(**llm_kwargs)
51
+ except ImportError as e:
52
+ self.logger.warning(f"LLM client not available: {str(e)}")
53
+ self.llm_client = None
54
+ else:
55
+ self.llm_client = None
56
+
57
+ # Use provided data manager or create new one (for backward compatibility)
58
+ if data_manager is not None:
59
+ self.data_manager = data_manager
60
+ self.logger.debug("Using shared data manager instance")
61
+ else:
62
+ self.data_manager = LocalDataManager(config.get('data_dir', './fusesell_data'))
63
+ self.logger.warning("Created new data manager instance - this may cause performance overhead. Consider using shared data manager.")
64
+
65
+ @abstractmethod
66
+ def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
67
+ """
68
+ Execute the stage logic and return results.
69
+
70
+ Args:
71
+ context: Execution context containing input data and previous results
72
+
73
+ Returns:
74
+ Dictionary containing stage results and metadata
75
+ """
76
+ pass
77
+
78
+ @abstractmethod
79
+ def validate_input(self, context: Dict[str, Any]) -> bool:
80
+ """
81
+ Validate input data for this stage.
82
+
83
+ Args:
84
+ context: Execution context to validate
85
+
86
+ Returns:
87
+ True if input is valid, False otherwise
88
+ """
89
+ pass
90
+
91
+ def call_llm(self, prompt: str, **kwargs) -> str:
92
+ """
93
+ Standardized LLM calling interface.
94
+
95
+ Args:
96
+ prompt: The prompt to send to the LLM
97
+ **kwargs: Additional parameters for the LLM call
98
+
99
+ Returns:
100
+ LLM response text
101
+
102
+ Raises:
103
+ ValueError: If LLM client is not initialized
104
+ """
105
+ if not self.llm_client:
106
+ raise ValueError("LLM client not initialized. Provide openai_api_key in config.")
107
+
108
+ self.logger.debug(f"Calling LLM with prompt length: {len(prompt)}")
109
+
110
+ try:
111
+ response = self.llm_client.chat_completion(
112
+ messages=[{"role": "user", "content": prompt}],
113
+ **kwargs
114
+ )
115
+
116
+ self.logger.debug(f"LLM response length: {len(response)}")
117
+ return response
118
+
119
+ except Exception as e:
120
+ self.logger.error(f"LLM call failed: {str(e)}")
121
+ raise
122
+
123
+ def call_llm_with_system(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
124
+ """
125
+ Call LLM with system and user prompts.
126
+
127
+ Args:
128
+ system_prompt: System prompt to set context
129
+ user_prompt: User prompt with the actual request
130
+ **kwargs: Additional parameters for the LLM call
131
+
132
+ Returns:
133
+ LLM response text
134
+ """
135
+ if not self.llm_client:
136
+ raise ValueError("LLM client not initialized. Provide openai_api_key in config.")
137
+
138
+ self.logger.debug(f"Calling LLM with system prompt length: {len(system_prompt)}, user prompt length: {len(user_prompt)}")
139
+
140
+ try:
141
+ response = self.llm_client.chat_completion(
142
+ messages=[
143
+ {"role": "system", "content": system_prompt},
144
+ {"role": "user", "content": user_prompt}
145
+ ],
146
+ **kwargs
147
+ )
148
+
149
+ self.logger.debug(f"LLM response length: {len(response)}")
150
+ return response
151
+
152
+ except Exception as e:
153
+ self.logger.error(f"LLM call with system prompt failed: {str(e)}")
154
+ raise
155
+
156
+ def call_llm_structured(self, prompt: str, response_format: str = "json", **kwargs) -> Dict[str, Any]:
157
+ """
158
+ Call LLM and parse structured response.
159
+
160
+ Args:
161
+ prompt: The prompt to send to the LLM
162
+ response_format: Expected response format ('json' or 'yaml')
163
+ **kwargs: Additional parameters for the LLM call
164
+
165
+ Returns:
166
+ Parsed structured response
167
+
168
+ Raises:
169
+ ValueError: If response cannot be parsed
170
+ """
171
+ # Add format instruction to prompt
172
+ if response_format.lower() == "json":
173
+ formatted_prompt = f"{prompt}\n\nPlease respond with valid JSON format."
174
+ else:
175
+ formatted_prompt = prompt
176
+
177
+ response = self.call_llm(formatted_prompt, **kwargs)
178
+
179
+ if response_format.lower() == "json":
180
+ return self.parse_json_response(response)
181
+ else:
182
+ return {"raw_response": response}
183
+
184
+ def parse_json_response(self, response: str) -> Dict[str, Any]:
185
+ """
186
+ Parse JSON response from LLM, handling common formatting issues.
187
+
188
+ Args:
189
+ response: Raw LLM response text
190
+
191
+ Returns:
192
+ Parsed JSON dictionary
193
+
194
+ Raises:
195
+ ValueError: If response cannot be parsed as JSON
196
+ """
197
+ try:
198
+ # Try direct parsing first
199
+ return json.loads(response)
200
+ except json.JSONDecodeError:
201
+ # Try to extract JSON from markdown code blocks
202
+ if "```json" in response:
203
+ start = response.find("```json") + 7
204
+ end = response.find("```", start)
205
+ if end != -1:
206
+ json_str = response[start:end].strip()
207
+ return json.loads(json_str)
208
+
209
+ # Try to extract JSON from the response
210
+ start = response.find("{")
211
+ end = response.rfind("}") + 1
212
+ if start != -1 and end > start:
213
+ json_str = response[start:end]
214
+ return json.loads(json_str)
215
+
216
+ raise ValueError(f"Could not parse JSON from LLM response: {response[:200]}...")
217
+
218
+ def log_stage_start(self, context: Dict[str, Any]) -> None:
219
+ """Log the start of stage execution."""
220
+ execution_id = context.get('execution_id', 'unknown')
221
+ self.logger.info(f"Starting {self.stage_name} stage for execution {execution_id}")
222
+
223
+ def log_stage_complete(self, context: Dict[str, Any], result: Dict[str, Any]) -> None:
224
+ """Log the completion of stage execution."""
225
+ execution_id = context.get('execution_id', 'unknown')
226
+ status = result.get('status', 'unknown')
227
+ self.logger.info(f"Completed {self.stage_name} stage for execution {execution_id} with status: {status}")
228
+
229
+ def log_stage_error(self, context: Dict[str, Any], error: Exception) -> None:
230
+ """Log stage execution errors."""
231
+ execution_id = context.get('execution_id', 'unknown')
232
+ self.logger.error(f"Error in {self.stage_name} stage for execution {execution_id}: {str(error)}")
233
+
234
+ def execute_with_timing(self, context: Dict[str, Any]) -> Dict[str, Any]:
235
+ """
236
+ Execute the stage with performance timing and consolidated logging.
237
+
238
+ Args:
239
+ context: Execution context containing input data and previous results
240
+
241
+ Returns:
242
+ Dictionary containing stage results and metadata with timing information
243
+ """
244
+ execution_id = context.get('execution_id', 'unknown')
245
+ start_time = time.time()
246
+
247
+ # Single start log message
248
+ self.logger.info(f"Starting {self.stage_name} stage for execution {execution_id}")
249
+
250
+ try:
251
+ # Execute the actual stage logic (stages should NOT log completion themselves)
252
+ result = self.execute(context)
253
+
254
+ # Calculate timing
255
+ end_time = time.time()
256
+ duration = end_time - start_time
257
+
258
+ # Add timing information to result
259
+ if isinstance(result, dict):
260
+ result['timing'] = {
261
+ 'start_time': start_time,
262
+ 'end_time': end_time,
263
+ 'duration_seconds': duration
264
+ }
265
+
266
+ # Single completion log message with timing
267
+ status = result.get('status', 'unknown') if isinstance(result, dict) else 'unknown'
268
+ self.logger.info(f"Completed {self.stage_name} stage for execution {execution_id} with status: {status} in {duration:.2f} seconds")
269
+
270
+ return result
271
+
272
+ except Exception as e:
273
+ end_time = time.time()
274
+ duration = end_time - start_time
275
+
276
+ # Single error log message with timing
277
+ self.logger.error(f"Error in {self.stage_name} stage for execution {execution_id} after {duration:.2f} seconds: {str(e)}")
278
+ raise
279
+
280
+ def save_stage_result(self, context: Dict[str, Any], result: Dict[str, Any]) -> None:
281
+ """
282
+ Save stage result to local database (backward compatibility only).
283
+
284
+ Note: Operation tracking is now handled by the pipeline using server-compatible schema.
285
+ This method only maintains backward compatibility with the old stage_results table.
286
+
287
+ Args:
288
+ context: Execution context
289
+ result: Stage execution result
290
+ """
291
+ try:
292
+ # Save to stage_results table (backward compatibility only)
293
+ # The pipeline now handles operation creation with server-compatible schema
294
+ self.data_manager.save_stage_result(
295
+ execution_id=context.get('execution_id'),
296
+ stage_name=self.stage_name,
297
+ input_data=context.get('input_data', {}),
298
+ output_data=result,
299
+ status=result.get('status', 'unknown')
300
+ )
301
+
302
+ except Exception as e:
303
+ self.logger.debug(f"Backward compatibility save failed (expected): {str(e)}")
304
+
305
+ def get_prompt_template(self, prompt_key: str) -> str:
306
+ """
307
+ Get prompt template from configuration.
308
+
309
+ Args:
310
+ prompt_key: Key for the prompt template
311
+
312
+ Returns:
313
+ Prompt template string
314
+ """
315
+ try:
316
+ prompts = self.data_manager.load_prompts()
317
+ stage_prompts = prompts.get(self.stage_name, {})
318
+ return stage_prompts.get(prompt_key, "")
319
+ except Exception as e:
320
+ self.logger.warning(f"Failed to load prompt template {prompt_key}: {str(e)}")
321
+ return ""
322
+
323
+ def get_required_fields(self) -> list:
324
+ """
325
+ Get list of required input fields for this stage.
326
+
327
+ Returns:
328
+ List of required field names
329
+ """
330
+ # Default implementation - stages should override this
331
+ return []
332
+
333
+ def validate_required_fields(self, context: Dict[str, Any]) -> list:
334
+ """
335
+ Validate that all required fields are present in the context.
336
+
337
+ Args:
338
+ context: Execution context to validate
339
+
340
+ Returns:
341
+ List of missing required fields
342
+ """
343
+ input_data = context.get('input_data', {})
344
+ required_fields = self.get_required_fields()
345
+ missing_fields = []
346
+
347
+ for field in required_fields:
348
+ if field not in input_data or input_data[field] is None:
349
+ missing_fields.append(field)
350
+
351
+ return missing_fields
352
+
353
+ def validate_context(self, context: Dict[str, Any]) -> tuple[bool, list]:
354
+ """
355
+ Comprehensive context validation.
356
+
357
+ Args:
358
+ context: Execution context to validate
359
+
360
+ Returns:
361
+ Tuple of (is_valid, list_of_errors)
362
+ """
363
+ errors = []
364
+
365
+ # Check for execution ID
366
+ if not context.get('execution_id'):
367
+ errors.append("Missing execution_id in context")
368
+
369
+ # Check for input data
370
+ if 'input_data' not in context:
371
+ errors.append("Missing input_data in context")
372
+
373
+ # Check required fields
374
+ missing_fields = self.validate_required_fields(context)
375
+ if missing_fields:
376
+ errors.append(f"Missing required fields: {', '.join(missing_fields)}")
377
+
378
+ # Stage-specific validation
379
+ if not self.validate_input(context):
380
+ errors.append("Stage-specific input validation failed")
381
+
382
+ return len(errors) == 0, errors
383
+
384
+ def format_prompt(self, template: str, **kwargs) -> str:
385
+ """
386
+ Format prompt template with provided variables.
387
+
388
+ Args:
389
+ template: Prompt template string
390
+ **kwargs: Variables to substitute in template
391
+
392
+ Returns:
393
+ Formatted prompt string
394
+ """
395
+ try:
396
+ return template.format(**kwargs)
397
+ except KeyError as e:
398
+ self.logger.warning(f"Missing variable in prompt template: {str(e)}")
399
+ return template
400
+
401
+ def generate_execution_id(self) -> str:
402
+ """Generate unique execution ID."""
403
+ return f"{self.stage_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}"
404
+
405
+ def should_stop_pipeline(self, result: Dict[str, Any]) -> bool:
406
+ """
407
+ Determine if pipeline should stop based on stage result.
408
+
409
+ Args:
410
+ result: Stage execution result
411
+
412
+ Returns:
413
+ True if pipeline should stop, False otherwise
414
+ """
415
+ # Stop if stage failed
416
+ if result.get('status') in ['fail', 'error']:
417
+ return True
418
+
419
+ # Stop if explicit stop condition from business logic
420
+ if result.get('pipeline_stop', False):
421
+ return True
422
+
423
+ # Stop if explicit stop condition (legacy)
424
+ if result.get('stop_pipeline', False):
425
+ return True
426
+
427
+ return False
428
+
429
+ def create_error_result(self, error: Exception, context: Dict[str, Any]) -> Dict[str, Any]:
430
+ """
431
+ Create standardized error result.
432
+
433
+ Args:
434
+ error: Exception that occurred
435
+ context: Execution context
436
+
437
+ Returns:
438
+ Error result dictionary
439
+ """
440
+ return {
441
+ 'status': 'error',
442
+ 'error_type': type(error).__name__,
443
+ 'error_message': str(error),
444
+ 'stage': self.stage_name,
445
+ 'execution_id': context.get('execution_id'),
446
+ 'timestamp': datetime.now().isoformat()
447
+ }
448
+
449
+ def create_success_result(self, data: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
450
+ """
451
+ Create standardized success result.
452
+
453
+ Args:
454
+ data: Stage output data
455
+ context: Execution context
456
+
457
+ Returns:
458
+ Success result dictionary
459
+ """
460
+ return {
461
+ 'status': 'success',
462
+ 'stage': self.stage_name,
463
+ 'execution_id': context.get('execution_id'),
464
+ 'timestamp': datetime.now().isoformat(),
465
+ 'data': data
466
+ }
467
+
468
+ def create_skip_result(self, reason: str, context: Dict[str, Any]) -> Dict[str, Any]:
469
+ """
470
+ Create standardized skip result.
471
+
472
+ Args:
473
+ reason: Reason for skipping the stage
474
+ context: Execution context
475
+
476
+ Returns:
477
+ Skip result dictionary
478
+ """
479
+ return {
480
+ 'status': 'skipped',
481
+ 'reason': reason,
482
+ 'stage': self.stage_name,
483
+ 'execution_id': context.get('execution_id'),
484
+ 'timestamp': datetime.now().isoformat()
485
+ }
486
+
487
+ def handle_stage_error(self, error: Exception, context: Dict[str, Any]) -> Dict[str, Any]:
488
+ """
489
+ Comprehensive error handling for stage execution.
490
+
491
+ Args:
492
+ error: Exception that occurred
493
+ context: Execution context
494
+
495
+ Returns:
496
+ Error result dictionary
497
+ """
498
+ # Log the error
499
+ self.log_stage_error(context, error)
500
+
501
+ # Save error to database if possible
502
+ try:
503
+ self.data_manager.save_stage_result(
504
+ execution_id=context.get('execution_id'),
505
+ stage_name=self.stage_name,
506
+ input_data=context.get('input_data', {}),
507
+ output_data={'error': str(error)},
508
+ status='error',
509
+ error_message=str(error)
510
+ )
511
+ except Exception as save_error:
512
+ self.logger.warning(f"Failed to save error result: {str(save_error)}")
513
+
514
+ # Return standardized error result
515
+ return self.create_error_result(error, context)
516
+
517
+ def get_stage_config(self, key: str, default: Any = None) -> Any:
518
+ """
519
+ Get stage-specific configuration value.
520
+
521
+ Args:
522
+ key: Configuration key
523
+ default: Default value if key not found
524
+
525
+ Returns:
526
+ Configuration value
527
+ """
528
+ stage_config = self.config.get('stages', {}).get(self.stage_name, {})
529
+ return stage_config.get(key, default)
530
+
531
+ def is_dry_run(self) -> bool:
532
+ """
533
+ Check if this is a dry run execution.
534
+
535
+ Returns:
536
+ True if dry run mode is enabled
537
+ """
538
+ return self.config.get('dry_run', False)
539
+
540
+ def get_team_settings(self, team_id: str = None) -> Optional[Dict[str, Any]]:
541
+ """
542
+ Get team settings for the current execution.
543
+
544
+ Args:
545
+ team_id: Team ID to get settings for. If None, uses team_id from config.
546
+
547
+ Returns:
548
+ Team settings dictionary or None if not found
549
+ """
550
+ if not team_id:
551
+ team_id = self.config.get('team_id')
552
+
553
+ if not team_id:
554
+ return None
555
+
556
+ try:
557
+ settings = self.data_manager.get_team_settings(team_id)
558
+ if settings:
559
+ self.logger.debug(f"Loaded team settings for team: {team_id}")
560
+ else:
561
+ self.logger.debug(f"No team settings found for team: {team_id}")
562
+ return settings
563
+ except Exception as e:
564
+ self.logger.warning(f"Failed to load team settings for team {team_id}: {str(e)}")
565
+ return None
566
+
567
+ def get_team_setting(self, setting_name: str, team_id: str = None, default: Any = None) -> Any:
568
+ """
569
+ Get a specific team setting value.
570
+
571
+ Args:
572
+ setting_name: Name of the setting to retrieve
573
+ team_id: Team ID to get settings for. If None, uses team_id from config.
574
+ default: Default value if setting not found
575
+
576
+ Returns:
577
+ Setting value or default
578
+ """
579
+ team_settings = self.get_team_settings(team_id)
580
+ if team_settings and setting_name in team_settings:
581
+ return team_settings[setting_name]
582
+ return default
583
+
584
+ def get_execution_metadata(self, context: Dict[str, Any]) -> Dict[str, Any]:
585
+ """
586
+ Get execution metadata for logging and tracking.
587
+
588
+ Args:
589
+ context: Execution context
590
+
591
+ Returns:
592
+ Metadata dictionary
593
+ """
594
+ return {
595
+ 'execution_id': context.get('execution_id'),
596
+ 'stage': self.stage_name,
597
+ 'org_id': self.config.get('org_id'),
598
+ 'org_name': self.config.get('org_name'),
599
+ 'customer_name': context.get('input_data', {}).get('customer_name'),
600
+ 'timestamp': datetime.now().isoformat(),
601
+ 'dry_run': self.is_dry_run()
602
+ }