fusesell 1.3.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. fusesell-1.3.42.dist-info/METADATA +873 -0
  2. fusesell-1.3.42.dist-info/RECORD +35 -0
  3. fusesell-1.3.42.dist-info/WHEEL +5 -0
  4. fusesell-1.3.42.dist-info/entry_points.txt +2 -0
  5. fusesell-1.3.42.dist-info/licenses/LICENSE +21 -0
  6. fusesell-1.3.42.dist-info/top_level.txt +2 -0
  7. fusesell.py +20 -0
  8. fusesell_local/__init__.py +37 -0
  9. fusesell_local/api.py +343 -0
  10. fusesell_local/cli.py +1480 -0
  11. fusesell_local/config/__init__.py +11 -0
  12. fusesell_local/config/default_email_templates.json +34 -0
  13. fusesell_local/config/default_prompts.json +19 -0
  14. fusesell_local/config/default_scoring_criteria.json +154 -0
  15. fusesell_local/config/prompts.py +245 -0
  16. fusesell_local/config/settings.py +277 -0
  17. fusesell_local/pipeline.py +978 -0
  18. fusesell_local/stages/__init__.py +19 -0
  19. fusesell_local/stages/base_stage.py +603 -0
  20. fusesell_local/stages/data_acquisition.py +1820 -0
  21. fusesell_local/stages/data_preparation.py +1238 -0
  22. fusesell_local/stages/follow_up.py +1728 -0
  23. fusesell_local/stages/initial_outreach.py +2972 -0
  24. fusesell_local/stages/lead_scoring.py +1452 -0
  25. fusesell_local/utils/__init__.py +36 -0
  26. fusesell_local/utils/agent_context.py +552 -0
  27. fusesell_local/utils/auto_setup.py +361 -0
  28. fusesell_local/utils/birthday_email_manager.py +467 -0
  29. fusesell_local/utils/data_manager.py +4857 -0
  30. fusesell_local/utils/event_scheduler.py +959 -0
  31. fusesell_local/utils/llm_client.py +342 -0
  32. fusesell_local/utils/logger.py +203 -0
  33. fusesell_local/utils/output_helpers.py +2443 -0
  34. fusesell_local/utils/timezone_detector.py +914 -0
  35. fusesell_local/utils/validators.py +436 -0
@@ -0,0 +1,1238 @@
1
+ """
2
+ Data Preparation Stage - Clean and structure customer data using AI
3
+ Converted from fusesell_data_preparation.yml
4
+ """
5
+
6
+ import json
7
+ from typing import Dict, Any, List, Optional
8
+ from datetime import datetime
9
+ from .base_stage import BaseStage
10
+
11
+
12
+ class DataPreparationStage(BaseStage):
13
+ """
14
+ Data Preparation stage for cleaning and structuring customer data using LLM.
15
+ Converts YAML workflow logic to Python implementation.
16
+ """
17
+
18
+ def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
19
+ """
20
+ Execute data preparation stage.
21
+
22
+ Args:
23
+ context: Execution context
24
+
25
+ Returns:
26
+ Stage execution result
27
+ """
28
+ try:
29
+ # Get data from previous stage (data acquisition)
30
+ acquisition_data = self._get_acquisition_data(context)
31
+
32
+ # Prepare customer information for LLM processing
33
+ customer_info_text = self._prepare_customer_info_text(acquisition_data)
34
+
35
+ # Extract structured customer information using LLM
36
+ structured_data = self._extract_structured_customer_info(customer_info_text)
37
+
38
+ # Enhance pain point identification
39
+ enhanced_data = self._enhance_pain_point_analysis(structured_data, customer_info_text)
40
+
41
+ # Add financial analysis
42
+ financial_enhanced_data = self._enhance_financial_analysis(enhanced_data, customer_info_text)
43
+
44
+ # Add company research and development analysis
45
+ research_enhanced_data = self._enhance_research_analysis(financial_enhanced_data, customer_info_text)
46
+
47
+ # Validate and clean the structured data
48
+ validated_data = self._validate_and_clean_data(research_enhanced_data)
49
+
50
+ # Save customer data to local database
51
+ self._save_customer_data(context, validated_data)
52
+
53
+ # Save to database
54
+ self.save_stage_result(context, validated_data)
55
+
56
+ result = self.create_success_result(validated_data, context)
57
+ return result
58
+
59
+ except Exception as e:
60
+ self.log_stage_error(context, e)
61
+ return self.handle_stage_error(e, context)
62
+
63
+ def _get_acquisition_data(self, context: Dict[str, Any]) -> Dict[str, Any]:
64
+ """
65
+ Get data from the data acquisition stage.
66
+
67
+ Args:
68
+ context: Execution context
69
+
70
+ Returns:
71
+ Data acquisition results
72
+ """
73
+ # Try to get from stage results first
74
+ stage_results = context.get('stage_results', {})
75
+ if 'data_acquisition' in stage_results:
76
+ acquisition_data = stage_results['data_acquisition'].get('data', {})
77
+ # Store for fallback use
78
+ self._current_acquisition_data = acquisition_data
79
+ return acquisition_data
80
+
81
+ # Fallback: try to get from input_data (for testing)
82
+ input_data = context.get('input_data', {})
83
+ fallback_data = {
84
+ 'company_name': input_data.get('customer_name', ''),
85
+ 'company_website': input_data.get('customer_website', ''),
86
+ 'customer_description': input_data.get('customer_description', ''),
87
+ 'company_mini_search': input_data.get('company_mini_search', ''),
88
+ 'contact_name': input_data.get('contact_name')
89
+ or input_data.get('recipient_name')
90
+ or input_data.get('customer_name', ''),
91
+ 'customer_email': input_data.get('contact_email')
92
+ or input_data.get('recipient_address')
93
+ or input_data.get('customer_email', ''),
94
+ 'customer_phone': input_data.get('contact_phone')
95
+ or input_data.get('customer_phone', ''),
96
+ 'customer_address': input_data.get('customer_address', ''),
97
+ 'customer_linkedin': input_data.get('linkedin_url')
98
+ or input_data.get('input_linkedin_url', ''),
99
+ 'customer_facebook': input_data.get('facebook_url')
100
+ or input_data.get('input_facebook_url', ''),
101
+ 'company_business': '',
102
+ 'company_industries': [],
103
+ 'founders': [],
104
+ 'branches': []
105
+ }
106
+ # Store for fallback use
107
+ self._current_acquisition_data = fallback_data
108
+ return fallback_data
109
+
110
+ def _prepare_customer_info_text(self, acquisition_data: Dict[str, Any]) -> str:
111
+ """
112
+ Prepare customer information text for LLM processing.
113
+
114
+ Args:
115
+ acquisition_data: Data from acquisition stage
116
+
117
+ Returns:
118
+ Combined customer information text
119
+ """
120
+ info_parts = []
121
+
122
+ # Add company mini search results
123
+ mini_search = acquisition_data.get('company_mini_search', '')
124
+ if mini_search:
125
+ info_parts.append(f"Company Research: {mini_search}")
126
+
127
+ # Add customer description
128
+ description = acquisition_data.get('customer_description', '')
129
+ if description:
130
+ info_parts.append(f"Customer Description: {description}")
131
+
132
+ # Add basic company info
133
+ company_name = acquisition_data.get('company_name', '')
134
+ if company_name:
135
+ info_parts.append(f"Company Name: {company_name}")
136
+
137
+ website = acquisition_data.get('company_website', '')
138
+ if website:
139
+ info_parts.append(f"Website: {website}")
140
+
141
+ # Add contact information
142
+ contact_name = acquisition_data.get('contact_name', '')
143
+ if contact_name:
144
+ info_parts.append(f"Contact: {contact_name}")
145
+
146
+ # Add business information
147
+ business = acquisition_data.get('company_business', '')
148
+ if business:
149
+ info_parts.append(f"Business: {business}")
150
+
151
+ # Add industries
152
+ industries = acquisition_data.get('company_industries', [])
153
+ if industries:
154
+ info_parts.append(f"Industries: {', '.join(industries)}")
155
+
156
+ return '; '.join(info_parts)
157
+
158
+ def _extract_structured_customer_info(self, customer_info_text: str) -> Dict[str, Any]:
159
+ """
160
+ Extract structured customer information using LLM.
161
+
162
+ Args:
163
+ customer_info_text: Combined customer information text
164
+
165
+ Returns:
166
+ Structured customer information dictionary
167
+ """
168
+ try:
169
+ if self.is_dry_run():
170
+ return self._get_mock_structured_data()
171
+
172
+ # Get the LLM instruction from the original YAML
173
+ instruction = self._get_llm_instruction()
174
+
175
+ # Create the full prompt
176
+ prompt = f"{instruction}\n\nThe customer information: {customer_info_text}"
177
+
178
+ # Call LLM with specific parameters from original YAML
179
+ response = self.call_llm(prompt, temperature=0.3)
180
+
181
+ # Parse the JSON response
182
+ structured_data = self.parse_json_response(response)
183
+
184
+ self.logger.info("Successfully extracted structured customer information")
185
+ return structured_data
186
+
187
+ except Exception as e:
188
+ self.logger.error(f"Structured data extraction failed: {str(e)}")
189
+ # Return minimal structure to prevent complete failure
190
+ return self._get_fallback_structured_data(customer_info_text)
191
+
192
+ def _enhance_pain_point_analysis(self, structured_data: Dict[str, Any], customer_info_text: str) -> Dict[str, Any]:
193
+ """
194
+ Enhance pain point identification with additional analysis.
195
+
196
+ Args:
197
+ structured_data: Initial structured data from LLM
198
+ customer_info_text: Original customer information text
199
+
200
+ Returns:
201
+ Enhanced structured data with better pain point analysis
202
+ """
203
+ try:
204
+ current_pain_points = structured_data.get('painPoints', [])
205
+
206
+ # If pain points are insufficient, enhance them
207
+ if len(current_pain_points) < 2 or not self._are_pain_points_detailed(current_pain_points):
208
+ enhanced_pain_points = self._generate_enhanced_pain_points(structured_data, customer_info_text)
209
+ if enhanced_pain_points:
210
+ structured_data['painPoints'] = enhanced_pain_points
211
+
212
+ # Categorize and prioritize pain points
213
+ structured_data['painPoints'] = self._categorize_and_prioritize_pain_points(structured_data['painPoints'])
214
+
215
+ return structured_data
216
+
217
+ except Exception as e:
218
+ self.logger.error(f"Pain point enhancement failed: {str(e)}")
219
+ return structured_data
220
+
221
+ def _are_pain_points_detailed(self, pain_points: List[Dict[str, Any]]) -> bool:
222
+ """
223
+ Check if pain points are detailed enough.
224
+
225
+ Args:
226
+ pain_points: List of pain point dictionaries
227
+
228
+ Returns:
229
+ True if pain points are sufficiently detailed
230
+ """
231
+ if not pain_points:
232
+ return False
233
+
234
+ for pain_point in pain_points:
235
+ description = pain_point.get('description', '')
236
+ if len(description) < 20: # Too short to be meaningful
237
+ return False
238
+
239
+ return True
240
+
241
+ def _generate_enhanced_pain_points(self, structured_data: Dict[str, Any], customer_info_text: str) -> Optional[List[Dict[str, Any]]]:
242
+ """
243
+ Generate enhanced pain points using focused LLM analysis.
244
+
245
+ Args:
246
+ structured_data: Current structured data
247
+ customer_info_text: Original customer information
248
+
249
+ Returns:
250
+ Enhanced pain points list or None if failed
251
+ """
252
+ try:
253
+ if self.is_dry_run():
254
+ return self._get_mock_pain_points()
255
+
256
+ company_info = structured_data.get('companyInfo', {})
257
+ industry = company_info.get('industry', '')
258
+ company_size = company_info.get('size', '')
259
+
260
+ pain_point_prompt = f"""Analyze the following company information and identify specific, actionable pain points:
261
+
262
+ Company Information: {customer_info_text}
263
+ Industry: {industry}
264
+ Company Size: {company_size}
265
+
266
+ Based on this information, identify 3-5 specific pain points this company likely faces. For each pain point, provide:
267
+ 1. Category (e.g., "Operational Efficiency", "Technology", "Financial", "Market Competition", "Customer Experience")
268
+ 2. Detailed description of the specific challenge
269
+ 3. Impact level and explanation (High/Medium/Low with reasoning)
270
+
271
+ Return as JSON array:
272
+ [
273
+ {{
274
+ "category": "category name",
275
+ "description": "detailed description of the pain point",
276
+ "impact": "impact level with explanation"
277
+ }}
278
+ ]
279
+
280
+ Focus on realistic, industry-specific challenges that would resonate with the company."""
281
+
282
+ response = self.call_llm(pain_point_prompt, temperature=0.4)
283
+ pain_points = self.parse_json_response(response)
284
+
285
+ if isinstance(pain_points, list) and len(pain_points) > 0:
286
+ self.logger.info(f"Generated {len(pain_points)} enhanced pain points")
287
+ return pain_points
288
+
289
+ return None
290
+
291
+ except Exception as e:
292
+ self.logger.error(f"Enhanced pain point generation failed: {str(e)}")
293
+ return None
294
+
295
+ def _get_mock_pain_points(self) -> List[Dict[str, Any]]:
296
+ """
297
+ Get mock pain points for dry run mode.
298
+
299
+ Returns:
300
+ Mock pain points list
301
+ """
302
+ return [
303
+ {
304
+ 'category': 'Operational Efficiency',
305
+ 'description': 'Manual processes and lack of automation leading to increased operational costs and slower response times',
306
+ 'impact': 'High - directly affects profitability and customer satisfaction'
307
+ },
308
+ {
309
+ 'category': 'Technology Infrastructure',
310
+ 'description': 'Outdated systems and lack of integration between different business tools',
311
+ 'impact': 'Medium - limiting scalability and data-driven decision making'
312
+ },
313
+ {
314
+ 'category': 'Market Competition',
315
+ 'description': 'Increasing competition from digital-first companies with more agile business models',
316
+ 'impact': 'High - threatening market share and pricing power'
317
+ },
318
+ {
319
+ 'category': 'Customer Experience',
320
+ 'description': 'Inconsistent customer touchpoints and limited self-service options',
321
+ 'impact': 'Medium - affecting customer retention and acquisition costs'
322
+ }
323
+ ]
324
+
325
+ def _categorize_and_prioritize_pain_points(self, pain_points: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
326
+ """
327
+ Categorize and prioritize pain points.
328
+
329
+ Args:
330
+ pain_points: List of pain point dictionaries
331
+
332
+ Returns:
333
+ Categorized and prioritized pain points
334
+ """
335
+ try:
336
+ # Define priority mapping
337
+ impact_priority = {
338
+ 'high': 3,
339
+ 'medium': 2,
340
+ 'low': 1
341
+ }
342
+
343
+ # Add priority scores and normalize categories
344
+ for pain_point in pain_points:
345
+ # Normalize impact to get priority
346
+ impact = pain_point.get('impact', '').lower()
347
+ if 'high' in impact:
348
+ pain_point['priority'] = 3
349
+ elif 'medium' in impact:
350
+ pain_point['priority'] = 2
351
+ else:
352
+ pain_point['priority'] = 1
353
+
354
+ # Normalize category
355
+ category = pain_point.get('category', '').strip()
356
+ pain_point['category'] = self._normalize_pain_point_category(category)
357
+
358
+ # Sort by priority (highest first)
359
+ pain_points.sort(key=lambda x: x.get('priority', 0), reverse=True)
360
+
361
+ return pain_points
362
+
363
+ except Exception as e:
364
+ self.logger.error(f"Pain point categorization failed: {str(e)}")
365
+ return pain_points
366
+
367
+ def _normalize_pain_point_category(self, category: str) -> str:
368
+ """
369
+ Normalize pain point category names.
370
+
371
+ Args:
372
+ category: Original category name
373
+
374
+ Returns:
375
+ Normalized category name
376
+ """
377
+ category_mapping = {
378
+ 'operational': 'Operational Efficiency',
379
+ 'operations': 'Operational Efficiency',
380
+ 'efficiency': 'Operational Efficiency',
381
+ 'technology': 'Technology Infrastructure',
382
+ 'tech': 'Technology Infrastructure',
383
+ 'it': 'Technology Infrastructure',
384
+ 'financial': 'Financial Management',
385
+ 'finance': 'Financial Management',
386
+ 'money': 'Financial Management',
387
+ 'market': 'Market Competition',
388
+ 'competition': 'Market Competition',
389
+ 'competitive': 'Market Competition',
390
+ 'customer': 'Customer Experience',
391
+ 'customers': 'Customer Experience',
392
+ 'service': 'Customer Experience',
393
+ 'sales': 'Sales & Marketing',
394
+ 'marketing': 'Sales & Marketing',
395
+ 'growth': 'Business Growth',
396
+ 'scaling': 'Business Growth',
397
+ 'compliance': 'Regulatory Compliance',
398
+ 'legal': 'Regulatory Compliance',
399
+ 'hr': 'Human Resources',
400
+ 'talent': 'Human Resources',
401
+ 'staff': 'Human Resources'
402
+ }
403
+
404
+ category_lower = category.lower().strip()
405
+
406
+ # Check for exact matches first
407
+ if category_lower in category_mapping:
408
+ return category_mapping[category_lower]
409
+
410
+ # Check for partial matches
411
+ for key, value in category_mapping.items():
412
+ if key in category_lower:
413
+ return value
414
+
415
+ # Return original if no match found
416
+ return category.title() if category else 'General Business'
417
+
418
+ def _enhance_financial_analysis(self, structured_data: Dict[str, Any], customer_info_text: str) -> Dict[str, Any]:
419
+ """
420
+ Enhance financial analysis with additional insights.
421
+
422
+ Args:
423
+ structured_data: Current structured data
424
+ customer_info_text: Original customer information
425
+
426
+ Returns:
427
+ Enhanced structured data with better financial analysis
428
+ """
429
+ try:
430
+ company_info = structured_data.get('companyInfo', {})
431
+ current_financial = structured_data.get('financialInfo', {})
432
+
433
+ # If financial info is sparse, enhance it
434
+ if not current_financial.get('revenueLastThreeYears') and not current_financial.get('profit'):
435
+ enhanced_financial = self._generate_financial_estimates(company_info, customer_info_text)
436
+ if enhanced_financial:
437
+ structured_data['financialInfo'].update(enhanced_financial)
438
+
439
+ # Add financial health assessment
440
+ structured_data['financialInfo']['healthAssessment'] = self._assess_financial_health(
441
+ structured_data['financialInfo'], company_info
442
+ )
443
+
444
+ return structured_data
445
+
446
+ except Exception as e:
447
+ self.logger.error(f"Financial analysis enhancement failed: {str(e)}")
448
+ return structured_data
449
+
450
+ def _generate_financial_estimates(self, company_info: Dict[str, Any], customer_info_text: str) -> Optional[Dict[str, Any]]:
451
+ """
452
+ Generate financial estimates using LLM analysis.
453
+
454
+ Args:
455
+ company_info: Company information
456
+ customer_info_text: Original customer information
457
+
458
+ Returns:
459
+ Financial estimates or None if failed
460
+ """
461
+ try:
462
+ if self.is_dry_run():
463
+ return self._get_mock_financial_data()
464
+
465
+ industry = company_info.get('industry', '')
466
+ company_size = company_info.get('size', '')
467
+ company_name = company_info.get('name', '')
468
+
469
+ financial_prompt = f"""Based on the following company information, provide realistic financial estimates:
470
+
471
+ Company: {company_name}
472
+ Industry: {industry}
473
+ Size: {company_size}
474
+ Additional Info: {customer_info_text[:500]}
475
+
476
+ Provide financial estimates in JSON format:
477
+ {{
478
+ "estimatedAnnualRevenue": "revenue range estimate",
479
+ "revenueGrowthTrend": "growth trend analysis",
480
+ "profitMarginEstimate": "estimated profit margin percentage",
481
+ "fundingStage": "likely funding stage",
482
+ "financialChallenges": ["list of likely financial challenges"],
483
+ "revenueStreams": ["likely revenue streams"]
484
+ }}
485
+
486
+ Base estimates on industry standards and company size indicators. Be conservative and realistic."""
487
+
488
+ response = self.call_llm(financial_prompt, temperature=0.3)
489
+ financial_data = self.parse_json_response(response)
490
+
491
+ if isinstance(financial_data, dict):
492
+ self.logger.info("Generated financial estimates")
493
+ return financial_data
494
+
495
+ return None
496
+
497
+ except Exception as e:
498
+ self.logger.error(f"Financial estimate generation failed: {str(e)}")
499
+ return None
500
+
501
+ def _get_mock_financial_data(self) -> Dict[str, Any]:
502
+ """
503
+ Get mock financial data for dry run mode.
504
+
505
+ Returns:
506
+ Mock financial data
507
+ """
508
+ return {
509
+ 'estimatedAnnualRevenue': '$2-5M',
510
+ 'revenueGrowthTrend': 'Steady growth of 15-20% annually',
511
+ 'profitMarginEstimate': '12-18%',
512
+ 'fundingStage': 'Self-funded or Series A',
513
+ 'financialChallenges': [
514
+ 'Cash flow management during growth phases',
515
+ 'Balancing investment in growth vs profitability',
516
+ 'Managing operational costs as scale increases'
517
+ ],
518
+ 'revenueStreams': [
519
+ 'Product sales',
520
+ 'Service contracts',
521
+ 'Recurring subscriptions'
522
+ ]
523
+ }
524
+
525
+ def _assess_financial_health(self, financial_info: Dict[str, Any], company_info: Dict[str, Any]) -> Dict[str, Any]:
526
+ """
527
+ Assess financial health based on available information.
528
+
529
+ Args:
530
+ financial_info: Financial information
531
+ company_info: Company information
532
+
533
+ Returns:
534
+ Financial health assessment
535
+ """
536
+ try:
537
+ assessment = {
538
+ 'overallRating': 'Unknown',
539
+ 'strengths': [],
540
+ 'concerns': [],
541
+ 'recommendations': []
542
+ }
543
+
544
+ # Analyze revenue trend if available
545
+ revenue_years = financial_info.get('revenueLastThreeYears', [])
546
+ if len(revenue_years) >= 2:
547
+ # Calculate growth trend
548
+ recent_revenue = revenue_years[-1].get('revenue', 0)
549
+ previous_revenue = revenue_years[-2].get('revenue', 0)
550
+
551
+ if previous_revenue > 0:
552
+ growth_rate = ((recent_revenue - previous_revenue) / previous_revenue) * 100
553
+
554
+ if growth_rate > 20:
555
+ assessment['strengths'].append('Strong revenue growth')
556
+ assessment['overallRating'] = 'Good'
557
+ elif growth_rate > 0:
558
+ assessment['strengths'].append('Positive revenue growth')
559
+ assessment['overallRating'] = 'Fair'
560
+ else:
561
+ assessment['concerns'].append('Declining revenue trend')
562
+ assessment['overallRating'] = 'Concerning'
563
+
564
+ # Analyze profit margins
565
+ profit = financial_info.get('profit', 0)
566
+ if profit > 0:
567
+ assessment['strengths'].append('Profitable operations')
568
+ elif profit < 0:
569
+ assessment['concerns'].append('Operating at a loss')
570
+
571
+ # Industry-specific analysis
572
+ industry = company_info.get('industry', '').lower()
573
+ if 'technology' in industry or 'software' in industry:
574
+ assessment['recommendations'].append('Focus on recurring revenue models')
575
+ assessment['recommendations'].append('Invest in R&D for competitive advantage')
576
+ elif 'manufacturing' in industry:
577
+ assessment['recommendations'].append('Optimize supply chain efficiency')
578
+ assessment['recommendations'].append('Consider automation investments')
579
+
580
+ # General recommendations
581
+ if not assessment['recommendations']:
582
+ assessment['recommendations'] = [
583
+ 'Diversify revenue streams',
584
+ 'Improve operational efficiency',
585
+ 'Build cash reserves for growth opportunities'
586
+ ]
587
+
588
+ return assessment
589
+
590
+ except Exception as e:
591
+ self.logger.error(f"Financial health assessment failed: {str(e)}")
592
+ return {
593
+ 'overallRating': 'Unknown',
594
+ 'strengths': [],
595
+ 'concerns': [],
596
+ 'recommendations': ['Conduct detailed financial analysis']
597
+ }
598
+
599
+ def _enhance_research_analysis(self, structured_data: Dict[str, Any], customer_info_text: str) -> Dict[str, Any]:
600
+ """
601
+ Enhance research and development analysis.
602
+
603
+ Args:
604
+ structured_data: Current structured data
605
+ customer_info_text: Original customer information
606
+
607
+ Returns:
608
+ Enhanced structured data with R&D analysis
609
+ """
610
+ try:
611
+ company_info = structured_data.get('companyInfo', {})
612
+ current_tech = structured_data.get('technologyAndInnovation', {})
613
+
614
+ # Enhance technology stack analysis
615
+ enhanced_tech = self._analyze_technology_stack(company_info, customer_info_text)
616
+ if enhanced_tech:
617
+ current_tech.update(enhanced_tech)
618
+
619
+ # Enhance development plans
620
+ enhanced_plans = self._analyze_development_plans(structured_data, customer_info_text)
621
+ if enhanced_plans:
622
+ structured_data['developmentPlans'].update(enhanced_plans)
623
+
624
+ # Add competitive analysis
625
+ structured_data['competitiveAnalysis'] = self._generate_competitive_analysis(
626
+ company_info, customer_info_text
627
+ )
628
+
629
+ return structured_data
630
+
631
+ except Exception as e:
632
+ self.logger.error(f"Research analysis enhancement failed: {str(e)}")
633
+ return structured_data
634
+
635
+ def _analyze_technology_stack(self, company_info: Dict[str, Any], customer_info_text: str) -> Optional[Dict[str, Any]]:
636
+ """
637
+ Analyze and estimate technology stack.
638
+
639
+ Args:
640
+ company_info: Company information
641
+ customer_info_text: Original customer information
642
+
643
+ Returns:
644
+ Technology analysis or None if failed
645
+ """
646
+ try:
647
+ if self.is_dry_run():
648
+ return self._get_mock_technology_analysis()
649
+
650
+ industry = company_info.get('industry', '')
651
+ company_size = company_info.get('size', '')
652
+
653
+ tech_prompt = f"""Analyze the likely technology stack and innovation needs for this company:
654
+
655
+ Industry: {industry}
656
+ Company Size: {company_size}
657
+ Company Info: {customer_info_text[:400]}
658
+
659
+ Provide analysis in JSON format:
660
+ {{
661
+ "likelyTechStack": ["list of technologies they probably use"],
662
+ "technologyGaps": ["areas where they might need technology improvements"],
663
+ "innovationOpportunities": ["potential areas for innovation"],
664
+ "digitalMaturityLevel": "assessment of digital maturity (Basic/Intermediate/Advanced)",
665
+ "recommendedTechnologies": ["technologies that could benefit them"]
666
+ }}
667
+
668
+ Focus on realistic, industry-appropriate technology assessments."""
669
+
670
+ response = self.call_llm(tech_prompt, temperature=0.3)
671
+ tech_analysis = self.parse_json_response(response)
672
+
673
+ if isinstance(tech_analysis, dict):
674
+ self.logger.info("Generated technology stack analysis")
675
+ return tech_analysis
676
+
677
+ return None
678
+
679
+ except Exception as e:
680
+ self.logger.error(f"Technology stack analysis failed: {str(e)}")
681
+ return None
682
+
683
+ def _get_mock_technology_analysis(self) -> Dict[str, Any]:
684
+ """
685
+ Get mock technology analysis for dry run mode.
686
+
687
+ Returns:
688
+ Mock technology analysis
689
+ """
690
+ return {
691
+ 'likelyTechStack': ['CRM System', 'Email Marketing', 'Basic Analytics', 'Office Suite'],
692
+ 'technologyGaps': ['Marketing Automation', 'Advanced Analytics', 'Customer Support Tools'],
693
+ 'innovationOpportunities': ['AI-powered customer insights', 'Process automation', 'Mobile solutions'],
694
+ 'digitalMaturityLevel': 'Intermediate',
695
+ 'recommendedTechnologies': ['Marketing Automation Platform', 'Business Intelligence Tools', 'Cloud Infrastructure']
696
+ }
697
+
698
+ def _analyze_development_plans(self, structured_data: Dict[str, Any], customer_info_text: str) -> Optional[Dict[str, Any]]:
699
+ """
700
+ Analyze and enhance development plans.
701
+
702
+ Args:
703
+ structured_data: Current structured data
704
+ customer_info_text: Original customer information
705
+
706
+ Returns:
707
+ Enhanced development plans or None if failed
708
+ """
709
+ try:
710
+ company_info = structured_data.get('companyInfo', {})
711
+ pain_points = structured_data.get('painPoints', [])
712
+
713
+ # Extract key challenges for development planning
714
+ key_challenges = [pp.get('description', '') for pp in pain_points[:3]]
715
+
716
+ development_analysis = {
717
+ 'priorityAreas': self._identify_priority_development_areas(company_info, pain_points),
718
+ 'timelineEstimates': self._estimate_development_timelines(company_info),
719
+ 'resourceRequirements': self._estimate_resource_requirements(company_info, pain_points),
720
+ 'riskFactors': self._identify_development_risks(company_info, pain_points)
721
+ }
722
+
723
+ return development_analysis
724
+
725
+ except Exception as e:
726
+ self.logger.error(f"Development plans analysis failed: {str(e)}")
727
+ return None
728
+
729
+ def _identify_priority_development_areas(self, company_info: Dict[str, Any], pain_points: List[Dict[str, Any]]) -> List[str]:
730
+ """
731
+ Identify priority development areas based on pain points.
732
+
733
+ Args:
734
+ company_info: Company information
735
+ pain_points: List of pain points
736
+
737
+ Returns:
738
+ List of priority development areas
739
+ """
740
+ priority_areas = []
741
+
742
+ for pain_point in pain_points:
743
+ category = pain_point.get('category', '').lower()
744
+
745
+ if 'operational' in category or 'efficiency' in category:
746
+ priority_areas.append('Process Optimization')
747
+ elif 'technology' in category:
748
+ priority_areas.append('Technology Modernization')
749
+ elif 'customer' in category:
750
+ priority_areas.append('Customer Experience Enhancement')
751
+ elif 'financial' in category:
752
+ priority_areas.append('Financial Management Systems')
753
+ elif 'market' in category or 'competition' in category:
754
+ priority_areas.append('Market Expansion Strategy')
755
+
756
+ # Remove duplicates and limit to top 5
757
+ return list(dict.fromkeys(priority_areas))[:5]
758
+
759
+ def _estimate_development_timelines(self, company_info: Dict[str, Any]) -> Dict[str, str]:
760
+ """
761
+ Estimate development timelines based on company size.
762
+
763
+ Args:
764
+ company_info: Company information
765
+
766
+ Returns:
767
+ Timeline estimates
768
+ """
769
+ company_size = company_info.get('size', '').lower()
770
+
771
+ if 'small' in company_size or 'startup' in company_size:
772
+ return {
773
+ 'shortTerm': '3-6 months',
774
+ 'mediumTerm': '6-12 months',
775
+ 'longTerm': '1-2 years'
776
+ }
777
+ elif 'large' in company_size or 'enterprise' in company_size:
778
+ return {
779
+ 'shortTerm': '6-12 months',
780
+ 'mediumTerm': '1-2 years',
781
+ 'longTerm': '2-3 years'
782
+ }
783
+ else: # Medium size
784
+ return {
785
+ 'shortTerm': '4-8 months',
786
+ 'mediumTerm': '8-18 months',
787
+ 'longTerm': '1.5-2.5 years'
788
+ }
789
+
790
+ def _estimate_resource_requirements(self, company_info: Dict[str, Any], pain_points: List[Dict[str, Any]]) -> Dict[str, Any]:
791
+ """
792
+ Estimate resource requirements for development.
793
+
794
+ Args:
795
+ company_info: Company information
796
+ pain_points: List of pain points
797
+
798
+ Returns:
799
+ Resource requirement estimates
800
+ """
801
+ return {
802
+ 'budgetRange': 'Varies by project scope',
803
+ 'keyRoles': ['Project Manager', 'Technical Lead', 'Business Analyst'],
804
+ 'externalSupport': 'May require consultants for specialized areas',
805
+ 'trainingNeeds': 'Staff training on new processes and technologies'
806
+ }
807
+
808
+ def _identify_development_risks(self, company_info: Dict[str, Any], pain_points: List[Dict[str, Any]]) -> List[str]:
809
+ """
810
+ Identify potential development risks.
811
+
812
+ Args:
813
+ company_info: Company information
814
+ pain_points: List of pain points
815
+
816
+ Returns:
817
+ List of development risks
818
+ """
819
+ return [
820
+ 'Resource allocation conflicts with daily operations',
821
+ 'Change management resistance from staff',
822
+ 'Technology integration challenges',
823
+ 'Budget overruns due to scope creep',
824
+ 'Timeline delays due to unforeseen complications'
825
+ ]
826
+
827
+ def _generate_competitive_analysis(self, company_info: Dict[str, Any], customer_info_text: str) -> Dict[str, Any]:
828
+ """
829
+ Generate competitive analysis insights.
830
+
831
+ Args:
832
+ company_info: Company information
833
+ customer_info_text: Original customer information
834
+
835
+ Returns:
836
+ Competitive analysis insights
837
+ """
838
+ try:
839
+ industry = company_info.get('industry', '')
840
+ company_size = company_info.get('size', '')
841
+
842
+ return {
843
+ 'competitivePosition': self._assess_competitive_position(industry, company_size),
844
+ 'marketTrends': self._identify_market_trends(industry),
845
+ 'competitiveAdvantages': self._identify_potential_advantages(company_info),
846
+ 'threats': self._identify_competitive_threats(industry, company_size),
847
+ 'opportunities': self._identify_market_opportunities(industry, company_size)
848
+ }
849
+
850
+ except Exception as e:
851
+ self.logger.error(f"Competitive analysis failed: {str(e)}")
852
+ return {
853
+ 'competitivePosition': 'Analysis pending',
854
+ 'marketTrends': [],
855
+ 'competitiveAdvantages': [],
856
+ 'threats': [],
857
+ 'opportunities': []
858
+ }
859
+
860
+ def _assess_competitive_position(self, industry: str, company_size: str) -> str:
861
+ """Assess competitive position based on industry and size."""
862
+ if 'small' in company_size.lower():
863
+ return 'Niche player with agility advantages'
864
+ elif 'large' in company_size.lower():
865
+ return 'Established player with resource advantages'
866
+ else:
867
+ return 'Mid-market player with growth potential'
868
+
869
+ def _identify_market_trends(self, industry: str) -> List[str]:
870
+ """Identify relevant market trends."""
871
+ industry_lower = industry.lower()
872
+
873
+ if 'technology' in industry_lower or 'software' in industry_lower:
874
+ return ['Digital transformation acceleration', 'AI/ML adoption', 'Cloud migration', 'Remote work tools']
875
+ elif 'retail' in industry_lower or 'ecommerce' in industry_lower:
876
+ return ['Omnichannel experiences', 'Personalization', 'Sustainability focus', 'Mobile commerce']
877
+ elif 'healthcare' in industry_lower:
878
+ return ['Telemedicine growth', 'Digital health records', 'Patient experience focus', 'Regulatory compliance']
879
+ else:
880
+ return ['Digital transformation', 'Customer experience focus', 'Operational efficiency', 'Sustainability']
881
+
882
+ def _identify_potential_advantages(self, company_info: Dict[str, Any]) -> List[str]:
883
+ """Identify potential competitive advantages."""
884
+ return [
885
+ 'Local market knowledge',
886
+ 'Personalized customer service',
887
+ 'Agile decision making',
888
+ 'Specialized expertise'
889
+ ]
890
+
891
+ def _identify_competitive_threats(self, industry: str, company_size: str) -> List[str]:
892
+ """Identify competitive threats."""
893
+ return [
894
+ 'Larger competitors with more resources',
895
+ 'New market entrants with innovative solutions',
896
+ 'Price competition from low-cost providers',
897
+ 'Technology disruption changing industry dynamics'
898
+ ]
899
+
900
+ def _identify_market_opportunities(self, industry: str, company_size: str) -> List[str]:
901
+ """Identify market opportunities."""
902
+ return [
903
+ 'Underserved market segments',
904
+ 'Technology adoption gaps',
905
+ 'Partnership opportunities',
906
+ 'Geographic expansion potential'
907
+ ]
908
+
909
+ def _get_llm_instruction(self) -> str:
910
+ """
911
+ Get the LLM instruction from the original YAML workflow.
912
+
913
+ Returns:
914
+ LLM instruction text
915
+ """
916
+ return """Role: Customer research analyst conducting comprehensive data gathering on provided companies.
917
+
918
+ Objective: Based on the provided customer information. Conduct a comprehensive search to infer detailed customer information. Use online search tools, company databases, and public sources to gather accurate, up-to-date data. Ensure all fields in the JSON schema below are completed with reliable information.
919
+
920
+ If information is unavailable, use an empty string ('') for string fields. However, painPoints must always contain relevant data inferred from the company's description, industry, or general challenges associated with its sector.
921
+
922
+ Return only the JSON result, strictly following the schema, without any additional explanation.
923
+
924
+ **JSON Schema**:
925
+ ```
926
+ {'companyInfo':{'name':'','industry':'','size':'','annualRevenue':'','address':'','website':''},'primaryContact':{'name':'','position':'','email':'','phone':'','linkedIn':''},'currentTechStack':[],'painPoints':[{'category':'','description':'','impact':''}],'financialInfo':{'revenueLastThreeYears':[{'year':0,'revenue':0}],'profit':0,'fundingSources':[]},'legalInfo':{'taxCode':'','businessLicense':'','foundingYear':0},'productsAndServices':{'mainProducts':[],'targetMarket':[]},'developmentPlans':{'shortTermGoals':[],'longTermGoals':[]},'technologyAndInnovation':{'rdProjects':[],'patents':[{'name':'','number':'','filingDate':''}]}}
927
+ ```
928
+ **Key Focus Areas**:
929
+ 1. Pain Points: Highlight specific issues the company may face, such as financial challenges, operational inefficiencies, market positioning struggles, or customer satisfaction concerns. Always include specific issues the company may face, inferred from its description, industry, or general market challenges.
930
+ 2. Accuracy: Ensure all provided data is reliable and up-to-date.
931
+ 3. Fallbacks: For unavailable data, fill fields with empty strings ('') or empty arrays ([]).
932
+ Note: Return only the JSON output, without the json keyword or additional commentary."""
933
+
934
+ def _get_mock_structured_data(self) -> Dict[str, Any]:
935
+ """
936
+ Get mock structured data for dry run mode.
937
+
938
+ Returns:
939
+ Mock structured customer data
940
+ """
941
+ return {
942
+ 'companyInfo': {
943
+ 'name': 'Example Corp',
944
+ 'industry': 'Technology',
945
+ 'size': 'Medium (50-200 employees)',
946
+ 'annualRevenue': '$5-10M',
947
+ 'address': '123 Main St, City, State',
948
+ 'website': 'https://example.com'
949
+ },
950
+ 'primaryContact': {
951
+ 'name': 'John Doe',
952
+ 'position': 'CEO',
953
+ 'email': 'john@example.com',
954
+ 'phone': '+1-555-0123',
955
+ 'linkedIn': 'https://linkedin.com/in/johndoe'
956
+ },
957
+ 'currentTechStack': ['CRM', 'Email Marketing', 'Analytics'],
958
+ 'painPoints': [
959
+ {
960
+ 'category': 'Operational Efficiency',
961
+ 'description': 'Manual processes causing delays and errors',
962
+ 'impact': 'High - affecting customer satisfaction and costs'
963
+ },
964
+ {
965
+ 'category': 'Data Management',
966
+ 'description': 'Scattered data across multiple systems',
967
+ 'impact': 'Medium - limiting insights and decision making'
968
+ }
969
+ ],
970
+ 'financialInfo': {
971
+ 'revenueLastThreeYears': [
972
+ {'year': 2023, 'revenue': 8500000},
973
+ {'year': 2022, 'revenue': 7200000},
974
+ {'year': 2021, 'revenue': 6100000}
975
+ ],
976
+ 'profit': 1200000,
977
+ 'fundingSources': ['Self-funded', 'Bank loan']
978
+ },
979
+ 'legalInfo': {
980
+ 'taxCode': 'TC123456789',
981
+ 'businessLicense': 'BL987654321',
982
+ 'foundingYear': 2018
983
+ },
984
+ 'productsAndServices': {
985
+ 'mainProducts': ['Software Solutions', 'Consulting Services'],
986
+ 'targetMarket': ['SMB', 'Enterprise']
987
+ },
988
+ 'developmentPlans': {
989
+ 'shortTermGoals': ['Improve operational efficiency', 'Expand customer base'],
990
+ 'longTermGoals': ['International expansion', 'Product diversification']
991
+ },
992
+ 'technologyAndInnovation': {
993
+ 'rdProjects': ['AI Integration', 'Mobile App Development'],
994
+ 'patents': [
995
+ {
996
+ 'name': 'Automated Process Management',
997
+ 'number': 'US123456789',
998
+ 'filingDate': '2023-01-15'
999
+ }
1000
+ ]
1001
+ }
1002
+ }
1003
+
1004
+ def _get_fallback_structured_data(self, customer_info_text: str) -> Dict[str, Any]:
1005
+ """
1006
+ Get fallback structured data when LLM extraction fails.
1007
+ Uses data from acquisition stage if available.
1008
+
1009
+ Args:
1010
+ customer_info_text: Original customer information text
1011
+
1012
+ Returns:
1013
+ Minimal structured customer data with available contact info
1014
+ """
1015
+ # Try to get acquisition data from context
1016
+ acquisition_data = getattr(self, '_current_acquisition_data', {})
1017
+
1018
+ return {
1019
+ 'companyInfo': {
1020
+ 'name': acquisition_data.get('company_name', ''),
1021
+ 'industry': '',
1022
+ 'size': '',
1023
+ 'annualRevenue': '',
1024
+ 'address': acquisition_data.get('customer_address', ''),
1025
+ 'website': acquisition_data.get('company_website', '')
1026
+ },
1027
+ 'primaryContact': {
1028
+ 'name': acquisition_data.get('contact_name', ''),
1029
+ 'position': '',
1030
+ 'email': acquisition_data.get('customer_email', ''),
1031
+ 'phone': acquisition_data.get('customer_phone', ''),
1032
+ 'linkedIn': acquisition_data.get('customer_linkedin', '')
1033
+ },
1034
+ 'currentTechStack': [],
1035
+ 'painPoints': [
1036
+ {
1037
+ 'category': 'General Business Challenges',
1038
+ 'description': 'Common business challenges that may affect operational efficiency and growth',
1039
+ 'impact': 'Medium - typical for businesses in competitive markets'
1040
+ }
1041
+ ],
1042
+ 'financialInfo': {
1043
+ 'revenueLastThreeYears': [],
1044
+ 'profit': 0,
1045
+ 'fundingSources': []
1046
+ },
1047
+ 'legalInfo': {
1048
+ 'taxCode': '',
1049
+ 'businessLicense': '',
1050
+ 'foundingYear': 0
1051
+ },
1052
+ 'productsAndServices': {
1053
+ 'mainProducts': [],
1054
+ 'targetMarket': []
1055
+ },
1056
+ 'developmentPlans': {
1057
+ 'shortTermGoals': [],
1058
+ 'longTermGoals': []
1059
+ },
1060
+ 'technologyAndInnovation': {
1061
+ 'rdProjects': [],
1062
+ 'patents': []
1063
+ },
1064
+ 'rawCustomerInfo': customer_info_text[:1000] + "..." if len(customer_info_text) > 1000 else customer_info_text
1065
+ }
1066
+
1067
+ def _validate_and_clean_data(self, structured_data: Dict[str, Any]) -> Dict[str, Any]:
1068
+ """
1069
+ Validate and clean the structured data.
1070
+
1071
+ Args:
1072
+ structured_data: Raw structured data from LLM
1073
+
1074
+ Returns:
1075
+ Validated and cleaned structured data
1076
+ """
1077
+ try:
1078
+ # Ensure all required sections exist
1079
+ required_sections = [
1080
+ 'companyInfo', 'primaryContact', 'currentTechStack', 'painPoints',
1081
+ 'financialInfo', 'legalInfo', 'productsAndServices',
1082
+ 'developmentPlans', 'technologyAndInnovation'
1083
+ ]
1084
+
1085
+ for section in required_sections:
1086
+ if section not in structured_data:
1087
+ structured_data[section] = {}
1088
+
1089
+ # Validate companyInfo
1090
+ company_info = structured_data.get('companyInfo', {})
1091
+ required_company_fields = ['name', 'industry', 'size', 'annualRevenue', 'address', 'website']
1092
+ for field in required_company_fields:
1093
+ if field not in company_info:
1094
+ company_info[field] = ''
1095
+
1096
+ # Validate primaryContact
1097
+ contact = structured_data.get('primaryContact', {})
1098
+ required_contact_fields = ['name', 'position', 'email', 'phone', 'linkedIn']
1099
+ for field in required_contact_fields:
1100
+ if field not in contact:
1101
+ contact[field] = ''
1102
+
1103
+ # Ensure painPoints is always a list with at least one item
1104
+ pain_points = structured_data.get('painPoints', [])
1105
+ if not pain_points or not isinstance(pain_points, list):
1106
+ pain_points = [
1107
+ {
1108
+ 'category': 'Business Operations',
1109
+ 'description': 'General operational challenges common in the industry',
1110
+ 'impact': 'Medium'
1111
+ }
1112
+ ]
1113
+ structured_data['painPoints'] = pain_points
1114
+
1115
+ # Validate financial info
1116
+ financial_info = structured_data.get('financialInfo', {})
1117
+ if 'revenueLastThreeYears' not in financial_info:
1118
+ financial_info['revenueLastThreeYears'] = []
1119
+ if 'profit' not in financial_info:
1120
+ financial_info['profit'] = 0
1121
+ if 'fundingSources' not in financial_info:
1122
+ financial_info['fundingSources'] = []
1123
+
1124
+ # Validate legal info
1125
+ legal_info = structured_data.get('legalInfo', {})
1126
+ required_legal_fields = ['taxCode', 'businessLicense', 'foundingYear']
1127
+ for field in required_legal_fields:
1128
+ if field not in legal_info:
1129
+ legal_info[field] = '' if field != 'foundingYear' else 0
1130
+
1131
+ # Ensure lists are actually lists
1132
+ list_fields = [
1133
+ ('currentTechStack', []),
1134
+ ('productsAndServices', {'mainProducts': [], 'targetMarket': []}),
1135
+ ('developmentPlans', {'shortTermGoals': [], 'longTermGoals': []}),
1136
+ ('technologyAndInnovation', {'rdProjects': [], 'patents': []})
1137
+ ]
1138
+
1139
+ for field, default in list_fields:
1140
+ if field not in structured_data:
1141
+ structured_data[field] = default
1142
+ elif isinstance(default, dict):
1143
+ for subfield, subdefault in default.items():
1144
+ if subfield not in structured_data[field]:
1145
+ structured_data[field][subfield] = subdefault
1146
+
1147
+ self.logger.info("Successfully validated and cleaned structured data")
1148
+ return structured_data
1149
+
1150
+ except Exception as e:
1151
+ self.logger.error(f"Data validation failed: {str(e)}")
1152
+ return structured_data # Return as-is if validation fails
1153
+
1154
+ def _save_customer_data(self, context: Dict[str, Any], structured_data: Dict[str, Any]) -> None:
1155
+ """
1156
+ Save customer data to local database.
1157
+
1158
+ Args:
1159
+ context: Execution context
1160
+ structured_data: Structured customer data
1161
+ """
1162
+ try:
1163
+ execution_id = context.get('execution_id')
1164
+ task_id = context.get('task_id', execution_id)
1165
+ company_info = structured_data.get('companyInfo', {})
1166
+ contact_info = structured_data.get('primaryContact', {})
1167
+
1168
+ # Save to customers table (basic customer info)
1169
+ customer_data = {
1170
+ 'customer_id': execution_id,
1171
+ 'org_id': self.config.get('org_id', ''),
1172
+ 'company_name': company_info.get('name', ''),
1173
+ 'website': company_info.get('website', ''),
1174
+ 'industry': company_info.get('industry', ''),
1175
+ 'contact_name': contact_info.get('name', ''),
1176
+ 'contact_email': contact_info.get('email', ''),
1177
+ 'contact_phone': contact_info.get('phone', ''),
1178
+ 'address': company_info.get('address', ''),
1179
+ 'profile_data': json.dumps(structured_data)
1180
+ }
1181
+
1182
+ # Save customer data to customers table
1183
+ self.data_manager.save_customer(customer_data)
1184
+ self.logger.info(f"Customer data saved to customers table: {execution_id}")
1185
+
1186
+ # Save to gs_customer_llmtask table (server-compatible)
1187
+ customer_task_data = {
1188
+ 'task_id': task_id,
1189
+ 'customer_id': execution_id,
1190
+ 'customer_name': company_info.get('name', ''),
1191
+ 'customer_phone': contact_info.get('phone', ''),
1192
+ 'customer_address': company_info.get('address', ''),
1193
+ 'customer_email': contact_info.get('email', ''),
1194
+ 'customer_industry': company_info.get('industry', ''),
1195
+ 'customer_taxcode': company_info.get('taxCode', ''),
1196
+ 'customer_website': company_info.get('website', ''),
1197
+ 'contact_name': contact_info.get('name', ''),
1198
+ 'org_id': self.config.get('org_id', ''),
1199
+ 'org_name': self.config.get('org_name', ''),
1200
+ 'project_code': 'FUSESELL',
1201
+ 'crm_dob': contact_info.get('dateOfBirth'),
1202
+ 'image_url': ''
1203
+ }
1204
+
1205
+ # Save customer task data to gs_customer_llmtask table
1206
+ self.data_manager.save_customer_task(customer_task_data)
1207
+ self.logger.info(f"Customer task data saved to gs_customer_llmtask table: {task_id}")
1208
+
1209
+ except Exception as e:
1210
+ self.logger.warning(f"Failed to save customer data: {str(e)}")
1211
+
1212
+ def validate_input(self, context: Dict[str, Any]) -> bool:
1213
+ """
1214
+ Validate input data for data preparation stage.
1215
+
1216
+ Args:
1217
+ context: Execution context
1218
+
1219
+ Returns:
1220
+ True if input is valid
1221
+ """
1222
+ # Check if we have data from data acquisition stage
1223
+ stage_results = context.get('stage_results', {})
1224
+ if 'data_acquisition' in stage_results:
1225
+ return True
1226
+
1227
+ # Fallback: check if we have basic input data
1228
+ input_data = context.get('input_data', {})
1229
+ return bool(input_data.get('customer_website') or input_data.get('customer_description'))
1230
+
1231
+ def get_required_fields(self) -> List[str]:
1232
+ """
1233
+ Get list of required input fields for this stage.
1234
+
1235
+ Returns:
1236
+ List of required field names
1237
+ """
1238
+ return [] # This stage depends on data_acquisition stage output