fin-infra 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,710 @@
1
+ """
2
+ LLM-powered portfolio rebalancing recommendations.
3
+
4
+ Uses LLM to generate intelligent, personalized rebalancing suggestions
5
+ that consider:
6
+ - Portfolio diversification
7
+ - Risk tolerance
8
+ - Tax efficiency
9
+ - Investment goals
10
+ - Market conditions
11
+
12
+ Caching:
13
+ Uses svc-infra's cache_read decorator for persistent caching.
14
+ Cache key is based on portfolio STRUCTURE (symbols + 5% allocation buckets),
15
+ not exact values. This means small price fluctuations don't trigger new LLM calls.
16
+ TTL: 24 hours (rebalancing advice doesn't need real-time updates).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import json
23
+ import logging
24
+ from decimal import Decimal
25
+ from typing import TYPE_CHECKING, Any
26
+
27
+ from pydantic import BaseModel, ConfigDict, Field
28
+
29
+ # svc-infra caching - uses Redis in production, in-memory in dev
30
+ try:
31
+ from svc_infra.cache import cache_read
32
+
33
+ HAS_SVC_CACHE = True
34
+ except ImportError:
35
+ HAS_SVC_CACHE = False
36
+ cache_read = None # type: ignore
37
+
38
+ if TYPE_CHECKING:
39
+ from fin_infra.models.brokerage import Position
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ # Cache TTL: 24 hours - rebalancing advice doesn't change frequently
44
+ REBALANCE_CACHE_TTL = 86400
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Output Schema
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ class LLMTrade(BaseModel):
53
+ """Single trade recommended by LLM."""
54
+
55
+ symbol: str = Field(..., description="Security ticker symbol to trade")
56
+ action: str = Field(..., description="'buy' or 'sell'")
57
+ percentage_of_portfolio: float = Field(
58
+ ..., description="Percentage of portfolio value for this trade (e.g., 5.0 = 5%)"
59
+ )
60
+ reasoning: str = Field(..., description="Clear explanation for this trade")
61
+
62
+
63
+ class LLMRebalancingPlan(BaseModel):
64
+ """LLM-generated rebalancing recommendations."""
65
+
66
+ summary: str = Field(
67
+ ...,
68
+ max_length=500,
69
+ description="High-level summary of portfolio state and key recommendations",
70
+ )
71
+ analysis: str = Field(
72
+ ...,
73
+ max_length=800,
74
+ description="Analysis of current portfolio: diversification, risk exposure, concerns",
75
+ )
76
+ trades: list[LLMTrade] = Field(
77
+ default_factory=list,
78
+ description="Recommended trades (max 5). Empty if portfolio is balanced.",
79
+ max_length=5,
80
+ )
81
+ recommendations: list[str] = Field(
82
+ default_factory=list,
83
+ description="Strategic recommendations beyond specific trades (max 3)",
84
+ max_length=3,
85
+ )
86
+ risk_warnings: list[str] = Field(
87
+ default_factory=list,
88
+ description="Important risk warnings or concerns (max 3)",
89
+ max_length=3,
90
+ )
91
+ is_balanced: bool = Field(
92
+ False,
93
+ description="True if portfolio is already well-balanced and no trades needed",
94
+ )
95
+
96
+ model_config = ConfigDict(
97
+ json_schema_extra={
98
+ "example": {
99
+ "summary": "Your portfolio is heavily concentrated in one stock (SBSI at 29%). Consider diversifying into broad index funds for better risk management.",
100
+ "analysis": "Current allocation: 48% cash, 29% single stock (SBSI), 23% diversified funds/ETFs. High single-stock risk. Excessive cash position.",
101
+ "trades": [
102
+ {
103
+ "symbol": "VTI",
104
+ "action": "buy",
105
+ "percentage_of_portfolio": 20.0,
106
+ "reasoning": "Deploy excess cash into total US stock market index for diversification",
107
+ },
108
+ {
109
+ "symbol": "SBSI",
110
+ "action": "sell",
111
+ "percentage_of_portfolio": 15.0,
112
+ "reasoning": "Reduce single-stock concentration risk from 29% to ~14%",
113
+ },
114
+ ],
115
+ "recommendations": [
116
+ "Consider tax-loss harvesting on underperforming positions",
117
+ "Set up automatic monthly investments into index funds",
118
+ "Review asset allocation quarterly",
119
+ ],
120
+ "risk_warnings": [
121
+ "29% concentration in SBSI exposes you to company-specific risk",
122
+ "48% cash is likely underperforming inflation long-term",
123
+ ],
124
+ "is_balanced": False,
125
+ }
126
+ }
127
+ )
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Prompts
132
+ # ---------------------------------------------------------------------------
133
+
134
+ REBALANCING_SYSTEM_PROMPT = """
135
+ You are an expert financial advisor optimizing investment portfolios.
136
+
137
+ CRITICAL RULES (in order of priority):
138
+ 1. DEPLOY CASH FIRST: Before selling any position, use available cash to buy. Selling triggers taxes; buying with cash does not.
139
+ 2. TAX AWARENESS: Positions with large unrealized gains should be sold last. Check cost_basis vs current value.
140
+ 3. MATH ACCURACY: Verify percentages add up. If buying 20% of portfolio, calculate exact dollar amount from total value.
141
+ 4. CONCENTRATION LIMITS: Any single stock >15% is risky. Broad ETFs (VTI, VOO) don't count as concentration.
142
+ 5. CASH TARGET: Keep 2-5% cash for liquidity; excess cash loses to inflation.
143
+
144
+ TRADE LOGIC:
145
+ - If cash > 10%: Deploy into target allocation BEFORE any sells
146
+ - If position has gains > 20%: Avoid selling unless critically overweight
147
+ - Prefer buying broad index funds: VTI (US), VXUS (Intl), BND (Bonds)
148
+ - Limit to 5 trades maximum; prioritize highest-impact moves
149
+
150
+ ASSET CLASSIFICATION:
151
+ - "U S Dollar", "USD", "Cash" = Cash position
152
+ - Individual stocks (AAPL, SBSI, etc.) = Concentration risk if >15%
153
+ - ETFs (VTI, SPY, BND) = Diversified, lower risk
154
+ - Mutual funds (ending in X) = Check if broad or concentrated
155
+ - Crypto = High volatility, keep <5%
156
+
157
+ OUTPUT REQUIREMENTS:
158
+ - is_balanced=true only if allocation matches target within 5%
159
+ - Each trade must have clear reasoning tied to target allocation
160
+ - Dollar amounts must be mathematically correct
161
+ """
162
+
163
+ REBALANCING_USER_PROMPT = """
164
+ User's current portfolio holdings:
165
+ {holdings_json}
166
+
167
+ Total portfolio value: ${total_value:,.2f}
168
+
169
+ Target allocation (if specified): {target_allocation}
170
+
171
+ Please analyze this portfolio and provide rebalancing recommendations.
172
+ Consider diversification, risk management, and tax efficiency.
173
+ If the portfolio is already well-balanced, indicate that no trades are needed.
174
+ """
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Generator Class
179
+ # ---------------------------------------------------------------------------
180
+
181
+
182
+ class RebalancingInsightsGenerator:
183
+ """
184
+ LLM-powered portfolio rebalancing recommendations.
185
+
186
+ Uses ai-infra LLM to generate intelligent trade recommendations
187
+ based on portfolio analysis, diversification principles, and risk management.
188
+ """
189
+
190
+ def __init__(
191
+ self,
192
+ provider: str = "anthropic",
193
+ model_name: str | None = "claude-3-5-haiku-latest",
194
+ cache_ttl: int = 86400, # 24 hours - portfolio structure rarely changes
195
+ enable_cache: bool = True,
196
+ max_cost_per_day: float = 0.10,
197
+ max_cost_per_month: float = 2.00,
198
+ ):
199
+ """
200
+ Initialize rebalancing generator.
201
+
202
+ Args:
203
+ provider: LLM provider ("google", "openai", "anthropic")
204
+ model_name: Model override (default: provider-specific)
205
+ cache_ttl: Cache TTL in seconds (default: 3600 = 1 hour)
206
+ enable_cache: Enable caching (default: True)
207
+ max_cost_per_day: Daily budget cap in USD (default: $0.10)
208
+ max_cost_per_month: Monthly budget cap in USD (default: $2.00)
209
+ """
210
+ from ai_infra.llm import LLM
211
+
212
+ self.provider = provider
213
+ self.model_name = model_name
214
+ self.cache_ttl = cache_ttl
215
+ self.enable_cache = enable_cache
216
+ self.max_cost_per_day = max_cost_per_day
217
+ self.max_cost_per_month = max_cost_per_month
218
+
219
+ # Initialize LLM
220
+ self.llm = LLM()
221
+
222
+ async def generate(
223
+ self,
224
+ positions: list[Position],
225
+ target_allocation: dict[str, Decimal] | None = None,
226
+ user_id: str | None = None,
227
+ ) -> LLMRebalancingPlan:
228
+ """
229
+ Generate LLM-powered rebalancing recommendations.
230
+
231
+ Uses svc-infra caching to avoid redundant LLM calls.
232
+ Cache key is based on portfolio STRUCTURE, not exact values.
233
+
234
+ Args:
235
+ positions: Current portfolio positions
236
+ target_allocation: Optional target allocation by asset class
237
+ user_id: Optional user ID for caching
238
+
239
+ Returns:
240
+ LLMRebalancingPlan with trades and recommendations
241
+ """
242
+ # Convert positions to serializable format
243
+ holdings = self._positions_to_holdings(positions)
244
+
245
+ if not holdings:
246
+ return self._empty_portfolio_response()
247
+
248
+ # Generate cache key from portfolio structure
249
+ cache_key = self._generate_cache_key(holdings, target_allocation, user_id)
250
+
251
+ # Use svc-infra cached function if available
252
+ if self.enable_cache and HAS_SVC_CACHE:
253
+ logger.info(f"[REBALANCE_CACHE] Checking cache, key={cache_key}")
254
+ result = await _cached_rebalance_llm_call(
255
+ cache_key=cache_key,
256
+ holdings_json=json.dumps(holdings, sort_keys=True),
257
+ target_allocation_json=json.dumps(
258
+ {k: float(v) for k, v in (target_allocation or {}).items()},
259
+ sort_keys=True,
260
+ ),
261
+ provider=self.provider,
262
+ model_name=self.model_name,
263
+ )
264
+ if result is not None:
265
+ return result # type: ignore[no-any-return]
266
+ # Fallback if cached call failed
267
+ logger.warning("[REBALANCE_CACHE] Cached call returned None, trying direct")
268
+
269
+ # Direct call (no cache or fallback)
270
+ try:
271
+ result = await self._call_llm(holdings, target_allocation)
272
+ logger.info("Generated LLM rebalancing recommendations")
273
+ return result
274
+ except Exception as e:
275
+ logger.error("LLM call failed for rebalancing: %s", e)
276
+ return self._fallback_response(holdings)
277
+
278
+ def _positions_to_holdings(self, positions: list[Position]) -> list[dict[str, Any]]:
279
+ """Convert Position objects to serializable holdings list."""
280
+ holdings = []
281
+ for pos in positions:
282
+ holding = {
283
+ "symbol": pos.symbol,
284
+ "name": getattr(pos, "name", pos.symbol),
285
+ "quantity": float(pos.qty),
286
+ "market_value": float(pos.market_value),
287
+ "asset_class": getattr(pos, "asset_class", "unknown"),
288
+ }
289
+ # Calculate percentage (will be done after we have total)
290
+ holdings.append(holding)
291
+
292
+ # Calculate percentages
293
+ total: float = sum(float(h["market_value"]) for h in holdings)
294
+ if total > 0:
295
+ for h in holdings:
296
+ h["percentage"] = round(float(h["market_value"]) / total * 100, 2)
297
+
298
+ # Sort by value descending
299
+ holdings.sort(key=lambda x: x["market_value"], reverse=True)
300
+
301
+ return holdings
302
+
303
+ def _generate_cache_key(
304
+ self,
305
+ holdings: list[dict[str, Any]],
306
+ target_allocation: dict[str, Decimal] | None,
307
+ user_id: str | None,
308
+ ) -> str:
309
+ """Generate cache key from portfolio STRUCTURE, not current values.
310
+
311
+ Rebalancing advice depends on:
312
+ - Which holdings you have (symbols)
313
+ - Approximate allocation buckets (not exact dollar amounts)
314
+ - Target allocation
315
+
316
+ It does NOT need to change when:
317
+ - Stock prices fluctuate (daily noise)
318
+ - Small quantity changes (<10%)
319
+ """
320
+ import json
321
+
322
+ # Hash based on structure, not exact values:
323
+ # - Symbols (what you own)
324
+ # - Allocation buckets (5% increments) - not exact percentages
325
+ # - Target allocation
326
+ total_value = sum(h["market_value"] for h in holdings)
327
+
328
+ cache_data = {
329
+ # Symbols you own (sorted for consistency)
330
+ "symbols": sorted(h["symbol"] for h in holdings if h["market_value"] > 0),
331
+ # Allocation in 5% buckets (e.g., 48.5% -> 50%, 29% -> 30%)
332
+ "allocations": {
333
+ h["symbol"]: round(h["market_value"] / total_value * 20) * 5 # Round to nearest 5%
334
+ for h in holdings
335
+ if h["market_value"] / total_value > 0.02 # Only include >2% positions
336
+ }
337
+ if total_value > 0
338
+ else {},
339
+ # Target allocation
340
+ "target": {k: float(v) for k, v in (target_allocation or {}).items()},
341
+ }
342
+
343
+ data_json = json.dumps(cache_data, sort_keys=True)
344
+ # Security: B324 skip justified - MD5 used for cache key generation only.
345
+ portfolio_hash = hashlib.md5(data_json.encode()).hexdigest()[:12]
346
+
347
+ # Include user_id for user-specific caching
348
+ if user_id:
349
+ return f"rebalance:{user_id}:{portfolio_hash}"
350
+
351
+ return f"rebalance:{portfolio_hash}"
352
+
353
+ async def _call_llm(
354
+ self,
355
+ holdings: list[dict[str, Any]],
356
+ target_allocation: dict[str, Decimal] | None,
357
+ ) -> LLMRebalancingPlan:
358
+ """Call LLM for rebalancing recommendations."""
359
+ import json
360
+
361
+ # Calculate total value
362
+ total_value = sum(h["market_value"] for h in holdings)
363
+
364
+ # Format holdings for prompt
365
+ holdings_json = json.dumps(holdings, indent=2)
366
+
367
+ # Format target allocation
368
+ if target_allocation:
369
+ target_str = ", ".join(f"{k}: {v}%" for k, v in target_allocation.items())
370
+ else:
371
+ target_str = "Not specified (recommend based on moderate risk tolerance)"
372
+
373
+ user_prompt = REBALANCING_USER_PROMPT.format(
374
+ holdings_json=holdings_json,
375
+ total_value=total_value,
376
+ target_allocation=target_str,
377
+ )
378
+
379
+ # Try with structured output first, fall back to raw content parsing
380
+ import asyncio
381
+ import time
382
+
383
+ try:
384
+ logger.info(f"Calling LLM: provider={self.provider}, model={self.model_name}")
385
+ start_time = time.monotonic()
386
+ response = await asyncio.wait_for(
387
+ self.llm.achat(
388
+ user_msg=user_prompt,
389
+ provider=self.provider,
390
+ model_name=self.model_name,
391
+ system=REBALANCING_SYSTEM_PROMPT,
392
+ output_schema=LLMRebalancingPlan,
393
+ output_method="prompt", # Use prompt for cross-provider compatibility
394
+ temperature=0.3, # Some creativity for recommendations
395
+ max_tokens=4000, # Increased for complex portfolio analysis
396
+ ),
397
+ timeout=60.0, # 60 second timeout for Gemini (can be slow)
398
+ )
399
+ elapsed = time.monotonic() - start_time
400
+
401
+ # ai-infra LLM.achat with output_schema returns the Pydantic model directly
402
+ if isinstance(response, LLMRebalancingPlan):
403
+ logger.info(
404
+ f"LLM returned structured LLMRebalancingPlan directly in {elapsed:.2f}s"
405
+ )
406
+ return response
407
+
408
+ except TimeoutError:
409
+ logger.error("LLM call timed out after 60 seconds")
410
+ raise
411
+ except ValueError as e:
412
+ # ai-infra coerce_structured_result failed - try raw content parsing
413
+ logger.warning("Structured output parsing failed: %s - trying raw content", e)
414
+ except Exception as e:
415
+ logger.error(f"LLM call failed with {type(e).__name__}: {e}")
416
+ raise
417
+
418
+ # Fall back to calling without output_schema and parsing manually
419
+ logger.info("Attempting raw LLM call without output_schema")
420
+ raw_response = await self.llm.achat(
421
+ user_msg=user_prompt,
422
+ provider=self.provider,
423
+ model_name=self.model_name,
424
+ system=REBALANCING_SYSTEM_PROMPT
425
+ + "\n\nIMPORTANT: Respond with ONLY valid JSON, no markdown or explanation.",
426
+ temperature=0.3,
427
+ max_tokens=4000, # Increased for complex portfolio analysis
428
+ )
429
+
430
+ # Parse raw content
431
+ content = getattr(raw_response, "content", str(raw_response))
432
+ if content:
433
+ # Handle Gemini's list content format: [{'type': 'text', 'text': '...'}]
434
+ if isinstance(content, list):
435
+ text_parts = []
436
+ for part in content:
437
+ if isinstance(part, dict) and "text" in part:
438
+ text_parts.append(part["text"])
439
+ elif isinstance(part, str):
440
+ text_parts.append(part)
441
+ content = "".join(text_parts) if text_parts else str(content)
442
+
443
+ logger.debug(
444
+ "Raw LLM content length=%d, first 500 chars: %s", len(content), content[:500]
445
+ )
446
+ logger.debug(
447
+ "Raw LLM content last 200 chars: %s",
448
+ content[-200:] if len(content) > 200 else content,
449
+ )
450
+ content = content.strip()
451
+
452
+ # Remove markdown code blocks if present
453
+ if content.startswith("```json"):
454
+ content = content[7:]
455
+ elif content.startswith("```"):
456
+ content = content[3:]
457
+ if content.endswith("```"):
458
+ content = content[:-3]
459
+ content = content.strip()
460
+
461
+ # Try to find JSON object in the content
462
+ start_idx = content.find("{")
463
+ end_idx = content.rfind("}") + 1
464
+ if start_idx >= 0 and end_idx > start_idx:
465
+ json_str = content[start_idx:end_idx]
466
+ data = json.loads(json_str)
467
+ logger.info("Successfully parsed LLM JSON response manually")
468
+
469
+ # Normalize data to match schema - handle missing fields and truncate lists
470
+ normalized = self._normalize_llm_response(data)
471
+ return LLMRebalancingPlan(**normalized)
472
+
473
+ raise ValueError("Could not extract valid JSON from LLM response")
474
+
475
+ def _normalize_llm_response(self, data: dict) -> dict:
476
+ """Normalize LLM response to match schema requirements."""
477
+ # Ensure required fields exist
478
+ if "summary" not in data:
479
+ # Try to use analysis as summary if available
480
+ data["summary"] = data.get("analysis", "Portfolio analysis completed.")[:500]
481
+
482
+ if "analysis" not in data:
483
+ data["analysis"] = data.get("summary", "Analysis unavailable.")[:800]
484
+
485
+ # Truncate summary and analysis to max lengths
486
+ if len(data.get("summary", "")) > 500:
487
+ data["summary"] = data["summary"][:497] + "..."
488
+ if len(data.get("analysis", "")) > 800:
489
+ data["analysis"] = data["analysis"][:797] + "..."
490
+
491
+ # Truncate trades to max 5
492
+ if "trades" in data and isinstance(data["trades"], list):
493
+ data["trades"] = data["trades"][:5]
494
+ else:
495
+ data["trades"] = []
496
+
497
+ # Truncate recommendations to max 3
498
+ if "recommendations" in data and isinstance(data["recommendations"], list):
499
+ data["recommendations"] = [
500
+ str(r) if isinstance(r, str) else r.get("reasoning", str(r))
501
+ for r in data["recommendations"]
502
+ ][:3]
503
+ else:
504
+ data["recommendations"] = []
505
+
506
+ # Truncate risk_warnings to max 3
507
+ if "risk_warnings" in data and isinstance(data["risk_warnings"], list):
508
+ data["risk_warnings"] = data["risk_warnings"][:3]
509
+ else:
510
+ data["risk_warnings"] = []
511
+
512
+ # Ensure is_balanced is boolean
513
+ data["is_balanced"] = bool(data.get("is_balanced", False))
514
+
515
+ return data
516
+
517
+ def _empty_portfolio_response(self) -> LLMRebalancingPlan:
518
+ """Response for empty portfolio."""
519
+ return LLMRebalancingPlan(
520
+ summary="Your portfolio is empty. Add funds to start investing.",
521
+ analysis="No holdings to analyze.",
522
+ trades=[],
523
+ recommendations=[
524
+ "Open a brokerage account if you haven't already",
525
+ "Consider starting with a low-cost index fund like VTI",
526
+ "Set up automatic monthly contributions",
527
+ ],
528
+ risk_warnings=[],
529
+ is_balanced=True,
530
+ )
531
+
532
+ def _fallback_response(self, holdings: list[dict[str, Any]]) -> LLMRebalancingPlan:
533
+ """Fallback response when LLM unavailable."""
534
+ total_value = sum(h["market_value"] for h in holdings)
535
+
536
+ # Basic analysis
537
+ top_holding = holdings[0] if holdings else None
538
+ concentration_warning = None
539
+
540
+ if top_holding and top_holding["percentage"] > 30:
541
+ concentration_warning = (
542
+ f"High concentration in {top_holding['symbol']} "
543
+ f"({top_holding['percentage']:.1f}% of portfolio)"
544
+ )
545
+
546
+ # Calculate asset class breakdown
547
+ cash_pct = sum(
548
+ h["percentage"]
549
+ for h in holdings
550
+ if h.get("asset_class") in ["cash", "currency", "money_market"]
551
+ or "dollar" in h["symbol"].lower()
552
+ )
553
+
554
+ warnings = []
555
+ if concentration_warning:
556
+ warnings.append(concentration_warning)
557
+ if cash_pct > 30:
558
+ warnings.append(f"High cash allocation ({cash_pct:.1f}%) may underperform inflation")
559
+
560
+ return LLMRebalancingPlan(
561
+ summary=f"Portfolio has {len(holdings)} holdings worth ${total_value:,.2f}. "
562
+ "LLM analysis temporarily unavailable.",
563
+ analysis="Basic analysis performed. Detailed recommendations require LLM.",
564
+ trades=[],
565
+ recommendations=[
566
+ "Consider consulting a financial advisor for personalized advice",
567
+ "Review your portfolio for diversification across asset classes",
568
+ ],
569
+ risk_warnings=warnings,
570
+ is_balanced=True, # Conservative default
571
+ )
572
+
573
+
574
+ # ---------------------------------------------------------------------------
575
+ # Cached LLM Call (svc-infra caching)
576
+ # ---------------------------------------------------------------------------
577
+
578
+
579
+ async def _do_rebalance_llm_call_impl(
580
+ cache_key: str,
581
+ holdings_json: str,
582
+ target_allocation_json: str,
583
+ provider: str,
584
+ model_name: str,
585
+ ) -> LLMRebalancingPlan | None:
586
+ """
587
+ Actually call the LLM for rebalancing (no caching, called by cached wrapper).
588
+ """
589
+ import asyncio
590
+ import time
591
+ from decimal import Decimal
592
+
593
+ from ai_infra.llm import LLM
594
+
595
+ logger.info(f"[REBALANCE_CACHE] MISS - calling LLM (key={cache_key})")
596
+
597
+ holdings = json.loads(holdings_json)
598
+ target_allocation_raw = json.loads(target_allocation_json)
599
+ target_allocation = (
600
+ {k: Decimal(str(v)) for k, v in target_allocation_raw.items()}
601
+ if target_allocation_raw
602
+ else None
603
+ )
604
+
605
+ # Calculate total value
606
+ total_value = sum(h["market_value"] for h in holdings)
607
+
608
+ # Format holdings for prompt
609
+ formatted_holdings_json = json.dumps(holdings, indent=2)
610
+
611
+ # Format target allocation
612
+ if target_allocation:
613
+ target_str = ", ".join(f"{k}: {v}%" for k, v in target_allocation.items())
614
+ else:
615
+ target_str = "Not specified (recommend based on moderate risk tolerance)"
616
+
617
+ user_prompt = REBALANCING_USER_PROMPT.format(
618
+ holdings_json=formatted_holdings_json,
619
+ total_value=total_value,
620
+ target_allocation=target_str,
621
+ )
622
+
623
+ llm = LLM()
624
+
625
+ try:
626
+ logger.info(f"Calling LLM: provider={provider}, model={model_name}")
627
+ start_time = time.monotonic()
628
+ response = await asyncio.wait_for(
629
+ llm.achat(
630
+ user_msg=user_prompt,
631
+ provider=provider,
632
+ model_name=model_name,
633
+ system=REBALANCING_SYSTEM_PROMPT,
634
+ output_schema=LLMRebalancingPlan,
635
+ output_method="prompt",
636
+ temperature=0.3,
637
+ max_tokens=4000,
638
+ ),
639
+ timeout=60.0,
640
+ )
641
+ elapsed = time.monotonic() - start_time
642
+
643
+ if isinstance(response, LLMRebalancingPlan):
644
+ logger.info(f"[REBALANCE_CACHE] LLM returned result in {elapsed:.2f}s")
645
+ return response
646
+
647
+ logger.warning("LLM response was not LLMRebalancingPlan: %s", type(response))
648
+ return None
649
+
650
+ except TimeoutError:
651
+ logger.error("LLM call timed out after 60 seconds")
652
+ return None
653
+ except Exception as e:
654
+ logger.error(f"LLM call failed: {type(e).__name__}: {e}")
655
+ return None
656
+
657
+
658
+ # Apply svc-infra caching decorator if available
659
+ if HAS_SVC_CACHE and cache_read is not None:
660
+ _cached_rebalance_llm_call = cache_read(
661
+ key="rebalance:{cache_key}",
662
+ ttl=REBALANCE_CACHE_TTL,
663
+ )(_do_rebalance_llm_call_impl)
664
+ logger.info("[REBALANCE_CACHE] Using svc-infra cache_read decorator")
665
+ else:
666
+ # Fallback: no caching
667
+ _cached_rebalance_llm_call = _do_rebalance_llm_call_impl
668
+ logger.warning("[REBALANCE_CACHE] svc-infra cache not available, caching disabled")
669
+
670
+
671
+ # ---------------------------------------------------------------------------
672
+ # Convenience Function
673
+ # ---------------------------------------------------------------------------
674
+
675
+ # Module-level singleton to preserve cache across calls
676
+ _generator_instance: RebalancingInsightsGenerator | None = None
677
+
678
+
679
+ def _get_generator(provider: str, model_name: str) -> RebalancingInsightsGenerator:
680
+ """Get or create singleton generator instance."""
681
+ global _generator_instance
682
+ if _generator_instance is None:
683
+ _generator_instance = RebalancingInsightsGenerator(provider=provider, model_name=model_name)
684
+ return _generator_instance
685
+
686
+
687
+ async def generate_rebalancing_plan_llm(
688
+ positions: list[Position],
689
+ target_allocation: dict[str, Decimal] | None = None,
690
+ user_id: str | None = None,
691
+ provider: str = "anthropic",
692
+ model_name: str = "claude-3-5-haiku-latest",
693
+ ) -> LLMRebalancingPlan:
694
+ """
695
+ Generate LLM-powered rebalancing recommendations.
696
+
697
+ Convenience function that creates generator and calls it.
698
+
699
+ Args:
700
+ positions: Current portfolio positions
701
+ target_allocation: Optional target allocation by asset class
702
+ user_id: Optional user ID for caching
703
+ provider: LLM provider to use
704
+ model_name: Model name to use
705
+
706
+ Returns:
707
+ LLMRebalancingPlan with trades and recommendations
708
+ """
709
+ generator = _get_generator(provider=provider, model_name=model_name)
710
+ return await generator.generate(positions, target_allocation, user_id)