repr-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,597 @@
1
+ """
2
+ OpenAI-based analysis for repository profiling.
3
+
4
+ This module implements a direct OpenAI integration for analyzing git repositories
5
+ using a two-phase approach:
6
+ 1. EXTRACTION: Process batches of commits with diffs using gpt-5-nano
7
+ 2. SYNTHESIS: Combine summaries into final profile using gpt-5.2
8
+ """
9
+
10
+ import asyncio
11
+ from typing import Any
12
+
13
+ from openai import AsyncOpenAI
14
+
15
+ from .tools import get_commits_with_diffs
16
+ from .discovery import RepoInfo
17
+ from .config import get_litellm_config, get_llm_config, get_api_base
18
+
19
+
20
+ # Model configuration (defaults for OpenAI)
21
+ DEFAULT_EXTRACTION_MODEL = "openai/gpt-5-nano-2025-08-07"
22
+ DEFAULT_SYNTHESIS_MODEL = "openai/gpt-5.2-2025-12-11"
23
+ EXTRACTION_TEMPERATURE = 0.3
24
+ SYNTHESIS_TEMPERATURE = 0.7
25
+ COMMITS_PER_BATCH = 25
26
+
27
+
28
+ def get_openai_client(api_key: str = None, base_url: str = None) -> AsyncOpenAI:
29
+ """
30
+ Get OpenAI-compatible client that proxies through our backend.
31
+
32
+ Args:
33
+ api_key: API key (optional, for local LLM mode)
34
+ base_url: Base URL for API (optional, for local LLM mode)
35
+
36
+ Returns:
37
+ AsyncOpenAI client
38
+ """
39
+ # If explicit parameters provided, use them (for local mode)
40
+ if api_key:
41
+ kwargs = {"api_key": api_key}
42
+ if base_url:
43
+ kwargs["base_url"] = base_url
44
+ return AsyncOpenAI(**kwargs)
45
+
46
+ # Use our backend as the proxy - it will forward to LiteLLM
47
+ # The rf_* token is used to authenticate with our backend
48
+ _, litellm_key = get_litellm_config()
49
+ if not litellm_key:
50
+ raise ValueError("Not logged in. Please run 'rf login' first.")
51
+
52
+ # Point to our backend's LLM proxy endpoint
53
+ backend_url = get_api_base().replace("/api/cli", "")
54
+
55
+ return AsyncOpenAI(
56
+ api_key=litellm_key,
57
+ base_url=f"{backend_url}/api/llm/v1"
58
+ )
59
+
60
+
61
+ async def extract_commit_batch(
62
+ client: AsyncOpenAI,
63
+ commits: list[dict[str, Any]],
64
+ batch_num: int,
65
+ total_batches: int,
66
+ model: str = None,
67
+ ) -> str:
68
+ """
69
+ Extraction phase: Extract accomplishments from a batch of commits.
70
+
71
+ Args:
72
+ client: OpenAI client
73
+ commits: List of commits with diffs
74
+ batch_num: Current batch number (for context)
75
+ total_batches: Total number of batches
76
+ model: Model name to use (defaults to stored config or DEFAULT_EXTRACTION_MODEL)
77
+
78
+ Returns:
79
+ Summary of technical accomplishments in this batch
80
+ """
81
+ if not model:
82
+ llm_config = get_llm_config()
83
+ model = llm_config.get("extraction_model") or DEFAULT_EXTRACTION_MODEL
84
+ # Format commits for the prompt
85
+ commits_text = []
86
+ for commit in commits:
87
+ commit_text = f"""
88
+ Commit: {commit['sha']}
89
+ Date: {commit['date']}
90
+ Message: {commit['message']}
91
+
92
+ Files changed:"""
93
+
94
+ for file_info in commit['files'][:10]: # Limit files per commit
95
+ change_type = {
96
+ 'A': 'Added',
97
+ 'D': 'Deleted',
98
+ 'M': 'Modified',
99
+ 'R': 'Renamed'
100
+ }.get(file_info['change_type'], 'Changed')
101
+
102
+ commit_text += f"\n {change_type}: {file_info['path']}"
103
+
104
+ if file_info['diff']:
105
+ # Truncate diff if too long (for token management)
106
+ diff = file_info['diff'][:2000]
107
+ commit_text += f"\n```diff\n{diff}\n```"
108
+
109
+ commits_text.append(commit_text)
110
+
111
+ commits_formatted = "\n\n---\n".join(commits_text)
112
+
113
+ system_prompt = """You are analyzing a developer's actual code commits to extract specific technical accomplishments WITH the reasoning behind them.
114
+
115
+ Your job: Read the commit messages and diffs, then list CONCRETE technical accomplishments with SPECIFIC details AND infer WHY those decisions were made.
116
+
117
+ For each accomplishment, capture:
118
+ 1. WHAT was built (the technical implementation)
119
+ 2. WHY it was needed (the problem being solved, the user/business need, or the technical constraint)
120
+
121
+ Rules:
122
+ - Use EXACT technology names from the code (FastAPI, React, SQLAlchemy, not "web framework")
123
+ - Describe SPECIFIC features built (e.g., "JWT authentication with refresh tokens", not "auth system")
124
+ - INFER the motivation when possible:
125
+ - Performance changes → what latency/throughput problem was being solved?
126
+ - New features → what user capability was being enabled?
127
+ - Refactors → what maintainability or scalability issue was being addressed?
128
+ - Error handling → what failure mode was being prevented?
129
+ - Mention architectural patterns when evident (microservices, event-driven, REST API, etc.)
130
+ - Include scale indicators (number of endpoints, integrations, etc.)
131
+ - Be concise but specific - bullet points are fine
132
+
133
+ What NOT to do:
134
+ - Don't write vague statements like "worked on backend"
135
+ - Don't guess technologies not shown in the diffs
136
+ - Don't include process/methodology unless there's evidence
137
+ - Don't fabricate motivations that aren't supported by the code/commits"""
138
+
139
+ user_prompt = f"""Analyze commits batch {batch_num}/{total_batches} and extract technical accomplishments:
140
+
141
+ {commits_formatted}
142
+
143
+ List the specific technical work done in this batch. For each item:
144
+ 1. What was BUILT (the concrete implementation)
145
+ 2. Why it was needed (infer from context: what problem was solved? what user need? what constraint?)
146
+
147
+ Focus on substance, not process."""
148
+
149
+ response = await client.chat.completions.create(
150
+ model=model,
151
+ messages=[
152
+ {"role": "system", "content": system_prompt},
153
+ {"role": "user", "content": user_prompt},
154
+ ],
155
+ temperature=EXTRACTION_TEMPERATURE,
156
+ max_tokens=16000, # Increased for reasoning models that use tokens for thinking
157
+ )
158
+
159
+ return response.choices[0].message.content or ""
160
+
161
+
162
+ async def synthesize_profile(
163
+ client: AsyncOpenAI,
164
+ summaries: list[str],
165
+ repo_info: dict[str, Any],
166
+ model: str = None,
167
+ ) -> str:
168
+ """
169
+ Synthesis phase: Combine batch summaries into final developer profile.
170
+
171
+ Args:
172
+ client: OpenAI client
173
+ summaries: List of batch summaries from extraction phase
174
+ repo_info: Repository metadata
175
+ model: Model name to use (defaults to stored config or DEFAULT_SYNTHESIS_MODEL)
176
+
177
+ Returns:
178
+ Final developer profile in markdown
179
+ """
180
+ if not model:
181
+ llm_config = get_llm_config()
182
+ model = llm_config.get("synthesis_model") or DEFAULT_SYNTHESIS_MODEL
183
+ summaries_text = "\n\n---\n\n".join([
184
+ f"## Batch {i+1}\n\n{summary}"
185
+ for i, summary in enumerate(summaries)
186
+ ])
187
+
188
+ system_prompt = """You are an expert technical resume writer creating a developer profile from their ACTUAL code commits.
189
+
190
+ Transform the batch analyses into COMPELLING RESUME CONTENT that shows not just WHAT was built, but WHY decisions were made.
191
+
192
+ CRITICAL - NO GENERIC STATEMENTS:
193
+ - ❌ "Experience with web frameworks" → ✅ "Built REST APIs with FastAPI including WebSocket support for real-time updates"
194
+ - ❌ "Strong Python skills" → ✅ "Architected async Python backend with SQLAlchemy, Celery task queues, and Redis caching"
195
+ - ❌ "Agile methodologies" → Don't mention process/methodology
196
+
197
+ CRITICAL - INCLUDE THE WHY:
198
+ For significant technical work, explain the reasoning:
199
+ - ✅ "Built WebSocket token streaming—users expect ChatGPT-like instant feedback; REST endpoints that return only after full completion feel broken for 10-30 second responses"
200
+ - ✅ "Implemented Redis-backed auth caching to short-circuit repeated Supabase validation—every API call was adding 50-100ms of overhead"
201
+ - ✅ "Added explicit rollback paths in DB transactions—SQLAlchemy's implicit rollback doesn't always fire when expected, causing connection pool pollution"
202
+
203
+ The WHY demonstrates engineering judgment:
204
+ - What problem was being solved?
205
+ - What tradeoffs were considered?
206
+ - What would have happened without this change?
207
+ - What user/business need drove this?
208
+
209
+ STRUCTURE:
210
+ 1. **Summary**: 2-3 sentences capturing UNIQUE expertise (not generic "versatile developer")
211
+ 2. **Key Technical Skills (used in this codebase)**: ONLY technologies ACTUALLY used, with context of HOW they were used
212
+ 3. **Notable Projects & Contributions**: SPECIFIC features/achievements with technical details AND the reasoning behind key decisions. Group related work under descriptive subsection headers. For each major piece of work, include a "**Why**:" line explaining the problem/motivation.
213
+ 4. **Development Philosophy (evidence-based)**: ONLY if there's clear evidence (comprehensive tests, specific patterns). Include *Why?* explanations that show the thinking.
214
+
215
+ Use strong action verbs: Built, Architected, Implemented, Designed, Optimized, Integrated
216
+ Every claim must be backed by evidence from the commits."""
217
+
218
+ # Build metadata header (injected directly, not LLM-generated)
219
+ languages = repo_info.get('languages', {})
220
+ languages_str = ", ".join([f"{k} ({v}%)" for k, v in languages.items()]) if languages else "Unknown"
221
+
222
+ # Calculate age display
223
+ age_months = repo_info.get('age_months', 0)
224
+ if age_months < 1:
225
+ age_str = "< 1 month"
226
+ elif age_months < 12:
227
+ age_str = f"{age_months} months"
228
+ else:
229
+ years = age_months // 12
230
+ remaining_months = age_months % 12
231
+ age_str = f"{years} year{'s' if years > 1 else ''}" + (f", {remaining_months} months" if remaining_months else "")
232
+
233
+ # Format remote URL (clean up if present)
234
+ remote_url = repo_info.get('remote_url', '')
235
+ if remote_url:
236
+ remote_display = remote_url.replace('git@github.com:', 'github.com/').replace('.git', '')
237
+ if remote_display.startswith('https://'):
238
+ remote_display = remote_display[8:]
239
+ else:
240
+ remote_display = None
241
+
242
+ # Build the metadata header to prepend
243
+ metadata_lines = [
244
+ f"- **Repository**: {repo_info.get('name', 'Unknown')}",
245
+ f"- **Languages**: {languages_str}",
246
+ f"- **Total Commits**: {repo_info.get('commit_count', 'Unknown')}",
247
+ f"- **Contributors**: {repo_info.get('contributors', 'Unknown')}",
248
+ f"- **Active Period**: {repo_info.get('first_commit_date', 'Unknown')} to {repo_info.get('last_commit_date', 'Unknown')} ({age_str})",
249
+ ]
250
+ if remote_display:
251
+ metadata_lines.append(f"- **Remote**: {remote_display}")
252
+ if repo_info.get('is_fork'):
253
+ metadata_lines.append("- **Fork**: Yes")
254
+
255
+ metadata_header = "\n".join(metadata_lines)
256
+
257
+ user_prompt = f"""Create a developer profile from these commit analyses:
258
+
259
+ ## Technical Work (from commit analysis):
260
+
261
+ {summaries_text}
262
+
263
+ ---
264
+
265
+ Synthesize this into a cohesive developer profile in Markdown format starting with Summary, then Key Technical Skills, Notable Projects & Contributions, and Development Philosophy.
266
+
267
+ Focus on CONCRETE technical accomplishments AND the reasoning behind key decisions. For each major feature or system, explain WHY it was built that way—what problem it solved, what user need it addressed, or what technical constraint it navigated."""
268
+
269
+ response = await client.chat.completions.create(
270
+ model=model,
271
+ messages=[
272
+ {"role": "system", "content": system_prompt},
273
+ {"role": "user", "content": user_prompt},
274
+ ],
275
+ temperature=SYNTHESIS_TEMPERATURE,
276
+ max_tokens=16000, # Increased for reasoning models
277
+ )
278
+
279
+ llm_content = response.choices[0].message.content or ""
280
+
281
+ # Prepend metadata header
282
+ return f"{metadata_header}\n\n---\n\n{llm_content}"
283
+
284
+
285
+ async def analyze_repo_openai(
286
+ repo: RepoInfo,
287
+ api_key: str = None,
288
+ base_url: str = None,
289
+ extraction_model: str = None,
290
+ synthesis_model: str = None,
291
+ verbose: bool = False,
292
+ progress_callback: callable = None,
293
+ ) -> str:
294
+ """
295
+ Analyze a single repository using OpenAI-compatible API.
296
+
297
+ Args:
298
+ repo: Repository information
299
+ api_key: API key (defaults to OPENAI_API_KEY env var)
300
+ base_url: Base URL for API (for local LLMs like Ollama)
301
+ extraction_model: Model for extracting accomplishments (defaults to DEFAULT_EXTRACTION_MODEL)
302
+ synthesis_model: Model for synthesizing profile (defaults to DEFAULT_SYNTHESIS_MODEL)
303
+ verbose: Whether to print verbose output
304
+ progress_callback: Optional callback for progress updates
305
+ Signature: callback(step: str, detail: str, repo: str, progress: float)
306
+
307
+ Returns:
308
+ Repository analysis/narrative in markdown
309
+ """
310
+ client = get_openai_client(api_key=api_key, base_url=base_url)
311
+
312
+ if progress_callback:
313
+ progress_callback(
314
+ step="Extracting",
315
+ detail=f"Reading git history ({repo.commit_count} commits)",
316
+ repo=repo.name,
317
+ progress=5.0,
318
+ )
319
+
320
+ # Get commits with diffs
321
+ commits = get_commits_with_diffs(
322
+ repo_path=repo.path,
323
+ count=200, # Last 200 commits
324
+ days=730, # Last 2 years
325
+ )
326
+
327
+ if not commits:
328
+ return f"No commits found in {repo.name}"
329
+
330
+ if progress_callback:
331
+ progress_callback(
332
+ step="Preparing",
333
+ detail=f"Found {len(commits)} commits with diffs to analyze",
334
+ repo=repo.name,
335
+ progress=10.0,
336
+ )
337
+
338
+ # Split into batches
339
+ batches = [
340
+ commits[i:i + COMMITS_PER_BATCH]
341
+ for i in range(0, len(commits), COMMITS_PER_BATCH)
342
+ ]
343
+
344
+ total_batches = len(batches)
345
+
346
+ if progress_callback:
347
+ progress_callback(
348
+ step="Analyzing",
349
+ detail=f"Processing {total_batches} batches ({COMMITS_PER_BATCH} commits each)",
350
+ repo=repo.name,
351
+ progress=15.0,
352
+ )
353
+
354
+ # EXTRACTION phase: Process batches with progress tracking
355
+ async def process_batch_with_progress(batch, batch_num):
356
+ """Process a single batch and report progress."""
357
+ result = await extract_commit_batch(client, batch, batch_num, total_batches, model=extraction_model)
358
+ if progress_callback:
359
+ # Progress goes from 15% to 75% during extraction phase
360
+ batch_progress = 15.0 + (60.0 * batch_num / total_batches)
361
+ progress_callback(
362
+ step="Analyzing",
363
+ detail=f"Batch {batch_num}/{total_batches} complete",
364
+ repo=repo.name,
365
+ progress=batch_progress,
366
+ )
367
+ return result
368
+
369
+ # Process batches concurrently but track progress
370
+ extraction_tasks = [
371
+ process_batch_with_progress(batch, i + 1)
372
+ for i, batch in enumerate(batches)
373
+ ]
374
+
375
+ summaries = await asyncio.gather(*extraction_tasks)
376
+
377
+ # Filter out empty summaries
378
+ summaries = [s for s in summaries if s.strip()]
379
+
380
+ if not summaries:
381
+ return f"Could not extract meaningful information from {repo.name}"
382
+
383
+ if progress_callback:
384
+ progress_callback(
385
+ step="Synthesizing",
386
+ detail="Generating developer profile from analysis...",
387
+ repo=repo.name,
388
+ progress=80.0,
389
+ )
390
+
391
+ # SYNTHESIS phase: Combine into final profile
392
+ repo_dict = {
393
+ "name": repo.name,
394
+ "path": str(repo.path),
395
+ "languages": repo.languages,
396
+ "primary_language": repo.primary_language,
397
+ "commit_count": repo.commit_count,
398
+ "contributors": repo.contributors,
399
+ "first_commit_date": repo.first_commit_date.isoformat() if repo.first_commit_date else None,
400
+ "last_commit_date": repo.last_commit_date.isoformat() if repo.last_commit_date else None,
401
+ "remote_url": repo.remote_url,
402
+ "is_fork": repo.is_fork,
403
+ "age_months": repo.age_months,
404
+ }
405
+
406
+ profile = await synthesize_profile(client, summaries, repo_dict, model=synthesis_model)
407
+
408
+ if progress_callback:
409
+ progress_callback(
410
+ step="Complete",
411
+ detail=f"Profile generated for {repo.name}",
412
+ repo=repo.name,
413
+ progress=100.0,
414
+ )
415
+
416
+ return profile
417
+
418
+
419
+ async def analyze_repos_openai(
420
+ repos: list[RepoInfo],
421
+ api_key: str = None,
422
+ base_url: str = None,
423
+ extraction_model: str = None,
424
+ synthesis_model: str = None,
425
+ verbose: bool = False,
426
+ progress_callback: callable = None,
427
+ ) -> str:
428
+ """
429
+ Analyze multiple repositories and create a combined profile.
430
+
431
+ Args:
432
+ repos: List of repositories to analyze
433
+ api_key: API key (defaults to OPENAI_API_KEY env var)
434
+ base_url: Base URL for API (for local LLMs like Ollama)
435
+ extraction_model: Model for extracting accomplishments (defaults to DEFAULT_EXTRACTION_MODEL)
436
+ synthesis_model: Model for synthesizing profile (defaults to DEFAULT_SYNTHESIS_MODEL)
437
+ verbose: Whether to print verbose output
438
+ progress_callback: Optional callback for progress updates
439
+ Signature: callback(step: str, detail: str, repo: str, progress: float)
440
+
441
+ Returns:
442
+ Combined developer profile in markdown
443
+ """
444
+ if not repos:
445
+ return "No repositories to analyze"
446
+
447
+ total_repos = len(repos)
448
+
449
+ if progress_callback:
450
+ progress_callback(
451
+ step="Starting",
452
+ detail=f"Analyzing {total_repos} {'repository' if total_repos == 1 else 'repositories'}",
453
+ repo="",
454
+ progress=0.0,
455
+ )
456
+
457
+ # Analyze each repo
458
+ repo_profiles = []
459
+ for i, repo in enumerate(repos):
460
+ # Create a scoped progress callback for this repo
461
+ def make_repo_callback(repo_idx, repo_name):
462
+ def repo_callback(step, detail, repo, progress):
463
+ # Scale progress: each repo gets equal share
464
+ repo_start = (repo_idx / total_repos) * 90 # Save 10% for final merge
465
+ repo_end = ((repo_idx + 1) / total_repos) * 90
466
+ scaled_progress = repo_start + (progress / 100) * (repo_end - repo_start)
467
+
468
+ if progress_callback:
469
+ progress_callback(
470
+ step=step,
471
+ detail=f"[{repo_idx + 1}/{total_repos}] {detail}",
472
+ repo=repo_name,
473
+ progress=scaled_progress,
474
+ )
475
+ return repo_callback
476
+
477
+ profile = await analyze_repo_openai(
478
+ repo,
479
+ api_key=api_key,
480
+ base_url=base_url,
481
+ extraction_model=extraction_model,
482
+ synthesis_model=synthesis_model,
483
+ verbose=verbose,
484
+ progress_callback=make_repo_callback(i, repo.name),
485
+ )
486
+ repo_profiles.append({
487
+ "name": repo.name,
488
+ "profile": profile,
489
+ })
490
+
491
+ # If only one repo, return its profile directly
492
+ if len(repos) == 1:
493
+ return repo_profiles[0]["profile"]
494
+
495
+ # Multiple repos: combine them
496
+ if progress_callback:
497
+ progress_callback(
498
+ step="Merging",
499
+ detail=f"Combining profiles from {total_repos} repositories...",
500
+ repo="all",
501
+ progress=92.0,
502
+ )
503
+
504
+ client = get_openai_client(api_key=api_key, base_url=base_url)
505
+
506
+ # Aggregate metadata from all repos (injected directly, not LLM-generated)
507
+ total_commits = sum(r.commit_count for r in repos)
508
+ all_languages = {}
509
+ for repo in repos:
510
+ if repo.languages:
511
+ for lang, pct in repo.languages.items():
512
+ all_languages[lang] = all_languages.get(lang, 0) + pct
513
+ # Normalize percentages
514
+ if all_languages:
515
+ total_pct = sum(all_languages.values())
516
+ all_languages = {k: round(v * 100 / total_pct) for k, v in sorted(all_languages.items(), key=lambda x: -x[1])}
517
+
518
+ # Find date range across all repos
519
+ first_dates = [r.first_commit_date for r in repos if r.first_commit_date]
520
+ last_dates = [r.last_commit_date for r in repos if r.last_commit_date]
521
+ earliest_date = min(first_dates).isoformat() if first_dates else "Unknown"
522
+ latest_date = max(last_dates).isoformat() if last_dates else "Unknown"
523
+
524
+ # Build metadata header to prepend
525
+ repos_list = ", ".join(r.name for r in repos)
526
+ languages_str = ", ".join([f"{k} ({v}%)" for k, v in all_languages.items()]) if all_languages else "Unknown"
527
+
528
+ metadata_header = f"""- **Repositories**: {repos_list}
529
+ - **Total Commits**: {total_commits}
530
+ - **Languages**: {languages_str}
531
+ - **Active Period**: {earliest_date} to {latest_date}"""
532
+
533
+ profiles_text = "\n\n---\n\n".join([
534
+ f"## Repository: {rp['name']}\n\n{rp['profile']}"
535
+ for rp in repo_profiles
536
+ ])
537
+
538
+ system_prompt = """You are creating a unified developer profile from multiple project analyses.
539
+
540
+ Combine the insights into a single cohesive profile that:
541
+ 1. Highlights the breadth of technical skills across projects
542
+ 2. Identifies common patterns and expertise areas
543
+ 3. Showcases the most impressive accomplishments WITH the reasoning behind them
544
+ 4. Maintains specificity - don't generalize away the concrete details
545
+ 5. Preserves the "why" explanations that demonstrate engineering judgment
546
+
547
+ Structure:
548
+ 1. **Summary**: Overall technical profile (2-3 sentences)
549
+ 2. **Key Technical Skills (used across these codebases)**: Technologies used across projects, with context on HOW they were used
550
+ 3. **Notable Projects & Contributions**: One section per major project with key accomplishments. For significant work, include "**Why**:" explanations that show the problem being solved or the motivation behind the decision.
551
+ 4. **Development Philosophy (evidence-based)**: Patterns that emerge across the work, with evidence-based reasoning (e.g., "Instrument first, optimize with data—introduced timing utilities before optimization to avoid guessing at bottlenecks")"""
552
+
553
+ user_prompt = f"""Combine these repository analyses into a unified developer profile:
554
+
555
+ {profiles_text}
556
+
557
+ Create a cohesive markdown profile that represents the developer's complete body of work, starting with Summary.
558
+
559
+ Preserve and highlight the "why" explanations that demonstrate engineering judgment—these show the developer thinks about problems, not just code."""
560
+
561
+ # Get model for final synthesis
562
+ final_synthesis_model = synthesis_model
563
+ if not final_synthesis_model:
564
+ llm_config = get_llm_config()
565
+ final_synthesis_model = llm_config.get("synthesis_model") or DEFAULT_SYNTHESIS_MODEL
566
+
567
+ if progress_callback:
568
+ progress_callback(
569
+ step="Finalizing",
570
+ detail="Generating unified developer profile...",
571
+ repo="all",
572
+ progress=95.0,
573
+ )
574
+
575
+ response = await client.chat.completions.create(
576
+ model=final_synthesis_model,
577
+ messages=[
578
+ {"role": "system", "content": system_prompt},
579
+ {"role": "user", "content": user_prompt},
580
+ ],
581
+ temperature=SYNTHESIS_TEMPERATURE,
582
+ max_tokens=16000,
583
+ )
584
+
585
+ if progress_callback:
586
+ progress_callback(
587
+ step="Complete",
588
+ detail="Profile ready!",
589
+ repo="",
590
+ progress=100.0,
591
+ )
592
+
593
+ llm_content = response.choices[0].message.content or ""
594
+
595
+ # Prepend metadata header
596
+ return f"{metadata_header}\n\n---\n\n{llm_content}"
597
+