repr-cli 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repr/__init__.py +1 -1
- repr/api.py +127 -1
- repr/auth.py +66 -2
- repr/cli.py +2143 -663
- repr/config.py +658 -32
- repr/discovery.py +5 -0
- repr/doctor.py +458 -0
- repr/hooks.py +634 -0
- repr/keychain.py +255 -0
- repr/llm.py +506 -0
- repr/openai_analysis.py +92 -21
- repr/privacy.py +333 -0
- repr/storage.py +527 -0
- repr/templates.py +229 -0
- repr/tools.py +202 -0
- repr/ui.py +79 -364
- repr_cli-0.2.1.dist-info/METADATA +263 -0
- repr_cli-0.2.1.dist-info/RECORD +23 -0
- {repr_cli-0.1.0.dist-info → repr_cli-0.2.1.dist-info}/licenses/LICENSE +1 -1
- repr/analyzer.py +0 -915
- repr/highlights.py +0 -712
- repr_cli-0.1.0.dist-info/METADATA +0 -326
- repr_cli-0.1.0.dist-info/RECORD +0 -18
- {repr_cli-0.1.0.dist-info → repr_cli-0.2.1.dist-info}/WHEEL +0 -0
- {repr_cli-0.1.0.dist-info → repr_cli-0.2.1.dist-info}/entry_points.txt +0 -0
- {repr_cli-0.1.0.dist-info → repr_cli-0.2.1.dist-info}/top_level.txt +0 -0
repr/highlights.py
DELETED
|
@@ -1,712 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Extract resume-worthy highlights from repositories.
|
|
3
|
-
|
|
4
|
-
Goes beyond stats to identify:
|
|
5
|
-
- What was built (features, systems)
|
|
6
|
-
- Technical challenges solved
|
|
7
|
-
- Integrations implemented
|
|
8
|
-
- Impact and scale indicators
|
|
9
|
-
- Domain expertise demonstrated
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import re
|
|
13
|
-
from collections import Counter, defaultdict
|
|
14
|
-
from dataclasses import dataclass, field
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
from typing import Any
|
|
17
|
-
|
|
18
|
-
from git import Repo
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# ============================================================================
|
|
22
|
-
# Feature & Integration Detection
|
|
23
|
-
# ============================================================================
|
|
24
|
-
|
|
25
|
-
# What they BUILT (features/systems)
|
|
26
|
-
FEATURES = {
|
|
27
|
-
"authentication": {
|
|
28
|
-
"patterns": [
|
|
29
|
-
r"auth", r"login", r"logout", r"signup", r"register",
|
|
30
|
-
r"oauth", r"jwt", r"session", r"password", r"2fa", r"mfa",
|
|
31
|
-
r"sso", r"saml", r"ldap", r"token",
|
|
32
|
-
],
|
|
33
|
-
"description": "Authentication system",
|
|
34
|
-
"skills": ["Security", "OAuth", "JWT"],
|
|
35
|
-
},
|
|
36
|
-
"payment": {
|
|
37
|
-
"patterns": [
|
|
38
|
-
r"payment", r"checkout", r"billing", r"invoice",
|
|
39
|
-
r"subscription", r"stripe", r"paypal", r"charge",
|
|
40
|
-
r"refund", r"transaction", r"pricing",
|
|
41
|
-
],
|
|
42
|
-
"description": "Payment processing",
|
|
43
|
-
"skills": ["Payment APIs", "PCI compliance"],
|
|
44
|
-
},
|
|
45
|
-
"real_time": {
|
|
46
|
-
"patterns": [
|
|
47
|
-
r"websocket", r"socket\.io", r"realtime", r"real-time",
|
|
48
|
-
r"live\s*update", r"push\s*notification", r"sse",
|
|
49
|
-
r"pubsub", r"broadcast", r"channel",
|
|
50
|
-
],
|
|
51
|
-
"description": "Real-time communication",
|
|
52
|
-
"skills": ["WebSockets", "Event-driven architecture"],
|
|
53
|
-
},
|
|
54
|
-
"search": {
|
|
55
|
-
"patterns": [
|
|
56
|
-
r"elasticsearch", r"solr", r"algolia", r"meilisearch",
|
|
57
|
-
r"full.?text.?search", r"search.?index", r"fuzzy.?search",
|
|
58
|
-
],
|
|
59
|
-
"description": "Search functionality",
|
|
60
|
-
"skills": ["Search engines", "Full-text search"],
|
|
61
|
-
},
|
|
62
|
-
"file_upload": {
|
|
63
|
-
"patterns": [
|
|
64
|
-
r"upload", r"s3", r"blob", r"storage", r"presigned",
|
|
65
|
-
r"multipart", r"file.?handling", r"asset",
|
|
66
|
-
],
|
|
67
|
-
"description": "File upload system",
|
|
68
|
-
"skills": ["Cloud storage", "File handling"],
|
|
69
|
-
},
|
|
70
|
-
"email": {
|
|
71
|
-
"patterns": [
|
|
72
|
-
r"sendgrid", r"mailgun", r"ses", r"smtp",
|
|
73
|
-
r"email.?template", r"newsletter", r"transactional",
|
|
74
|
-
],
|
|
75
|
-
"description": "Email system",
|
|
76
|
-
"skills": ["Email APIs", "Template systems"],
|
|
77
|
-
},
|
|
78
|
-
"notifications": {
|
|
79
|
-
"patterns": [
|
|
80
|
-
r"notification", r"push", r"firebase.?cloud", r"fcm",
|
|
81
|
-
r"apns", r"alert", r"in.?app.?message",
|
|
82
|
-
],
|
|
83
|
-
"description": "Notification system",
|
|
84
|
-
"skills": ["Push notifications", "Real-time updates"],
|
|
85
|
-
},
|
|
86
|
-
"caching": {
|
|
87
|
-
"patterns": [
|
|
88
|
-
r"redis", r"memcache", r"cache", r"memoiz",
|
|
89
|
-
r"ttl", r"invalidat", r"cdn",
|
|
90
|
-
],
|
|
91
|
-
"description": "Caching layer",
|
|
92
|
-
"skills": ["Redis", "Performance optimization"],
|
|
93
|
-
},
|
|
94
|
-
"queue": {
|
|
95
|
-
"patterns": [
|
|
96
|
-
r"celery", r"rq", r"bullmq", r"rabbitmq", r"sqs",
|
|
97
|
-
r"background.?job", r"worker", r"task.?queue", r"async.?task",
|
|
98
|
-
],
|
|
99
|
-
"description": "Background job system",
|
|
100
|
-
"skills": ["Message queues", "Async processing"],
|
|
101
|
-
},
|
|
102
|
-
"api": {
|
|
103
|
-
"patterns": [
|
|
104
|
-
r"rest.?api", r"graphql", r"openapi", r"swagger",
|
|
105
|
-
r"endpoint", r"rate.?limit", r"api.?version",
|
|
106
|
-
],
|
|
107
|
-
"description": "API development",
|
|
108
|
-
"skills": ["REST API", "API design"],
|
|
109
|
-
},
|
|
110
|
-
"database": {
|
|
111
|
-
"patterns": [
|
|
112
|
-
r"migration", r"schema", r"model", r"orm",
|
|
113
|
-
r"query.?optim", r"index", r"foreign.?key",
|
|
114
|
-
r"transaction", r"rollback",
|
|
115
|
-
],
|
|
116
|
-
"description": "Database design",
|
|
117
|
-
"skills": ["Database design", "SQL", "ORM"],
|
|
118
|
-
},
|
|
119
|
-
"ci_cd": {
|
|
120
|
-
"patterns": [
|
|
121
|
-
r"github.?action", r"jenkins", r"circleci", r"travis",
|
|
122
|
-
r"deploy", r"pipeline", r"ci.?cd", r"docker",
|
|
123
|
-
r"kubernetes", r"helm", r"terraform",
|
|
124
|
-
],
|
|
125
|
-
"description": "CI/CD pipeline",
|
|
126
|
-
"skills": ["DevOps", "CI/CD", "Docker"],
|
|
127
|
-
},
|
|
128
|
-
"testing": {
|
|
129
|
-
"patterns": [
|
|
130
|
-
r"test.?suite", r"unit.?test", r"integration.?test",
|
|
131
|
-
r"e2e", r"coverage", r"mock", r"fixture",
|
|
132
|
-
],
|
|
133
|
-
"description": "Test suite",
|
|
134
|
-
"skills": ["Testing", "TDD"],
|
|
135
|
-
},
|
|
136
|
-
"analytics": {
|
|
137
|
-
"patterns": [
|
|
138
|
-
r"analytics", r"tracking", r"metrics", r"dashboard",
|
|
139
|
-
r"report", r"insight", r"visualization",
|
|
140
|
-
],
|
|
141
|
-
"description": "Analytics system",
|
|
142
|
-
"skills": ["Data analysis", "Visualization"],
|
|
143
|
-
},
|
|
144
|
-
"ml_ai": {
|
|
145
|
-
"patterns": [
|
|
146
|
-
r"machine.?learning", r"neural", r"train", r"model",
|
|
147
|
-
r"predict", r"tensorflow", r"pytorch", r"sklearn",
|
|
148
|
-
r"llm", r"gpt", r"embedding", r"vector",
|
|
149
|
-
],
|
|
150
|
-
"description": "ML/AI features",
|
|
151
|
-
"skills": ["Machine Learning", "AI"],
|
|
152
|
-
},
|
|
153
|
-
"scraping": {
|
|
154
|
-
"patterns": [
|
|
155
|
-
r"scrape", r"crawl", r"spider", r"beautifulsoup",
|
|
156
|
-
r"selenium", r"playwright", r"puppeteer",
|
|
157
|
-
],
|
|
158
|
-
"description": "Web scraping",
|
|
159
|
-
"skills": ["Web scraping", "Data extraction"],
|
|
160
|
-
},
|
|
161
|
-
"security": {
|
|
162
|
-
"patterns": [
|
|
163
|
-
r"encrypt", r"decrypt", r"hash", r"salt",
|
|
164
|
-
r"csrf", r"xss", r"sanitiz", r"vulnerability",
|
|
165
|
-
r"permission", r"role.?based", r"rbac", r"acl",
|
|
166
|
-
],
|
|
167
|
-
"description": "Security implementation",
|
|
168
|
-
"skills": ["Security", "Encryption"],
|
|
169
|
-
},
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
# Third-party integrations
|
|
173
|
-
INTEGRATIONS = {
|
|
174
|
-
# Payment
|
|
175
|
-
"Stripe": [r"stripe", r"sk_live", r"pk_live"],
|
|
176
|
-
"PayPal": [r"paypal", r"braintree"],
|
|
177
|
-
"Square": [r"square", r"squareup"],
|
|
178
|
-
|
|
179
|
-
# Cloud
|
|
180
|
-
"AWS": [r"aws", r"boto3", r"s3", r"ec2", r"lambda", r"dynamodb", r"sqs", r"sns"],
|
|
181
|
-
"Google Cloud": [r"gcp", r"google.cloud", r"bigquery", r"firestore"],
|
|
182
|
-
"Azure": [r"azure", r"microsoft.azure"],
|
|
183
|
-
"Vercel": [r"vercel", r"@vercel"],
|
|
184
|
-
"Cloudflare": [r"cloudflare", r"cf-"],
|
|
185
|
-
|
|
186
|
-
# Database
|
|
187
|
-
"PostgreSQL": [r"postgres", r"psycopg", r"pg_"],
|
|
188
|
-
"MongoDB": [r"mongodb", r"mongoose", r"pymongo"],
|
|
189
|
-
"Redis": [r"redis", r"ioredis"],
|
|
190
|
-
"Elasticsearch": [r"elasticsearch", r"elastic.co"],
|
|
191
|
-
|
|
192
|
-
# Communication
|
|
193
|
-
"Twilio": [r"twilio"],
|
|
194
|
-
"SendGrid": [r"sendgrid"],
|
|
195
|
-
"Mailgun": [r"mailgun"],
|
|
196
|
-
"Slack": [r"slack", r"slack-sdk"],
|
|
197
|
-
"Discord": [r"discord", r"discord.py"],
|
|
198
|
-
|
|
199
|
-
# Auth
|
|
200
|
-
"Auth0": [r"auth0"],
|
|
201
|
-
"Firebase Auth": [r"firebase.auth", r"firebase/auth"],
|
|
202
|
-
"Okta": [r"okta"],
|
|
203
|
-
"Clerk": [r"@clerk"],
|
|
204
|
-
"Supabase": [r"supabase"],
|
|
205
|
-
|
|
206
|
-
# AI/ML
|
|
207
|
-
"OpenAI": [r"openai", r"gpt-4", r"gpt-3"],
|
|
208
|
-
"Anthropic": [r"anthropic", r"claude"],
|
|
209
|
-
"Hugging Face": [r"huggingface", r"transformers"],
|
|
210
|
-
"LangChain": [r"langchain"],
|
|
211
|
-
|
|
212
|
-
# Analytics
|
|
213
|
-
"Segment": [r"segment", r"analytics.js"],
|
|
214
|
-
"Mixpanel": [r"mixpanel"],
|
|
215
|
-
"Amplitude": [r"amplitude"],
|
|
216
|
-
"PostHog": [r"posthog"],
|
|
217
|
-
|
|
218
|
-
# Monitoring
|
|
219
|
-
"Sentry": [r"sentry", r"@sentry"],
|
|
220
|
-
"Datadog": [r"datadog", r"dd-trace"],
|
|
221
|
-
"New Relic": [r"newrelic"],
|
|
222
|
-
|
|
223
|
-
# Other
|
|
224
|
-
"GitHub API": [r"github.api", r"octokit", r"pygithub"],
|
|
225
|
-
"Shopify": [r"shopify"],
|
|
226
|
-
"Contentful": [r"contentful"],
|
|
227
|
-
"Algolia": [r"algolia", r"algoliasearch"],
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
# Scale/impact indicators
|
|
231
|
-
SCALE_INDICATORS = {
|
|
232
|
-
"high_traffic": [
|
|
233
|
-
r"rate.?limit", r"throttl", r"load.?balanc", r"horizontal.?scal",
|
|
234
|
-
r"replica", r"shard", r"partition", r"concurrent",
|
|
235
|
-
],
|
|
236
|
-
"data_intensive": [
|
|
237
|
-
r"batch.?process", r"etl", r"pipeline", r"stream",
|
|
238
|
-
r"million", r"billion", r"terabyte", r"petabyte",
|
|
239
|
-
],
|
|
240
|
-
"performance": [
|
|
241
|
-
r"optimi[zs]", r"latency", r"throughput", r"benchmark",
|
|
242
|
-
r"profil", r"memory.?leak", r"garbage.?collect",
|
|
243
|
-
],
|
|
244
|
-
"reliability": [
|
|
245
|
-
r"fault.?toleran", r"failover", r"redundan", r"backup",
|
|
246
|
-
r"disaster.?recovery", r"high.?availability", r"sla",
|
|
247
|
-
],
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
# ============================================================================
|
|
252
|
-
# Data Classes
|
|
253
|
-
# ============================================================================
|
|
254
|
-
|
|
255
|
-
@dataclass
|
|
256
|
-
class Highlight:
|
|
257
|
-
"""A resume-worthy highlight."""
|
|
258
|
-
category: str # feature, integration, achievement
|
|
259
|
-
title: str
|
|
260
|
-
description: str
|
|
261
|
-
evidence: list[str] = field(default_factory=list) # file paths, commit messages
|
|
262
|
-
skills: list[str] = field(default_factory=list)
|
|
263
|
-
confidence: float = 0.0 # 0-1 how confident we are
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
@dataclass
|
|
267
|
-
class RepoHighlights:
|
|
268
|
-
"""Highlights extracted from a repository."""
|
|
269
|
-
repo_name: str
|
|
270
|
-
repo_path: str
|
|
271
|
-
|
|
272
|
-
# What was built
|
|
273
|
-
features: list[Highlight] = field(default_factory=list)
|
|
274
|
-
integrations: list[Highlight] = field(default_factory=list)
|
|
275
|
-
|
|
276
|
-
# Impact
|
|
277
|
-
scale_indicators: list[str] = field(default_factory=list)
|
|
278
|
-
|
|
279
|
-
# From commits
|
|
280
|
-
key_achievements: list[str] = field(default_factory=list)
|
|
281
|
-
|
|
282
|
-
# Domain
|
|
283
|
-
domain: str = "" # e-commerce, fintech, healthcare, etc.
|
|
284
|
-
|
|
285
|
-
# Summary for LLM
|
|
286
|
-
summary_context: str = ""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
# ============================================================================
|
|
290
|
-
# Extraction Functions
|
|
291
|
-
# ============================================================================
|
|
292
|
-
|
|
293
|
-
def extract_highlights(repo_path: Path) -> RepoHighlights:
|
|
294
|
-
"""
|
|
295
|
-
Extract resume-worthy highlights from a repository.
|
|
296
|
-
|
|
297
|
-
Analyzes:
|
|
298
|
-
- Code patterns to identify features built
|
|
299
|
-
- Imports and configs for integrations
|
|
300
|
-
- Commit messages for achievements
|
|
301
|
-
- README for project purpose
|
|
302
|
-
"""
|
|
303
|
-
highlights = RepoHighlights(
|
|
304
|
-
repo_name=repo_path.name,
|
|
305
|
-
repo_path=str(repo_path),
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
# Collect all source content for analysis
|
|
309
|
-
all_content = _collect_source_content(repo_path)
|
|
310
|
-
|
|
311
|
-
# Detect features
|
|
312
|
-
highlights.features = _detect_features(all_content, repo_path)
|
|
313
|
-
|
|
314
|
-
# Detect integrations
|
|
315
|
-
highlights.integrations = _detect_integrations(all_content, repo_path)
|
|
316
|
-
|
|
317
|
-
# Detect scale indicators
|
|
318
|
-
highlights.scale_indicators = _detect_scale(all_content)
|
|
319
|
-
|
|
320
|
-
# Analyze commits for achievements
|
|
321
|
-
highlights.key_achievements = _extract_achievements_from_commits(repo_path)
|
|
322
|
-
|
|
323
|
-
# Detect domain
|
|
324
|
-
highlights.domain = _detect_domain(all_content, repo_path)
|
|
325
|
-
|
|
326
|
-
# Build summary context for LLM
|
|
327
|
-
highlights.summary_context = _build_summary_context(highlights, repo_path)
|
|
328
|
-
|
|
329
|
-
return highlights
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
def _collect_source_content(repo_path: Path) -> str:
|
|
333
|
-
"""Collect content from source files for pattern matching."""
|
|
334
|
-
content_parts = []
|
|
335
|
-
|
|
336
|
-
skip_dirs = {
|
|
337
|
-
".git", "node_modules", "venv", ".venv", "__pycache__",
|
|
338
|
-
"dist", "build", ".next", "target", "coverage",
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
extensions = {".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs", ".java", ".rb"}
|
|
342
|
-
|
|
343
|
-
# Also check config files
|
|
344
|
-
config_files = [
|
|
345
|
-
"package.json", "requirements.txt", "pyproject.toml",
|
|
346
|
-
"Cargo.toml", "go.mod", "Gemfile", "docker-compose.yml",
|
|
347
|
-
"Dockerfile", ".env.example", "README.md",
|
|
348
|
-
]
|
|
349
|
-
|
|
350
|
-
for config in config_files:
|
|
351
|
-
config_path = repo_path / config
|
|
352
|
-
if config_path.exists():
|
|
353
|
-
try:
|
|
354
|
-
content_parts.append(config_path.read_text(errors="ignore"))
|
|
355
|
-
except Exception:
|
|
356
|
-
pass
|
|
357
|
-
|
|
358
|
-
# Sample source files (limit to avoid memory issues)
|
|
359
|
-
file_count = 0
|
|
360
|
-
max_files = 100
|
|
361
|
-
|
|
362
|
-
for file_path in repo_path.rglob("*"):
|
|
363
|
-
if file_count >= max_files:
|
|
364
|
-
break
|
|
365
|
-
|
|
366
|
-
if not file_path.is_file():
|
|
367
|
-
continue
|
|
368
|
-
|
|
369
|
-
parts = file_path.relative_to(repo_path).parts
|
|
370
|
-
if any(skip in parts for skip in skip_dirs):
|
|
371
|
-
continue
|
|
372
|
-
|
|
373
|
-
if file_path.suffix.lower() in extensions:
|
|
374
|
-
try:
|
|
375
|
-
content = file_path.read_text(errors="ignore")
|
|
376
|
-
# Limit per file
|
|
377
|
-
content_parts.append(content[:50000])
|
|
378
|
-
file_count += 1
|
|
379
|
-
except Exception:
|
|
380
|
-
pass
|
|
381
|
-
|
|
382
|
-
return "\n".join(content_parts)
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def _detect_features(content: str, repo_path: Path) -> list[Highlight]:
|
|
386
|
-
"""Detect features built based on code patterns."""
|
|
387
|
-
detected = []
|
|
388
|
-
content_lower = content.lower()
|
|
389
|
-
|
|
390
|
-
for feature_id, feature_info in FEATURES.items():
|
|
391
|
-
matches = 0
|
|
392
|
-
evidence = []
|
|
393
|
-
|
|
394
|
-
for pattern in feature_info["patterns"]:
|
|
395
|
-
found = re.findall(pattern, content_lower)
|
|
396
|
-
if found:
|
|
397
|
-
matches += len(found)
|
|
398
|
-
# Find file evidence
|
|
399
|
-
for f in _find_files_with_pattern(repo_path, pattern):
|
|
400
|
-
if f not in evidence:
|
|
401
|
-
evidence.append(f)
|
|
402
|
-
|
|
403
|
-
if matches >= 3: # Minimum threshold
|
|
404
|
-
confidence = min(1.0, matches / 20) # More matches = higher confidence
|
|
405
|
-
|
|
406
|
-
detected.append(Highlight(
|
|
407
|
-
category="feature",
|
|
408
|
-
title=feature_info["description"],
|
|
409
|
-
description=f"Implemented {feature_info['description'].lower()}",
|
|
410
|
-
evidence=evidence[:5], # Top 5 files
|
|
411
|
-
skills=feature_info["skills"],
|
|
412
|
-
confidence=confidence,
|
|
413
|
-
))
|
|
414
|
-
|
|
415
|
-
# Sort by confidence
|
|
416
|
-
detected.sort(key=lambda x: x.confidence, reverse=True)
|
|
417
|
-
|
|
418
|
-
return detected[:10] # Top 10 features
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
def _detect_integrations(content: str, repo_path: Path) -> list[Highlight]:
|
|
422
|
-
"""Detect third-party integrations."""
|
|
423
|
-
detected = []
|
|
424
|
-
content_lower = content.lower()
|
|
425
|
-
|
|
426
|
-
for integration, patterns in INTEGRATIONS.items():
|
|
427
|
-
for pattern in patterns:
|
|
428
|
-
if re.search(pattern, content_lower):
|
|
429
|
-
evidence = _find_files_with_pattern(repo_path, pattern)
|
|
430
|
-
|
|
431
|
-
detected.append(Highlight(
|
|
432
|
-
category="integration",
|
|
433
|
-
title=integration,
|
|
434
|
-
description=f"Integrated {integration}",
|
|
435
|
-
evidence=evidence[:3],
|
|
436
|
-
skills=[integration],
|
|
437
|
-
confidence=0.8,
|
|
438
|
-
))
|
|
439
|
-
break # Only add once per integration
|
|
440
|
-
|
|
441
|
-
return detected
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
def _detect_scale(content: str) -> list[str]:
|
|
445
|
-
"""Detect scale/impact indicators."""
|
|
446
|
-
indicators = []
|
|
447
|
-
content_lower = content.lower()
|
|
448
|
-
|
|
449
|
-
for indicator_type, patterns in SCALE_INDICATORS.items():
|
|
450
|
-
for pattern in patterns:
|
|
451
|
-
if re.search(pattern, content_lower):
|
|
452
|
-
indicators.append(indicator_type)
|
|
453
|
-
break
|
|
454
|
-
|
|
455
|
-
return indicators
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
def _find_files_with_pattern(repo_path: Path, pattern: str) -> list[str]:
|
|
459
|
-
"""Find files containing a pattern."""
|
|
460
|
-
files = []
|
|
461
|
-
skip_dirs = {".git", "node_modules", "venv", ".venv", "__pycache__", "dist", "build"}
|
|
462
|
-
|
|
463
|
-
try:
|
|
464
|
-
regex = re.compile(pattern, re.IGNORECASE)
|
|
465
|
-
except re.error:
|
|
466
|
-
return files
|
|
467
|
-
|
|
468
|
-
for file_path in repo_path.rglob("*"):
|
|
469
|
-
if len(files) >= 10:
|
|
470
|
-
break
|
|
471
|
-
|
|
472
|
-
if not file_path.is_file():
|
|
473
|
-
continue
|
|
474
|
-
|
|
475
|
-
parts = file_path.relative_to(repo_path).parts
|
|
476
|
-
if any(skip in parts for skip in skip_dirs):
|
|
477
|
-
continue
|
|
478
|
-
|
|
479
|
-
try:
|
|
480
|
-
content = file_path.read_text(errors="ignore")
|
|
481
|
-
if regex.search(content):
|
|
482
|
-
files.append(str(file_path.relative_to(repo_path)))
|
|
483
|
-
except Exception:
|
|
484
|
-
pass
|
|
485
|
-
|
|
486
|
-
return files
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
def _extract_achievements_from_commits(repo_path: Path) -> list[str]:
|
|
490
|
-
"""Extract key achievements from commit messages."""
|
|
491
|
-
achievements = []
|
|
492
|
-
|
|
493
|
-
# Keywords that indicate achievements
|
|
494
|
-
achievement_patterns = [
|
|
495
|
-
r"implement(?:ed|s)?\s+(.+)",
|
|
496
|
-
r"add(?:ed|s)?\s+(.+)",
|
|
497
|
-
r"create(?:d|s)?\s+(.+)",
|
|
498
|
-
r"build(?:s)?\s+(.+)",
|
|
499
|
-
r"introduc(?:e|ed|es)\s+(.+)",
|
|
500
|
-
r"develop(?:ed|s)?\s+(.+)",
|
|
501
|
-
r"design(?:ed|s)?\s+(.+)",
|
|
502
|
-
r"integrat(?:e|ed|es)\s+(.+)",
|
|
503
|
-
r"migrat(?:e|ed|es)\s+(.+)",
|
|
504
|
-
r"optimi[zs](?:e|ed|es)\s+(.+)",
|
|
505
|
-
r"refactor(?:ed|s)?\s+(.+)",
|
|
506
|
-
r"fix(?:ed|es)?\s+(.+)",
|
|
507
|
-
r"improv(?:e|ed|es)\s+(.+)",
|
|
508
|
-
]
|
|
509
|
-
|
|
510
|
-
try:
|
|
511
|
-
repo = Repo(repo_path)
|
|
512
|
-
|
|
513
|
-
# Get commit messages
|
|
514
|
-
commit_messages = []
|
|
515
|
-
for commit in repo.iter_commits(max_count=200):
|
|
516
|
-
msg = commit.message.strip().split("\n")[0] # First line
|
|
517
|
-
if len(msg) > 10: # Skip short messages
|
|
518
|
-
commit_messages.append(msg)
|
|
519
|
-
|
|
520
|
-
# Extract achievements
|
|
521
|
-
seen = set()
|
|
522
|
-
for msg in commit_messages:
|
|
523
|
-
msg_lower = msg.lower()
|
|
524
|
-
|
|
525
|
-
# Skip common non-achievement messages
|
|
526
|
-
if any(skip in msg_lower for skip in [
|
|
527
|
-
"merge", "update readme", "fix typo", "wip", "temp",
|
|
528
|
-
"clean up", "formatting", "lint", "bump version",
|
|
529
|
-
]):
|
|
530
|
-
continue
|
|
531
|
-
|
|
532
|
-
for pattern in achievement_patterns:
|
|
533
|
-
match = re.search(pattern, msg_lower)
|
|
534
|
-
if match:
|
|
535
|
-
achievement = match.group(1).strip()
|
|
536
|
-
# Clean up
|
|
537
|
-
achievement = re.sub(r"\s+", " ", achievement)
|
|
538
|
-
achievement = achievement[:100] # Limit length
|
|
539
|
-
|
|
540
|
-
if achievement and achievement not in seen and len(achievement) > 5:
|
|
541
|
-
seen.add(achievement)
|
|
542
|
-
achievements.append(msg) # Use original message
|
|
543
|
-
break
|
|
544
|
-
|
|
545
|
-
except Exception:
|
|
546
|
-
pass
|
|
547
|
-
|
|
548
|
-
# Return most meaningful achievements
|
|
549
|
-
# Prioritize longer, more descriptive commits
|
|
550
|
-
achievements.sort(key=lambda x: len(x), reverse=True)
|
|
551
|
-
|
|
552
|
-
return achievements[:15] # Top 15
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
def _detect_domain(content: str, repo_path: Path) -> str:
|
|
556
|
-
"""Detect the domain/industry of the project."""
|
|
557
|
-
content_lower = content.lower()
|
|
558
|
-
|
|
559
|
-
domains = {
|
|
560
|
-
"e-commerce": [r"shop", r"cart", r"checkout", r"product", r"inventory", r"order"],
|
|
561
|
-
"fintech": [r"payment", r"banking", r"transaction", r"wallet", r"financial"],
|
|
562
|
-
"healthcare": [r"patient", r"medical", r"health", r"doctor", r"appointment"],
|
|
563
|
-
"education": [r"course", r"student", r"lesson", r"quiz", r"learning"],
|
|
564
|
-
"social": [r"post", r"comment", r"follow", r"friend", r"feed", r"like"],
|
|
565
|
-
"productivity": [r"task", r"project", r"calendar", r"reminder", r"schedule"],
|
|
566
|
-
"media": [r"video", r"stream", r"playlist", r"podcast", r"audio"],
|
|
567
|
-
"developer-tools": [r"cli", r"sdk", r"api", r"framework", r"library"],
|
|
568
|
-
"real-estate": [r"property", r"listing", r"agent", r"rent", r"tenant"],
|
|
569
|
-
"travel": [r"booking", r"hotel", r"flight", r"destination", r"itinerary"],
|
|
570
|
-
"food": [r"restaurant", r"menu", r"delivery", r"recipe", r"order"],
|
|
571
|
-
"fitness": [r"workout", r"exercise", r"gym", r"fitness", r"health"],
|
|
572
|
-
"analytics": [r"dashboard", r"metric", r"report", r"insight", r"chart"],
|
|
573
|
-
}
|
|
574
|
-
|
|
575
|
-
scores = Counter()
|
|
576
|
-
for domain, patterns in domains.items():
|
|
577
|
-
for pattern in patterns:
|
|
578
|
-
matches = len(re.findall(pattern, content_lower))
|
|
579
|
-
scores[domain] += matches
|
|
580
|
-
|
|
581
|
-
if scores:
|
|
582
|
-
top_domain = scores.most_common(1)[0]
|
|
583
|
-
if top_domain[1] >= 5: # Minimum threshold
|
|
584
|
-
return top_domain[0]
|
|
585
|
-
|
|
586
|
-
return ""
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
def _build_summary_context(highlights: RepoHighlights, repo_path: Path) -> str:
|
|
590
|
-
"""Build a summary context for LLM to generate narrative."""
|
|
591
|
-
parts = []
|
|
592
|
-
|
|
593
|
-
# README content
|
|
594
|
-
readme_content = ""
|
|
595
|
-
for readme_name in ["README.md", "README.rst", "README.txt", "README"]:
|
|
596
|
-
readme_path = repo_path / readme_name
|
|
597
|
-
if readme_path.exists():
|
|
598
|
-
try:
|
|
599
|
-
readme_content = readme_path.read_text()[:2000]
|
|
600
|
-
break
|
|
601
|
-
except Exception:
|
|
602
|
-
pass
|
|
603
|
-
|
|
604
|
-
if readme_content:
|
|
605
|
-
parts.append(f"README:\n{readme_content}\n")
|
|
606
|
-
|
|
607
|
-
# Features
|
|
608
|
-
if highlights.features:
|
|
609
|
-
feature_list = ", ".join(f.title for f in highlights.features[:5])
|
|
610
|
-
parts.append(f"Features built: {feature_list}")
|
|
611
|
-
|
|
612
|
-
# Integrations
|
|
613
|
-
if highlights.integrations:
|
|
614
|
-
integration_list = ", ".join(i.title for i in highlights.integrations[:5])
|
|
615
|
-
parts.append(f"Integrations: {integration_list}")
|
|
616
|
-
|
|
617
|
-
# Domain
|
|
618
|
-
if highlights.domain:
|
|
619
|
-
parts.append(f"Domain: {highlights.domain}")
|
|
620
|
-
|
|
621
|
-
# Scale
|
|
622
|
-
if highlights.scale_indicators:
|
|
623
|
-
parts.append(f"Scale indicators: {', '.join(highlights.scale_indicators)}")
|
|
624
|
-
|
|
625
|
-
# Key commits
|
|
626
|
-
if highlights.key_achievements:
|
|
627
|
-
parts.append(f"Key achievements from commits:\n" + "\n".join(f"- {a}" for a in highlights.key_achievements[:10]))
|
|
628
|
-
|
|
629
|
-
return "\n\n".join(parts)
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
# ============================================================================
|
|
633
|
-
# Public API
|
|
634
|
-
# ============================================================================
|
|
635
|
-
|
|
636
|
-
def get_highlights(repo_path: Path) -> dict[str, Any]:
|
|
637
|
-
"""
|
|
638
|
-
Get highlights as a dictionary for easy consumption.
|
|
639
|
-
|
|
640
|
-
Returns structured data about:
|
|
641
|
-
- Features built
|
|
642
|
-
- Integrations used
|
|
643
|
-
- Technical achievements
|
|
644
|
-
- Skills demonstrated
|
|
645
|
-
"""
|
|
646
|
-
highlights = extract_highlights(repo_path)
|
|
647
|
-
|
|
648
|
-
# Collect all skills
|
|
649
|
-
all_skills = set()
|
|
650
|
-
for f in highlights.features:
|
|
651
|
-
all_skills.update(f.skills)
|
|
652
|
-
for i in highlights.integrations:
|
|
653
|
-
all_skills.update(i.skills)
|
|
654
|
-
|
|
655
|
-
return {
|
|
656
|
-
"features": [
|
|
657
|
-
{
|
|
658
|
-
"name": f.title,
|
|
659
|
-
"description": f.description,
|
|
660
|
-
"confidence": round(f.confidence, 2),
|
|
661
|
-
"skills": f.skills,
|
|
662
|
-
"evidence_files": f.evidence[:3],
|
|
663
|
-
}
|
|
664
|
-
for f in highlights.features
|
|
665
|
-
],
|
|
666
|
-
"integrations": [
|
|
667
|
-
{
|
|
668
|
-
"name": i.title,
|
|
669
|
-
"description": i.description,
|
|
670
|
-
}
|
|
671
|
-
for i in highlights.integrations
|
|
672
|
-
],
|
|
673
|
-
"achievements": highlights.key_achievements[:10],
|
|
674
|
-
"domain": highlights.domain,
|
|
675
|
-
"scale_indicators": highlights.scale_indicators,
|
|
676
|
-
"skills_demonstrated": sorted(all_skills),
|
|
677
|
-
"summary_context": highlights.summary_context,
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
def format_highlights_for_resume(highlights: dict[str, Any]) -> str:
|
|
682
|
-
"""
|
|
683
|
-
Format highlights as resume bullet points.
|
|
684
|
-
"""
|
|
685
|
-
bullets = []
|
|
686
|
-
|
|
687
|
-
# Features as accomplishments
|
|
688
|
-
for feature in highlights.get("features", [])[:5]:
|
|
689
|
-
if feature["confidence"] >= 0.5:
|
|
690
|
-
skills = ", ".join(feature["skills"][:3]) if feature["skills"] else ""
|
|
691
|
-
bullet = f"• {feature['description']}"
|
|
692
|
-
if skills:
|
|
693
|
-
bullet += f" ({skills})"
|
|
694
|
-
bullets.append(bullet)
|
|
695
|
-
|
|
696
|
-
# Integrations
|
|
697
|
-
integrations = highlights.get("integrations", [])
|
|
698
|
-
if integrations:
|
|
699
|
-
integration_names = [i["name"] for i in integrations[:5]]
|
|
700
|
-
bullets.append(f"• Integrated with {', '.join(integration_names)}")
|
|
701
|
-
|
|
702
|
-
# Scale indicators
|
|
703
|
-
scale = highlights.get("scale_indicators", [])
|
|
704
|
-
if "high_traffic" in scale:
|
|
705
|
-
bullets.append("• Built for high-traffic production environment")
|
|
706
|
-
if "performance" in scale:
|
|
707
|
-
bullets.append("• Implemented performance optimizations")
|
|
708
|
-
if "reliability" in scale:
|
|
709
|
-
bullets.append("• Designed for high availability and fault tolerance")
|
|
710
|
-
|
|
711
|
-
return "\n".join(bullets)
|
|
712
|
-
|