devtime-ei 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devtime/__init__.py +9 -0
- devtime/ai/__init__.py +0 -0
- devtime/ai/local.py +11 -0
- devtime/ai/prompts.py +24 -0
- devtime/ai/providers.py +41 -0
- devtime/assets/devtimeignore.starter +23 -0
- devtime/cli.py +374 -0
- devtime/config.py +67 -0
- devtime/db/__init__.py +0 -0
- devtime/db/connection.py +16 -0
- devtime/db/migrations.py +114 -0
- devtime/db/repository.py +351 -0
- devtime/db/schema.sql +145 -0
- devtime/fixtures/__init__.py +0 -0
- devtime/fixtures/assertions.py +51 -0
- devtime/fixtures/loader.py +52 -0
- devtime/fixtures/runner.py +73 -0
- devtime/intelligence/__init__.py +0 -0
- devtime/intelligence/claims.py +235 -0
- devtime/intelligence/concepts.py +483 -0
- devtime/intelligence/context_pack.py +276 -0
- devtime/intelligence/evidence.py +127 -0
- devtime/intelligence/lineage.py +21 -0
- devtime/intelligence/risk.py +267 -0
- devtime/intelligence/scoring.py +99 -0
- devtime/mcp/__init__.py +0 -0
- devtime/mcp/schemas.py +39 -0
- devtime/mcp/server.py +35 -0
- devtime/mcp/tools.py +90 -0
- devtime/output/__init__.py +0 -0
- devtime/output/json_export.py +50 -0
- devtime/output/markdown.py +50 -0
- devtime/output/terminal.py +208 -0
- devtime/paths.py +40 -0
- devtime/privacy.py +96 -0
- devtime/scanner/__init__.py +0 -0
- devtime/scanner/extractors/__init__.py +0 -0
- devtime/scanner/extractors/base.py +83 -0
- devtime/scanner/extractors/config_files.py +41 -0
- devtime/scanner/extractors/docs.py +35 -0
- devtime/scanner/extractors/nextjs.py +82 -0
- devtime/scanner/extractors/python.py +81 -0
- devtime/scanner/extractors/tests.py +61 -0
- devtime/scanner/extractors/typescript.py +99 -0
- devtime/scanner/file_walker.py +96 -0
- devtime/scanner/ignore.py +96 -0
- devtime/scanner/language.py +36 -0
- devtime/scanner/signals.py +252 -0
- devtime_ei-0.1.0.dist-info/METADATA +289 -0
- devtime_ei-0.1.0.dist-info/RECORD +54 -0
- devtime_ei-0.1.0.dist-info/WHEEL +5 -0
- devtime_ei-0.1.0.dist-info/entry_points.txt +2 -0
- devtime_ei-0.1.0.dist-info/licenses/LICENSE +201 -0
- devtime_ei-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
"""Concept detection (Builder Edition, Chapter 9).
|
|
2
|
+
|
|
3
|
+
Concepts are stable units of software meaning. V0 detection is pragmatic and
|
|
4
|
+
pattern-based: match strong signals against known templates, merge overlaps, and
|
|
5
|
+
refuse generic concept names.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
from devtime.scanner.extractors.base import Signal
|
|
14
|
+
|
|
15
|
+
# Known concept templates (Chapter 9). names = keyword hints, signals = signal
|
|
16
|
+
# kinds that strengthen the match.
|
|
17
|
+
CONCEPT_TEMPLATES: dict[str, dict] = {
|
|
18
|
+
"authentication": {
|
|
19
|
+
"display_name": "Authentication",
|
|
20
|
+
"kind": "system_concept",
|
|
21
|
+
"names": ["auth", "authentication", "login", "session", "jwt", "token"],
|
|
22
|
+
"signals": ["auth_dependency", "middleware", "route", "test", "config", "token_usage"],
|
|
23
|
+
"min_score": 0.4,
|
|
24
|
+
},
|
|
25
|
+
"billing_webhooks": {
|
|
26
|
+
"display_name": "Billing Webhooks",
|
|
27
|
+
"kind": "system_concept",
|
|
28
|
+
"names": ["stripe", "webhook", "billing", "subscription", "invoice"],
|
|
29
|
+
"signals": ["route", "webhook_signature_verification", "dependency", "test"],
|
|
30
|
+
"min_score": 0.4,
|
|
31
|
+
},
|
|
32
|
+
"background_jobs": {
|
|
33
|
+
"display_name": "Background Jobs",
|
|
34
|
+
"kind": "system_concept",
|
|
35
|
+
"names": ["queue", "worker", "celery", "bullmq", "sidekiq", "bgtask",
|
|
36
|
+
"bg_task", "bg-task", "bgtasks", "cron job"],
|
|
37
|
+
"signals": ["background_job", "queue", "dependency", "config", "test"],
|
|
38
|
+
"min_score": 0.4,
|
|
39
|
+
},
|
|
40
|
+
"data_export": {
|
|
41
|
+
"display_name": "Data Export",
|
|
42
|
+
"kind": "system_concept",
|
|
43
|
+
"names": ["export", "download", "csv", "report"],
|
|
44
|
+
"signals": ["route", "handler", "test", "schema"],
|
|
45
|
+
"min_score": 0.4,
|
|
46
|
+
},
|
|
47
|
+
"admin_permissions": {
|
|
48
|
+
"display_name": "Admin Permissions",
|
|
49
|
+
"kind": "system_concept",
|
|
50
|
+
"names": ["admin", "permission", "role", "rbac", "authorize"],
|
|
51
|
+
"signals": ["middleware", "route", "test", "config"],
|
|
52
|
+
"min_score": 0.4,
|
|
53
|
+
},
|
|
54
|
+
"file_uploads": {
|
|
55
|
+
"display_name": "File Uploads",
|
|
56
|
+
"kind": "system_concept",
|
|
57
|
+
"names": ["upload", "multipart", "s3", "attachment", "file"],
|
|
58
|
+
"signals": ["route", "handler", "dependency", "test"],
|
|
59
|
+
"min_score": 0.4,
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Forbidden generic concept names (Chapter 9).
|
|
64
|
+
FORBIDDEN_NAMES = {
|
|
65
|
+
"utils", "helpers", "api", "database", "files", "code", "services", "components",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Signal kinds that prove behavior, not just presence.
|
|
70
|
+
ANCHOR_KINDS = {
|
|
71
|
+
"route",
|
|
72
|
+
"auth_dependency",
|
|
73
|
+
"middleware",
|
|
74
|
+
"webhook_signature_verification",
|
|
75
|
+
"background_job",
|
|
76
|
+
"token_usage",
|
|
77
|
+
"queue",
|
|
78
|
+
"upload_endpoint",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Tokens that justify naming a concept "Billing Webhooks". Reality Hardening
|
|
82
|
+
# (v0.0.2): billing evidence must be *file-local* to webhook evidence. Trust Repair
|
|
83
|
+
# (v0.0.6): a bare "subscription" is NOT billing (calendar subscriptions, etc.) -
|
|
84
|
+
# require a payment provider or an explicit billing/payment term.
|
|
85
|
+
BILLING_PROVIDER_TOKENS = (
|
|
86
|
+
"stripe", "paypal", "braintree", "chargebee", "lemonsqueezy", "paddle", "razorpay",
|
|
87
|
+
)
|
|
88
|
+
# Explicit payment/billing terms (Evidence Precision v0.0.7). "subscription" and
|
|
89
|
+
# "customer" alone are NOT here - they appear in calendar/CRM contexts too.
|
|
90
|
+
BILLING_TERM_TOKENS = (
|
|
91
|
+
"billing", "invoice", "invoices", "checkout", "payment", "payments",
|
|
92
|
+
"charge", "charges", "customer.subscription",
|
|
93
|
+
)
|
|
94
|
+
BILLING_TOKENS = BILLING_PROVIDER_TOKENS + BILLING_TERM_TOKENS
|
|
95
|
+
WEBHOOK_TOKENS = ("webhook",)
|
|
96
|
+
|
|
97
|
+
# Negative billing contexts: webhook routes that are clearly NOT payment webhooks.
|
|
98
|
+
# A billing concept here requires an explicit payment provider to override.
|
|
99
|
+
NEGATIVE_BILLING_CONTEXTS = (
|
|
100
|
+
"calendar", "credential", "connector", "monitor", "scheduler", "cron",
|
|
101
|
+
"webhooktrigger", "webhook-trigger", "webhook_trigger", "github", "fireflies",
|
|
102
|
+
"recall", "resend", "ses", "oauth", "cleanup",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Direct authentication terms. A weak signal (test/config/dep) only counts as
|
|
106
|
+
# Authentication evidence if it contains one of these (Evidence Precision v0.0.7).
|
|
107
|
+
# Deliberately excludes bare "auth"/"token"/"session"/"signing" and "nextauth_url"
|
|
108
|
+
# (the real NextAuth handler is a route, not a URL constant in a permalink test).
|
|
109
|
+
STRONG_AUTH_TERMS = (
|
|
110
|
+
"login", "log in", "logout", "log out", "signin", "sign in", "sign-in",
|
|
111
|
+
"register", "oauth", "bearer", "cookie", "password", "access token",
|
|
112
|
+
"access_token", "accesstoken", "jwt", "authenticate", "authentication",
|
|
113
|
+
"auth middleware", "api key", "api_key", "apikey", "session creation",
|
|
114
|
+
"session verification", "csrf",
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Employment / person taxonomy that must never become Background Jobs.
|
|
118
|
+
EMPLOYMENT_NEG_TOKENS = (
|
|
119
|
+
"job title", "job-title", "jobtitle", "job_title", "job role", "job-role",
|
|
120
|
+
"jobrole", "job_role", "job class", "job-class", "job_class", "job sub-role",
|
|
121
|
+
"sub-role", "employment", "occupation", "position title", "role options",
|
|
122
|
+
"title options", "role-options", "title-options",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Execution dependencies that justify a Background Jobs concept on their own.
|
|
126
|
+
JOB_EXECUTION_DEPS = (
|
|
127
|
+
"celery", "bullmq", "sidekiq", "dramatiq", "rq", "kombu", "bull", "agenda",
|
|
128
|
+
"sqs", "kafka", "rabbitmq", "resque",
|
|
129
|
+
)
|
|
130
|
+
# Upload-related dependencies.
|
|
131
|
+
UPLOAD_DEPS = ("multer", "busboy", "formidable", "multipart", "minio")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class ConceptCandidate:
|
|
136
|
+
slug: str
|
|
137
|
+
name: str
|
|
138
|
+
kind: str
|
|
139
|
+
confidence: float
|
|
140
|
+
signals: list[Signal] = field(default_factory=list)
|
|
141
|
+
weak_only: bool = False # supported only by presence evidence, not behavior
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _slugify(name: str) -> str:
|
|
145
|
+
return re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _signal_haystack(s: Signal) -> str:
|
|
149
|
+
return " ".join(
|
|
150
|
+
str(x).lower()
|
|
151
|
+
for x in (s.name, s.value, s.file_rel_path, *s.metadata.values())
|
|
152
|
+
if x is not None
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def signal_matches_template(s: Signal, template: dict) -> bool:
|
|
157
|
+
if s.kind in template["signals"]:
|
|
158
|
+
hay = _signal_haystack(s)
|
|
159
|
+
# A typed signal of the right kind whose text mentions a template keyword
|
|
160
|
+
# is a match; pure kind matches (e.g. generic "route") need a name hit too.
|
|
161
|
+
if any(keyword in hay for keyword in template["names"]):
|
|
162
|
+
return True
|
|
163
|
+
# Strong, concept-specific signal kinds count on their own.
|
|
164
|
+
if s.kind in {
|
|
165
|
+
"auth_dependency",
|
|
166
|
+
"webhook_signature_verification",
|
|
167
|
+
"background_job",
|
|
168
|
+
"token_usage",
|
|
169
|
+
}:
|
|
170
|
+
return True
|
|
171
|
+
# Keyword-only match through paths/names for weak kinds.
|
|
172
|
+
if s.kind in {"dependency", "config", "doc", "decision"}:
|
|
173
|
+
hay = _signal_haystack(s)
|
|
174
|
+
if any(keyword in hay for keyword in template["names"]):
|
|
175
|
+
return True
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def score_template_match(template: dict, matched: list[Signal]) -> float:
|
|
180
|
+
if not matched:
|
|
181
|
+
return 0.0
|
|
182
|
+
# Confidence grows with the number of distinct signal kinds and their strength,
|
|
183
|
+
# not raw count (detection is correlation, not magic).
|
|
184
|
+
kinds = {s.kind for s in matched}
|
|
185
|
+
diversity = min(len(kinds), 4) / 4.0
|
|
186
|
+
avg_conf = sum(s.confidence for s in matched) / len(matched)
|
|
187
|
+
score = 0.5 * diversity + 0.5 * avg_conf
|
|
188
|
+
return round(min(score, 0.99), 2)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _merge_overlapping(candidates: list[ConceptCandidate]) -> list[ConceptCandidate]:
|
|
192
|
+
by_slug: dict[str, ConceptCandidate] = {}
|
|
193
|
+
for cand in candidates:
|
|
194
|
+
existing = by_slug.get(cand.slug)
|
|
195
|
+
if existing is None or cand.confidence > existing.confidence:
|
|
196
|
+
by_slug[cand.slug] = cand
|
|
197
|
+
return list(by_slug.values())
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _remove_generic(candidates: list[ConceptCandidate]) -> list[ConceptCandidate]:
|
|
201
|
+
return [c for c in candidates if c.name.lower() not in FORBIDDEN_NAMES]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _meaningful_signals(matched: list[Signal]) -> list[Signal]:
|
|
205
|
+
"""Signals that can legitimately define a concept (excludes e2e tests + docs)."""
|
|
206
|
+
out = []
|
|
207
|
+
for s in matched:
|
|
208
|
+
if s.kind == "doc":
|
|
209
|
+
continue
|
|
210
|
+
if s.kind == "test" and s.metadata.get("e2e"):
|
|
211
|
+
continue
|
|
212
|
+
out.append(s)
|
|
213
|
+
return out
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _has_concept_anchor(slug: str, matched: list[Signal]) -> bool:
|
|
217
|
+
"""Require a concept-appropriate behavior anchor (Trust Repair v0.0.6).
|
|
218
|
+
|
|
219
|
+
Word-sense protection: a concept is only emitted when at least one signal
|
|
220
|
+
actually demonstrates that concept's behavior - not a coincidental keyword
|
|
221
|
+
(job *title*, avatar *URL*, *session_id* trace, model *download*, etc.).
|
|
222
|
+
"""
|
|
223
|
+
kinds = {s.kind for s in matched}
|
|
224
|
+
|
|
225
|
+
def dep_hit(tokens) -> bool:
|
|
226
|
+
return any(
|
|
227
|
+
s.kind == "dependency" and any(t in _signal_haystack(s) for t in tokens)
|
|
228
|
+
for s in matched
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if slug == "background_jobs":
|
|
232
|
+
if {"background_job", "queue"} & kinds:
|
|
233
|
+
return True
|
|
234
|
+
if dep_hit(JOB_EXECUTION_DEPS):
|
|
235
|
+
return True
|
|
236
|
+
# A direct background-task test (path or import references bg_tasks/celery/etc).
|
|
237
|
+
for s in matched:
|
|
238
|
+
hay = _signal_haystack(s)
|
|
239
|
+
if _is_employment(hay):
|
|
240
|
+
continue
|
|
241
|
+
if any(t in hay for t in ("bgtask", "bg_task", "bg-task", "bgtasks",
|
|
242
|
+
"celery", "sidekiq", "bullmq", "worker",
|
|
243
|
+
"queue", "cron job", "scheduler")):
|
|
244
|
+
return True
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
if slug == "file_uploads":
|
|
248
|
+
if "upload_endpoint" in kinds:
|
|
249
|
+
return True
|
|
250
|
+
if dep_hit(UPLOAD_DEPS):
|
|
251
|
+
return True
|
|
252
|
+
# A route whose own path/handler is about uploading.
|
|
253
|
+
return any(
|
|
254
|
+
s.kind == "route" and "upload" in _signal_haystack(s) for s in matched
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
if slug == "data_export":
|
|
258
|
+
# Distinguish user-data export from model/artifact/dependency downloads.
|
|
259
|
+
artifact_terms = ("model", "artifact", "asset", "package", "dependency",
|
|
260
|
+
"plugin", "binary", "weights", "checkpoint")
|
|
261
|
+
for s in matched:
|
|
262
|
+
if s.kind != "route":
|
|
263
|
+
continue
|
|
264
|
+
hay = _signal_haystack(s)
|
|
265
|
+
if any(t in hay for t in ("export", "csv", "report", "backup")):
|
|
266
|
+
return True
|
|
267
|
+
if "download" in hay and not any(a in hay for a in artifact_terms):
|
|
268
|
+
return True
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
if slug == "admin_permissions":
|
|
272
|
+
return any(
|
|
273
|
+
s.kind in ("middleware", "route")
|
|
274
|
+
and any(t in _signal_haystack(s)
|
|
275
|
+
for t in ("admin", "superuser", "rbac", "owner", "role", "authorize"))
|
|
276
|
+
for s in matched
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if slug == "authentication":
|
|
280
|
+
if {"auth_dependency", "token_usage"} & kinds:
|
|
281
|
+
return True
|
|
282
|
+
if any(s.kind == "middleware" for s in matched):
|
|
283
|
+
return True
|
|
284
|
+
return any(
|
|
285
|
+
s.kind == "route"
|
|
286
|
+
and any(t in _signal_haystack(s)
|
|
287
|
+
for t in ("login", "logout", "signin", "sign-in", "/auth", "oauth",
|
|
288
|
+
"token", "register", "password", "session/"))
|
|
289
|
+
for s in matched
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# billing_webhooks is gated separately by _passes_billing_gate.
|
|
293
|
+
return True
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _is_employment(hay: str) -> bool:
|
|
297
|
+
return any(t in hay for t in EMPLOYMENT_NEG_TOKENS)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _passes_billing_gate(slug: str, matched: list[Signal]) -> bool:
|
|
301
|
+
"""Billing Webhooks requires webhook evidence and *payment-provider* evidence
|
|
302
|
+
local to each other.
|
|
303
|
+
|
|
304
|
+
Evidence Precision (v0.0.7): a webhook in a negative context (calendar,
|
|
305
|
+
credential, connector, monitor, scheduler, cron, generic trigger) is NOT billing
|
|
306
|
+
unless an explicit payment provider (Stripe/PayPal/...) is local. "subscription"
|
|
307
|
+
and "customer" alone do not count.
|
|
308
|
+
"""
|
|
309
|
+
if slug != "billing_webhooks":
|
|
310
|
+
return True
|
|
311
|
+
|
|
312
|
+
# A provider signature-verification handler (Stripe constructEvent, etc.) is by
|
|
313
|
+
# itself a local payment-provider signal.
|
|
314
|
+
if any(s.kind == "webhook_signature_verification" for s in matched):
|
|
315
|
+
return True
|
|
316
|
+
|
|
317
|
+
by_file: dict[str, list[Signal]] = {}
|
|
318
|
+
for s in matched:
|
|
319
|
+
by_file.setdefault(s.file_rel_path, []).append(s)
|
|
320
|
+
|
|
321
|
+
for file_path, sigs in by_file.items():
|
|
322
|
+
hay = file_path.lower() + " " + " ".join(_signal_haystack(s) for s in sigs)
|
|
323
|
+
has_webhook = any(tok in hay for tok in WEBHOOK_TOKENS)
|
|
324
|
+
has_provider = any(tok in hay for tok in BILLING_PROVIDER_TOKENS)
|
|
325
|
+
has_payment_term = any(tok in hay for tok in BILLING_TERM_TOKENS)
|
|
326
|
+
is_negative = any(tok in hay for tok in NEGATIVE_BILLING_CONTEXTS)
|
|
327
|
+
|
|
328
|
+
if not has_webhook:
|
|
329
|
+
continue
|
|
330
|
+
# An explicit payment provider always qualifies.
|
|
331
|
+
if has_provider:
|
|
332
|
+
return True
|
|
333
|
+
# A payment/billing term qualifies only outside a negative context.
|
|
334
|
+
if has_payment_term and not is_negative:
|
|
335
|
+
return True
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _is_false_sense(slug: str, s: Signal) -> bool:
|
|
340
|
+
"""Drop signals that match a concept only through a misleading keyword
|
|
341
|
+
(Evidence Precision v0.0.7). Real behavior signals are never dropped."""
|
|
342
|
+
hay = _signal_haystack(s)
|
|
343
|
+
|
|
344
|
+
if slug == "authentication":
|
|
345
|
+
# Real auth behavior kinds are headline evidence and are never dropped.
|
|
346
|
+
if s.kind in ("auth_dependency", "middleware", "token_usage"):
|
|
347
|
+
return False
|
|
348
|
+
# A route is auth evidence only if it is genuinely an auth route - a `[token]`
|
|
349
|
+
# path segment on an upload/file route is not authentication.
|
|
350
|
+
if s.kind == "route":
|
|
351
|
+
return not _is_auth_route(hay)
|
|
352
|
+
# Weak kinds (test/config/dependency/doc): judge by FILE PATH so a passing
|
|
353
|
+
# mention of "auth" in storage/signing code cannot leak in. An
|
|
354
|
+
# s3SigningDiagnostics.test.ts is storage/signing, not authentication.
|
|
355
|
+
path = s.file_rel_path.lower()
|
|
356
|
+
if any(d in path for d in AUTH_NEGATIVE_DOMAIN):
|
|
357
|
+
return True
|
|
358
|
+
has_auth_path = any(t in path for t in AUTH_POSITIVE_PATH_TERMS)
|
|
359
|
+
has_strong = any(t in hay for t in STRONG_AUTH_TERMS)
|
|
360
|
+
return not (has_auth_path or has_strong)
|
|
361
|
+
|
|
362
|
+
if slug == "background_jobs":
|
|
363
|
+
return _is_employment(hay)
|
|
364
|
+
|
|
365
|
+
if slug == "billing_webhooks":
|
|
366
|
+
# A payment-provider signature handler is always real billing evidence.
|
|
367
|
+
if s.kind == "webhook_signature_verification":
|
|
368
|
+
return False
|
|
369
|
+
has_provider = any(p in hay for p in BILLING_PROVIDER_TOKENS)
|
|
370
|
+
if has_provider:
|
|
371
|
+
return False
|
|
372
|
+
has_payment = any(t in hay for t in BILLING_TERM_TOKENS)
|
|
373
|
+
is_negative = any(n in hay for n in NEGATIVE_BILLING_CONTEXTS)
|
|
374
|
+
# Calendar/credential/connector/cron/generic-trigger contexts without a local
|
|
375
|
+
# payment provider are NOT billing evidence (Codex blocker 1 / Cal.com).
|
|
376
|
+
if is_negative:
|
|
377
|
+
return True
|
|
378
|
+
if "webhook" in hay and not has_payment:
|
|
379
|
+
return True
|
|
380
|
+
if "subscription" in hay and not has_payment:
|
|
381
|
+
return True
|
|
382
|
+
return False
|
|
383
|
+
|
|
384
|
+
return False
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# For weak Authentication signals, the file path decides. Storage/signing/upload/
|
|
388
|
+
# diagnostics files are not authentication, even if their content mentions "auth".
|
|
389
|
+
AUTH_NEGATIVE_DOMAIN = (
|
|
390
|
+
"s3", "signing", "signed", "storage", "bucket", "minio", "cdn", "upload",
|
|
391
|
+
"diagnostic", "billing", "calendar", "webhook", "export", "avatar", "image",
|
|
392
|
+
"media", "trace", "permalink", "monitor", "analytics", "telemetry",
|
|
393
|
+
)
|
|
394
|
+
AUTH_POSITIVE_PATH_TERMS = (
|
|
395
|
+
"auth", "login", "logout", "signin", "signup", "oauth", "nextauth", "session",
|
|
396
|
+
"password", "mfa", "2fa", "/sso", "saml", "credential",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
_AUTH_ROUTE_POSITIVE = (
|
|
400
|
+
"/auth", "login", "logout", "signin", "sign-in", "signup", "sign-up", "oauth",
|
|
401
|
+
"register", "password", "nextauth", "/session", "forgot", "reset-password",
|
|
402
|
+
"2fa", "mfa", "/sso", "saml", "verify-email", "magic-link",
|
|
403
|
+
)
|
|
404
|
+
_AUTH_ROUTE_NEGATIVE_DOMAIN = (
|
|
405
|
+
"upload", "album", "file", "avatar", "export", "download", "calendar",
|
|
406
|
+
"billing", "webhook", "invoice", "image", "media", "asset",
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _is_auth_route(hay: str) -> bool:
|
|
411
|
+
if any(t in hay for t in _AUTH_ROUTE_POSITIVE):
|
|
412
|
+
return True
|
|
413
|
+
# A bare token/jwt path segment counts only outside a clearly non-auth domain
|
|
414
|
+
# (e.g. /app-upload/[token] is an upload route, not authentication).
|
|
415
|
+
if "token" in hay or "jwt" in hay:
|
|
416
|
+
return not any(d in hay for d in _AUTH_ROUTE_NEGATIVE_DOMAIN)
|
|
417
|
+
return False
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _sense_filter(slug: str, signals: list[Signal]) -> list[Signal]:
|
|
421
|
+
return [s for s in signals if not _is_false_sense(slug, s)]
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def detect_concepts(signals: list[Signal]) -> list[ConceptCandidate]:
|
|
425
|
+
candidates: list[ConceptCandidate] = []
|
|
426
|
+
for slug, template in CONCEPT_TEMPLATES.items():
|
|
427
|
+
matched = [s for s in signals if signal_matches_template(s, template)]
|
|
428
|
+
if not matched:
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
# Gate 1: drop concepts defined only by e2e specs or docs.
|
|
432
|
+
meaningful = _meaningful_signals(matched)
|
|
433
|
+
# Gate 1b: drop word-sense pollution (session_id traces, NEXTAUTH_URL,
|
|
434
|
+
# employment job-title taxonomy, etc.) so it never becomes evidence.
|
|
435
|
+
meaningful = _sense_filter(slug, meaningful)
|
|
436
|
+
if not meaningful:
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
# Gate 2: "Billing" Webhooks needs billing evidence local to webhook
|
|
440
|
+
# evidence. Only meaningful signals count - an e2e spec under a path like
|
|
441
|
+
# tests-e2e/specs/billing.e2e.spec.ts must not satisfy the billing gate.
|
|
442
|
+
if not _passes_billing_gate(slug, meaningful):
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
# Gate 3: require a concept-appropriate behavior anchor (word-sense).
|
|
446
|
+
# A coincidental keyword (job title, avatar URL, session_id trace) is not
|
|
447
|
+
# enough to emit the concept.
|
|
448
|
+
if not _has_concept_anchor(slug, meaningful):
|
|
449
|
+
continue
|
|
450
|
+
|
|
451
|
+
# Score and evidence use the cleaned (sense-filtered) signals only, so
|
|
452
|
+
# word-sense pollution never inflates confidence or drives headline evidence.
|
|
453
|
+
score = score_template_match(template, meaningful)
|
|
454
|
+
if score < template["min_score"]:
|
|
455
|
+
continue
|
|
456
|
+
|
|
457
|
+
# Weak-only: presence evidence (deps/config) but no behavior anchor.
|
|
458
|
+
weak_only = not any(s.kind in ANCHOR_KINDS for s in meaningful)
|
|
459
|
+
if weak_only:
|
|
460
|
+
score = min(score, 0.45)
|
|
461
|
+
|
|
462
|
+
candidates.append(
|
|
463
|
+
ConceptCandidate(
|
|
464
|
+
slug=slug,
|
|
465
|
+
name=template["display_name"],
|
|
466
|
+
kind=template["kind"],
|
|
467
|
+
confidence=score,
|
|
468
|
+
signals=meaningful,
|
|
469
|
+
weak_only=weak_only,
|
|
470
|
+
)
|
|
471
|
+
)
|
|
472
|
+
candidates = _merge_overlapping(candidates)
|
|
473
|
+
candidates = _remove_generic(candidates)
|
|
474
|
+
candidates.sort(key=lambda c: c.confidence, reverse=True)
|
|
475
|
+
return candidates
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def confidence_label(score: float) -> str:
|
|
479
|
+
if score >= 0.75:
|
|
480
|
+
return "high"
|
|
481
|
+
if score >= 0.5:
|
|
482
|
+
return "medium"
|
|
483
|
+
return "low"
|