delimit-cli 4.1.7 → 4.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,624 +0,0 @@
1
- """
2
- Spec Health Score Engine.
3
-
4
- Scores an OpenAPI spec on five dimensions (0-100 each):
5
- - completeness: endpoints with descriptions, examples, response schemas
6
- - security: auth schemes, HTTPS, no PII patterns
7
- - consistency: naming convention uniformity, response structure patterns
8
- - documentation: info metadata, contact, license, tag descriptions
9
- - best_practices: $ref reuse, schema depth, proper HTTP methods
10
-
11
- Returns an overall weighted score and letter grade (A-F).
12
- """
13
-
14
- import re
15
- from typing import Any, Dict, List, Optional, Set, Tuple
16
-
17
-
18
- # Weights for the overall score
19
- DIMENSION_WEIGHTS = {
20
- "completeness": 0.30,
21
- "security": 0.20,
22
- "consistency": 0.20,
23
- "documentation": 0.15,
24
- "best_practices": 0.15,
25
- }
26
-
27
- # PII patterns to flag
28
- PII_PATTERNS = [
29
- re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), # SSN
30
- re.compile(r"\b\d{16}\b"), # Credit card (simple)
31
- re.compile(r"\b[A-Za-z0-9._%+-]+@(?!example\.com\b)[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"), # Email (excludes example.com)
32
- re.compile(r"password\s*[:=]\s*['\"][^'\"]+['\"]", re.IGNORECASE), # Hardcoded passwords
33
- ]
34
-
35
- # HTTP methods that are standard for REST
36
- STANDARD_METHODS = {"get", "post", "put", "patch", "delete", "head", "options"}
37
-
38
- # Methods that should not have request bodies per HTTP semantics
39
- NO_BODY_METHODS = {"get", "head", "delete"}
40
-
41
-
42
- def _letter_grade(score: float) -> str:
43
- """Convert a 0-100 score to a letter grade."""
44
- if score >= 90:
45
- return "A"
46
- elif score >= 80:
47
- return "B"
48
- elif score >= 70:
49
- return "C"
50
- elif score >= 60:
51
- return "D"
52
- else:
53
- return "F"
54
-
55
-
56
- def _get_all_operations(spec: Dict[str, Any]) -> List[Dict[str, Any]]:
57
- """Extract all operations from the spec with their path and method."""
58
- ops = []
59
- paths = spec.get("paths") or {}
60
- for path, path_obj in paths.items():
61
- if not isinstance(path_obj, dict):
62
- continue
63
- for method in STANDARD_METHODS:
64
- if method in path_obj and isinstance(path_obj[method], dict):
65
- ops.append({
66
- "path": path,
67
- "method": method,
68
- "operation": path_obj[method],
69
- })
70
- return ops
71
-
72
-
73
- def _count_refs(obj: Any, depth: int = 0) -> int:
74
- """Count $ref usages in an object tree."""
75
- if depth > 50:
76
- return 0
77
- count = 0
78
- if isinstance(obj, dict):
79
- if "$ref" in obj:
80
- count += 1
81
- for v in obj.values():
82
- count += _count_refs(v, depth + 1)
83
- elif isinstance(obj, list):
84
- for item in obj:
85
- count += _count_refs(item, depth + 1)
86
- return count
87
-
88
-
89
- def _max_inline_depth(obj: Any, current: int = 0, limit: int = 20) -> int:
90
- """Find the maximum nesting depth of inline (non-$ref) schemas."""
91
- if current > limit or not isinstance(obj, dict):
92
- return current
93
- if "$ref" in obj:
94
- return current # refs don't count as inline depth
95
- max_d = current
96
- # Check properties (object nesting)
97
- props = obj.get("properties", {})
98
- if isinstance(props, dict):
99
- for v in props.values():
100
- if isinstance(v, dict) and "$ref" not in v:
101
- d = _max_inline_depth(v, current + 1, limit)
102
- max_d = max(max_d, d)
103
- # Check items (array nesting)
104
- items = obj.get("items")
105
- if isinstance(items, dict) and "$ref" not in items:
106
- d = _max_inline_depth(items, current + 1, limit)
107
- max_d = max(max_d, d)
108
- # Check additionalProperties
109
- addl = obj.get("additionalProperties")
110
- if isinstance(addl, dict) and "$ref" not in addl:
111
- d = _max_inline_depth(addl, current + 1, limit)
112
- max_d = max(max_d, d)
113
- return max_d
114
-
115
-
116
- def _extract_path_segments(path: str) -> List[str]:
117
- """Extract non-parameter segments from a path like /users/{id}/posts."""
118
- segments = []
119
- for part in path.strip("/").split("/"):
120
- if part and not part.startswith("{"):
121
- segments.append(part)
122
- return segments
123
-
124
-
125
- def _detect_naming_style(name: str) -> Optional[str]:
126
- """Detect if a name is camelCase, snake_case, kebab-case, or PascalCase."""
127
- if "_" in name:
128
- return "snake_case"
129
- if "-" in name:
130
- return "kebab-case"
131
- if name and name[0].isupper() and any(c.islower() for c in name):
132
- return "PascalCase"
133
- if name and name[0].islower() and any(c.isupper() for c in name):
134
- return "camelCase"
135
- return None # single word or ambiguous
136
-
137
-
138
- def score_completeness(spec: Dict[str, Any]) -> Tuple[int, List[str]]:
139
- """Score completeness: descriptions, examples, response schemas."""
140
- ops = _get_all_operations(spec)
141
- if not ops:
142
- return 0, ["No endpoints found in spec"]
143
-
144
- recommendations = []
145
- total_checks = 0
146
- passed_checks = 0
147
-
148
- ops_without_description = []
149
- ops_without_response_schema = []
150
- ops_without_examples = []
151
-
152
- for op_info in ops:
153
- op = op_info["operation"]
154
- label = f"{op_info['method'].upper()} {op_info['path']}"
155
-
156
- # Check: operation has description or summary
157
- total_checks += 1
158
- if op.get("description") or op.get("summary"):
159
- passed_checks += 1
160
- else:
161
- ops_without_description.append(label)
162
-
163
- # Check: at least one response has a schema
164
- total_checks += 1
165
- responses = op.get("responses") or {}
166
- has_schema = False
167
- for resp in responses.values():
168
- if isinstance(resp, dict):
169
- content = resp.get("content", {})
170
- if isinstance(content, dict):
171
- for media in content.values():
172
- if isinstance(media, dict) and "schema" in media:
173
- has_schema = True
174
- break
175
- # Also check old-style schema
176
- if "schema" in resp:
177
- has_schema = True
178
- if has_schema:
179
- break
180
- if has_schema:
181
- passed_checks += 1
182
- else:
183
- ops_without_response_schema.append(label)
184
-
185
- # Check: has examples (in parameters or request body or responses)
186
- total_checks += 1
187
- has_example = False
188
- # Check parameters
189
- for param in op.get("parameters", []):
190
- if isinstance(param, dict) and ("example" in param or "examples" in param):
191
- has_example = True
192
- break
193
- schema = param.get("schema", {}) if isinstance(param, dict) else {}
194
- if isinstance(schema, dict) and "example" in schema:
195
- has_example = True
196
- break
197
- # Check request body
198
- if not has_example:
199
- rb = op.get("requestBody", {})
200
- if isinstance(rb, dict):
201
- for media in (rb.get("content") or {}).values():
202
- if isinstance(media, dict) and ("example" in media or "examples" in media):
203
- has_example = True
204
- break
205
- # Check response examples
206
- if not has_example:
207
- for resp in responses.values():
208
- if isinstance(resp, dict):
209
- for media in (resp.get("content") or {}).values():
210
- if isinstance(media, dict) and ("example" in media or "examples" in media):
211
- has_example = True
212
- break
213
- if has_example:
214
- break
215
- if has_example:
216
- passed_checks += 1
217
- else:
218
- ops_without_examples.append(label)
219
-
220
- if ops_without_description:
221
- if len(ops_without_description) <= 3:
222
- recommendations.append(f"Add description/summary to: {', '.join(ops_without_description)}")
223
- else:
224
- recommendations.append(f"{len(ops_without_description)} of {len(ops)} endpoints lack description/summary")
225
-
226
- if ops_without_response_schema:
227
- if len(ops_without_response_schema) <= 3:
228
- recommendations.append(f"Add response schema to: {', '.join(ops_without_response_schema)}")
229
- else:
230
- recommendations.append(f"{len(ops_without_response_schema)} of {len(ops)} endpoints lack response schemas")
231
-
232
- if ops_without_examples:
233
- if len(ops_without_examples) <= 3:
234
- recommendations.append(f"Add examples to: {', '.join(ops_without_examples)}")
235
- else:
236
- recommendations.append(f"{len(ops_without_examples)} of {len(ops)} endpoints lack examples")
237
-
238
- score = round((passed_checks / total_checks) * 100) if total_checks > 0 else 0
239
- return score, recommendations
240
-
241
-
242
- def score_security(spec: Dict[str, Any]) -> Tuple[int, List[str]]:
243
- """Score security: auth schemes, HTTPS, PII patterns."""
244
- recommendations = []
245
- points = 0
246
- max_points = 0
247
-
248
- # Check: security schemes defined
249
- max_points += 30
250
- components = spec.get("components") or {}
251
- security_schemes = components.get("securitySchemes") or {}
252
- if security_schemes:
253
- points += 30
254
- else:
255
- recommendations.append("Define securitySchemes in components (e.g., bearerAuth, apiKey, oauth2)")
256
-
257
- # Check: global security applied
258
- max_points += 20
259
- global_security = spec.get("security")
260
- if global_security and isinstance(global_security, list) and len(global_security) > 0:
261
- points += 20
262
- else:
263
- recommendations.append("Add global security requirement (e.g., security: [bearerAuth: []])")
264
-
265
- # Check: server URLs use HTTPS
266
- max_points += 25
267
- servers = spec.get("servers") or []
268
- if not servers:
269
- # No servers defined -- partial credit (relative URLs are fine)
270
- points += 10
271
- recommendations.append("Define server URLs with HTTPS")
272
- else:
273
- all_https = True
274
- for s in servers:
275
- url = s.get("url", "") if isinstance(s, dict) else ""
276
- if url and url.startswith("http://"):
277
- all_https = False
278
- break
279
- if all_https:
280
- points += 25
281
- else:
282
- recommendations.append("Use HTTPS for all server URLs")
283
-
284
- # Check: no PII patterns in examples or descriptions
285
- max_points += 25
286
- spec_text = _spec_to_text(spec)
287
- pii_found = []
288
- for pattern in PII_PATTERNS:
289
- if pattern.search(spec_text):
290
- pii_found.append(pattern.pattern)
291
- if not pii_found:
292
- points += 25
293
- else:
294
- recommendations.append("Potential PII detected in spec content -- use placeholder values for examples")
295
-
296
- score = round((points / max_points) * 100) if max_points > 0 else 0
297
- return score, recommendations
298
-
299
-
300
- def _spec_to_text(spec: Dict[str, Any], depth: int = 0) -> str:
301
- """Convert a spec to flat text for pattern scanning. Limits recursion depth."""
302
- if depth > 15:
303
- return ""
304
- parts = []
305
- if isinstance(spec, dict):
306
- for k, v in spec.items():
307
- if k == "$ref":
308
- continue
309
- parts.append(str(k))
310
- parts.append(_spec_to_text(v, depth + 1))
311
- elif isinstance(spec, list):
312
- for item in spec:
313
- parts.append(_spec_to_text(item, depth + 1))
314
- elif isinstance(spec, str):
315
- parts.append(spec)
316
- return " ".join(parts)
317
-
318
-
319
- def score_consistency(spec: Dict[str, Any]) -> Tuple[int, List[str]]:
320
- """Score consistency: naming conventions, response structure patterns."""
321
- recommendations = []
322
- points = 0
323
- max_points = 0
324
-
325
- # Check: path segment naming consistency
326
- max_points += 35
327
- paths = spec.get("paths") or {}
328
- all_segments = []
329
- for path in paths:
330
- all_segments.extend(_extract_path_segments(path))
331
-
332
- if all_segments:
333
- styles = {}
334
- for seg in all_segments:
335
- style = _detect_naming_style(seg)
336
- if style:
337
- styles[style] = styles.get(style, 0) + 1
338
- if styles:
339
- total_styled = sum(styles.values())
340
- dominant_style = max(styles, key=styles.get)
341
- dominant_count = styles[dominant_style]
342
- consistency_ratio = dominant_count / total_styled
343
- points += round(35 * consistency_ratio)
344
- if consistency_ratio < 0.9:
345
- recommendations.append(
346
- f"Path naming inconsistency: mixed {', '.join(styles.keys())}. "
347
- f"Standardize on {dominant_style}"
348
- )
349
- else:
350
- points += 35 # single-word segments, no inconsistency
351
- else:
352
- points += 35
353
-
354
- # Check: parameter naming consistency
355
- max_points += 35
356
- ops = _get_all_operations(spec)
357
- param_names = []
358
- for op_info in ops:
359
- for param in op_info["operation"].get("parameters", []):
360
- if isinstance(param, dict) and "name" in param:
361
- param_names.append(param["name"])
362
- # Also check schema property names
363
- schemas = (spec.get("components") or {}).get("schemas") or {}
364
- for schema_name, schema in schemas.items():
365
- if isinstance(schema, dict):
366
- for prop_name in (schema.get("properties") or {}).keys():
367
- param_names.append(prop_name)
368
-
369
- if param_names:
370
- styles = {}
371
- for name in param_names:
372
- style = _detect_naming_style(name)
373
- if style:
374
- styles[style] = styles.get(style, 0) + 1
375
- if styles:
376
- total_styled = sum(styles.values())
377
- dominant_style = max(styles, key=styles.get)
378
- dominant_count = styles[dominant_style]
379
- consistency_ratio = dominant_count / total_styled
380
- points += round(35 * consistency_ratio)
381
- if consistency_ratio < 0.9:
382
- recommendations.append(
383
- f"Parameter/property naming inconsistency: mixed {', '.join(styles.keys())}. "
384
- f"Standardize on {dominant_style}"
385
- )
386
- else:
387
- points += 35
388
- else:
389
- points += 35
390
-
391
- # Check: response structure consistency (all success responses have similar shape)
392
- max_points += 30
393
- response_shapes: List[str] = []
394
- for op_info in ops:
395
- responses = op_info["operation"].get("responses") or {}
396
- for code, resp in responses.items():
397
- if not isinstance(resp, dict):
398
- continue
399
- if str(code).startswith("2"):
400
- content = resp.get("content") or {}
401
- for media_type, media in content.items():
402
- if isinstance(media, dict) and "schema" in media:
403
- schema = media["schema"]
404
- # Classify shape: object, array, ref, primitive
405
- if "$ref" in schema:
406
- response_shapes.append("ref")
407
- elif schema.get("type") == "array":
408
- response_shapes.append("array")
409
- elif schema.get("type") == "object" or "properties" in schema:
410
- response_shapes.append("object")
411
- else:
412
- response_shapes.append("primitive")
413
-
414
- if len(response_shapes) >= 2:
415
- # Check if responses use a consistent wrapper pattern
416
- unique_shapes = set(response_shapes)
417
- if len(unique_shapes) <= 2:
418
- points += 30
419
- else:
420
- points += 15
421
- recommendations.append(
422
- "Response structures use mixed shapes. Consider a consistent envelope pattern"
423
- )
424
- else:
425
- points += 30 # too few to judge
426
-
427
- score = round((points / max_points) * 100) if max_points > 0 else 0
428
- return score, recommendations
429
-
430
-
431
- def score_documentation(spec: Dict[str, Any]) -> Tuple[int, List[str]]:
432
- """Score documentation: info metadata, contact, license, tags."""
433
- recommendations = []
434
- points = 0
435
- max_points = 0
436
-
437
- info = spec.get("info") or {}
438
-
439
- # Check: info.description
440
- max_points += 25
441
- if info.get("description"):
442
- points += 25
443
- else:
444
- recommendations.append("Add info.description to explain what this API does")
445
-
446
- # Check: info.contact
447
- max_points += 20
448
- if info.get("contact") and isinstance(info["contact"], dict):
449
- points += 20
450
- else:
451
- recommendations.append("Add info.contact with name and email/url")
452
-
453
- # Check: info.license
454
- max_points += 20
455
- if info.get("license") and isinstance(info["license"], dict):
456
- points += 20
457
- else:
458
- recommendations.append("Add info.license to specify API license terms")
459
-
460
- # Check: tags defined and described
461
- max_points += 20
462
- tags = spec.get("tags") or []
463
- if tags and isinstance(tags, list):
464
- described = sum(1 for t in tags if isinstance(t, dict) and t.get("description"))
465
- if described == len(tags):
466
- points += 20
467
- elif described > 0:
468
- points += 10
469
- recommendations.append("Add descriptions to all tags")
470
- else:
471
- points += 5
472
- recommendations.append("Add descriptions to tags")
473
- else:
474
- recommendations.append("Define tags with descriptions to organize endpoints")
475
-
476
- # Check: info.version follows semver
477
- max_points += 15
478
- version = info.get("version", "")
479
- if version and re.match(r"^\d+\.\d+\.\d+", str(version)):
480
- points += 15
481
- elif version:
482
- points += 5
483
- recommendations.append("Use semantic versioning for info.version (e.g., 1.0.0)")
484
- else:
485
- recommendations.append("Set info.version")
486
-
487
- score = round((points / max_points) * 100) if max_points > 0 else 0
488
- return score, recommendations
489
-
490
-
491
- def score_best_practices(spec: Dict[str, Any]) -> Tuple[int, List[str]]:
492
- """Score best practices: $ref reuse, schema depth, HTTP methods."""
493
- recommendations = []
494
- points = 0
495
- max_points = 0
496
-
497
- # Check: uses $ref for reuse
498
- max_points += 30
499
- ref_count = _count_refs(spec)
500
- schemas = (spec.get("components") or {}).get("schemas") or {}
501
- if ref_count >= 3 or len(schemas) >= 2:
502
- points += 30
503
- elif ref_count >= 1 or len(schemas) >= 1:
504
- points += 15
505
- recommendations.append("Increase use of $ref and component schemas to reduce duplication")
506
- else:
507
- recommendations.append("Define reusable schemas in components/schemas and reference with $ref")
508
-
509
- # Check: no deeply nested inline schemas (>3 levels)
510
- max_points += 25
511
- max_depth = 0
512
- paths_obj = spec.get("paths") or {}
513
- for path_key, path_val in paths_obj.items():
514
- if not isinstance(path_val, dict):
515
- continue
516
- for method in STANDARD_METHODS:
517
- op = path_val.get(method)
518
- if not isinstance(op, dict):
519
- continue
520
- # Check request body schemas
521
- rb = op.get("requestBody", {})
522
- if isinstance(rb, dict):
523
- for media in (rb.get("content") or {}).values():
524
- if isinstance(media, dict) and "schema" in media:
525
- d = _max_inline_depth(media["schema"])
526
- max_depth = max(max_depth, d)
527
- # Check response schemas
528
- for resp in (op.get("responses") or {}).values():
529
- if isinstance(resp, dict):
530
- for media in (resp.get("content") or {}).values():
531
- if isinstance(media, dict) and "schema" in media:
532
- d = _max_inline_depth(media["schema"])
533
- max_depth = max(max_depth, d)
534
-
535
- if max_depth <= 3:
536
- points += 25
537
- elif max_depth <= 5:
538
- points += 15
539
- recommendations.append(f"Inline schema nesting depth of {max_depth} -- extract nested schemas to components")
540
- else:
541
- recommendations.append(f"Deeply nested inline schemas (depth {max_depth}) -- refactor to $ref components")
542
-
543
- # Check: proper HTTP method usage
544
- max_points += 25
545
- ops = _get_all_operations(spec)
546
- method_issues = []
547
- for op_info in ops:
548
- method = op_info["method"]
549
- op = op_info["operation"]
550
- # GET/HEAD/DELETE should not have requestBody
551
- if method in NO_BODY_METHODS and op.get("requestBody"):
552
- method_issues.append(f"{method.upper()} {op_info['path']} has requestBody")
553
-
554
- if not method_issues:
555
- points += 25
556
- else:
557
- points += 10
558
- if len(method_issues) <= 2:
559
- recommendations.append(f"HTTP method misuse: {'; '.join(method_issues)}")
560
- else:
561
- recommendations.append(f"{len(method_issues)} endpoints misuse HTTP methods (e.g., GET with requestBody)")
562
-
563
- # Check: operationId defined for all operations
564
- max_points += 20
565
- if ops:
566
- with_id = sum(1 for o in ops if o["operation"].get("operationId"))
567
- ratio = with_id / len(ops)
568
- points += round(20 * ratio)
569
- if ratio < 1.0:
570
- missing = len(ops) - with_id
571
- recommendations.append(f"{missing} endpoint(s) missing operationId -- needed for SDK generation")
572
- else:
573
- points += 20
574
-
575
- score = round((points / max_points) * 100) if max_points > 0 else 0
576
- return score, recommendations
577
-
578
-
579
- def score_spec(spec: Dict[str, Any]) -> Dict[str, Any]:
580
- """Score an OpenAPI spec across all dimensions.
581
-
582
- Returns:
583
- Dict with overall_score, grade, dimensions, and recommendations.
584
- """
585
- dimensions = {}
586
- all_recommendations = []
587
-
588
- scorers = {
589
- "completeness": score_completeness,
590
- "security": score_security,
591
- "consistency": score_consistency,
592
- "documentation": score_documentation,
593
- "best_practices": score_best_practices,
594
- }
595
-
596
- for name, scorer in scorers.items():
597
- score, recs = scorer(spec)
598
- dimensions[name] = {
599
- "score": score,
600
- "grade": _letter_grade(score),
601
- "weight": DIMENSION_WEIGHTS[name],
602
- }
603
- for rec in recs:
604
- all_recommendations.append({"dimension": name, "recommendation": rec})
605
-
606
- # Weighted average
607
- overall = sum(
608
- dimensions[d]["score"] * DIMENSION_WEIGHTS[d]
609
- for d in dimensions
610
- )
611
- overall_score = round(overall)
612
-
613
- # Count endpoints
614
- ops = _get_all_operations(spec)
615
-
616
- return {
617
- "overall_score": overall_score,
618
- "grade": _letter_grade(overall_score),
619
- "dimensions": dimensions,
620
- "recommendations": all_recommendations,
621
- "endpoint_count": len(ops),
622
- "spec_version": spec.get("openapi") or spec.get("swagger") or "unknown",
623
- "api_title": (spec.get("info") or {}).get("title", "Unknown API"),
624
- }