confab-framework 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
confab/__init__.py ADDED
@@ -0,0 +1,44 @@
1
+ """Confabulation Framework — structural detection and prevention for multi-agent systems.
2
+
3
+ Solves the cascade propagation problem: agents state falsehoods confidently,
4
+ other agents copy them forward indefinitely. This framework makes verification
5
+ structural (enforced by code) rather than aspirational (suggested by docs).
6
+
7
+ Quick start (CLI)::
8
+
9
+ pip install confab-framework
10
+ confab init # generate a confab.toml
11
+ confab gate # run the cascade gate
12
+
13
+ Quick start (Python API)::
14
+
15
+ from confab import ConfabGate
16
+
17
+ gate = ConfabGate("confab.toml")
18
+ report = gate.run()
19
+
20
+ if report.has_failures:
21
+ print(report.format_report())
22
+
23
+ See DESIGN.md for architecture.
24
+ """
25
+
26
+ from .config import ConfabConfig, get_config, load_config, set_config
27
+ from .gate import run_gate, quick_check, GateReport, ConfabGate
28
+ from .claims import extract_claims, extract_claims_from_file, Claim, ClaimType
29
+ from .verify import verify_claim, verify_all, VerificationResult, VerificationOutcome
30
+
31
+ __version__ = "0.2.0"
32
+
33
+ __all__ = [
34
+ # High-level API
35
+ "ConfabGate",
36
+ # Configuration
37
+ "ConfabConfig", "get_config", "load_config", "set_config",
38
+ # Gate (function-based)
39
+ "run_gate", "quick_check", "GateReport",
40
+ # Claims
41
+ "extract_claims", "extract_claims_from_file", "Claim", "ClaimType",
42
+ # Verification
43
+ "verify_claim", "verify_all", "VerificationResult", "VerificationOutcome",
44
+ ]
confab/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ """Allow running as: python -m confab gate"""
2
+ from .cli import main
3
+
4
+ main()
confab/claims.py ADDED
@@ -0,0 +1,553 @@
1
+ """Claim extraction and classification for the confabulation framework.
2
+
3
+ Parses agent priority files and handoff text to identify carry-forward claims,
4
+ classify them by type, and determine which are auto-verifiable.
5
+
6
+ The key insight: most cascade-propagating confabulations in the ia system are
7
+ verifiable claims about system state (file exists, env var present, pipeline
8
+ works/blocked) that persist because no agent checks reality. This module
9
+ extracts those claims so the verification engine can test them.
10
+ """
11
+
12
+ import re
13
+ from dataclasses import dataclass, field
14
+ from datetime import datetime, timezone
15
+ from enum import Enum
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Tuple
18
+
19
+
20
+ class ClaimType(Enum):
21
+ """Types of claims agents make in priority files."""
22
+ FILE_EXISTS = "file_exists" # "X file exists / is ready"
23
+ FILE_MISSING = "file_missing" # "X file is missing / doesn't exist"
24
+ ENV_VAR = "env_var" # "needs ENV_VAR" / "ENV_VAR is set"
25
+ PIPELINE_WORKS = "pipeline_works" # "pipeline X is working"
26
+ PIPELINE_BLOCKED = "pipeline_blocked" # "X is blocked on Y"
27
+ SCRIPT_RUNS = "script_runs" # "script X works / runs"
28
+ SCRIPT_BROKEN = "script_broken" # "script X fails / is broken"
29
+ CONFIG_PRESENT = "config_present" # "config X is present / configured"
30
+ COUNT_CLAIM = "count_claim" # "X entries / N items / count of Y"
31
+ STATUS_CLAIM = "status_claim" # general status assertions
32
+ FACT_CLAIM = "fact_claim" # factual claims (dates, numbers)
33
+ REGISTRY_VIOLATION = "registry_violation" # file/db not in SYSTEM_REGISTRY.md
34
+ SUBJECTIVE = "subjective" # opinions, assessments
35
+
36
+
37
+ class VerifiabilityLevel(Enum):
38
+ """How automatically verifiable a claim is."""
39
+ AUTO = "auto" # Can be verified by code right now
40
+ SEMI = "semi" # Partially verifiable (needs some context)
41
+ MANUAL = "manual" # Requires human/agent judgment
42
+
43
+
44
+ @dataclass
45
+ class Claim:
46
+ """A single extracted claim from agent text."""
47
+ text: str # Original text of the claim
48
+ claim_type: ClaimType # Classification
49
+ verifiability: VerifiabilityLevel # How verifiable
50
+ source_file: Optional[str] = None # File the claim was extracted from
51
+ source_line: Optional[int] = None # Line number
52
+ verification_tag: Optional[str] = None # Existing [v1]/[v2]/[unverified] tag
53
+ extracted_paths: List[str] = field(default_factory=list) # File paths mentioned
54
+ extracted_env_vars: List[str] = field(default_factory=list) # Env vars mentioned
55
+ extracted_numbers: List[str] = field(default_factory=list) # Numbers/counts
56
+ extracted_config_keys: List[str] = field(default_factory=list) # Config keys to check
57
+ context: str = "" # Surrounding text for context
58
+ age_builds: int = 0 # How many builds this has persisted
59
+
60
+ def to_dict(self) -> Dict[str, Any]:
61
+ return {
62
+ "text": self.text,
63
+ "type": self.claim_type.value,
64
+ "verifiability": self.verifiability.value,
65
+ "source_file": self.source_file,
66
+ "source_line": self.source_line,
67
+ "verification_tag": self.verification_tag,
68
+ "paths": self.extracted_paths,
69
+ "env_vars": self.extracted_env_vars,
70
+ "numbers": self.extracted_numbers,
71
+ "config_keys": self.extracted_config_keys,
72
+ "age_builds": self.age_builds,
73
+ }
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Pattern definitions for claim extraction
78
+ # ---------------------------------------------------------------------------
79
+
80
+ # File path pattern (matches common project paths)
81
+ FILE_PATH_RE = re.compile(
82
+ r'`([^`]+\.(?:py|md|json|html|txt|yaml|yml|sh|js|ts|swift|css|db|conf|env|toml|cfg))`'
83
+ r'|(?:^|\s)((?:[\w./-]+/)+[\w.-]+\.(?:py|md|json|html|txt|yaml|yml|sh|js|ts|swift|css|db|conf|env|toml|cfg))',
84
+ )
85
+
86
+ # Environment variable pattern
87
+ ENV_VAR_RE = re.compile(
88
+ r'\b([A-Z][A-Z0-9_]{2,}(?:_KEY|_TOKEN|_SECRET|_URL|_PATH|_API|_ID|_PASSWORD|_COOKIE)?)\b'
89
+ )
90
+
91
+ # Default known env var names (always checked).
92
+ # These are common env vars found in most projects.
93
+ # Extended at runtime with project-specific env vars via _get_all_known_env_vars()
94
+ # (from confab.toml or ia-repo defaults).
95
+ _DEFAULT_KNOWN_ENV_VARS = {
96
+ 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'CLAUDE_API_KEY',
97
+ 'GITHUB_TOKEN', 'DATABASE_URL',
98
+ 'SECRET_KEY', 'API_KEY', 'AWS_ACCESS_KEY_ID',
99
+ 'AWS_SECRET_ACCESS_KEY', 'GOOGLE_API_KEY',
100
+ }
101
+
102
+ # Backwards-compatible alias
103
+ KNOWN_ENV_VARS = _DEFAULT_KNOWN_ENV_VARS
104
+
105
+
106
+ def _get_all_known_env_vars() -> set:
107
+ """Merge default known env vars with any configured extras."""
108
+ try:
109
+ from .config import get_config
110
+ return _DEFAULT_KNOWN_ENV_VARS | get_config().known_env_vars
111
+ except Exception:
112
+ return _DEFAULT_KNOWN_ENV_VARS
113
+
114
+ # Verification tag patterns
115
+ VERIFICATION_TAG_RE = re.compile(
116
+ r'\[(v[12]):\s*(?:checked\s+)?(.+?)(?:\s+\d{4}-\d{2}-\d{2})?\]'
117
+ r'|\[(unverified)\]'
118
+ r'|\[(verified(?::\s*\d{4}-\d{2}-\d{2})?)\]'
119
+ r'|\[FAILED:\s*(.+?)\]'
120
+ )
121
+
122
+ # Blocker/blocked patterns
123
+ BLOCKER_RE = re.compile(
124
+ r'(?:blocked\s+(?:on|by)|waiting\s+(?:on|for)|needs|requires|depends\s+on|missing)\s+(.+?)(?:\.|$|\n|—)',
125
+ re.IGNORECASE,
126
+ )
127
+
128
+ # Pipeline/script status patterns
129
+ PIPELINE_STATUS_RE = re.compile(
130
+ r'(?:pipeline|script|cron|process|service)\s+(?:is\s+)?'
131
+ r'(?:working|running|operational|active|healthy|broken|failing|down|blocked|stopped)',
132
+ re.IGNORECASE,
133
+ )
134
+
135
+ # Count/quantity claims
136
+ COUNT_RE = re.compile(
137
+ r'(\d+)\s+(?:entries|items|posts|notes|files|tests|builds|sprints|days|hours|commits|'
138
+ r'observations|ideas|principles|scripts|databases|subscribers|views)',
139
+ re.IGNORECASE,
140
+ )
141
+
142
+ # Build section header pattern (to track claim age)
143
+ BUILD_HEADER_RE = re.compile(
144
+ r'^##\s+(?:Latest|Previous|Current)\s+Build\s+\((.+?)\)',
145
+ re.MULTILINE,
146
+ )
147
+
148
+ # Meta-rule pattern — lines that describe how to handle claims, not claims themselves.
149
+ # e.g. "**Staleness rule:** ...", "**Rules:** ...", "**Size rule:** ..."
150
+ META_RULE_RE = re.compile(
151
+ r'^\w[\w\s]*rules?\s*:',
152
+ re.IGNORECASE,
153
+ )
154
+
155
+ # Config file detection
156
+ CONFIG_FILE_EXTS = {'.json', '.yaml', '.yml', '.toml', '.cfg', '.conf', '.ini'}
157
+
158
+ CONFIG_ASSERTION_RE = re.compile(
159
+ r'\b(?:config(?:ured|uration)?|setting|key\b|has\s+key|contains?\s+key)\b',
160
+ re.IGNORECASE,
161
+ )
162
+
163
+ # Config key pattern: backtick-enclosed identifiers that aren't file paths
164
+ CONFIG_KEY_RE = re.compile(r'`([a-zA-Z_][a-zA-Z0-9_.]*)`')
165
+
166
+ # Optional/conditional language — when present, file references are not existence assertions.
167
+ # e.g. "loads confab.toml or falls back to defaults", "reads config.yaml if present"
168
+ OPTIONAL_FILE_RE = re.compile(
169
+ r'\b(?:if\s+(?:\w+\s+)?(?:present|exists?|found|available)'
170
+ r'|or\s+(?:falls?\s+back|defaults?\s+to|uses?\s+defaults?)'
171
+ r'|optional(?:ly)?'
172
+ r'|when\s+(?:present|available|found)'
173
+ r'|(?:falls?\s+back|defaults?)\s+(?:to|if))\b',
174
+ re.IGNORECASE,
175
+ )
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # Claim extraction
180
+ # ---------------------------------------------------------------------------
181
+
182
+ def _get_exclude_patterns() -> List[re.Pattern]:
183
+ """Load section exclusion patterns from config."""
184
+ try:
185
+ from .config import get_config
186
+ patterns = get_config().exclude_sections
187
+ except Exception:
188
+ patterns = []
189
+ return [re.compile(p, re.IGNORECASE) for p in patterns] if patterns else []
190
+
191
+
192
+ # Markdown heading pattern for section tracking
193
+ _HEADING_RE = re.compile(r'^(#{1,6})\s+(.+)')
194
+
195
+
196
+ def extract_claims(
197
+ text: str,
198
+ source_file: Optional[str] = None,
199
+ exclude_sections: Optional[List[str]] = None,
200
+ ) -> List[Claim]:
201
+ """Extract verifiable claims from agent text.
202
+
203
+ Scans text for patterns that indicate testable assertions:
204
+ - File existence/absence claims
205
+ - Environment variable requirements
206
+ - Pipeline/script status claims
207
+ - Blocker assertions
208
+ - Count/quantity claims
209
+ - General status claims with verification tags
210
+
211
+ Args:
212
+ text: The text to scan for claims.
213
+ source_file: Path to the source file (for reporting).
214
+ exclude_sections: Optional list of regex patterns for section headings
215
+ to skip during extraction. If None, loads from config.
216
+
217
+ Returns a list of Claim objects, sorted by verifiability (auto first).
218
+ """
219
+ claims = []
220
+ lines = text.split('\n')
221
+
222
+ # Build section exclusion patterns
223
+ if exclude_sections is not None:
224
+ excl_patterns = [re.compile(p, re.IGNORECASE) for p in exclude_sections]
225
+ else:
226
+ excl_patterns = _get_exclude_patterns()
227
+
228
+ # Track which section we're in for exclusion filtering
229
+ in_excluded_section = False
230
+ excluded_heading_level = 0 # depth of the heading that started the exclusion
231
+
232
+ # Track build sections for age estimation
233
+ build_sections = list(BUILD_HEADER_RE.finditer(text))
234
+ current_build_idx = 0
235
+
236
+ for line_num, line in enumerate(lines, 1):
237
+ # Update build section tracking
238
+ for i, m in enumerate(build_sections):
239
+ if m.start() <= sum(len(l) + 1 for l in lines[:line_num - 1]):
240
+ current_build_idx = i
241
+
242
+ # Check if this line is a heading — update section tracking
243
+ stripped = line.strip()
244
+ heading_match = _HEADING_RE.match(stripped)
245
+ if heading_match:
246
+ heading_level = len(heading_match.group(1))
247
+ heading_text = heading_match.group(2).strip()
248
+
249
+ if in_excluded_section:
250
+ # A heading at the same or higher level ends the exclusion
251
+ if heading_level <= excluded_heading_level:
252
+ in_excluded_section = False
253
+ # Fall through to check if THIS heading starts a new exclusion
254
+
255
+ if not in_excluded_section:
256
+ # Check if this heading matches an exclusion pattern
257
+ for pattern in excl_patterns:
258
+ if pattern.search(heading_text):
259
+ in_excluded_section = True
260
+ excluded_heading_level = heading_level
261
+ break
262
+
263
+ # Skip lines in excluded sections
264
+ if in_excluded_section:
265
+ continue
266
+
267
+ # Skip headers, empty lines, and table formatting
268
+ if not stripped or stripped.startswith('#') or stripped.startswith('|---'):
269
+ continue
270
+
271
+ # Skip meta-rules about claim handling (e.g. "**Staleness rule:** ...")
272
+ # These describe how to process claims, not assertions about system state.
273
+ clean_for_rule_check = stripped.replace('*', '').strip()
274
+ if META_RULE_RE.match(clean_for_rule_check):
275
+ continue
276
+
277
+ # Extract existing verification tags
278
+ vtag_match = VERIFICATION_TAG_RE.search(line)
279
+ vtag = None
280
+ if vtag_match:
281
+ vtag = vtag_match.group(0)
282
+
283
+ # --- Blocker claims (highest priority — these are the cascade propagators) ---
284
+ blocker_matches = BLOCKER_RE.findall(line)
285
+ if blocker_matches:
286
+ for blocker_text in blocker_matches:
287
+ claim = _classify_blocker_claim(
288
+ line, blocker_text.strip(), source_file, line_num, vtag, current_build_idx
289
+ )
290
+ if claim:
291
+ claims.append(claim)
292
+ continue # Don't double-count
293
+
294
+ # --- Pipeline/script status claims ---
295
+ if PIPELINE_STATUS_RE.search(line):
296
+ claim = _classify_status_claim(
297
+ line, source_file, line_num, vtag, current_build_idx
298
+ )
299
+ if claim:
300
+ claims.append(claim)
301
+ continue
302
+
303
+ # --- File path and config file references in assertion context ---
304
+ file_paths = _extract_file_paths(line)
305
+ if file_paths and _is_assertion_context(line) and not _is_optional_reference(line):
306
+ if _is_config_assertion(line, file_paths):
307
+ config_keys = _extract_config_keys(line, file_paths)
308
+ claim = Claim(
309
+ text=stripped,
310
+ claim_type=ClaimType.CONFIG_PRESENT,
311
+ verifiability=VerifiabilityLevel.AUTO,
312
+ source_file=source_file,
313
+ source_line=line_num,
314
+ verification_tag=vtag,
315
+ extracted_paths=file_paths,
316
+ extracted_config_keys=config_keys,
317
+ age_builds=current_build_idx,
318
+ )
319
+ else:
320
+ claim = Claim(
321
+ text=stripped,
322
+ claim_type=ClaimType.FILE_EXISTS,
323
+ verifiability=VerifiabilityLevel.AUTO,
324
+ source_file=source_file,
325
+ source_line=line_num,
326
+ verification_tag=vtag,
327
+ extracted_paths=file_paths,
328
+ age_builds=current_build_idx,
329
+ )
330
+ claims.append(claim)
331
+ continue
332
+
333
+ # --- Count/quantity claims ---
334
+ count_matches = COUNT_RE.findall(line)
335
+ if count_matches and _is_assertion_context(line):
336
+ claim = Claim(
337
+ text=stripped,
338
+ claim_type=ClaimType.COUNT_CLAIM,
339
+ verifiability=VerifiabilityLevel.SEMI,
340
+ source_file=source_file,
341
+ source_line=line_num,
342
+ verification_tag=vtag,
343
+ extracted_numbers=count_matches,
344
+ age_builds=current_build_idx,
345
+ )
346
+ claims.append(claim)
347
+
348
+ # Sort: auto-verifiable first, then semi, then manual
349
+ priority = {VerifiabilityLevel.AUTO: 0, VerifiabilityLevel.SEMI: 1, VerifiabilityLevel.MANUAL: 2}
350
+ claims.sort(key=lambda c: (priority[c.verifiability], -c.age_builds))
351
+
352
+ return claims
353
+
354
+
355
+ def _extract_file_paths(text: str) -> List[str]:
356
+ """Extract file paths from text."""
357
+ paths = []
358
+ for match in FILE_PATH_RE.finditer(text):
359
+ path = match.group(1) or match.group(2)
360
+ if path:
361
+ paths.append(path)
362
+ return paths
363
+
364
+
365
+ def _is_assertion_context(line: str) -> bool:
366
+ """Check if a line contains an assertion (not just a reference)."""
367
+ assertion_words = {
368
+ 'exists', 'ready', 'working', 'works', 'runs', 'running',
369
+ 'blocked', 'broken', 'failing', 'missing', 'needs', 'requires',
370
+ 'present', 'configured', 'deployed', 'operational', 'healthy',
371
+ 'queued', 'pending', 'complete', 'completed', 'done', 'fixed',
372
+ 'added', 'created', 'updated', 'verified', 'confirmed',
373
+ 'status', 'still', 'not', 'should', 'must',
374
+ }
375
+ lower = line.lower()
376
+ return any(word in lower for word in assertion_words)
377
+
378
+
379
+ def _is_optional_reference(line: str) -> bool:
380
+ """Check if a line describes a file as optional/conditional.
381
+
382
+ Lines like "loads confab.toml or falls back to ia defaults" should not
383
+ be treated as assertions that the file must exist. The file reference
384
+ is conditional — the system works without it.
385
+ """
386
+ return bool(OPTIONAL_FILE_RE.search(line))
387
+
388
+
389
+ def _is_config_assertion(line: str, file_paths: List[str]) -> bool:
390
+ """Check if a line is a config-related assertion about config files."""
391
+ has_config_file = any(
392
+ Path(p).suffix.lower() in CONFIG_FILE_EXTS for p in file_paths
393
+ )
394
+ has_config_words = bool(CONFIG_ASSERTION_RE.search(line))
395
+ return has_config_file and has_config_words
396
+
397
+
398
+ def _extract_config_keys(line: str, file_paths: List[str]) -> List[str]:
399
+ """Extract config key names from backticked text in a line.
400
+
401
+ Returns identifiers in backticks that aren't file paths. These are
402
+ candidate config keys to verify in the referenced config file.
403
+ """
404
+ keys = []
405
+ file_path_set = set(file_paths)
406
+ file_exts = {'.py', '.md', '.json', '.yaml', '.yml', '.toml', '.html',
407
+ '.js', '.ts', '.css', '.sh', '.swift', '.txt', '.db',
408
+ '.conf', '.env', '.cfg', '.ini'}
409
+ for match in CONFIG_KEY_RE.finditer(line):
410
+ candidate = match.group(1)
411
+ # Skip if it's a detected file path
412
+ if candidate in file_path_set:
413
+ continue
414
+ # Skip if it looks like a file path
415
+ if '/' in candidate:
416
+ continue
417
+ # Skip if it looks like a file with extension
418
+ if '.' in candidate:
419
+ suffix = '.' + candidate.rsplit('.', 1)[-1]
420
+ if suffix.lower() in file_exts:
421
+ continue
422
+ keys.append(candidate)
423
+ return keys
424
+
425
+
426
+ def _classify_blocker_claim(
427
+ line: str,
428
+ blocker_text: str,
429
+ source_file: Optional[str],
430
+ line_num: int,
431
+ vtag: Optional[str],
432
+ build_idx: int,
433
+ ) -> Optional[Claim]:
434
+ """Classify a blocker claim by what it's blocked on."""
435
+ # Scan the FULL LINE for env vars, not just the blocker capture group.
436
+ # The regex captures up to the first delimiter, but env var names often
437
+ # appear later in the line (e.g., "needs cookie — fails without SUBSTACK_COOKIE").
438
+ scan_text = line
439
+
440
+ # Check for env var blockers
441
+ env_vars = []
442
+ all_known = _get_all_known_env_vars()
443
+ for var in all_known:
444
+ if var.lower() in scan_text.lower() or var in scan_text:
445
+ env_vars.append(var)
446
+
447
+ # Check for generic env var pattern in full line
448
+ for match in ENV_VAR_RE.finditer(scan_text):
449
+ candidate = match.group(1)
450
+ if candidate in all_known or candidate.endswith(('_KEY', '_TOKEN', '_SECRET', '_COOKIE')):
451
+ if candidate not in env_vars:
452
+ env_vars.append(candidate)
453
+
454
+ if env_vars:
455
+ return Claim(
456
+ text=line.strip(),
457
+ claim_type=ClaimType.ENV_VAR,
458
+ verifiability=VerifiabilityLevel.AUTO,
459
+ source_file=source_file,
460
+ source_line=line_num,
461
+ verification_tag=vtag,
462
+ extracted_env_vars=env_vars,
463
+ age_builds=build_idx,
464
+ )
465
+
466
+ # Check for file-based blockers
467
+ file_paths = _extract_file_paths(blocker_text)
468
+ if file_paths:
469
+ return Claim(
470
+ text=line.strip(),
471
+ claim_type=ClaimType.FILE_MISSING,
472
+ verifiability=VerifiabilityLevel.AUTO,
473
+ source_file=source_file,
474
+ source_line=line_num,
475
+ verification_tag=vtag,
476
+ extracted_paths=file_paths,
477
+ age_builds=build_idx,
478
+ )
479
+
480
+ # General blocker — semi-verifiable
481
+ return Claim(
482
+ text=line.strip(),
483
+ claim_type=ClaimType.PIPELINE_BLOCKED,
484
+ verifiability=VerifiabilityLevel.SEMI,
485
+ source_file=source_file,
486
+ source_line=line_num,
487
+ verification_tag=vtag,
488
+ age_builds=build_idx,
489
+ )
490
+
491
+
492
+ def _classify_status_claim(
493
+ line: str,
494
+ source_file: Optional[str],
495
+ line_num: int,
496
+ vtag: Optional[str],
497
+ build_idx: int,
498
+ ) -> Optional[Claim]:
499
+ """Classify a pipeline/script status claim."""
500
+ lower = line.lower()
501
+
502
+ # Determine if it's a "works" or "broken" claim
503
+ positive_words = {'working', 'running', 'operational', 'active', 'healthy'}
504
+ negative_words = {'broken', 'failing', 'down', 'blocked', 'stopped'}
505
+
506
+ is_positive = any(w in lower for w in positive_words)
507
+ is_negative = any(w in lower for w in negative_words)
508
+
509
+ claim_type = ClaimType.PIPELINE_BLOCKED if is_negative else ClaimType.PIPELINE_WORKS
510
+
511
+ # Extract any file paths (scripts being referenced)
512
+ file_paths = _extract_file_paths(line)
513
+
514
+ return Claim(
515
+ text=line.strip(),
516
+ claim_type=claim_type,
517
+ verifiability=VerifiabilityLevel.AUTO if file_paths else VerifiabilityLevel.SEMI,
518
+ source_file=source_file,
519
+ source_line=line_num,
520
+ verification_tag=vtag,
521
+ extracted_paths=file_paths,
522
+ age_builds=build_idx,
523
+ )
524
+
525
+
526
+ def extract_claims_from_file(
527
+ file_path: str,
528
+ exclude_sections: Optional[List[str]] = None,
529
+ ) -> List[Claim]:
530
+ """Extract claims from a file on disk."""
531
+ path = Path(file_path)
532
+ if not path.exists():
533
+ return []
534
+ text = path.read_text()
535
+ return extract_claims(text, source_file=str(path), exclude_sections=exclude_sections)
536
+
537
+
538
+ def summarize_claims(claims: List[Claim]) -> Dict[str, Any]:
539
+ """Generate a summary of extracted claims."""
540
+ by_type = {}
541
+ by_verifiability = {}
542
+ for c in claims:
543
+ by_type[c.claim_type.value] = by_type.get(c.claim_type.value, 0) + 1
544
+ by_verifiability[c.verifiability.value] = by_verifiability.get(c.verifiability.value, 0) + 1
545
+
546
+ return {
547
+ "total": len(claims),
548
+ "by_type": by_type,
549
+ "by_verifiability": by_verifiability,
550
+ "auto_verifiable": by_verifiability.get("auto", 0),
551
+ "oldest_build_age": max((c.age_builds for c in claims), default=0),
552
+ "untagged": sum(1 for c in claims if c.verification_tag is None),
553
+ }