cognitive-modules 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cognitive/validator.py CHANGED
@@ -1,11 +1,11 @@
1
1
  """
2
2
  Module Validator - Validate cognitive module structure and examples.
3
- Supports both old and new module formats.
3
+ Supports v0, v1, v2.1, and v2.2 module formats.
4
4
  """
5
5
 
6
6
  import json
7
7
  from pathlib import Path
8
- from typing import Optional
8
+ from typing import Optional, Literal
9
9
 
10
10
  import jsonschema
11
11
  import yaml
@@ -13,10 +13,21 @@ import yaml
13
13
  from .registry import find_module
14
14
 
15
15
 
16
- def validate_module(name_or_path: str) -> tuple[bool, list[str], list[str]]:
16
+ # =============================================================================
17
+ # Main Validation Entry Point
18
+ # =============================================================================
19
+
20
+ def validate_module(
21
+ name_or_path: str,
22
+ v22: bool = False
23
+ ) -> tuple[bool, list[str], list[str]]:
17
24
  """
18
25
  Validate a cognitive module's structure and examples.
19
- Supports both old and new formats.
26
+ Supports all formats.
27
+
28
+ Args:
29
+ name_or_path: Module name or path
30
+ v22: If True, validate v2.2 specific requirements
20
31
 
21
32
  Returns:
22
33
  Tuple of (is_valid, errors, warnings)
@@ -34,22 +45,256 @@ def validate_module(name_or_path: str) -> tuple[bool, list[str], list[str]]:
34
45
  return False, [f"Module not found: {name_or_path}"], []
35
46
 
36
47
  # Detect format
37
- has_new = (module_path / "MODULE.md").exists()
38
- has_old = (module_path / "module.md").exists()
48
+ has_module_yaml = (module_path / "module.yaml").exists()
49
+ has_module_md = (module_path / "MODULE.md").exists()
50
+ has_old_module_md = (module_path / "module.md").exists()
51
+
52
+ if has_module_yaml:
53
+ # v2.x format
54
+ if v22:
55
+ return _validate_v22_format(module_path)
56
+ else:
57
+ return _validate_v2_format(module_path)
58
+ elif has_module_md:
59
+ # v1 format
60
+ if v22:
61
+ errors.append("Module is v1 format. Use 'cogn migrate' to upgrade to v2.2")
62
+ return False, errors, warnings
63
+ return _validate_new_format(module_path)
64
+ elif has_old_module_md:
65
+ # v0 format
66
+ if v22:
67
+ errors.append("Module is v0 format. Use 'cogn migrate' to upgrade to v2.2")
68
+ return False, errors, warnings
69
+ return _validate_old_format(module_path)
70
+ else:
71
+ return False, ["Missing module.yaml, MODULE.md, or module.md"], []
72
+
73
+
74
+ # =============================================================================
75
+ # v2.2 Validation
76
+ # =============================================================================
77
+
78
+ def _validate_v22_format(module_path: Path) -> tuple[bool, list[str], list[str]]:
79
+ """Validate v2.2 format (module.yaml + prompt.md + schema.json with meta)."""
80
+ errors = []
81
+ warnings = []
39
82
 
40
- if not has_new and not has_old:
41
- return False, ["Missing MODULE.md or module.md"], []
83
+ # Check module.yaml
84
+ module_yaml = module_path / "module.yaml"
85
+ try:
86
+ with open(module_yaml, 'r', encoding='utf-8') as f:
87
+ manifest = yaml.safe_load(f)
88
+ except yaml.YAMLError as e:
89
+ errors.append(f"Invalid YAML in module.yaml: {e}")
90
+ return False, errors, warnings
42
91
 
43
- format_type = "new" if has_new else "old"
92
+ # Check v2.2 required fields
93
+ v22_required_fields = ['name', 'version', 'responsibility']
94
+ for field in v22_required_fields:
95
+ if field not in manifest:
96
+ errors.append(f"module.yaml missing required field: {field}")
44
97
 
45
- if format_type == "new":
46
- return _validate_new_format(module_path)
98
+ # Check tier (v2.2 specific)
99
+ tier = manifest.get('tier')
100
+ if tier is None:
101
+ warnings.append("module.yaml missing 'tier' (recommended: exec | decision | exploration)")
102
+ elif tier not in ['exec', 'decision', 'exploration']:
103
+ errors.append(f"Invalid tier: {tier}. Must be exec | decision | exploration")
104
+
105
+ # Check schema_strictness
106
+ schema_strictness = manifest.get('schema_strictness')
107
+ if schema_strictness and schema_strictness not in ['high', 'medium', 'low']:
108
+ errors.append(f"Invalid schema_strictness: {schema_strictness}. Must be high | medium | low")
109
+
110
+ # Check overflow config
111
+ overflow = manifest.get('overflow', {})
112
+ if overflow.get('enabled'):
113
+ if overflow.get('require_suggested_mapping') is None:
114
+ warnings.append("overflow.require_suggested_mapping not set (recommended for recoverable insights)")
115
+
116
+ # Check enums config
117
+ enums = manifest.get('enums', {})
118
+ strategy = enums.get('strategy')
119
+ if strategy and strategy not in ['strict', 'extensible']:
120
+ errors.append(f"Invalid enums.strategy: {strategy}. Must be strict | extensible")
121
+
122
+ # Check compat config
123
+ compat = manifest.get('compat', {})
124
+ if not compat:
125
+ warnings.append("module.yaml missing 'compat' section (recommended for migration)")
126
+
127
+ # Check excludes
128
+ excludes = manifest.get('excludes', [])
129
+ if not excludes:
130
+ warnings.append("'excludes' list is empty (should list what module won't do)")
131
+
132
+ # Check prompt.md
133
+ prompt_path = module_path / "prompt.md"
134
+ if not prompt_path.exists():
135
+ errors.append("Missing prompt.md (required for v2.2)")
47
136
  else:
48
- return _validate_old_format(module_path)
137
+ with open(prompt_path, 'r', encoding='utf-8') as f:
138
+ prompt = f.read()
139
+
140
+ # Check for v2.2 envelope format instructions
141
+ if 'meta' not in prompt.lower() and 'envelope' not in prompt.lower():
142
+ warnings.append("prompt.md should mention v2.2 envelope format with meta/data separation")
143
+
144
+ if len(prompt) < 100:
145
+ warnings.append("prompt.md seems too short (< 100 chars)")
146
+
147
+ # Check schema.json
148
+ schema_path = module_path / "schema.json"
149
+ if not schema_path.exists():
150
+ errors.append("Missing schema.json (required for v2.2)")
151
+ else:
152
+ try:
153
+ with open(schema_path, 'r', encoding='utf-8') as f:
154
+ schema = json.load(f)
155
+
156
+ # Check for meta schema (v2.2 required)
157
+ if 'meta' not in schema:
158
+ errors.append("schema.json missing 'meta' schema (required for v2.2)")
159
+ else:
160
+ meta_schema = schema['meta']
161
+ meta_required = meta_schema.get('required', [])
162
+
163
+ if 'confidence' not in meta_required:
164
+ errors.append("meta schema must require 'confidence'")
165
+ if 'risk' not in meta_required:
166
+ errors.append("meta schema must require 'risk'")
167
+ if 'explain' not in meta_required:
168
+ errors.append("meta schema must require 'explain'")
169
+
170
+ # Check explain maxLength
171
+ explain_props = meta_schema.get('properties', {}).get('explain', {})
172
+ if explain_props.get('maxLength', 999) > 280:
173
+ warnings.append("meta.explain should have maxLength <= 280")
174
+
175
+ # Check for input schema
176
+ if 'input' not in schema:
177
+ warnings.append("schema.json missing 'input' definition")
178
+
179
+ # Check for data schema (v2.2 uses 'data' instead of 'output')
180
+ if 'data' not in schema and 'output' not in schema:
181
+ errors.append("schema.json missing 'data' (or 'output') definition")
182
+ elif 'data' in schema:
183
+ data_schema = schema['data']
184
+ data_required = data_schema.get('required', [])
185
+
186
+ if 'rationale' not in data_required:
187
+ warnings.append("data schema should require 'rationale' for audit")
188
+
189
+ # Check for error schema
190
+ if 'error' not in schema:
191
+ warnings.append("schema.json missing 'error' definition")
192
+
193
+ # Check for $defs/extensions (v2.2 overflow)
194
+ if overflow.get('enabled'):
195
+ defs = schema.get('$defs', {})
196
+ if 'extensions' not in defs:
197
+ warnings.append("schema.json missing '$defs.extensions' (needed for overflow)")
198
+
199
+ except json.JSONDecodeError as e:
200
+ errors.append(f"Invalid JSON in schema.json: {e}")
201
+
202
+ # Check tests directory
203
+ tests_path = module_path / "tests"
204
+ if not tests_path.exists():
205
+ warnings.append("Missing tests directory (recommended)")
206
+ else:
207
+ # Check for v2.2 format in expected files
208
+ expected_files = list(tests_path.glob("*.expected.json"))
209
+ for expected_file in expected_files:
210
+ try:
211
+ with open(expected_file, 'r', encoding='utf-8') as f:
212
+ expected = json.load(f)
213
+
214
+ # Check if example uses v2.2 format
215
+ example = expected.get('$example', {})
216
+ if example.get('ok') is True and 'meta' not in example:
217
+ warnings.append(f"{expected_file.name}: $example missing 'meta' (v2.2 format)")
218
+
219
+ except json.JSONDecodeError:
220
+ pass
221
+
222
+ return len(errors) == 0, errors, warnings
223
+
224
+
225
+ # =============================================================================
226
+ # v2.x (non-strict) Validation
227
+ # =============================================================================
228
+
229
+ def _validate_v2_format(module_path: Path) -> tuple[bool, list[str], list[str]]:
230
+ """Validate v2.x format without strict v2.2 requirements."""
231
+ errors = []
232
+ warnings = []
233
+
234
+ # Check module.yaml
235
+ module_yaml = module_path / "module.yaml"
236
+ try:
237
+ with open(module_yaml, 'r', encoding='utf-8') as f:
238
+ manifest = yaml.safe_load(f)
239
+ except yaml.YAMLError as e:
240
+ errors.append(f"Invalid YAML in module.yaml: {e}")
241
+ return False, errors, warnings
242
+
243
+ # Check required fields
244
+ required_fields = ['name', 'version', 'responsibility']
245
+ for field in required_fields:
246
+ if field not in manifest:
247
+ errors.append(f"module.yaml missing required field: {field}")
248
+
249
+ # Check excludes
250
+ excludes = manifest.get('excludes', [])
251
+ if not excludes:
252
+ warnings.append("'excludes' list is empty")
253
+
254
+ # Check prompt.md or prompt existence in MODULE.md
255
+ prompt_path = module_path / "prompt.md"
256
+ module_md_path = module_path / "MODULE.md"
257
+
258
+ if not prompt_path.exists() and not module_md_path.exists():
259
+ errors.append("Missing prompt.md or MODULE.md")
260
+ elif prompt_path.exists():
261
+ with open(prompt_path, 'r', encoding='utf-8') as f:
262
+ prompt = f.read()
263
+ if len(prompt) < 50:
264
+ warnings.append("prompt.md seems too short (< 50 chars)")
265
+
266
+ # Check schema.json
267
+ schema_path = module_path / "schema.json"
268
+ if not schema_path.exists():
269
+ warnings.append("Missing schema.json (recommended)")
270
+ else:
271
+ try:
272
+ with open(schema_path, 'r', encoding='utf-8') as f:
273
+ schema = json.load(f)
274
+
275
+ if 'input' not in schema:
276
+ warnings.append("schema.json missing 'input' definition")
277
+
278
+ # Accept both 'data' and 'output'
279
+ if 'data' not in schema and 'output' not in schema:
280
+ warnings.append("schema.json missing 'data' or 'output' definition")
281
+
282
+ except json.JSONDecodeError as e:
283
+ errors.append(f"Invalid JSON in schema.json: {e}")
284
+
285
+ # Check for v2.2 features and suggest upgrade
286
+ if manifest.get('tier') is None:
287
+ warnings.append("Consider adding 'tier' for v2.2 (use 'cogn validate --v22' for full check)")
288
+
289
+ return len(errors) == 0, errors, warnings
49
290
 
50
291
 
292
+ # =============================================================================
293
+ # v1 Format Validation (MODULE.md + schema.json)
294
+ # =============================================================================
295
+
51
296
  def _validate_new_format(module_path: Path) -> tuple[bool, list[str], list[str]]:
52
- """Validate new format (MODULE.md + schema.json)."""
297
+ """Validate v1 format (MODULE.md + schema.json)."""
53
298
  errors = []
54
299
  warnings = []
55
300
 
@@ -93,7 +338,7 @@ def _validate_new_format(module_path: Path) -> tuple[bool, list[str], list[str]]
93
338
  except yaml.YAMLError as e:
94
339
  errors.append(f"Invalid YAML in MODULE.md: {e}")
95
340
 
96
- # Check schema.json (optional but recommended)
341
+ # Check schema.json
97
342
  schema_path = module_path / "schema.json"
98
343
  if not schema_path.exists():
99
344
  warnings.append("Missing schema.json (recommended for validation)")
@@ -118,56 +363,25 @@ def _validate_new_format(module_path: Path) -> tuple[bool, list[str], list[str]]
118
363
  except json.JSONDecodeError as e:
119
364
  errors.append(f"Invalid JSON in schema.json: {e}")
120
365
 
121
- # Check examples (optional but recommended)
366
+ # Check examples
122
367
  examples_path = module_path / "examples"
123
368
  if not examples_path.exists():
124
369
  warnings.append("Missing examples directory (recommended)")
125
370
  else:
126
- if not (examples_path / "input.json").exists():
127
- warnings.append("Missing examples/input.json")
128
- if not (examples_path / "output.json").exists():
129
- warnings.append("Missing examples/output.json")
130
-
131
- # Validate examples against schema if both exist
132
- if schema_path.exists():
133
- try:
134
- with open(schema_path, 'r', encoding='utf-8') as f:
135
- schema = json.load(f)
136
-
137
- # Validate input example
138
- input_example_path = examples_path / "input.json"
139
- if input_example_path.exists() and "input" in schema:
140
- with open(input_example_path, 'r', encoding='utf-8') as f:
141
- input_example = json.load(f)
142
- try:
143
- jsonschema.validate(instance=input_example, schema=schema["input"])
144
- except jsonschema.ValidationError as e:
145
- errors.append(f"Example input fails schema: {e.message}")
146
-
147
- # Validate output example
148
- output_example_path = examples_path / "output.json"
149
- if output_example_path.exists() and "output" in schema:
150
- with open(output_example_path, 'r', encoding='utf-8') as f:
151
- output_example = json.load(f)
152
- try:
153
- jsonschema.validate(instance=output_example, schema=schema["output"])
154
- except jsonschema.ValidationError as e:
155
- errors.append(f"Example output fails schema: {e.message}")
156
-
157
- # Check confidence
158
- if "confidence" in output_example:
159
- conf = output_example["confidence"]
160
- if not (0 <= conf <= 1):
161
- errors.append(f"Confidence must be 0-1, got: {conf}")
162
-
163
- except (json.JSONDecodeError, KeyError):
164
- pass
371
+ _validate_examples(examples_path, schema_path, errors, warnings)
372
+
373
+ # Suggest v2.2 upgrade
374
+ warnings.append("Consider upgrading to v2.2 format for better Control/Data separation")
165
375
 
166
376
  return len(errors) == 0, errors, warnings
167
377
 
168
378
 
379
+ # =============================================================================
380
+ # v0 Format Validation (6-file format)
381
+ # =============================================================================
382
+
169
383
  def _validate_old_format(module_path: Path) -> tuple[bool, list[str], list[str]]:
170
- """Validate old format (6 files)."""
384
+ """Validate v0 format (6 files)."""
171
385
  errors = []
172
386
  warnings = []
173
387
 
@@ -226,77 +440,121 @@ def _validate_old_format(module_path: Path) -> tuple[bool, list[str], list[str]]
226
440
  except yaml.YAMLError as e:
227
441
  errors.append(f"Invalid YAML in module.md: {e}")
228
442
 
229
- # Load and validate schemas
230
- input_schema = None
231
- output_schema = None
443
+ # Suggest v2.2 upgrade
444
+ warnings.append("v0 format is deprecated. Consider upgrading to v2.2")
232
445
 
233
- try:
234
- with open(module_path / "input.schema.json", 'r', encoding='utf-8') as f:
235
- input_schema = json.load(f)
236
- if input_schema.get('additionalProperties') != False:
237
- warnings.append("input.schema.json should have additionalProperties: false")
238
- except json.JSONDecodeError as e:
239
- errors.append(f"Invalid JSON in input.schema.json: {e}")
446
+ return len(errors) == 0, errors, warnings
447
+
448
+
449
+ # =============================================================================
450
+ # Helper Functions
451
+ # =============================================================================
452
+
453
+ def _validate_examples(
454
+ examples_path: Path,
455
+ schema_path: Path,
456
+ errors: list[str],
457
+ warnings: list[str]
458
+ ) -> None:
459
+ """Validate example files against schema."""
460
+ if not (examples_path / "input.json").exists():
461
+ warnings.append("Missing examples/input.json")
462
+ if not (examples_path / "output.json").exists():
463
+ warnings.append("Missing examples/output.json")
240
464
 
241
- try:
242
- with open(module_path / "output.schema.json", 'r', encoding='utf-8') as f:
243
- output_schema = json.load(f)
244
- required_output_fields = ['confidence', 'rationale']
245
- if 'required' in output_schema:
246
- for field in required_output_fields:
247
- if field not in output_schema['required']:
248
- warnings.append(f"output.schema.json should require '{field}'")
249
- except json.JSONDecodeError as e:
250
- errors.append(f"Invalid JSON in output.schema.json: {e}")
251
-
252
- # Validate constraints
253
- try:
254
- with open(module_path / "constraints.yaml", 'r', encoding='utf-8') as f:
255
- constraints = yaml.safe_load(f)
465
+ # Validate examples against schema if both exist
466
+ if schema_path.exists():
467
+ try:
468
+ with open(schema_path, 'r', encoding='utf-8') as f:
469
+ schema = json.load(f)
470
+
471
+ # Validate input example
472
+ input_example_path = examples_path / "input.json"
473
+ if input_example_path.exists() and "input" in schema:
474
+ with open(input_example_path, 'r', encoding='utf-8') as f:
475
+ input_example = json.load(f)
476
+ try:
477
+ jsonschema.validate(instance=input_example, schema=schema["input"])
478
+ except jsonschema.ValidationError as e:
479
+ errors.append(f"Example input fails schema: {e.message}")
480
+
481
+ # Validate output example
482
+ output_example_path = examples_path / "output.json"
483
+ output_schema = schema.get("output", schema.get("data"))
484
+ if output_example_path.exists() and output_schema:
485
+ with open(output_example_path, 'r', encoding='utf-8') as f:
486
+ output_example = json.load(f)
487
+ try:
488
+ jsonschema.validate(instance=output_example, schema=output_schema)
489
+ except jsonschema.ValidationError as e:
490
+ errors.append(f"Example output fails schema: {e.message}")
491
+
492
+ # Check confidence
493
+ if "confidence" in output_example:
494
+ conf = output_example["confidence"]
495
+ if not (0 <= conf <= 1):
496
+ errors.append(f"Confidence must be 0-1, got: {conf}")
497
+
498
+ except (json.JSONDecodeError, KeyError):
499
+ pass
500
+
501
+
502
+ def validate_v22_envelope(response: dict) -> tuple[bool, list[str]]:
503
+ """
504
+ Validate a response against v2.2 envelope format.
505
+
506
+ Args:
507
+ response: The response dict to validate
256
508
 
257
- required_constraints = ['no_external_network', 'no_side_effects', 'no_inventing_data']
258
- if 'operational' in constraints:
259
- for constraint in required_constraints:
260
- if constraint not in constraints['operational']:
261
- warnings.append(f"Missing operational constraint: {constraint}")
262
- elif not constraints['operational'][constraint]:
263
- warnings.append(f"Constraint '{constraint}' is set to false")
264
- else:
265
- warnings.append("Missing 'operational' section in constraints")
266
- except yaml.YAMLError as e:
267
- errors.append(f"Invalid YAML in constraints.yaml: {e}")
509
+ Returns:
510
+ Tuple of (is_valid, errors)
511
+ """
512
+ errors = []
268
513
 
269
- # Check prompt.txt
270
- with open(module_path / "prompt.txt", 'r', encoding='utf-8') as f:
271
- prompt = f.read()
272
- if len(prompt) < 100:
273
- warnings.append("prompt.txt seems too short (< 100 chars)")
514
+ # Check ok field
515
+ if 'ok' not in response:
516
+ errors.append("Missing 'ok' field")
517
+ return False, errors
274
518
 
275
- # Validate example input against schema
276
- if input_schema:
277
- try:
278
- with open(examples_path / "input.json", 'r', encoding='utf-8') as f:
279
- example_input = json.load(f)
280
- jsonschema.validate(instance=example_input, schema=input_schema)
281
- except json.JSONDecodeError as e:
282
- errors.append(f"Invalid JSON in examples/input.json: {e}")
283
- except jsonschema.ValidationError as e:
284
- errors.append(f"Example input fails schema validation: {e.message}")
519
+ # Check meta
520
+ if 'meta' not in response:
521
+ errors.append("Missing 'meta' field (required for v2.2)")
522
+ else:
523
+ meta = response['meta']
524
+
525
+ if 'confidence' not in meta:
526
+ errors.append("meta missing 'confidence'")
527
+ elif not isinstance(meta['confidence'], (int, float)):
528
+ errors.append("meta.confidence must be a number")
529
+ elif not (0 <= meta['confidence'] <= 1):
530
+ errors.append("meta.confidence must be between 0 and 1")
531
+
532
+ if 'risk' not in meta:
533
+ errors.append("meta missing 'risk'")
534
+ elif meta['risk'] not in ['none', 'low', 'medium', 'high']:
535
+ errors.append(f"meta.risk must be none|low|medium|high, got: {meta['risk']}")
536
+
537
+ if 'explain' not in meta:
538
+ errors.append("meta missing 'explain'")
539
+ elif len(meta.get('explain', '')) > 280:
540
+ errors.append(f"meta.explain exceeds 280 chars ({len(meta['explain'])} chars)")
285
541
 
286
- # Validate example output against schema
287
- if output_schema:
288
- try:
289
- with open(examples_path / "output.json", 'r', encoding='utf-8') as f:
290
- example_output = json.load(f)
291
- jsonschema.validate(instance=example_output, schema=output_schema)
292
-
293
- if 'confidence' in example_output:
294
- conf = example_output['confidence']
295
- if not (0 <= conf <= 1):
296
- errors.append(f"Confidence must be between 0 and 1, got: {conf}")
297
- except json.JSONDecodeError as e:
298
- errors.append(f"Invalid JSON in examples/output.json: {e}")
299
- except jsonschema.ValidationError as e:
300
- errors.append(f"Example output fails schema validation: {e.message}")
542
+ # Check data or error
543
+ if response['ok']:
544
+ if 'data' not in response:
545
+ errors.append("Success response missing 'data' field")
546
+ else:
547
+ data = response['data']
548
+ if 'rationale' not in data:
549
+ errors.append("data missing 'rationale' (recommended for audit)")
550
+ else:
551
+ if 'error' not in response:
552
+ errors.append("Error response missing 'error' field")
553
+ else:
554
+ error = response['error']
555
+ if 'code' not in error:
556
+ errors.append("error missing 'code'")
557
+ if 'message' not in error:
558
+ errors.append("error missing 'message'")
301
559
 
302
- return len(errors) == 0, errors, warnings
560
+ return len(errors) == 0, errors