dotmd-parser 0.7.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {dotmd_parser-0.7.0/src/dotmd_parser.egg-info → dotmd_parser-0.9.0}/PKG-INFO +108 -2
  2. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/README.md +107 -1
  3. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/pyproject.toml +1 -1
  4. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/__init__.py +68 -1
  5. dotmd_parser-0.9.0/src/dotmd_parser/cache_order.py +60 -0
  6. dotmd_parser-0.9.0/src/dotmd_parser/checks.py +233 -0
  7. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/cli.py +206 -13
  8. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/index_md.py +27 -5
  9. dotmd_parser-0.9.0/src/dotmd_parser/ledger.py +156 -0
  10. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/parser.py +25 -11
  11. dotmd_parser-0.9.0/src/dotmd_parser/plan.py +256 -0
  12. dotmd_parser-0.9.0/src/dotmd_parser/scan.py +127 -0
  13. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0/src/dotmd_parser.egg-info}/PKG-INFO +108 -2
  14. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/SOURCES.txt +17 -0
  15. dotmd_parser-0.9.0/tests/test_cache_order.py +78 -0
  16. dotmd_parser-0.9.0/tests/test_checks.py +253 -0
  17. dotmd_parser-0.9.0/tests/test_cli_check.py +98 -0
  18. dotmd_parser-0.9.0/tests/test_cli_ledger.py +109 -0
  19. dotmd_parser-0.9.0/tests/test_cli_plan.py +78 -0
  20. dotmd_parser-0.9.0/tests/test_cli_resolve_scan.py +48 -0
  21. dotmd_parser-0.9.0/tests/test_cli_stability.py +64 -0
  22. dotmd_parser-0.9.0/tests/test_index_md_order.py +53 -0
  23. dotmd_parser-0.9.0/tests/test_ledger.py +164 -0
  24. dotmd_parser-0.9.0/tests/test_plan.py +352 -0
  25. dotmd_parser-0.9.0/tests/test_resolve_scan.py +51 -0
  26. dotmd_parser-0.9.0/tests/test_scan.py +116 -0
  27. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_skill_integration.py +1 -1
  28. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/LICENSE +0 -0
  29. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/setup.cfg +0 -0
  30. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/analyze.py +0 -0
  31. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/digest.py +0 -0
  32. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/index.py +0 -0
  33. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/inventory.py +0 -0
  34. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/openrag.py +0 -0
  35. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/SKILL.md +0 -0
  36. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/__init__.py +0 -0
  37. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/dotmd_index/SKILL.md +0 -0
  38. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/dotmd_index/__init__.py +0 -0
  39. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/prompts/__init__.py +0 -0
  40. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/prompts/analyze-dependencies.md +0 -0
  41. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/dependency_links.txt +0 -0
  42. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/entry_points.txt +0 -0
  43. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/requires.txt +0 -0
  44. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/top_level.txt +0 -0
  45. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_aggregate.py +0 -0
  46. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_analyze.py +0 -0
  47. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_cli_dotmd_index.py +0 -0
  48. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_cost_estimate.py +0 -0
  49. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_empty_warnings.py +0 -0
  50. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_host_agent_plan.py +0 -0
  51. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_index_md.py +0 -0
  52. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_index_scope.py +0 -0
  53. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_inventory.py +0 -0
  54. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_openrag_push.py +0 -0
  55. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_orchestrator_detection.py +0 -0
  56. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_parser.py +0 -0
  57. {dotmd_parser-0.7.0 → dotmd_parser-0.9.0}/tests/test_token_savings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dotmd-parser
3
- Version: 0.7.0
3
+ Version: 0.9.0
4
4
  Summary: Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG
5
5
  Author: dotmd-projects
6
6
  License-Expression: MIT
@@ -167,6 +167,24 @@ print(result["placeholders"]) # Unresolved {{variable}} names
167
167
  print(result["warnings"]) # Circular refs, missing files, etc.
168
168
  ```
169
169
 
170
+ #### Injection scanning
171
+
172
+ `resolve` scans content pulled in via `@include` for prompt-injection
173
+ patterns (role spoofing like `System:`, instruction overrides like "ignore
174
+ previous instructions"). Findings print to stderr; the expanded content is
175
+ unchanged by default.
176
+
177
+ ```bash
178
+ dotmd-parser resolve ./skill/SKILL.md # scan on, warn (default)
179
+ dotmd-parser resolve ./skill/SKILL.md --no-scan # disable scanning
180
+ dotmd-parser resolve ./skill/SKILL.md --scan-rule tool-exfil # add an opt-in rule
181
+ dotmd-parser resolve ./skill/SKILL.md --block # replace injected includes with a placeholder
182
+ ```
183
+
184
+ The root/entry file is trusted and not scanned — only `@include`-pulled
185
+ files are. Matches inside fenced code blocks are ignored, and
186
+ `<!-- dotmd-allow: role-spoof -->` (or `all`) in a file suppresses that rule.
187
+
170
188
  ### dependents_of — Reverse dependency query
171
189
 
172
190
  ```python
@@ -221,11 +239,12 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
221
239
  | `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
222
240
  | `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
223
241
  | `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
224
- | `dotmd-parser check <path>` | Exit non-zero on cycles / missing refs (CI-friendly) |
242
+ | `dotmd-parser check <path>` | Health gate (CI): cycles, missing refs, unresolved placeholders, conflicts |
225
243
  | `dotmd-parser affects <path> <file>` | Reverse dependencies of `<file>` |
226
244
  | `dotmd-parser deps <path> <file>` | Direct dependencies of `<file>` |
227
245
  | `dotmd-parser digest <path>` | Token-efficient text summary for LLM context |
228
246
  | `dotmd-parser tree <path>` | ASCII dependency tree |
247
+ | `dotmd-parser plan <path>` | Parallel delegation plan (JSON) |
229
248
  | `dotmd-parser resolve <file> [--var k=v]` | Recursively expand `@include` |
230
249
  | `dotmd-parser analyze <path>` | AI dependency detection (requires `ANTHROPIC_API_KEY`) |
231
250
  | `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
@@ -243,6 +262,71 @@ dotmd-parser digest ./my-skill/ # compact summary for the LLM
243
262
  dotmd-parser affects ./my-skill/ shared/role.md
244
263
  ```
245
264
 
265
+ ### `ledger` / `risk` — edit-risk governance
266
+
267
+ Record per-file risk history in an append-only JSONL ledger
268
+ (`.claude/dotmd-ledger.jsonl`) and query it before editing. `risk` combines
269
+ reverse-dependency impact (`affects`) with active risk tags (ledger replay ∪
270
+ frontmatter `risk:`).
271
+
272
+ ```bash
273
+ dotmd-parser ledger add . shared/role.md --tag fix-failed --note "retry hung"
274
+ dotmd-parser ledger clear . shared/role.md --tag fix-failed # or --all
275
+ dotmd-parser risk . shared/role.md # text report
276
+ dotmd-parser risk . shared/role.md --json
277
+ ```
278
+
279
+ Tags: `fix-failed`, `fragile`, `security-sensitive`, `deprecated` (the first
280
+ two are "high"). `--fail-on high|any|never` controls the exit code, so a
281
+ PreToolUse hook can warn before risky edits:
282
+
283
+ ```bash
284
+ dotmd-parser risk . "$FILE_PATH" --fail-on high \
285
+ || echo "[dotmd] high-risk file (last fix failed / security-sensitive) — review before editing"
286
+ ```
287
+
288
+ ### `check` — guidance health gate (CI)
289
+
290
+ Deterministic health check over the dependency graph. Detects cycles and
291
+ missing references (errors), plus unresolved `{{placeholders}}` and
292
+ conflicting directives (warnings). Optionally flags orphan files.
293
+
294
+ ```bash
295
+ dotmd-parser check ./my-skill # text, fails on errors
296
+ dotmd-parser check ./my-skill --fail-on warning # also fail on warnings
297
+ dotmd-parser check ./my-skill --format json
298
+ dotmd-parser check ./my-skill --format sarif --out dotmd.sarif
299
+ dotmd-parser check ./my-skill --check orphans # opt-in orphan detection
300
+ ```
301
+
302
+ `--fail-on` chooses the exit-code threshold (`error` default, `warning`, or
303
+ `never`). Use `--format sarif` with GitHub's `upload-sarif` action to get
304
+ inline PR annotations:
305
+
306
+ ```yaml
307
+ - run: dotmd-parser check . --format sarif --out dotmd.sarif --fail-on never
308
+ - uses: github/codeql-action/upload-sarif@v3
309
+ with: { sarif_file: dotmd.sarif }
310
+ - run: dotmd-parser check . --fail-on warning # gate the PR
311
+ ```
312
+ ### `plan` — parallel delegation plan
313
+
314
+ Generate a static execution plan from the `@delegate` graph: topological
315
+ batches (parallel levels), per-task subtree context, plus conflict and cycle
316
+ pre-detection. Intended for a parent agent that fans out subagents.
317
+
318
+ ```bash
319
+ dotmd-parser plan ./my-skill # plan(JSON) to stdout
320
+ dotmd-parser plan ./my-skill --ascii # human-readable view
321
+ dotmd-parser plan ./my-skill --out plan.json
322
+ dotmd-parser plan ./my-skill --strict # exit 1 on cycles/conflicts (CI)
323
+ ```
324
+
325
+ Each task in the JSON carries a `context` array (the subtree files to hand the
326
+ subagent). Same-batch shared dependencies are reported in `conflicts[]` as
327
+ warnings — the batch stays parallel. Mutual `@delegate` references are reported
328
+ in `cycles[]` and excluded from batches.
329
+
246
330
  ## `dotmd-index.md` — folder overview in a single file
247
331
 
248
332
  `dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
@@ -305,6 +389,28 @@ dotmd-parser dotmd-index ./docs/ --push-openrag
305
389
  **search index** (full-content semantic retrieval). Register OpenRAG's
306
390
  MCP server with Claude Code to use both surfaces from the same client.
307
391
 
392
+ ### Cache-affine order (`--order cache`)
393
+
394
+ `dotmd-index --order cache` lists the `## Files` section with the
395
+ least-frequently-changed files first (estimated from git history), so the
396
+ generated `dotmd-index.md` keeps a stable prefix across regenerations — better
397
+ KV-cache reuse for LLMs that read it. Default `--order alpha` is unchanged.
398
+
399
+ ```bash
400
+ dotmd-parser dotmd-index ./skill --order cache
401
+ dotmd-parser dotmd-index ./skill --order cache --stdout
402
+ ```
403
+
404
+ Measure the effect with `stability` (compare two generations):
405
+
406
+ ```bash
407
+ dotmd-parser stability old-index.md new-index.md # prefix stable: 42/50 lines (0.84)
408
+ dotmd-parser stability old-index.md new-index.md --json
409
+ ```
410
+
411
+ Outside a git repo (or for untracked files) frequency is treated as 0, so
412
+ `cache` degrades gracefully to alphabetical order.
413
+
308
414
  ## `analyze` — AI-assisted dependency detection
309
415
 
310
416
  Use when a folder of markdown has **no explicit directives yet**. `analyze`
@@ -130,6 +130,24 @@ print(result["placeholders"]) # Unresolved {{variable}} names
130
130
  print(result["warnings"]) # Circular refs, missing files, etc.
131
131
  ```
132
132
 
133
+ #### Injection scanning
134
+
135
+ `resolve` scans content pulled in via `@include` for prompt-injection
136
+ patterns (role spoofing like `System:`, instruction overrides like "ignore
137
+ previous instructions"). Findings print to stderr; the expanded content is
138
+ unchanged by default.
139
+
140
+ ```bash
141
+ dotmd-parser resolve ./skill/SKILL.md # scan on, warn (default)
142
+ dotmd-parser resolve ./skill/SKILL.md --no-scan # disable scanning
143
+ dotmd-parser resolve ./skill/SKILL.md --scan-rule tool-exfil # add an opt-in rule
144
+ dotmd-parser resolve ./skill/SKILL.md --block # replace injected includes with a placeholder
145
+ ```
146
+
147
+ The root/entry file is trusted and not scanned — only `@include`-pulled
148
+ files are. Matches inside fenced code blocks are ignored, and
149
+ `<!-- dotmd-allow: role-spoof -->` (or `all`) in a file suppresses that rule.
150
+
133
151
  ### dependents_of — Reverse dependency query
134
152
 
135
153
  ```python
@@ -184,11 +202,12 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
184
202
  | `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
185
203
  | `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
186
204
  | `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
187
- | `dotmd-parser check <path>` | Exit non-zero on cycles / missing refs (CI-friendly) |
205
+ | `dotmd-parser check <path>` | Health gate (CI): cycles, missing refs, unresolved placeholders, conflicts |
188
206
  | `dotmd-parser affects <path> <file>` | Reverse dependencies of `<file>` |
189
207
  | `dotmd-parser deps <path> <file>` | Direct dependencies of `<file>` |
190
208
  | `dotmd-parser digest <path>` | Token-efficient text summary for LLM context |
191
209
  | `dotmd-parser tree <path>` | ASCII dependency tree |
210
+ | `dotmd-parser plan <path>` | Parallel delegation plan (JSON) |
192
211
  | `dotmd-parser resolve <file> [--var k=v]` | Recursively expand `@include` |
193
212
  | `dotmd-parser analyze <path>` | AI dependency detection (requires `ANTHROPIC_API_KEY`) |
194
213
  | `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
@@ -206,6 +225,71 @@ dotmd-parser digest ./my-skill/ # compact summary for the LLM
206
225
  dotmd-parser affects ./my-skill/ shared/role.md
207
226
  ```
208
227
 
228
+ ### `ledger` / `risk` — edit-risk governance
229
+
230
+ Record per-file risk history in an append-only JSONL ledger
231
+ (`.claude/dotmd-ledger.jsonl`) and query it before editing. `risk` combines
232
+ reverse-dependency impact (`affects`) with active risk tags (ledger replay ∪
233
+ frontmatter `risk:`).
234
+
235
+ ```bash
236
+ dotmd-parser ledger add . shared/role.md --tag fix-failed --note "retry hung"
237
+ dotmd-parser ledger clear . shared/role.md --tag fix-failed # or --all
238
+ dotmd-parser risk . shared/role.md # text report
239
+ dotmd-parser risk . shared/role.md --json
240
+ ```
241
+
242
+ Tags: `fix-failed`, `fragile`, `security-sensitive`, `deprecated` (the first
243
+ two are "high"). `--fail-on high|any|never` controls the exit code, so a
244
+ PreToolUse hook can warn before risky edits:
245
+
246
+ ```bash
247
+ dotmd-parser risk . "$FILE_PATH" --fail-on high \
248
+ || echo "[dotmd] high-risk file (last fix failed / security-sensitive) — review before editing"
249
+ ```
250
+
251
+ ### `check` — guidance health gate (CI)
252
+
253
+ Deterministic health check over the dependency graph. Detects cycles and
254
+ missing references (errors), plus unresolved `{{placeholders}}` and
255
+ conflicting directives (warnings). Optionally flags orphan files.
256
+
257
+ ```bash
258
+ dotmd-parser check ./my-skill # text, fails on errors
259
+ dotmd-parser check ./my-skill --fail-on warning # also fail on warnings
260
+ dotmd-parser check ./my-skill --format json
261
+ dotmd-parser check ./my-skill --format sarif --out dotmd.sarif
262
+ dotmd-parser check ./my-skill --check orphans # opt-in orphan detection
263
+ ```
264
+
265
+ `--fail-on` chooses the exit-code threshold (`error` default, `warning`, or
266
+ `never`). Use `--format sarif` with GitHub's `upload-sarif` action to get
267
+ inline PR annotations:
268
+
269
+ ```yaml
270
+ - run: dotmd-parser check . --format sarif --out dotmd.sarif --fail-on never
271
+ - uses: github/codeql-action/upload-sarif@v3
272
+ with: { sarif_file: dotmd.sarif }
273
+ - run: dotmd-parser check . --fail-on warning # gate the PR
274
+ ```
275
+ ### `plan` — parallel delegation plan
276
+
277
+ Generate a static execution plan from the `@delegate` graph: topological
278
+ batches (parallel levels), per-task subtree context, plus conflict and cycle
279
+ pre-detection. Intended for a parent agent that fans out subagents.
280
+
281
+ ```bash
282
+ dotmd-parser plan ./my-skill # plan(JSON) to stdout
283
+ dotmd-parser plan ./my-skill --ascii # human-readable view
284
+ dotmd-parser plan ./my-skill --out plan.json
285
+ dotmd-parser plan ./my-skill --strict # exit 1 on cycles/conflicts (CI)
286
+ ```
287
+
288
+ Each task in the JSON carries a `context` array (the subtree files to hand the
289
+ subagent). Same-batch shared dependencies are reported in `conflicts[]` as
290
+ warnings — the batch stays parallel. Mutual `@delegate` references are reported
291
+ in `cycles[]` and excluded from batches.
292
+
209
293
  ## `dotmd-index.md` — folder overview in a single file
210
294
 
211
295
  `dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
@@ -268,6 +352,28 @@ dotmd-parser dotmd-index ./docs/ --push-openrag
268
352
  **search index** (full-content semantic retrieval). Register OpenRAG's
269
353
  MCP server with Claude Code to use both surfaces from the same client.
270
354
 
355
+ ### Cache-affine order (`--order cache`)
356
+
357
+ `dotmd-index --order cache` lists the `## Files` section with the
358
+ least-frequently-changed files first (estimated from git history), so the
359
+ generated `dotmd-index.md` keeps a stable prefix across regenerations — better
360
+ KV-cache reuse for LLMs that read it. Default `--order alpha` is unchanged.
361
+
362
+ ```bash
363
+ dotmd-parser dotmd-index ./skill --order cache
364
+ dotmd-parser dotmd-index ./skill --order cache --stdout
365
+ ```
366
+
367
+ Measure the effect with `stability` (compare two generations):
368
+
369
+ ```bash
370
+ dotmd-parser stability old-index.md new-index.md # prefix stable: 42/50 lines (0.84)
371
+ dotmd-parser stability old-index.md new-index.md --json
372
+ ```
373
+
374
+ Outside a git repo (or for untracked files) frequency is treated as 0, so
375
+ `cache` degrades gracefully to alphabetical order.
376
+
271
377
  ## `analyze` — AI-assisted dependency detection
272
378
 
273
379
  Use when a folder of markdown has **no explicit directives yet**. `analyze`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dotmd-parser"
3
- version = "0.7.0"
3
+ version = "0.9.0"
4
4
  description = "Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG"
5
5
  requires-python = ">=3.10"
6
6
  license = "MIT"
@@ -10,7 +10,7 @@ API:
10
10
  from dotmd_parser import digest, tree, affects
11
11
  """
12
12
 
13
- __version__ = "0.7.0"
13
+ __version__ = "0.9.0"
14
14
 
15
15
  from dotmd_parser.parser import (
16
16
  build_graph,
@@ -70,7 +70,43 @@ from dotmd_parser.index_md import (
70
70
  DEFAULT_INDEX_FILENAME,
71
71
  INDEX_MD_SCHEMA,
72
72
  )
73
+ from dotmd_parser.cache_order import (
74
+ git_change_counts,
75
+ order_key,
76
+ prefix_stability,
77
+ )
78
+ from dotmd_parser.ledger import (
79
+ append_event,
80
+ read_events,
81
+ active_tags,
82
+ static_tags,
83
+ all_active_tags,
84
+ risk_level,
85
+ risk_report,
86
+ default_ledger_path,
87
+ RISK_TAGS,
88
+ HIGH_TAGS,
89
+ )
90
+ from dotmd_parser.checks import (
91
+ run_checks,
92
+ summarize,
93
+ exit_code,
94
+ format_text,
95
+ format_json,
96
+ format_sarif,
97
+ CHECK_SCHEMA,
98
+ )
73
99
  from dotmd_parser.openrag import push_to_openrag
100
+ from dotmd_parser.scan import (
101
+ scan_content,
102
+ DEFAULT_RULES,
103
+ OPTIONAL_RULES,
104
+ ALL_RULES,
105
+ )
106
+ from dotmd_parser.plan import (
107
+ build_plan,
108
+ render_ascii,
109
+ )
74
110
 
75
111
  __all__ = [
76
112
  "__version__",
@@ -126,6 +162,37 @@ __all__ = [
126
162
  "extract_frontmatter",
127
163
  "DEFAULT_INDEX_FILENAME",
128
164
  "INDEX_MD_SCHEMA",
165
+ # cache_order
166
+ "git_change_counts",
167
+ "order_key",
168
+ "prefix_stability",
169
+ # ledger
170
+ "append_event",
171
+ "read_events",
172
+ "active_tags",
173
+ "static_tags",
174
+ "all_active_tags",
175
+ "risk_level",
176
+ "risk_report",
177
+ "default_ledger_path",
178
+ "RISK_TAGS",
179
+ "HIGH_TAGS",
180
+ # checks
181
+ "run_checks",
182
+ "summarize",
183
+ "exit_code",
184
+ "format_text",
185
+ "format_json",
186
+ "format_sarif",
187
+ "CHECK_SCHEMA",
129
188
  # openrag
130
189
  "push_to_openrag",
190
+ # scan
191
+ "scan_content",
192
+ "DEFAULT_RULES",
193
+ "OPTIONAL_RULES",
194
+ "ALL_RULES",
195
+ # plan
196
+ "build_plan",
197
+ "render_ascii",
131
198
  ]
@@ -0,0 +1,60 @@
1
+ """
2
+ dotmd-parser — cache-affine ordering helpers.
3
+
4
+ Estimates per-file change frequency from git history (with a safe fallback)
5
+ and provides an ordering key that puts low-frequency files first, so the
6
+ `dotmd-index.md` body prefix stays stable across regenerations (KV-cache
7
+ friendly). Also a prefix-stability metric. Pure stdlib; git via subprocess.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import shutil
13
+ import subprocess
14
+ from pathlib import Path
15
+
16
+
17
+ def git_change_counts(root: str | Path) -> dict[str, int]:
18
+ """Return {rel_posix: commit_count} from git history; {} when unavailable."""
19
+ if shutil.which("git") is None:
20
+ return {}
21
+ try:
22
+ result = subprocess.run(
23
+ ["git", "-C", str(root), "-c", "core.quotepath=false",
24
+ "log", "--format=", "--name-only", "--relative", "--", "."],
25
+ capture_output=True,
26
+ text=True,
27
+ )
28
+ except OSError:
29
+ return {}
30
+ if result.returncode != 0:
31
+ return {}
32
+ counts: dict[str, int] = {}
33
+ for line in result.stdout.splitlines():
34
+ rel = line.strip()
35
+ if rel:
36
+ counts[rel] = counts.get(rel, 0) + 1
37
+ return counts
38
+
39
+
40
+ def order_key(rel: str, counts: dict[str, int]) -> tuple[int, str]:
41
+ """Sort key: low change-count first, path-ascending tiebreak."""
42
+ return (counts.get(rel, 0), rel)
43
+
44
+
45
+ def prefix_stability(old_text: str, new_text: str) -> dict:
46
+ """Measure how much of `new_text`'s leading lines match `old_text`."""
47
+ old_lines = old_text.split("\n")
48
+ new_lines = new_text.split("\n")
49
+ common = 0
50
+ for old_line, new_line in zip(old_lines, new_lines):
51
+ if old_line == new_line:
52
+ common += 1
53
+ else:
54
+ break
55
+ total_new = len(new_lines)
56
+ return {
57
+ "common_prefix_lines": common,
58
+ "new_lines": total_new,
59
+ "ratio": round(common / max(total_new, 1), 4),
60
+ }
@@ -0,0 +1,233 @@
1
+ """
2
+ dotmd-parser — guidance health checks (deterministic CI gate).
3
+
4
+ Consumes a compact index (from `index.build_index` / `index.load_index`) and
5
+ produces a flat list of Finding dicts, rendered as text / JSON / SARIF. Pure,
6
+ stdlib-only, no LLM. The raw graph / parser are not touched.
7
+
8
+ Finding shape:
9
+ {"rule": str, "severity": "error"|"warning", "path": str,
10
+ "message": str, "line": int | None}
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from pathlib import Path
17
+
18
+ CHECK_SCHEMA = "dotmd-check/v1"
19
+
20
+ _GRAPH_WARNING_RULES = {
21
+ "depth_exceeded": "depth-exceeded",
22
+ "read_error": "read-error",
23
+ }
24
+
25
+
26
+ def _finding(rule: str, severity: str, path: str, message: str,
27
+ line: int | None = None) -> dict:
28
+ return {"rule": rule, "severity": severity, "path": path,
29
+ "message": message, "line": line}
30
+
31
+
32
+ def _circular_findings(index: dict) -> list[dict]:
33
+ """One error finding per recorded cycle message (path unknown → '')."""
34
+ return [
35
+ _finding("circular", "error", "", msg)
36
+ for msg in index.get("cycles", [])
37
+ ]
38
+
39
+
40
+ def _missing_findings(index: dict) -> list[dict]:
41
+ """One error finding per missing referenced file."""
42
+ return [
43
+ _finding("missing-reference", "error", rel,
44
+ "referenced file does not exist")
45
+ for rel in index.get("missing", [])
46
+ ]
47
+
48
+
49
+ def _graph_warning_findings(index: dict) -> list[dict]:
50
+ """Promote depth_exceeded / read_error graph warnings to error findings."""
51
+ out: list[dict] = []
52
+ for warning in index.get("warnings", []):
53
+ rule = _GRAPH_WARNING_RULES.get(warning.get("type", ""))
54
+ if rule is None:
55
+ continue
56
+ out.append(_finding(rule, "error", warning.get("path", ""),
57
+ warning.get("message", "")))
58
+ return out
59
+
60
+
61
+ def _placeholder_findings(index: dict) -> list[dict]:
62
+ """One warning finding per unresolved {{var}} (sorted by path, var)."""
63
+ out: list[dict] = []
64
+ files = index.get("files", {})
65
+ for rel in sorted(files):
66
+ for var in sorted(files[rel].get("placeholders", []) or []):
67
+ out.append(_finding(
68
+ "unresolved-placeholder", "warning", rel,
69
+ f"unresolved placeholder: {{{{{var}}}}}",
70
+ ))
71
+ return out
72
+
73
+
74
+ _EXPLICIT_DIRECTIVE_TYPES = {"include", "ref", "delegate"}
75
+
76
+
77
+ def _conflicting_directive_findings(index: dict) -> list[dict]:
78
+ """Warn when a source reaches one target via ≥2 distinct explicit types."""
79
+ out: list[dict] = []
80
+ files = index.get("files", {})
81
+ for rel in sorted(files):
82
+ by_target: dict[str, set[str]] = {}
83
+ for dep in files[rel].get("deps", []):
84
+ dtype = dep.get("type", "")
85
+ if dtype not in _EXPLICIT_DIRECTIVE_TYPES:
86
+ continue
87
+ target = dep.get("to", "")
88
+ if not target:
89
+ continue
90
+ by_target.setdefault(target, set()).add(dtype)
91
+ for target in sorted(by_target):
92
+ types = by_target[target]
93
+ if len(types) >= 2:
94
+ joined = ", ".join(sorted(types))
95
+ out.append(_finding(
96
+ "conflicting-directive", "warning", rel,
97
+ f"{target} is referenced by multiple directive types ({joined})",
98
+ ))
99
+ return out
100
+
101
+
102
+ def _orphan_findings(index: dict, root: str | None) -> list[dict]:
103
+ """Warn about .md files on disk that no graph node references."""
104
+ if root is None:
105
+ return []
106
+ base = Path(root)
107
+ if base.is_file():
108
+ base = base.parent
109
+ if not base.is_dir():
110
+ return []
111
+ node_set = set(index.get("files", {}).keys())
112
+ out: list[dict] = []
113
+ for path in sorted(base.rglob("*.md")):
114
+ rel_path = path.relative_to(base)
115
+ if any(part.startswith(".") for part in rel_path.parts):
116
+ continue
117
+ rel = rel_path.as_posix()
118
+ if "node_modules" in rel:
119
+ continue
120
+ if not path.is_file():
121
+ continue
122
+ if rel not in node_set:
123
+ out.append(_finding("orphan-file", "warning", rel,
124
+ "file is not referenced by any node"))
125
+ return out
126
+
127
+
128
+ def run_checks(index: dict, root: str | None = None,
129
+ enable_orphans: bool = False) -> list[dict]:
130
+ """Run all enabled checks and return a flat list of findings."""
131
+ findings: list[dict] = []
132
+ findings += _circular_findings(index)
133
+ findings += _missing_findings(index)
134
+ findings += _graph_warning_findings(index)
135
+ findings += _placeholder_findings(index)
136
+ findings += _conflicting_directive_findings(index)
137
+ if enable_orphans:
138
+ findings += _orphan_findings(index, root)
139
+ return findings
140
+
141
+
142
+ def summarize(findings: list[dict]) -> dict:
143
+ """Count findings by severity."""
144
+ errors = sum(1 for f in findings if f.get("severity") == "error")
145
+ warnings = sum(1 for f in findings if f.get("severity") == "warning")
146
+ return {"errors": errors, "warnings": warnings}
147
+
148
+
149
+ def exit_code(findings: list[dict], fail_on: str) -> int:
150
+ """Map findings to a CI exit code per the fail_on threshold."""
151
+ counts = summarize(findings)
152
+ if fail_on == "never":
153
+ return 0
154
+ if fail_on == "warning":
155
+ return 1 if (counts["errors"] or counts["warnings"]) else 0
156
+ # default: "error"
157
+ return 1 if counts["errors"] else 0
158
+
159
+
160
+ def format_text(findings: list[dict], index: dict) -> str:
161
+ """Render findings as a backward-compatible text summary + detail lines."""
162
+ stats = index.get("stats", {})
163
+ counts = summarize(findings)
164
+ lines = [
165
+ f"{stats.get('files', 0)} files, {stats.get('edges', 0)} edges — "
166
+ f"errors:{counts['errors']} warnings:{counts['warnings']}"
167
+ ]
168
+ for f in findings:
169
+ loc = f.get("path") or "-"
170
+ lines.append(
171
+ f" [{f['severity'].upper()}] {f['rule']}: {loc} — {f['message']}"
172
+ )
173
+ return "\n".join(lines)
174
+
175
+
176
+ def format_json(findings: list[dict], index: dict) -> str:
177
+ """Render findings as dotmd-check/v1 JSON."""
178
+ stats = index.get("stats", {})
179
+ counts = summarize(findings)
180
+ payload = {
181
+ "schema": CHECK_SCHEMA,
182
+ "root": index.get("root", ""),
183
+ "stats": {
184
+ "files": stats.get("files", 0),
185
+ "edges": stats.get("edges", 0),
186
+ "errors": counts["errors"],
187
+ "warnings": counts["warnings"],
188
+ },
189
+ "findings": findings,
190
+ }
191
+ return json.dumps(payload, ensure_ascii=False, indent=2)
192
+
193
+
194
+ def _camel(rule_id: str) -> str:
195
+ parts = rule_id.split("-")
196
+ return parts[0] + "".join(p.capitalize() for p in parts[1:])
197
+
198
+
199
+ def format_sarif(findings: list[dict], index: dict) -> str:
200
+ """Render findings as SARIF 2.1.0 JSON (for GitHub code scanning)."""
201
+ from dotmd_parser import __version__ # local import avoids cycle at import time
202
+
203
+ rules: dict[str, dict] = {}
204
+ results: list[dict] = []
205
+ for f in findings:
206
+ rule_id = f["rule"]
207
+ rules.setdefault(rule_id, {"id": rule_id, "name": _camel(rule_id)})
208
+ result: dict = {
209
+ "ruleId": rule_id,
210
+ "level": f["severity"],
211
+ "message": {"text": f["message"]},
212
+ }
213
+ if f.get("path"):
214
+ physical: dict = {"artifactLocation": {"uri": f["path"]}}
215
+ if f.get("line"):
216
+ physical["region"] = {"startLine": f["line"]}
217
+ result["locations"] = [{"physicalLocation": physical}]
218
+ results.append(result)
219
+
220
+ sarif = {
221
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
222
+ "version": "2.1.0",
223
+ "runs": [{
224
+ "tool": {"driver": {
225
+ "name": "dotmd-parser",
226
+ "informationUri": "https://github.com/dotmd-projects/dotmd-parser",
227
+ "version": __version__,
228
+ "rules": list(rules.values()),
229
+ }},
230
+ "results": results,
231
+ }],
232
+ }
233
+ return json.dumps(sarif, ensure_ascii=False, indent=2)