dotmd-parser 0.6.2__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dotmd_parser-0.6.2/src/dotmd_parser.egg-info → dotmd_parser-0.9.0}/PKG-INFO +108 -2
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/README.md +107 -1
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/pyproject.toml +1 -1
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/__init__.py +68 -1
- dotmd_parser-0.9.0/src/dotmd_parser/cache_order.py +60 -0
- dotmd_parser-0.9.0/src/dotmd_parser/checks.py +233 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/cli.py +206 -13
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/index_md.py +27 -5
- dotmd_parser-0.9.0/src/dotmd_parser/ledger.py +156 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/parser.py +36 -14
- dotmd_parser-0.9.0/src/dotmd_parser/plan.py +256 -0
- dotmd_parser-0.9.0/src/dotmd_parser/scan.py +127 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0/src/dotmd_parser.egg-info}/PKG-INFO +108 -2
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/SOURCES.txt +18 -0
- dotmd_parser-0.9.0/tests/test_cache_order.py +78 -0
- dotmd_parser-0.9.0/tests/test_checks.py +253 -0
- dotmd_parser-0.9.0/tests/test_cli_check.py +98 -0
- dotmd_parser-0.9.0/tests/test_cli_ledger.py +109 -0
- dotmd_parser-0.9.0/tests/test_cli_plan.py +78 -0
- dotmd_parser-0.9.0/tests/test_cli_resolve_scan.py +48 -0
- dotmd_parser-0.9.0/tests/test_cli_stability.py +64 -0
- dotmd_parser-0.9.0/tests/test_index_md_order.py +53 -0
- dotmd_parser-0.9.0/tests/test_ledger.py +164 -0
- dotmd_parser-0.9.0/tests/test_orchestrator_detection.py +72 -0
- dotmd_parser-0.9.0/tests/test_plan.py +352 -0
- dotmd_parser-0.9.0/tests/test_resolve_scan.py +51 -0
- dotmd_parser-0.9.0/tests/test_scan.py +116 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_skill_integration.py +1 -1
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/LICENSE +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/setup.cfg +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/analyze.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/digest.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/index.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/inventory.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/openrag.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/SKILL.md +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/__init__.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/dotmd_index/SKILL.md +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/dotmd_index/__init__.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/prompts/__init__.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser/templates/prompts/analyze-dependencies.md +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/dependency_links.txt +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/entry_points.txt +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/requires.txt +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/src/dotmd_parser.egg-info/top_level.txt +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_aggregate.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_analyze.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_cli_dotmd_index.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_cost_estimate.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_empty_warnings.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_host_agent_plan.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_index_md.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_index_scope.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_inventory.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_openrag_push.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_parser.py +0 -0
- {dotmd_parser-0.6.2 → dotmd_parser-0.9.0}/tests/test_token_savings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dotmd-parser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG
|
|
5
5
|
Author: dotmd-projects
|
|
6
6
|
License-Expression: MIT
|
|
@@ -167,6 +167,24 @@ print(result["placeholders"]) # Unresolved {{variable}} names
|
|
|
167
167
|
print(result["warnings"]) # Circular refs, missing files, etc.
|
|
168
168
|
```
|
|
169
169
|
|
|
170
|
+
#### Injection scanning
|
|
171
|
+
|
|
172
|
+
`resolve` scans content pulled in via `@include` for prompt-injection
|
|
173
|
+
patterns (role spoofing like `System:`, instruction overrides like "ignore
|
|
174
|
+
previous instructions"). Findings print to stderr; the expanded content is
|
|
175
|
+
unchanged by default.
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
dotmd-parser resolve ./skill/SKILL.md # scan on, warn (default)
|
|
179
|
+
dotmd-parser resolve ./skill/SKILL.md --no-scan # disable scanning
|
|
180
|
+
dotmd-parser resolve ./skill/SKILL.md --scan-rule tool-exfil # add an opt-in rule
|
|
181
|
+
dotmd-parser resolve ./skill/SKILL.md --block # replace injected includes with a placeholder
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
The root/entry file is trusted and not scanned — only `@include`-pulled
|
|
185
|
+
files are. Matches inside fenced code blocks are ignored, and
|
|
186
|
+
`<!-- dotmd-allow: role-spoof -->` (or `all`) in a file suppresses that rule.
|
|
187
|
+
|
|
170
188
|
### dependents_of — Reverse dependency query
|
|
171
189
|
|
|
172
190
|
```python
|
|
@@ -221,11 +239,12 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
|
|
|
221
239
|
| `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
|
|
222
240
|
| `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
|
|
223
241
|
| `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
|
|
224
|
-
| `dotmd-parser check <path>` |
|
|
242
|
+
| `dotmd-parser check <path>` | Health gate (CI): cycles, missing refs, unresolved placeholders, conflicts |
|
|
225
243
|
| `dotmd-parser affects <path> <file>` | Reverse dependencies of `<file>` |
|
|
226
244
|
| `dotmd-parser deps <path> <file>` | Direct dependencies of `<file>` |
|
|
227
245
|
| `dotmd-parser digest <path>` | Token-efficient text summary for LLM context |
|
|
228
246
|
| `dotmd-parser tree <path>` | ASCII dependency tree |
|
|
247
|
+
| `dotmd-parser plan <path>` | Parallel delegation plan (JSON) |
|
|
229
248
|
| `dotmd-parser resolve <file> [--var k=v]` | Recursively expand `@include` |
|
|
230
249
|
| `dotmd-parser analyze <path>` | AI dependency detection (requires `ANTHROPIC_API_KEY`) |
|
|
231
250
|
| `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
|
|
@@ -243,6 +262,71 @@ dotmd-parser digest ./my-skill/ # compact summary for the LLM
|
|
|
243
262
|
dotmd-parser affects ./my-skill/ shared/role.md
|
|
244
263
|
```
|
|
245
264
|
|
|
265
|
+
### `ledger` / `risk` — edit-risk governance
|
|
266
|
+
|
|
267
|
+
Record per-file risk history in an append-only JSONL ledger
|
|
268
|
+
(`.claude/dotmd-ledger.jsonl`) and query it before editing. `risk` combines
|
|
269
|
+
reverse-dependency impact (`affects`) with active risk tags (ledger replay ∪
|
|
270
|
+
frontmatter `risk:`).
|
|
271
|
+
|
|
272
|
+
```bash
|
|
273
|
+
dotmd-parser ledger add . shared/role.md --tag fix-failed --note "retry hung"
|
|
274
|
+
dotmd-parser ledger clear . shared/role.md --tag fix-failed # or --all
|
|
275
|
+
dotmd-parser risk . shared/role.md # text report
|
|
276
|
+
dotmd-parser risk . shared/role.md --json
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
Tags: `fix-failed`, `fragile`, `security-sensitive`, `deprecated` (the first
|
|
280
|
+
two are "high"). `--fail-on high|any|never` controls the exit code, so a
|
|
281
|
+
PreToolUse hook can warn before risky edits:
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
dotmd-parser risk . "$FILE_PATH" --fail-on high \
|
|
285
|
+
|| echo "[dotmd] high-risk file (last fix failed / security-sensitive) — review before editing"
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### `check` — guidance health gate (CI)
|
|
289
|
+
|
|
290
|
+
Deterministic health check over the dependency graph. Detects cycles and
|
|
291
|
+
missing references (errors), plus unresolved `{{placeholders}}` and
|
|
292
|
+
conflicting directives (warnings). Optionally flags orphan files.
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
dotmd-parser check ./my-skill # text, fails on errors
|
|
296
|
+
dotmd-parser check ./my-skill --fail-on warning # also fail on warnings
|
|
297
|
+
dotmd-parser check ./my-skill --format json
|
|
298
|
+
dotmd-parser check ./my-skill --format sarif --out dotmd.sarif
|
|
299
|
+
dotmd-parser check ./my-skill --check orphans # opt-in orphan detection
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
`--fail-on` chooses the exit-code threshold (`error` default, `warning`, or
|
|
303
|
+
`never`). Use `--format sarif` with GitHub's `upload-sarif` action to get
|
|
304
|
+
inline PR annotations:
|
|
305
|
+
|
|
306
|
+
```yaml
|
|
307
|
+
- run: dotmd-parser check . --format sarif --out dotmd.sarif --fail-on never
|
|
308
|
+
- uses: github/codeql-action/upload-sarif@v3
|
|
309
|
+
with: { sarif_file: dotmd.sarif }
|
|
310
|
+
- run: dotmd-parser check . --fail-on warning # gate the PR
|
|
311
|
+
```
|
|
312
|
+
### `plan` — parallel delegation plan
|
|
313
|
+
|
|
314
|
+
Generate a static execution plan from the `@delegate` graph: topological
|
|
315
|
+
batches (parallel levels), per-task subtree context, plus conflict and cycle
|
|
316
|
+
pre-detection. Intended for a parent agent that fans out subagents.
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
dotmd-parser plan ./my-skill # plan(JSON) to stdout
|
|
320
|
+
dotmd-parser plan ./my-skill --ascii # human-readable view
|
|
321
|
+
dotmd-parser plan ./my-skill --out plan.json
|
|
322
|
+
dotmd-parser plan ./my-skill --strict # exit 1 on cycles/conflicts (CI)
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
Each task in the JSON carries a `context` array (the subtree files to hand the
|
|
326
|
+
subagent). Same-batch shared dependencies are reported in `conflicts[]` as
|
|
327
|
+
warnings — the batch stays parallel. Mutual `@delegate` references are reported
|
|
328
|
+
in `cycles[]` and excluded from batches.
|
|
329
|
+
|
|
246
330
|
## `dotmd-index.md` — folder overview in a single file
|
|
247
331
|
|
|
248
332
|
`dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
|
|
@@ -305,6 +389,28 @@ dotmd-parser dotmd-index ./docs/ --push-openrag
|
|
|
305
389
|
**search index** (full-content semantic retrieval). Register OpenRAG's
|
|
306
390
|
MCP server with Claude Code to use both surfaces from the same client.
|
|
307
391
|
|
|
392
|
+
### Cache-affine order (`--order cache`)
|
|
393
|
+
|
|
394
|
+
`dotmd-index --order cache` lists the `## Files` section with the
|
|
395
|
+
least-frequently-changed files first (estimated from git history), so the
|
|
396
|
+
generated `dotmd-index.md` keeps a stable prefix across regenerations — better
|
|
397
|
+
KV-cache reuse for LLMs that read it. Default `--order alpha` is unchanged.
|
|
398
|
+
|
|
399
|
+
```bash
|
|
400
|
+
dotmd-parser dotmd-index ./skill --order cache
|
|
401
|
+
dotmd-parser dotmd-index ./skill --order cache --stdout
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
Measure the effect with `stability` (compare two generations):
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
dotmd-parser stability old-index.md new-index.md # prefix stable: 42/50 lines (0.84)
|
|
408
|
+
dotmd-parser stability old-index.md new-index.md --json
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
Outside a git repo (or for untracked files) frequency is treated as 0, so
|
|
412
|
+
`cache` degrades gracefully to alphabetical order.
|
|
413
|
+
|
|
308
414
|
## `analyze` — AI-assisted dependency detection
|
|
309
415
|
|
|
310
416
|
Use when a folder of markdown has **no explicit directives yet**. `analyze`
|
|
@@ -130,6 +130,24 @@ print(result["placeholders"]) # Unresolved {{variable}} names
|
|
|
130
130
|
print(result["warnings"]) # Circular refs, missing files, etc.
|
|
131
131
|
```
|
|
132
132
|
|
|
133
|
+
#### Injection scanning
|
|
134
|
+
|
|
135
|
+
`resolve` scans content pulled in via `@include` for prompt-injection
|
|
136
|
+
patterns (role spoofing like `System:`, instruction overrides like "ignore
|
|
137
|
+
previous instructions"). Findings print to stderr; the expanded content is
|
|
138
|
+
unchanged by default.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
dotmd-parser resolve ./skill/SKILL.md # scan on, warn (default)
|
|
142
|
+
dotmd-parser resolve ./skill/SKILL.md --no-scan # disable scanning
|
|
143
|
+
dotmd-parser resolve ./skill/SKILL.md --scan-rule tool-exfil # add an opt-in rule
|
|
144
|
+
dotmd-parser resolve ./skill/SKILL.md --block # replace injected includes with a placeholder
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
The root/entry file is trusted and not scanned — only `@include`-pulled
|
|
148
|
+
files are. Matches inside fenced code blocks are ignored, and
|
|
149
|
+
`<!-- dotmd-allow: role-spoof -->` (or `all`) in a file suppresses that rule.
|
|
150
|
+
|
|
133
151
|
### dependents_of — Reverse dependency query
|
|
134
152
|
|
|
135
153
|
```python
|
|
@@ -184,11 +202,12 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
|
|
|
184
202
|
| `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
|
|
185
203
|
| `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
|
|
186
204
|
| `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
|
|
187
|
-
| `dotmd-parser check <path>` |
|
|
205
|
+
| `dotmd-parser check <path>` | Health gate (CI): cycles, missing refs, unresolved placeholders, conflicts |
|
|
188
206
|
| `dotmd-parser affects <path> <file>` | Reverse dependencies of `<file>` |
|
|
189
207
|
| `dotmd-parser deps <path> <file>` | Direct dependencies of `<file>` |
|
|
190
208
|
| `dotmd-parser digest <path>` | Token-efficient text summary for LLM context |
|
|
191
209
|
| `dotmd-parser tree <path>` | ASCII dependency tree |
|
|
210
|
+
| `dotmd-parser plan <path>` | Parallel delegation plan (JSON) |
|
|
192
211
|
| `dotmd-parser resolve <file> [--var k=v]` | Recursively expand `@include` |
|
|
193
212
|
| `dotmd-parser analyze <path>` | AI dependency detection (requires `ANTHROPIC_API_KEY`) |
|
|
194
213
|
| `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
|
|
@@ -206,6 +225,71 @@ dotmd-parser digest ./my-skill/ # compact summary for the LLM
|
|
|
206
225
|
dotmd-parser affects ./my-skill/ shared/role.md
|
|
207
226
|
```
|
|
208
227
|
|
|
228
|
+
### `ledger` / `risk` — edit-risk governance
|
|
229
|
+
|
|
230
|
+
Record per-file risk history in an append-only JSONL ledger
|
|
231
|
+
(`.claude/dotmd-ledger.jsonl`) and query it before editing. `risk` combines
|
|
232
|
+
reverse-dependency impact (`affects`) with active risk tags (ledger replay ∪
|
|
233
|
+
frontmatter `risk:`).
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
dotmd-parser ledger add . shared/role.md --tag fix-failed --note "retry hung"
|
|
237
|
+
dotmd-parser ledger clear . shared/role.md --tag fix-failed # or --all
|
|
238
|
+
dotmd-parser risk . shared/role.md # text report
|
|
239
|
+
dotmd-parser risk . shared/role.md --json
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Tags: `fix-failed`, `fragile`, `security-sensitive`, `deprecated` (the first
|
|
243
|
+
two are "high"). `--fail-on high|any|never` controls the exit code, so a
|
|
244
|
+
PreToolUse hook can warn before risky edits:
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
dotmd-parser risk . "$FILE_PATH" --fail-on high \
|
|
248
|
+
|| echo "[dotmd] high-risk file (last fix failed / security-sensitive) — review before editing"
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### `check` — guidance health gate (CI)
|
|
252
|
+
|
|
253
|
+
Deterministic health check over the dependency graph. Detects cycles and
|
|
254
|
+
missing references (errors), plus unresolved `{{placeholders}}` and
|
|
255
|
+
conflicting directives (warnings). Optionally flags orphan files.
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
dotmd-parser check ./my-skill # text, fails on errors
|
|
259
|
+
dotmd-parser check ./my-skill --fail-on warning # also fail on warnings
|
|
260
|
+
dotmd-parser check ./my-skill --format json
|
|
261
|
+
dotmd-parser check ./my-skill --format sarif --out dotmd.sarif
|
|
262
|
+
dotmd-parser check ./my-skill --check orphans # opt-in orphan detection
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
`--fail-on` chooses the exit-code threshold (`error` default, `warning`, or
|
|
266
|
+
`never`). Use `--format sarif` with GitHub's `upload-sarif` action to get
|
|
267
|
+
inline PR annotations:
|
|
268
|
+
|
|
269
|
+
```yaml
|
|
270
|
+
- run: dotmd-parser check . --format sarif --out dotmd.sarif --fail-on never
|
|
271
|
+
- uses: github/codeql-action/upload-sarif@v3
|
|
272
|
+
with: { sarif_file: dotmd.sarif }
|
|
273
|
+
- run: dotmd-parser check . --fail-on warning # gate the PR
|
|
274
|
+
```
|
|
275
|
+
### `plan` — parallel delegation plan
|
|
276
|
+
|
|
277
|
+
Generate a static execution plan from the `@delegate` graph: topological
|
|
278
|
+
batches (parallel levels), per-task subtree context, plus conflict and cycle
|
|
279
|
+
pre-detection. Intended for a parent agent that fans out subagents.
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
dotmd-parser plan ./my-skill # plan(JSON) to stdout
|
|
283
|
+
dotmd-parser plan ./my-skill --ascii # human-readable view
|
|
284
|
+
dotmd-parser plan ./my-skill --out plan.json
|
|
285
|
+
dotmd-parser plan ./my-skill --strict # exit 1 on cycles/conflicts (CI)
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
Each task in the JSON carries a `context` array (the subtree files to hand the
|
|
289
|
+
subagent). Same-batch shared dependencies are reported in `conflicts[]` as
|
|
290
|
+
warnings — the batch stays parallel. Mutual `@delegate` references are reported
|
|
291
|
+
in `cycles[]` and excluded from batches.
|
|
292
|
+
|
|
209
293
|
## `dotmd-index.md` — folder overview in a single file
|
|
210
294
|
|
|
211
295
|
`dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
|
|
@@ -268,6 +352,28 @@ dotmd-parser dotmd-index ./docs/ --push-openrag
|
|
|
268
352
|
**search index** (full-content semantic retrieval). Register OpenRAG's
|
|
269
353
|
MCP server with Claude Code to use both surfaces from the same client.
|
|
270
354
|
|
|
355
|
+
### Cache-affine order (`--order cache`)
|
|
356
|
+
|
|
357
|
+
`dotmd-index --order cache` lists the `## Files` section with the
|
|
358
|
+
least-frequently-changed files first (estimated from git history), so the
|
|
359
|
+
generated `dotmd-index.md` keeps a stable prefix across regenerations — better
|
|
360
|
+
KV-cache reuse for LLMs that read it. Default `--order alpha` is unchanged.
|
|
361
|
+
|
|
362
|
+
```bash
|
|
363
|
+
dotmd-parser dotmd-index ./skill --order cache
|
|
364
|
+
dotmd-parser dotmd-index ./skill --order cache --stdout
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
Measure the effect with `stability` (compare two generations):
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
dotmd-parser stability old-index.md new-index.md # prefix stable: 42/50 lines (0.84)
|
|
371
|
+
dotmd-parser stability old-index.md new-index.md --json
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
Outside a git repo (or for untracked files) frequency is treated as 0, so
|
|
375
|
+
`cache` degrades gracefully to alphabetical order.
|
|
376
|
+
|
|
271
377
|
## `analyze` — AI-assisted dependency detection
|
|
272
378
|
|
|
273
379
|
Use when a folder of markdown has **no explicit directives yet**. `analyze`
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dotmd-parser"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.9.0"
|
|
4
4
|
description = "Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG"
|
|
5
5
|
requires-python = ">=3.10"
|
|
6
6
|
license = "MIT"
|
|
@@ -10,7 +10,7 @@ API:
|
|
|
10
10
|
from dotmd_parser import digest, tree, affects
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
__version__ = "0.
|
|
13
|
+
__version__ = "0.9.0"
|
|
14
14
|
|
|
15
15
|
from dotmd_parser.parser import (
|
|
16
16
|
build_graph,
|
|
@@ -70,7 +70,43 @@ from dotmd_parser.index_md import (
|
|
|
70
70
|
DEFAULT_INDEX_FILENAME,
|
|
71
71
|
INDEX_MD_SCHEMA,
|
|
72
72
|
)
|
|
73
|
+
from dotmd_parser.cache_order import (
|
|
74
|
+
git_change_counts,
|
|
75
|
+
order_key,
|
|
76
|
+
prefix_stability,
|
|
77
|
+
)
|
|
78
|
+
from dotmd_parser.ledger import (
|
|
79
|
+
append_event,
|
|
80
|
+
read_events,
|
|
81
|
+
active_tags,
|
|
82
|
+
static_tags,
|
|
83
|
+
all_active_tags,
|
|
84
|
+
risk_level,
|
|
85
|
+
risk_report,
|
|
86
|
+
default_ledger_path,
|
|
87
|
+
RISK_TAGS,
|
|
88
|
+
HIGH_TAGS,
|
|
89
|
+
)
|
|
90
|
+
from dotmd_parser.checks import (
|
|
91
|
+
run_checks,
|
|
92
|
+
summarize,
|
|
93
|
+
exit_code,
|
|
94
|
+
format_text,
|
|
95
|
+
format_json,
|
|
96
|
+
format_sarif,
|
|
97
|
+
CHECK_SCHEMA,
|
|
98
|
+
)
|
|
73
99
|
from dotmd_parser.openrag import push_to_openrag
|
|
100
|
+
from dotmd_parser.scan import (
|
|
101
|
+
scan_content,
|
|
102
|
+
DEFAULT_RULES,
|
|
103
|
+
OPTIONAL_RULES,
|
|
104
|
+
ALL_RULES,
|
|
105
|
+
)
|
|
106
|
+
from dotmd_parser.plan import (
|
|
107
|
+
build_plan,
|
|
108
|
+
render_ascii,
|
|
109
|
+
)
|
|
74
110
|
|
|
75
111
|
__all__ = [
|
|
76
112
|
"__version__",
|
|
@@ -126,6 +162,37 @@ __all__ = [
|
|
|
126
162
|
"extract_frontmatter",
|
|
127
163
|
"DEFAULT_INDEX_FILENAME",
|
|
128
164
|
"INDEX_MD_SCHEMA",
|
|
165
|
+
# cache_order
|
|
166
|
+
"git_change_counts",
|
|
167
|
+
"order_key",
|
|
168
|
+
"prefix_stability",
|
|
169
|
+
# ledger
|
|
170
|
+
"append_event",
|
|
171
|
+
"read_events",
|
|
172
|
+
"active_tags",
|
|
173
|
+
"static_tags",
|
|
174
|
+
"all_active_tags",
|
|
175
|
+
"risk_level",
|
|
176
|
+
"risk_report",
|
|
177
|
+
"default_ledger_path",
|
|
178
|
+
"RISK_TAGS",
|
|
179
|
+
"HIGH_TAGS",
|
|
180
|
+
# checks
|
|
181
|
+
"run_checks",
|
|
182
|
+
"summarize",
|
|
183
|
+
"exit_code",
|
|
184
|
+
"format_text",
|
|
185
|
+
"format_json",
|
|
186
|
+
"format_sarif",
|
|
187
|
+
"CHECK_SCHEMA",
|
|
129
188
|
# openrag
|
|
130
189
|
"push_to_openrag",
|
|
190
|
+
# scan
|
|
191
|
+
"scan_content",
|
|
192
|
+
"DEFAULT_RULES",
|
|
193
|
+
"OPTIONAL_RULES",
|
|
194
|
+
"ALL_RULES",
|
|
195
|
+
# plan
|
|
196
|
+
"build_plan",
|
|
197
|
+
"render_ascii",
|
|
131
198
|
]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dotmd-parser — cache-affine ordering helpers.
|
|
3
|
+
|
|
4
|
+
Estimates per-file change frequency from git history (with a safe fallback)
|
|
5
|
+
and provides an ordering key that puts low-frequency files first, so the
|
|
6
|
+
`dotmd-index.md` body prefix stays stable across regenerations (KV-cache
|
|
7
|
+
friendly). Also a prefix-stability metric. Pure stdlib; git via subprocess.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import shutil
|
|
13
|
+
import subprocess
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def git_change_counts(root: str | Path) -> dict[str, int]:
|
|
18
|
+
"""Return {rel_posix: commit_count} from git history; {} when unavailable."""
|
|
19
|
+
if shutil.which("git") is None:
|
|
20
|
+
return {}
|
|
21
|
+
try:
|
|
22
|
+
result = subprocess.run(
|
|
23
|
+
["git", "-C", str(root), "-c", "core.quotepath=false",
|
|
24
|
+
"log", "--format=", "--name-only", "--relative", "--", "."],
|
|
25
|
+
capture_output=True,
|
|
26
|
+
text=True,
|
|
27
|
+
)
|
|
28
|
+
except OSError:
|
|
29
|
+
return {}
|
|
30
|
+
if result.returncode != 0:
|
|
31
|
+
return {}
|
|
32
|
+
counts: dict[str, int] = {}
|
|
33
|
+
for line in result.stdout.splitlines():
|
|
34
|
+
rel = line.strip()
|
|
35
|
+
if rel:
|
|
36
|
+
counts[rel] = counts.get(rel, 0) + 1
|
|
37
|
+
return counts
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def order_key(rel: str, counts: dict[str, int]) -> tuple[int, str]:
|
|
41
|
+
"""Sort key: low change-count first, path-ascending tiebreak."""
|
|
42
|
+
return (counts.get(rel, 0), rel)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def prefix_stability(old_text: str, new_text: str) -> dict:
|
|
46
|
+
"""Measure how much of `new_text`'s leading lines match `old_text`."""
|
|
47
|
+
old_lines = old_text.split("\n")
|
|
48
|
+
new_lines = new_text.split("\n")
|
|
49
|
+
common = 0
|
|
50
|
+
for old_line, new_line in zip(old_lines, new_lines):
|
|
51
|
+
if old_line == new_line:
|
|
52
|
+
common += 1
|
|
53
|
+
else:
|
|
54
|
+
break
|
|
55
|
+
total_new = len(new_lines)
|
|
56
|
+
return {
|
|
57
|
+
"common_prefix_lines": common,
|
|
58
|
+
"new_lines": total_new,
|
|
59
|
+
"ratio": round(common / max(total_new, 1), 4),
|
|
60
|
+
}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dotmd-parser — guidance health checks (deterministic CI gate).
|
|
3
|
+
|
|
4
|
+
Consumes a compact index (from `index.build_index` / `index.load_index`) and
|
|
5
|
+
produces a flat list of Finding dicts, rendered as text / JSON / SARIF. Pure,
|
|
6
|
+
stdlib-only, no LLM. The raw graph / parser are not touched.
|
|
7
|
+
|
|
8
|
+
Finding shape:
|
|
9
|
+
{"rule": str, "severity": "error"|"warning", "path": str,
|
|
10
|
+
"message": str, "line": int | None}
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
CHECK_SCHEMA = "dotmd-check/v1"
|
|
19
|
+
|
|
20
|
+
_GRAPH_WARNING_RULES = {
|
|
21
|
+
"depth_exceeded": "depth-exceeded",
|
|
22
|
+
"read_error": "read-error",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _finding(rule: str, severity: str, path: str, message: str,
|
|
27
|
+
line: int | None = None) -> dict:
|
|
28
|
+
return {"rule": rule, "severity": severity, "path": path,
|
|
29
|
+
"message": message, "line": line}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _circular_findings(index: dict) -> list[dict]:
|
|
33
|
+
"""One error finding per recorded cycle message (path unknown → '')."""
|
|
34
|
+
return [
|
|
35
|
+
_finding("circular", "error", "", msg)
|
|
36
|
+
for msg in index.get("cycles", [])
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _missing_findings(index: dict) -> list[dict]:
|
|
41
|
+
"""One error finding per missing referenced file."""
|
|
42
|
+
return [
|
|
43
|
+
_finding("missing-reference", "error", rel,
|
|
44
|
+
"referenced file does not exist")
|
|
45
|
+
for rel in index.get("missing", [])
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _graph_warning_findings(index: dict) -> list[dict]:
|
|
50
|
+
"""Promote depth_exceeded / read_error graph warnings to error findings."""
|
|
51
|
+
out: list[dict] = []
|
|
52
|
+
for warning in index.get("warnings", []):
|
|
53
|
+
rule = _GRAPH_WARNING_RULES.get(warning.get("type", ""))
|
|
54
|
+
if rule is None:
|
|
55
|
+
continue
|
|
56
|
+
out.append(_finding(rule, "error", warning.get("path", ""),
|
|
57
|
+
warning.get("message", "")))
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _placeholder_findings(index: dict) -> list[dict]:
|
|
62
|
+
"""One warning finding per unresolved {{var}} (sorted by path, var)."""
|
|
63
|
+
out: list[dict] = []
|
|
64
|
+
files = index.get("files", {})
|
|
65
|
+
for rel in sorted(files):
|
|
66
|
+
for var in sorted(files[rel].get("placeholders", []) or []):
|
|
67
|
+
out.append(_finding(
|
|
68
|
+
"unresolved-placeholder", "warning", rel,
|
|
69
|
+
f"unresolved placeholder: {{{{{var}}}}}",
|
|
70
|
+
))
|
|
71
|
+
return out
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
_EXPLICIT_DIRECTIVE_TYPES = {"include", "ref", "delegate"}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _conflicting_directive_findings(index: dict) -> list[dict]:
|
|
78
|
+
"""Warn when a source reaches one target via ≥2 distinct explicit types."""
|
|
79
|
+
out: list[dict] = []
|
|
80
|
+
files = index.get("files", {})
|
|
81
|
+
for rel in sorted(files):
|
|
82
|
+
by_target: dict[str, set[str]] = {}
|
|
83
|
+
for dep in files[rel].get("deps", []):
|
|
84
|
+
dtype = dep.get("type", "")
|
|
85
|
+
if dtype not in _EXPLICIT_DIRECTIVE_TYPES:
|
|
86
|
+
continue
|
|
87
|
+
target = dep.get("to", "")
|
|
88
|
+
if not target:
|
|
89
|
+
continue
|
|
90
|
+
by_target.setdefault(target, set()).add(dtype)
|
|
91
|
+
for target in sorted(by_target):
|
|
92
|
+
types = by_target[target]
|
|
93
|
+
if len(types) >= 2:
|
|
94
|
+
joined = ", ".join(sorted(types))
|
|
95
|
+
out.append(_finding(
|
|
96
|
+
"conflicting-directive", "warning", rel,
|
|
97
|
+
f"{target} is referenced by multiple directive types ({joined})",
|
|
98
|
+
))
|
|
99
|
+
return out
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _orphan_findings(index: dict, root: str | None) -> list[dict]:
|
|
103
|
+
"""Warn about .md files on disk that no graph node references."""
|
|
104
|
+
if root is None:
|
|
105
|
+
return []
|
|
106
|
+
base = Path(root)
|
|
107
|
+
if base.is_file():
|
|
108
|
+
base = base.parent
|
|
109
|
+
if not base.is_dir():
|
|
110
|
+
return []
|
|
111
|
+
node_set = set(index.get("files", {}).keys())
|
|
112
|
+
out: list[dict] = []
|
|
113
|
+
for path in sorted(base.rglob("*.md")):
|
|
114
|
+
rel_path = path.relative_to(base)
|
|
115
|
+
if any(part.startswith(".") for part in rel_path.parts):
|
|
116
|
+
continue
|
|
117
|
+
rel = rel_path.as_posix()
|
|
118
|
+
if "node_modules" in rel:
|
|
119
|
+
continue
|
|
120
|
+
if not path.is_file():
|
|
121
|
+
continue
|
|
122
|
+
if rel not in node_set:
|
|
123
|
+
out.append(_finding("orphan-file", "warning", rel,
|
|
124
|
+
"file is not referenced by any node"))
|
|
125
|
+
return out
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def run_checks(index: dict, root: str | None = None,
|
|
129
|
+
enable_orphans: bool = False) -> list[dict]:
|
|
130
|
+
"""Run all enabled checks and return a flat list of findings."""
|
|
131
|
+
findings: list[dict] = []
|
|
132
|
+
findings += _circular_findings(index)
|
|
133
|
+
findings += _missing_findings(index)
|
|
134
|
+
findings += _graph_warning_findings(index)
|
|
135
|
+
findings += _placeholder_findings(index)
|
|
136
|
+
findings += _conflicting_directive_findings(index)
|
|
137
|
+
if enable_orphans:
|
|
138
|
+
findings += _orphan_findings(index, root)
|
|
139
|
+
return findings
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def summarize(findings: list[dict]) -> dict:
|
|
143
|
+
"""Count findings by severity."""
|
|
144
|
+
errors = sum(1 for f in findings if f.get("severity") == "error")
|
|
145
|
+
warnings = sum(1 for f in findings if f.get("severity") == "warning")
|
|
146
|
+
return {"errors": errors, "warnings": warnings}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def exit_code(findings: list[dict], fail_on: str) -> int:
|
|
150
|
+
"""Map findings to a CI exit code per the fail_on threshold."""
|
|
151
|
+
counts = summarize(findings)
|
|
152
|
+
if fail_on == "never":
|
|
153
|
+
return 0
|
|
154
|
+
if fail_on == "warning":
|
|
155
|
+
return 1 if (counts["errors"] or counts["warnings"]) else 0
|
|
156
|
+
# default: "error"
|
|
157
|
+
return 1 if counts["errors"] else 0
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def format_text(findings: list[dict], index: dict) -> str:
|
|
161
|
+
"""Render findings as a backward-compatible text summary + detail lines."""
|
|
162
|
+
stats = index.get("stats", {})
|
|
163
|
+
counts = summarize(findings)
|
|
164
|
+
lines = [
|
|
165
|
+
f"{stats.get('files', 0)} files, {stats.get('edges', 0)} edges — "
|
|
166
|
+
f"errors:{counts['errors']} warnings:{counts['warnings']}"
|
|
167
|
+
]
|
|
168
|
+
for f in findings:
|
|
169
|
+
loc = f.get("path") or "-"
|
|
170
|
+
lines.append(
|
|
171
|
+
f" [{f['severity'].upper()}] {f['rule']}: {loc} — {f['message']}"
|
|
172
|
+
)
|
|
173
|
+
return "\n".join(lines)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def format_json(findings: list[dict], index: dict) -> str:
|
|
177
|
+
"""Render findings as dotmd-check/v1 JSON."""
|
|
178
|
+
stats = index.get("stats", {})
|
|
179
|
+
counts = summarize(findings)
|
|
180
|
+
payload = {
|
|
181
|
+
"schema": CHECK_SCHEMA,
|
|
182
|
+
"root": index.get("root", ""),
|
|
183
|
+
"stats": {
|
|
184
|
+
"files": stats.get("files", 0),
|
|
185
|
+
"edges": stats.get("edges", 0),
|
|
186
|
+
"errors": counts["errors"],
|
|
187
|
+
"warnings": counts["warnings"],
|
|
188
|
+
},
|
|
189
|
+
"findings": findings,
|
|
190
|
+
}
|
|
191
|
+
return json.dumps(payload, ensure_ascii=False, indent=2)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _camel(rule_id: str) -> str:
|
|
195
|
+
parts = rule_id.split("-")
|
|
196
|
+
return parts[0] + "".join(p.capitalize() for p in parts[1:])
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def format_sarif(findings: list[dict], index: dict) -> str:
|
|
200
|
+
"""Render findings as SARIF 2.1.0 JSON (for GitHub code scanning)."""
|
|
201
|
+
from dotmd_parser import __version__ # local import avoids cycle at import time
|
|
202
|
+
|
|
203
|
+
rules: dict[str, dict] = {}
|
|
204
|
+
results: list[dict] = []
|
|
205
|
+
for f in findings:
|
|
206
|
+
rule_id = f["rule"]
|
|
207
|
+
rules.setdefault(rule_id, {"id": rule_id, "name": _camel(rule_id)})
|
|
208
|
+
result: dict = {
|
|
209
|
+
"ruleId": rule_id,
|
|
210
|
+
"level": f["severity"],
|
|
211
|
+
"message": {"text": f["message"]},
|
|
212
|
+
}
|
|
213
|
+
if f.get("path"):
|
|
214
|
+
physical: dict = {"artifactLocation": {"uri": f["path"]}}
|
|
215
|
+
if f.get("line"):
|
|
216
|
+
physical["region"] = {"startLine": f["line"]}
|
|
217
|
+
result["locations"] = [{"physicalLocation": physical}]
|
|
218
|
+
results.append(result)
|
|
219
|
+
|
|
220
|
+
sarif = {
|
|
221
|
+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
|
|
222
|
+
"version": "2.1.0",
|
|
223
|
+
"runs": [{
|
|
224
|
+
"tool": {"driver": {
|
|
225
|
+
"name": "dotmd-parser",
|
|
226
|
+
"informationUri": "https://github.com/dotmd-projects/dotmd-parser",
|
|
227
|
+
"version": __version__,
|
|
228
|
+
"rules": list(rules.values()),
|
|
229
|
+
}},
|
|
230
|
+
"results": results,
|
|
231
|
+
}],
|
|
232
|
+
}
|
|
233
|
+
return json.dumps(sarif, ensure_ascii=False, indent=2)
|