dotmd-parser 0.5.0__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/PKG-INFO +111 -5
- dotmd_parser-0.5.0/src/dotmd_parser.egg-info/PKG-INFO → dotmd_parser-0.6.2/README.md +99 -30
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/pyproject.toml +13 -2
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/__init__.py +17 -1
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/cli.py +159 -9
- dotmd_parser-0.6.2/src/dotmd_parser/index_md.py +716 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/inventory.py +5 -2
- dotmd_parser-0.6.2/src/dotmd_parser/openrag.py +150 -0
- dotmd_parser-0.6.2/src/dotmd_parser/templates/dotmd_index/SKILL.md +254 -0
- dotmd_parser-0.6.2/src/dotmd_parser/templates/dotmd_index/__init__.py +0 -0
- dotmd_parser-0.5.0/README.md → dotmd_parser-0.6.2/src/dotmd_parser.egg-info/PKG-INFO +136 -3
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser.egg-info/SOURCES.txt +11 -1
- dotmd_parser-0.6.2/src/dotmd_parser.egg-info/requires.txt +14 -0
- dotmd_parser-0.6.2/tests/test_aggregate.py +149 -0
- dotmd_parser-0.6.2/tests/test_cli_dotmd_index.py +133 -0
- dotmd_parser-0.6.2/tests/test_index_md.py +233 -0
- dotmd_parser-0.6.2/tests/test_openrag_push.py +199 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_skill_integration.py +1 -1
- dotmd_parser-0.6.2/tests/test_token_savings.py +263 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/LICENSE +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/setup.cfg +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/analyze.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/digest.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/index.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/parser.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/templates/SKILL.md +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/templates/__init__.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/templates/prompts/__init__.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser/templates/prompts/analyze-dependencies.md +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser.egg-info/dependency_links.txt +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser.egg-info/entry_points.txt +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/src/dotmd_parser.egg-info/top_level.txt +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_analyze.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_cost_estimate.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_empty_warnings.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_host_agent_plan.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_index_scope.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_inventory.py +0 -0
- {dotmd_parser-0.5.0 → dotmd_parser-0.6.2}/tests/test_parser.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dotmd-parser
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Dependency graph parser and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates,
|
|
3
|
+
Version: 0.6.2
|
|
4
|
+
Summary: Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG
|
|
5
5
|
Author: dotmd-projects
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/dotmd-projects/dotmd-parser
|
|
@@ -23,6 +23,16 @@ Classifier: Typing :: Typed
|
|
|
23
23
|
Requires-Python: >=3.10
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
25
25
|
License-File: LICENSE
|
|
26
|
+
Provides-Extra: openrag
|
|
27
|
+
Requires-Dist: openrag-sdk>=0.3.1; extra == "openrag"
|
|
28
|
+
Provides-Extra: pdf
|
|
29
|
+
Requires-Dist: pdfplumber>=0.10; extra == "pdf"
|
|
30
|
+
Provides-Extra: docx
|
|
31
|
+
Requires-Dist: python-docx>=1.0; extra == "docx"
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: openrag-sdk>=0.3.1; extra == "all"
|
|
34
|
+
Requires-Dist: pdfplumber>=0.10; extra == "all"
|
|
35
|
+
Requires-Dist: python-docx>=1.0; extra == "all"
|
|
26
36
|
Dynamic: license-file
|
|
27
37
|
|
|
28
38
|
# dotmd-parser
|
|
@@ -47,6 +57,35 @@ As AI agent projects grow, `.md` files start referencing each other via `@includ
|
|
|
47
57
|
|
|
48
58
|
**dotmd-parser** solves this by parsing your `.md` files into a dependency graph — automatically detecting directives, runtime references, and template placeholders. One function call gives you the full picture.
|
|
49
59
|
|
|
60
|
+
## Token savings — measured
|
|
61
|
+
|
|
62
|
+
The single biggest reason to reach for `dotmd-parser` in an agent loop is
|
|
63
|
+
that it lets Claude understand a folder **without reading every file**.
|
|
64
|
+
The numbers below are produced by `tests/test_token_savings.py` (run
|
|
65
|
+
with `DOTMD_TOKEN_REPORT=1 pytest -s`) using `tiktoken`'s `cl100k_base`
|
|
66
|
+
encoding (a close proxy for Claude's tokenizer family):
|
|
67
|
+
|
|
68
|
+
| Folder profile | Files | Naive read of every `.md` | `dotmd-index.md` | `digest` |
|
|
69
|
+
|---|---:|---:|---:|---:|
|
|
70
|
+
| Small skill (each file ~2 KB) | 4 | 1,610 tokens | **605 (0.38×)** | 174 (0.11×) |
|
|
71
|
+
| Medium docs (each file ~2 KB) | 31 | 15,855 tokens | **2,837 (0.18× → 5.6× cheaper)** | 1,285 (0.08×) |
|
|
72
|
+
| Large docs (each file ~2 KB) | 111 | 58,171 tokens | **9,535 (0.16× → 6.3× cheaper)** | 4,606 (0.08×) |
|
|
73
|
+
|
|
74
|
+
**Takeaway**: at ~30 files dotmd-parser already cuts Claude's reading
|
|
75
|
+
cost by **~5.6×**, and the savings *grow* with folder size. At 100+
|
|
76
|
+
files the same context window now fits **6× more conversation**, or
|
|
77
|
+
serves the same prompt at **1/6 the input-token spend**.
|
|
78
|
+
|
|
79
|
+
The persistent `dotmd-index.md` artifact pays a fixed frontmatter
|
|
80
|
+
overhead, so for *very small* folders (a handful of files of a few
|
|
81
|
+
hundred bytes) the naive read can still win. The `digest` output is
|
|
82
|
+
even more compact (~12× cheaper at scale) but isn't persisted on disk —
|
|
83
|
+
use `dotmd-index.md` for stable navigation, `digest` for one-shot
|
|
84
|
+
summaries.
|
|
85
|
+
|
|
86
|
+
The break-even is around **4 files × 1 KB each**: above that,
|
|
87
|
+
`dotmd-index.md` is always cheaper than reading the folder by hand.
|
|
88
|
+
|
|
50
89
|
## Comparison
|
|
51
90
|
|
|
52
91
|
| Capability | Manual / grep | dotmd-parser |
|
|
@@ -177,6 +216,9 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
|
|
|
177
216
|
| Command | Purpose |
|
|
178
217
|
|---|---|
|
|
179
218
|
| `dotmd-parser inventory <path>` | **API-free**: extension counts, markdown/binary ratio, largest files |
|
|
219
|
+
| `dotmd-parser dotmd-index <path>` | **API-free**: generate `<path>/dotmd-index.md` (single-file folder overview) |
|
|
220
|
+
| `dotmd-parser dotmd-index <path> --aggregate` | Roll up nested `dotmd-index.md` files into the parent (Sub-Indexes section) |
|
|
221
|
+
| `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
|
|
180
222
|
| `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
|
|
181
223
|
| `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
|
|
182
224
|
| `dotmd-parser check <path>` | Exit non-zero on cycles / missing refs (CI-friendly) |
|
|
@@ -189,16 +231,80 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
|
|
|
189
231
|
| `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
|
|
190
232
|
| `dotmd-parser analyze <path> --plan` | **API-free**: emit a host-agent prompt pack for Claude Code to execute |
|
|
191
233
|
| `dotmd-parser analyze <path> --apply-from <json>` | Apply a pre-computed analysis JSON |
|
|
234
|
+
| `dotmd-parser init [--skill dotmd-index]` | Install a bundled SKILL.md into `.claude/skills/<id>/` |
|
|
192
235
|
| `dotmd-parser show <path>` | Summary + full JSON graph (legacy default) |
|
|
193
236
|
|
|
194
237
|
```bash
|
|
195
238
|
# Typical Claude Code workflow
|
|
196
|
-
dotmd-parser inventory ./my-skill/
|
|
197
|
-
dotmd-parser index ./my-skill/
|
|
198
|
-
dotmd-parser
|
|
239
|
+
dotmd-parser inventory ./my-skill/ # start here if you've never seen the folder
|
|
240
|
+
dotmd-parser dotmd-index ./my-skill/ # write ./my-skill/dotmd-index.md (Claude reads ONLY this file)
|
|
241
|
+
dotmd-parser index ./my-skill/ # one-off; cached until files change
|
|
242
|
+
dotmd-parser digest ./my-skill/ # compact summary for the LLM
|
|
199
243
|
dotmd-parser affects ./my-skill/ shared/role.md
|
|
200
244
|
```
|
|
201
245
|
|
|
246
|
+
## `dotmd-index.md` — folder overview in a single file
|
|
247
|
+
|
|
248
|
+
`dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
|
|
249
|
+
self-contained Markdown file that combines `inventory()` and
|
|
250
|
+
`build_index()` into one artifact Claude can read **instead of**
|
|
251
|
+
grep-scanning every file in the folder.
|
|
252
|
+
|
|
253
|
+
The file contains:
|
|
254
|
+
|
|
255
|
+
- YAML frontmatter (`schema`, `content_hash`, `stats`, RAG `chunks[]`)
|
|
256
|
+
- `## Summary` (file counts, total size, health)
|
|
257
|
+
- `## Folder Map` (depth-limited ASCII tree)
|
|
258
|
+
- `## Files` (markdown: title + desc + deps; other: kind + size)
|
|
259
|
+
- `## Dependency Tree` (`@include` / `@delegate` / `@ref` graph as ASCII)
|
|
260
|
+
- `## Placeholders` (unresolved `{{...}}` variables)
|
|
261
|
+
- `<!-- chunk:id -->` HTML markers so any RAG tool can split deterministically
|
|
262
|
+
|
|
263
|
+
Re-running on an unchanged folder writes nothing (`content_hash` matches).
|
|
264
|
+
The command refuses to overwrite a hand-written `dotmd-index.md` unless
|
|
265
|
+
`--force` is passed.
|
|
266
|
+
|
|
267
|
+
### Aggregating across nested folders
|
|
268
|
+
|
|
269
|
+
Run with `--aggregate` to roll up descendant artifacts:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
dotmd-parser dotmd-index ./project/ --aggregate
|
|
273
|
+
# project/dotmd-index.md now references project/docs/dotmd-index.md and
|
|
274
|
+
# project/src/dotmd-index.md without duplicating their file listings.
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
Each child is discovered, its frontmatter is read, and a one-line
|
|
278
|
+
summary (file count, edges, health) appears under `## Sub-Indexes`.
|
|
279
|
+
The `aggregates[]` frontmatter array records each child's `content_hash`
|
|
280
|
+
so staleness is easy to detect. User-authored `dotmd-index.md` files
|
|
281
|
+
that lack `generated_by: dotmd-parser` are silently skipped.
|
|
282
|
+
|
|
283
|
+
This is a **reference**, not a merge — Claude reads the parent to learn
|
|
284
|
+
which subfolders exist, then drills into the relevant child file. The
|
|
285
|
+
parent stays token-efficient even with deeply nested trees.
|
|
286
|
+
|
|
287
|
+
### OpenRAG integration
|
|
288
|
+
|
|
289
|
+
[OpenRAG](https://github.com/langflow-ai/openrag) is a self-hosted RAG
|
|
290
|
+
platform built on Langflow + Docling + OpenSearch. dotmd-parser can ship
|
|
291
|
+
the artifact straight into it:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
pip install dotmd-parser[openrag] # adds openrag-sdk
|
|
295
|
+
export OPENRAG_URL=http://localhost:3000
|
|
296
|
+
export OPENRAG_API_KEY=... # if your instance requires auth
|
|
297
|
+
|
|
298
|
+
dotmd-parser dotmd-index ./docs/ --push-openrag
|
|
299
|
+
# 1. Writes ./docs/dotmd-index.md
|
|
300
|
+
# 2. Calls OpenRAGClient.documents.ingest(file_path=...)
|
|
301
|
+
# 3. Records {document_id, base_url, pushed_at} in exports.openrag
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
`dotmd-index.md` is the **map** (one-shot overview); OpenRAG is the
|
|
305
|
+
**search index** (full-content semantic retrieval). Register OpenRAG's
|
|
306
|
+
MCP server with Claude Code to use both surfaces from the same client.
|
|
307
|
+
|
|
202
308
|
## `analyze` — AI-assisted dependency detection
|
|
203
309
|
|
|
204
310
|
Use when a folder of markdown has **no explicit directives yet**. `analyze`
|
|
@@ -1,30 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: dotmd-parser
|
|
3
|
-
Version: 0.5.0
|
|
4
|
-
Summary: Dependency graph parser and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, and detect implicit dependencies via Claude for AI agent prompt engineering
|
|
5
|
-
Author: dotmd-projects
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/dotmd-projects/dotmd-parser
|
|
8
|
-
Project-URL: Repository, https://github.com/dotmd-projects/dotmd-parser
|
|
9
|
-
Project-URL: Issues, https://github.com/dotmd-projects/dotmd-parser/issues
|
|
10
|
-
Keywords: claude-code,ai-agent,skill-management,prompt-engineering,dependency-graph,SKILL.md,markdown,parser,dotmd,llm
|
|
11
|
-
Classifier: Development Status :: 3 - Alpha
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: Operating System :: OS Independent
|
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
-
Classifier: Topic :: Software Development :: Code Generators
|
|
21
|
-
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
22
|
-
Classifier: Typing :: Typed
|
|
23
|
-
Requires-Python: >=3.10
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
License-File: LICENSE
|
|
26
|
-
Dynamic: license-file
|
|
27
|
-
|
|
28
1
|
# dotmd-parser
|
|
29
2
|
|
|
30
3
|
[](https://pypi.org/project/dotmd-parser/)
|
|
@@ -47,6 +20,35 @@ As AI agent projects grow, `.md` files start referencing each other via `@includ
|
|
|
47
20
|
|
|
48
21
|
**dotmd-parser** solves this by parsing your `.md` files into a dependency graph — automatically detecting directives, runtime references, and template placeholders. One function call gives you the full picture.
|
|
49
22
|
|
|
23
|
+
## Token savings — measured
|
|
24
|
+
|
|
25
|
+
The single biggest reason to reach for `dotmd-parser` in an agent loop is
|
|
26
|
+
that it lets Claude understand a folder **without reading every file**.
|
|
27
|
+
The numbers below are produced by `tests/test_token_savings.py` (run
|
|
28
|
+
with `DOTMD_TOKEN_REPORT=1 pytest -s`) using `tiktoken`'s `cl100k_base`
|
|
29
|
+
encoding (a close proxy for Claude's tokenizer family):
|
|
30
|
+
|
|
31
|
+
| Folder profile | Files | Naive read of every `.md` | `dotmd-index.md` | `digest` |
|
|
32
|
+
|---|---:|---:|---:|---:|
|
|
33
|
+
| Small skill (each file ~2 KB) | 4 | 1,610 tokens | **605 (0.38×)** | 174 (0.11×) |
|
|
34
|
+
| Medium docs (each file ~2 KB) | 31 | 15,855 tokens | **2,837 (0.18× → 5.6× cheaper)** | 1,285 (0.08×) |
|
|
35
|
+
| Large docs (each file ~2 KB) | 111 | 58,171 tokens | **9,535 (0.16× → 6.3× cheaper)** | 4,606 (0.08×) |
|
|
36
|
+
|
|
37
|
+
**Takeaway**: at ~30 files dotmd-parser already cuts Claude's reading
|
|
38
|
+
cost by **~5.6×**, and the savings *grow* with folder size. At 100+
|
|
39
|
+
files the same context window now fits **6× more conversation**, or
|
|
40
|
+
serves the same prompt at **1/6 the input-token spend**.
|
|
41
|
+
|
|
42
|
+
The persistent `dotmd-index.md` artifact pays a fixed frontmatter
|
|
43
|
+
overhead, so for *very small* folders (a handful of files of a few
|
|
44
|
+
hundred bytes) the naive read can still win. The `digest` output is
|
|
45
|
+
even more compact (~12× cheaper at scale) but isn't persisted on disk —
|
|
46
|
+
use `dotmd-index.md` for stable navigation, `digest` for one-shot
|
|
47
|
+
summaries.
|
|
48
|
+
|
|
49
|
+
The break-even is around **4 files × 1 KB each**: above that,
|
|
50
|
+
`dotmd-index.md` is always cheaper than reading the folder by hand.
|
|
51
|
+
|
|
50
52
|
## Comparison
|
|
51
53
|
|
|
52
54
|
| Capability | Manual / grep | dotmd-parser |
|
|
@@ -177,6 +179,9 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
|
|
|
177
179
|
| Command | Purpose |
|
|
178
180
|
|---|---|
|
|
179
181
|
| `dotmd-parser inventory <path>` | **API-free**: extension counts, markdown/binary ratio, largest files |
|
|
182
|
+
| `dotmd-parser dotmd-index <path>` | **API-free**: generate `<path>/dotmd-index.md` (single-file folder overview) |
|
|
183
|
+
| `dotmd-parser dotmd-index <path> --aggregate` | Roll up nested `dotmd-index.md` files into the parent (Sub-Indexes section) |
|
|
184
|
+
| `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
|
|
180
185
|
| `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
|
|
181
186
|
| `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
|
|
182
187
|
| `dotmd-parser check <path>` | Exit non-zero on cycles / missing refs (CI-friendly) |
|
|
@@ -189,16 +194,80 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
|
|
|
189
194
|
| `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
|
|
190
195
|
| `dotmd-parser analyze <path> --plan` | **API-free**: emit a host-agent prompt pack for Claude Code to execute |
|
|
191
196
|
| `dotmd-parser analyze <path> --apply-from <json>` | Apply a pre-computed analysis JSON |
|
|
197
|
+
| `dotmd-parser init [--skill dotmd-index]` | Install a bundled SKILL.md into `.claude/skills/<id>/` |
|
|
192
198
|
| `dotmd-parser show <path>` | Summary + full JSON graph (legacy default) |
|
|
193
199
|
|
|
194
200
|
```bash
|
|
195
201
|
# Typical Claude Code workflow
|
|
196
|
-
dotmd-parser inventory ./my-skill/
|
|
197
|
-
dotmd-parser index ./my-skill/
|
|
198
|
-
dotmd-parser
|
|
202
|
+
dotmd-parser inventory ./my-skill/ # start here if you've never seen the folder
|
|
203
|
+
dotmd-parser dotmd-index ./my-skill/ # write ./my-skill/dotmd-index.md (Claude reads ONLY this file)
|
|
204
|
+
dotmd-parser index ./my-skill/ # one-off; cached until files change
|
|
205
|
+
dotmd-parser digest ./my-skill/ # compact summary for the LLM
|
|
199
206
|
dotmd-parser affects ./my-skill/ shared/role.md
|
|
200
207
|
```
|
|
201
208
|
|
|
209
|
+
## `dotmd-index.md` — folder overview in a single file
|
|
210
|
+
|
|
211
|
+
`dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
|
|
212
|
+
self-contained Markdown file that combines `inventory()` and
|
|
213
|
+
`build_index()` into one artifact Claude can read **instead of**
|
|
214
|
+
grep-scanning every file in the folder.
|
|
215
|
+
|
|
216
|
+
The file contains:
|
|
217
|
+
|
|
218
|
+
- YAML frontmatter (`schema`, `content_hash`, `stats`, RAG `chunks[]`)
|
|
219
|
+
- `## Summary` (file counts, total size, health)
|
|
220
|
+
- `## Folder Map` (depth-limited ASCII tree)
|
|
221
|
+
- `## Files` (markdown: title + desc + deps; other: kind + size)
|
|
222
|
+
- `## Dependency Tree` (`@include` / `@delegate` / `@ref` graph as ASCII)
|
|
223
|
+
- `## Placeholders` (unresolved `{{...}}` variables)
|
|
224
|
+
- `<!-- chunk:id -->` HTML markers so any RAG tool can split deterministically
|
|
225
|
+
|
|
226
|
+
Re-running on an unchanged folder writes nothing (`content_hash` matches).
|
|
227
|
+
The command refuses to overwrite a hand-written `dotmd-index.md` unless
|
|
228
|
+
`--force` is passed.
|
|
229
|
+
|
|
230
|
+
### Aggregating across nested folders
|
|
231
|
+
|
|
232
|
+
Run with `--aggregate` to roll up descendant artifacts:
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
dotmd-parser dotmd-index ./project/ --aggregate
|
|
236
|
+
# project/dotmd-index.md now references project/docs/dotmd-index.md and
|
|
237
|
+
# project/src/dotmd-index.md without duplicating their file listings.
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Each child is discovered, its frontmatter is read, and a one-line
|
|
241
|
+
summary (file count, edges, health) appears under `## Sub-Indexes`.
|
|
242
|
+
The `aggregates[]` frontmatter array records each child's `content_hash`
|
|
243
|
+
so staleness is easy to detect. User-authored `dotmd-index.md` files
|
|
244
|
+
that lack `generated_by: dotmd-parser` are silently skipped.
|
|
245
|
+
|
|
246
|
+
This is a **reference**, not a merge — Claude reads the parent to learn
|
|
247
|
+
which subfolders exist, then drills into the relevant child file. The
|
|
248
|
+
parent stays token-efficient even with deeply nested trees.
|
|
249
|
+
|
|
250
|
+
### OpenRAG integration
|
|
251
|
+
|
|
252
|
+
[OpenRAG](https://github.com/langflow-ai/openrag) is a self-hosted RAG
|
|
253
|
+
platform built on Langflow + Docling + OpenSearch. dotmd-parser can ship
|
|
254
|
+
the artifact straight into it:
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
pip install dotmd-parser[openrag] # adds openrag-sdk
|
|
258
|
+
export OPENRAG_URL=http://localhost:3000
|
|
259
|
+
export OPENRAG_API_KEY=... # if your instance requires auth
|
|
260
|
+
|
|
261
|
+
dotmd-parser dotmd-index ./docs/ --push-openrag
|
|
262
|
+
# 1. Writes ./docs/dotmd-index.md
|
|
263
|
+
# 2. Calls OpenRAGClient.documents.ingest(file_path=...)
|
|
264
|
+
# 3. Records {document_id, base_url, pushed_at} in exports.openrag
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
`dotmd-index.md` is the **map** (one-shot overview); OpenRAG is the
|
|
268
|
+
**search index** (full-content semantic retrieval). Register OpenRAG's
|
|
269
|
+
MCP server with Claude Code to use both surfaces from the same client.
|
|
270
|
+
|
|
202
271
|
## `analyze` — AI-assisted dependency detection
|
|
203
272
|
|
|
204
273
|
Use when a folder of markdown has **no explicit directives yet**. `analyze`
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dotmd-parser"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "Dependency graph parser and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates,
|
|
3
|
+
version = "0.6.2"
|
|
4
|
+
description = "Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG"
|
|
5
5
|
requires-python = ">=3.10"
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
@@ -40,6 +40,16 @@ Homepage = "https://github.com/dotmd-projects/dotmd-parser"
|
|
|
40
40
|
Repository = "https://github.com/dotmd-projects/dotmd-parser"
|
|
41
41
|
Issues = "https://github.com/dotmd-projects/dotmd-parser/issues"
|
|
42
42
|
|
|
43
|
+
[project.optional-dependencies]
|
|
44
|
+
openrag = ["openrag-sdk>=0.3.1"]
|
|
45
|
+
pdf = ["pdfplumber>=0.10"]
|
|
46
|
+
docx = ["python-docx>=1.0"]
|
|
47
|
+
all = [
|
|
48
|
+
"openrag-sdk>=0.3.1",
|
|
49
|
+
"pdfplumber>=0.10",
|
|
50
|
+
"python-docx>=1.0",
|
|
51
|
+
]
|
|
52
|
+
|
|
43
53
|
[project.scripts]
|
|
44
54
|
dotmd-parser = "dotmd_parser.parser:main"
|
|
45
55
|
|
|
@@ -53,6 +63,7 @@ where = ["src"]
|
|
|
53
63
|
[tool.setuptools.package-data]
|
|
54
64
|
"dotmd_parser.templates" = ["*.md"]
|
|
55
65
|
"dotmd_parser.templates.prompts" = ["*.md"]
|
|
66
|
+
"dotmd_parser.templates.dotmd_index" = ["*.md"]
|
|
56
67
|
|
|
57
68
|
[tool.pytest.ini_options]
|
|
58
69
|
testpaths = ["tests"]
|
|
@@ -10,7 +10,7 @@ API:
|
|
|
10
10
|
from dotmd_parser import digest, tree, affects
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
__version__ = "0.
|
|
13
|
+
__version__ = "0.6.2"
|
|
14
14
|
|
|
15
15
|
from dotmd_parser.parser import (
|
|
16
16
|
build_graph,
|
|
@@ -63,6 +63,14 @@ from dotmd_parser.inventory import (
|
|
|
63
63
|
BINARY_EXTENSIONS,
|
|
64
64
|
MARKDOWN_EXTENSIONS,
|
|
65
65
|
)
|
|
66
|
+
from dotmd_parser.index_md import (
|
|
67
|
+
generate_index_md,
|
|
68
|
+
write_index_md,
|
|
69
|
+
extract_frontmatter,
|
|
70
|
+
DEFAULT_INDEX_FILENAME,
|
|
71
|
+
INDEX_MD_SCHEMA,
|
|
72
|
+
)
|
|
73
|
+
from dotmd_parser.openrag import push_to_openrag
|
|
66
74
|
|
|
67
75
|
__all__ = [
|
|
68
76
|
"__version__",
|
|
@@ -112,4 +120,12 @@ __all__ = [
|
|
|
112
120
|
"TEXT_EXTENSIONS",
|
|
113
121
|
"BINARY_EXTENSIONS",
|
|
114
122
|
"MARKDOWN_EXTENSIONS",
|
|
123
|
+
# index_md
|
|
124
|
+
"generate_index_md",
|
|
125
|
+
"write_index_md",
|
|
126
|
+
"extract_frontmatter",
|
|
127
|
+
"DEFAULT_INDEX_FILENAME",
|
|
128
|
+
"INDEX_MD_SCHEMA",
|
|
129
|
+
# openrag
|
|
130
|
+
"push_to_openrag",
|
|
115
131
|
]
|
|
@@ -53,6 +53,11 @@ from dotmd_parser.inventory import (
|
|
|
53
53
|
format_inventory as _format_inventory,
|
|
54
54
|
suggest_next_command as _suggest_next_command,
|
|
55
55
|
)
|
|
56
|
+
from dotmd_parser.index_md import (
|
|
57
|
+
DEFAULT_INDEX_FILENAME,
|
|
58
|
+
generate_index_md as _generate_index_md,
|
|
59
|
+
write_index_md as _write_index_md,
|
|
60
|
+
)
|
|
56
61
|
|
|
57
62
|
|
|
58
63
|
def _maybe_warn_empty(path: str) -> None:
|
|
@@ -88,20 +93,35 @@ def _load_or_build_index(path: str, use_cache: bool = True) -> dict:
|
|
|
88
93
|
SKILL_DIR_NAME = "dotmd-parser"
|
|
89
94
|
SKILL_TEMPLATE = "SKILL.md"
|
|
90
95
|
|
|
96
|
+
# Map skill id (user-facing folder name) → package resource path
|
|
97
|
+
_SKILLS = {
|
|
98
|
+
"dotmd-parser": ("dotmd_parser.templates", SKILL_TEMPLATE),
|
|
99
|
+
"dotmd-index": ("dotmd_parser.templates.dotmd_index", SKILL_TEMPLATE),
|
|
100
|
+
}
|
|
101
|
+
|
|
91
102
|
|
|
92
|
-
def _read_bundled_skill() -> str:
|
|
93
|
-
"""Load
|
|
94
|
-
|
|
103
|
+
def _read_bundled_skill(skill_id: str = "dotmd-parser") -> str:
|
|
104
|
+
"""Load a packaged SKILL.md via importlib.resources."""
|
|
105
|
+
pkg, name = _SKILLS[skill_id]
|
|
106
|
+
return resources.files(pkg).joinpath(name).read_text(encoding="utf-8")
|
|
95
107
|
|
|
96
108
|
|
|
97
109
|
def cmd_init(args: argparse.Namespace) -> int:
|
|
98
|
-
"""Install
|
|
110
|
+
"""Install a bundled SKILL.md into `<path>/.claude/skills/<skill-id>/SKILL.md`."""
|
|
99
111
|
project = Path(args.path).resolve()
|
|
100
112
|
if not project.exists():
|
|
101
113
|
print(f"error: path does not exist: {project}", file=sys.stderr)
|
|
102
114
|
return 2
|
|
103
115
|
|
|
104
|
-
|
|
116
|
+
skill_id = args.skill or "dotmd-parser"
|
|
117
|
+
if skill_id not in _SKILLS:
|
|
118
|
+
print(
|
|
119
|
+
f"error: unknown skill {skill_id!r}; choose from {sorted(_SKILLS)}",
|
|
120
|
+
file=sys.stderr,
|
|
121
|
+
)
|
|
122
|
+
return 2
|
|
123
|
+
|
|
124
|
+
target_dir = project / ".claude" / "skills" / skill_id
|
|
105
125
|
target = target_dir / SKILL_TEMPLATE
|
|
106
126
|
|
|
107
127
|
if target.exists() and not args.force:
|
|
@@ -112,9 +132,12 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
112
132
|
return 1
|
|
113
133
|
|
|
114
134
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
115
|
-
target.write_text(_read_bundled_skill(), encoding="utf-8")
|
|
135
|
+
target.write_text(_read_bundled_skill(skill_id), encoding="utf-8")
|
|
116
136
|
print(f"Installed skill: {target}")
|
|
117
|
-
|
|
137
|
+
if skill_id == "dotmd-parser":
|
|
138
|
+
print("Next: run `dotmd-parser index .` from the project root.")
|
|
139
|
+
elif skill_id == "dotmd-index":
|
|
140
|
+
print("Next: run `dotmd-parser dotmd-index .` to generate the artifact.")
|
|
118
141
|
return 0
|
|
119
142
|
|
|
120
143
|
|
|
@@ -307,6 +330,73 @@ def cmd_inventory(args: argparse.Namespace) -> int:
|
|
|
307
330
|
return 0
|
|
308
331
|
|
|
309
332
|
|
|
333
|
+
def cmd_dotmd_index(args: argparse.Namespace) -> int:
|
|
334
|
+
"""Generate `<root>/dotmd-index.md` (or print to stdout). Optionally push to OpenRAG."""
|
|
335
|
+
gen_kwargs = {
|
|
336
|
+
"include_folder_map": not args.no_folder_map,
|
|
337
|
+
"include_deps_tree": not args.no_deps,
|
|
338
|
+
"max_files": args.max_files,
|
|
339
|
+
"aggregate": args.aggregate,
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if args.stdout:
|
|
343
|
+
if args.push_openrag:
|
|
344
|
+
print("error: --stdout and --push-openrag are mutually exclusive", file=sys.stderr)
|
|
345
|
+
return 2
|
|
346
|
+
try:
|
|
347
|
+
md = _generate_index_md(args.path, **gen_kwargs)
|
|
348
|
+
except ValueError as e:
|
|
349
|
+
print(f"error: {e}", file=sys.stderr)
|
|
350
|
+
return 2
|
|
351
|
+
print(md, end="" if md.endswith("\n") else "\n")
|
|
352
|
+
return 0
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
path, written = _write_index_md(args.path, force=args.force, **gen_kwargs)
|
|
356
|
+
except ValueError as e:
|
|
357
|
+
msg = str(e)
|
|
358
|
+
print(f"error: {msg}", file=sys.stderr)
|
|
359
|
+
if "does not exist" in msg or "is not a directory" in msg:
|
|
360
|
+
return 2
|
|
361
|
+
return 1
|
|
362
|
+
if written:
|
|
363
|
+
print(f"Wrote {path}")
|
|
364
|
+
else:
|
|
365
|
+
print(f"{path} unchanged (content_hash matches).")
|
|
366
|
+
|
|
367
|
+
if args.push_openrag:
|
|
368
|
+
from dotmd_parser.openrag import push_to_openrag as _push_to_openrag
|
|
369
|
+
try:
|
|
370
|
+
export = _push_to_openrag(
|
|
371
|
+
str(path),
|
|
372
|
+
base_url=args.openrag_url,
|
|
373
|
+
api_key=args.openrag_api_key,
|
|
374
|
+
)
|
|
375
|
+
except ImportError as e:
|
|
376
|
+
print(f"error: {e}", file=sys.stderr)
|
|
377
|
+
return 2
|
|
378
|
+
except (ValueError, RuntimeError) as e:
|
|
379
|
+
print(f"error: openrag push failed: {e}", file=sys.stderr)
|
|
380
|
+
return 1
|
|
381
|
+
|
|
382
|
+
# Re-emit the file with exports.openrag recorded — bypass idempotency
|
|
383
|
+
# check because we have explicit new metadata to persist.
|
|
384
|
+
md_with_export = _generate_index_md(
|
|
385
|
+
args.path,
|
|
386
|
+
extra_frontmatter={"exports": {"openrag": export}},
|
|
387
|
+
**gen_kwargs,
|
|
388
|
+
)
|
|
389
|
+
path.write_text(md_with_export, encoding="utf-8")
|
|
390
|
+
tid = export.get("task_id") or "<n/a>"
|
|
391
|
+
succ = export.get("successful_files", 0)
|
|
392
|
+
fail = export.get("failed_files", 0)
|
|
393
|
+
print(
|
|
394
|
+
f"Pushed to OpenRAG: {export.get('base_url')} "
|
|
395
|
+
f"(task_id={tid}, successful={succ}, failed={fail})"
|
|
396
|
+
)
|
|
397
|
+
return 0
|
|
398
|
+
|
|
399
|
+
|
|
310
400
|
def cmd_show(args: argparse.Namespace) -> int:
|
|
311
401
|
graph = build_graph(args.path)
|
|
312
402
|
print(summary(graph))
|
|
@@ -325,8 +415,14 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
325
415
|
|
|
326
416
|
sub = parser.add_subparsers(dest="command")
|
|
327
417
|
|
|
328
|
-
p_init = sub.add_parser("init", help="Install bundled SKILL.md into .claude/skills
|
|
418
|
+
p_init = sub.add_parser("init", help="Install a bundled SKILL.md into .claude/skills/<id>/")
|
|
329
419
|
p_init.add_argument("path", nargs="?", default=".", help="Project root (default: current directory)")
|
|
420
|
+
p_init.add_argument(
|
|
421
|
+
"--skill",
|
|
422
|
+
choices=sorted(_SKILLS),
|
|
423
|
+
default="dotmd-parser",
|
|
424
|
+
help="Which bundled skill to install (default: dotmd-parser)",
|
|
425
|
+
)
|
|
330
426
|
p_init.add_argument("--force", action="store_true", help="Overwrite an existing SKILL.md")
|
|
331
427
|
p_init.set_defaults(func=cmd_init)
|
|
332
428
|
|
|
@@ -404,6 +500,60 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
404
500
|
p_inv.add_argument("--json", action="store_true", help="Emit JSON instead of formatted text")
|
|
405
501
|
p_inv.set_defaults(func=cmd_inventory)
|
|
406
502
|
|
|
503
|
+
p_idxmd = sub.add_parser(
|
|
504
|
+
"dotmd-index",
|
|
505
|
+
help=f"Generate {DEFAULT_INDEX_FILENAME} at <path>/ (single-file folder overview)",
|
|
506
|
+
)
|
|
507
|
+
p_idxmd.add_argument("path", help="Directory to summarize")
|
|
508
|
+
p_idxmd.add_argument(
|
|
509
|
+
"--stdout",
|
|
510
|
+
action="store_true",
|
|
511
|
+
help="Print to stdout instead of writing to <path>/dotmd-index.md",
|
|
512
|
+
)
|
|
513
|
+
p_idxmd.add_argument(
|
|
514
|
+
"--force",
|
|
515
|
+
action="store_true",
|
|
516
|
+
help="Overwrite an existing file even if it isn't a dotmd-parser artifact",
|
|
517
|
+
)
|
|
518
|
+
p_idxmd.add_argument(
|
|
519
|
+
"--no-folder-map",
|
|
520
|
+
action="store_true",
|
|
521
|
+
help="Skip the ASCII folder-map section",
|
|
522
|
+
)
|
|
523
|
+
p_idxmd.add_argument(
|
|
524
|
+
"--no-deps",
|
|
525
|
+
action="store_true",
|
|
526
|
+
help="Skip the dependency-tree section",
|
|
527
|
+
)
|
|
528
|
+
p_idxmd.add_argument(
|
|
529
|
+
"--max-files",
|
|
530
|
+
type=int,
|
|
531
|
+
default=200,
|
|
532
|
+
help="Cap on the number of files listed in the body (default: 200)",
|
|
533
|
+
)
|
|
534
|
+
p_idxmd.add_argument(
|
|
535
|
+
"--aggregate",
|
|
536
|
+
action="store_true",
|
|
537
|
+
help="Discover descendant dotmd-index.md artifacts and reference them "
|
|
538
|
+
"(adds a ## Sub-Indexes section + aggregates[] frontmatter)",
|
|
539
|
+
)
|
|
540
|
+
p_idxmd.add_argument(
|
|
541
|
+
"--push-openrag",
|
|
542
|
+
action="store_true",
|
|
543
|
+
help="After writing, ingest the file into OpenRAG (requires `pip install dotmd-parser[openrag]`)",
|
|
544
|
+
)
|
|
545
|
+
p_idxmd.add_argument(
|
|
546
|
+
"--openrag-url",
|
|
547
|
+
metavar="URL",
|
|
548
|
+
help="OpenRAG endpoint (default: $OPENRAG_URL or http://localhost:3000)",
|
|
549
|
+
)
|
|
550
|
+
p_idxmd.add_argument(
|
|
551
|
+
"--openrag-api-key",
|
|
552
|
+
metavar="KEY",
|
|
553
|
+
help="OpenRAG API key (default: $OPENRAG_API_KEY, handled by the SDK)",
|
|
554
|
+
)
|
|
555
|
+
p_idxmd.set_defaults(func=cmd_dotmd_index)
|
|
556
|
+
|
|
407
557
|
p_show = sub.add_parser("show", help="Legacy summary + full JSON graph")
|
|
408
558
|
p_show.add_argument("path", help="Directory or SKILL.md")
|
|
409
559
|
p_show.add_argument("--quiet", action="store_true", help="Suppress JSON dump")
|
|
@@ -417,7 +567,7 @@ def run(argv: list[str] | None = None) -> int:
|
|
|
417
567
|
args_list = list(sys.argv[1:] if argv is None else argv)
|
|
418
568
|
|
|
419
569
|
# Backwards compatibility: `dotmd-parser <path>` with no subcommand → show
|
|
420
|
-
known_cmds = {"init", "index", "check", "affects", "deps", "digest", "tree", "resolve", "analyze", "inventory", "show"}
|
|
570
|
+
known_cmds = {"init", "index", "check", "affects", "deps", "digest", "tree", "resolve", "analyze", "inventory", "dotmd-index", "show"}
|
|
421
571
|
if args_list and args_list[0] not in known_cmds and not args_list[0].startswith("-"):
|
|
422
572
|
args_list = ["show", *args_list]
|
|
423
573
|
if not args_list:
|