dotmd-parser 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/PKG-INFO +111 -5
  2. dotmd_parser-0.5.0/src/dotmd_parser.egg-info/PKG-INFO → dotmd_parser-0.7.0/README.md +99 -30
  3. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/pyproject.toml +13 -2
  4. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/__init__.py +17 -1
  5. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/cli.py +159 -9
  6. dotmd_parser-0.7.0/src/dotmd_parser/index_md.py +716 -0
  7. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/inventory.py +5 -2
  8. dotmd_parser-0.7.0/src/dotmd_parser/openrag.py +150 -0
  9. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/parser.py +11 -3
  10. dotmd_parser-0.7.0/src/dotmd_parser/templates/dotmd_index/SKILL.md +254 -0
  11. dotmd_parser-0.7.0/src/dotmd_parser/templates/dotmd_index/__init__.py +0 -0
  12. dotmd_parser-0.5.0/README.md → dotmd_parser-0.7.0/src/dotmd_parser.egg-info/PKG-INFO +136 -3
  13. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser.egg-info/SOURCES.txt +12 -1
  14. dotmd_parser-0.7.0/src/dotmd_parser.egg-info/requires.txt +14 -0
  15. dotmd_parser-0.7.0/tests/test_aggregate.py +149 -0
  16. dotmd_parser-0.7.0/tests/test_cli_dotmd_index.py +133 -0
  17. dotmd_parser-0.7.0/tests/test_index_md.py +233 -0
  18. dotmd_parser-0.7.0/tests/test_openrag_push.py +199 -0
  19. dotmd_parser-0.7.0/tests/test_orchestrator_detection.py +72 -0
  20. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_skill_integration.py +1 -1
  21. dotmd_parser-0.7.0/tests/test_token_savings.py +263 -0
  22. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/LICENSE +0 -0
  23. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/setup.cfg +0 -0
  24. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/analyze.py +0 -0
  25. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/digest.py +0 -0
  26. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/index.py +0 -0
  27. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/templates/SKILL.md +0 -0
  28. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/templates/__init__.py +0 -0
  29. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/templates/prompts/__init__.py +0 -0
  30. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser/templates/prompts/analyze-dependencies.md +0 -0
  31. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser.egg-info/dependency_links.txt +0 -0
  32. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser.egg-info/entry_points.txt +0 -0
  33. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/src/dotmd_parser.egg-info/top_level.txt +0 -0
  34. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_analyze.py +0 -0
  35. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_cost_estimate.py +0 -0
  36. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_empty_warnings.py +0 -0
  37. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_host_agent_plan.py +0 -0
  38. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_index_scope.py +0 -0
  39. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_inventory.py +0 -0
  40. {dotmd_parser-0.5.0 → dotmd_parser-0.7.0}/tests/test_parser.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dotmd-parser
3
- Version: 0.5.0
4
- Summary: Dependency graph parser and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, and detect implicit dependencies via Claude for AI agent prompt engineering
3
+ Version: 0.7.0
4
+ Summary: Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG
5
5
  Author: dotmd-projects
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/dotmd-projects/dotmd-parser
@@ -23,6 +23,16 @@ Classifier: Typing :: Typed
23
23
  Requires-Python: >=3.10
24
24
  Description-Content-Type: text/markdown
25
25
  License-File: LICENSE
26
+ Provides-Extra: openrag
27
+ Requires-Dist: openrag-sdk>=0.3.1; extra == "openrag"
28
+ Provides-Extra: pdf
29
+ Requires-Dist: pdfplumber>=0.10; extra == "pdf"
30
+ Provides-Extra: docx
31
+ Requires-Dist: python-docx>=1.0; extra == "docx"
32
+ Provides-Extra: all
33
+ Requires-Dist: openrag-sdk>=0.3.1; extra == "all"
34
+ Requires-Dist: pdfplumber>=0.10; extra == "all"
35
+ Requires-Dist: python-docx>=1.0; extra == "all"
26
36
  Dynamic: license-file
27
37
 
28
38
  # dotmd-parser
@@ -47,6 +57,35 @@ As AI agent projects grow, `.md` files start referencing each other via `@includ
47
57
 
48
58
  **dotmd-parser** solves this by parsing your `.md` files into a dependency graph — automatically detecting directives, runtime references, and template placeholders. One function call gives you the full picture.
49
59
 
60
+ ## Token savings — measured
61
+
62
+ The single biggest reason to reach for `dotmd-parser` in an agent loop is
63
+ that it lets Claude understand a folder **without reading every file**.
64
+ The numbers below are produced by `tests/test_token_savings.py` (run
65
+ with `DOTMD_TOKEN_REPORT=1 pytest -s`) using `tiktoken`'s `cl100k_base`
66
+ encoding (a close proxy for Claude's tokenizer family):
67
+
68
+ | Folder profile | Files | Naive read of every `.md` | `dotmd-index.md` | `digest` |
69
+ |---|---:|---:|---:|---:|
70
+ | Small skill (each file ~2 KB) | 4 | 1,610 tokens | **605 (0.38×)** | 174 (0.11×) |
71
+ | Medium docs (each file ~2 KB) | 31 | 15,855 tokens | **2,837 (0.18× → 5.6× cheaper)** | 1,285 (0.08×) |
72
+ | Large docs (each file ~2 KB) | 111 | 58,171 tokens | **9,535 (0.16× → 6.3× cheaper)** | 4,606 (0.08×) |
73
+
74
+ **Takeaway**: at ~30 files dotmd-parser already cuts Claude's reading
75
+ cost by **~5.6×**, and the savings *grow* with folder size. At 100+
76
+ files the same context window now fits **6× more conversation**, or
77
+ serves the same prompt at **1/6 the input-token spend**.
78
+
79
+ The persistent `dotmd-index.md` artifact pays a fixed frontmatter
80
+ overhead, so for *very small* folders (a handful of files of a few
81
+ hundred bytes) the naive read can still win. The `digest` output is
82
+ even more compact (~12× cheaper at scale) but isn't persisted on disk —
83
+ use `dotmd-index.md` for stable navigation, `digest` for one-shot
84
+ summaries.
85
+
86
+ The break-even is around **4 files × 1 KB each**: above that,
87
+ `dotmd-index.md` is always cheaper than reading the folder by hand.
88
+
50
89
  ## Comparison
51
90
 
52
91
  | Capability | Manual / grep | dotmd-parser |
@@ -177,6 +216,9 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
177
216
  | Command | Purpose |
178
217
  |---|---|
179
218
  | `dotmd-parser inventory <path>` | **API-free**: extension counts, markdown/binary ratio, largest files |
219
+ | `dotmd-parser dotmd-index <path>` | **API-free**: generate `<path>/dotmd-index.md` (single-file folder overview) |
220
+ | `dotmd-parser dotmd-index <path> --aggregate` | Roll up nested `dotmd-index.md` files into the parent (Sub-Indexes section) |
221
+ | `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
180
222
  | `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
181
223
  | `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
182
224
  | `dotmd-parser check <path>` | Exit non-zero on cycles / missing refs (CI-friendly) |
@@ -189,16 +231,80 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
189
231
  | `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
190
232
  | `dotmd-parser analyze <path> --plan` | **API-free**: emit a host-agent prompt pack for Claude Code to execute |
191
233
  | `dotmd-parser analyze <path> --apply-from <json>` | Apply a pre-computed analysis JSON |
234
+ | `dotmd-parser init [--skill dotmd-index]` | Install a bundled SKILL.md into `.claude/skills/<id>/` |
192
235
  | `dotmd-parser show <path>` | Summary + full JSON graph (legacy default) |
193
236
 
194
237
  ```bash
195
238
  # Typical Claude Code workflow
196
- dotmd-parser inventory ./my-skill/ # start here if you've never seen the folder
197
- dotmd-parser index ./my-skill/ # one-off; cached until files change
198
- dotmd-parser digest ./my-skill/ # compact summary for the LLM
239
+ dotmd-parser inventory ./my-skill/ # start here if you've never seen the folder
240
+ dotmd-parser dotmd-index ./my-skill/ # write ./my-skill/dotmd-index.md (Claude reads ONLY this file)
241
+ dotmd-parser index ./my-skill/ # one-off; cached until files change
242
+ dotmd-parser digest ./my-skill/ # compact summary for the LLM
199
243
  dotmd-parser affects ./my-skill/ shared/role.md
200
244
  ```
201
245
 
246
+ ## `dotmd-index.md` — folder overview in a single file
247
+
248
+ `dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
249
+ self-contained Markdown file that combines `inventory()` and
250
+ `build_index()` into one artifact Claude can read **instead of**
251
+ grep-scanning every file in the folder.
252
+
253
+ The file contains:
254
+
255
+ - YAML frontmatter (`schema`, `content_hash`, `stats`, RAG `chunks[]`)
256
+ - `## Summary` (file counts, total size, health)
257
+ - `## Folder Map` (depth-limited ASCII tree)
258
+ - `## Files` (markdown: title + desc + deps; other: kind + size)
259
+ - `## Dependency Tree` (`@include` / `@delegate` / `@ref` graph as ASCII)
260
+ - `## Placeholders` (unresolved `{{...}}` variables)
261
+ - `<!-- chunk:id -->` HTML markers so any RAG tool can split deterministically
262
+
263
+ Re-running on an unchanged folder writes nothing (`content_hash` matches).
264
+ The command refuses to overwrite a hand-written `dotmd-index.md` unless
265
+ `--force` is passed.
266
+
267
+ ### Aggregating across nested folders
268
+
269
+ Run with `--aggregate` to roll up descendant artifacts:
270
+
271
+ ```bash
272
+ dotmd-parser dotmd-index ./project/ --aggregate
273
+ # project/dotmd-index.md now references project/docs/dotmd-index.md and
274
+ # project/src/dotmd-index.md without duplicating their file listings.
275
+ ```
276
+
277
+ Each child is discovered, its frontmatter is read, and a one-line
278
+ summary (file count, edges, health) appears under `## Sub-Indexes`.
279
+ The `aggregates[]` frontmatter array records each child's `content_hash`
280
+ so staleness is easy to detect. User-authored `dotmd-index.md` files
281
+ that lack `generated_by: dotmd-parser` are silently skipped.
282
+
283
+ This is a **reference**, not a merge — Claude reads the parent to learn
284
+ which subfolders exist, then drills into the relevant child file. The
285
+ parent stays token-efficient even with deeply nested trees.
286
+
287
+ ### OpenRAG integration
288
+
289
+ [OpenRAG](https://github.com/langflow-ai/openrag) is a self-hosted RAG
290
+ platform built on Langflow + Docling + OpenSearch. dotmd-parser can ship
291
+ the artifact straight into it:
292
+
293
+ ```bash
294
+ pip install dotmd-parser[openrag] # adds openrag-sdk
295
+ export OPENRAG_URL=http://localhost:3000
296
+ export OPENRAG_API_KEY=... # if your instance requires auth
297
+
298
+ dotmd-parser dotmd-index ./docs/ --push-openrag
299
+ # 1. Writes ./docs/dotmd-index.md
300
+ # 2. Calls OpenRAGClient.documents.ingest(file_path=...)
301
+ # 3. Records {document_id, base_url, pushed_at} in exports.openrag
302
+ ```
303
+
304
+ `dotmd-index.md` is the **map** (one-shot overview); OpenRAG is the
305
+ **search index** (full-content semantic retrieval). Register OpenRAG's
306
+ MCP server with Claude Code to use both surfaces from the same client.
307
+
202
308
  ## `analyze` — AI-assisted dependency detection
203
309
 
204
310
  Use when a folder of markdown has **no explicit directives yet**. `analyze`
@@ -1,30 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: dotmd-parser
3
- Version: 0.5.0
4
- Summary: Dependency graph parser and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, and detect implicit dependencies via Claude for AI agent prompt engineering
5
- Author: dotmd-projects
6
- License-Expression: MIT
7
- Project-URL: Homepage, https://github.com/dotmd-projects/dotmd-parser
8
- Project-URL: Repository, https://github.com/dotmd-projects/dotmd-parser
9
- Project-URL: Issues, https://github.com/dotmd-projects/dotmd-parser/issues
10
- Keywords: claude-code,ai-agent,skill-management,prompt-engineering,dependency-graph,SKILL.md,markdown,parser,dotmd,llm
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: Operating System :: OS Independent
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Programming Language :: Python :: 3.13
19
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Classifier: Topic :: Software Development :: Code Generators
21
- Classifier: Topic :: Text Processing :: Markup :: Markdown
22
- Classifier: Typing :: Typed
23
- Requires-Python: >=3.10
24
- Description-Content-Type: text/markdown
25
- License-File: LICENSE
26
- Dynamic: license-file
27
-
28
1
  # dotmd-parser
29
2
 
30
3
  [![PyPI version](https://img.shields.io/pypi/v/dotmd-parser)](https://pypi.org/project/dotmd-parser/)
@@ -47,6 +20,35 @@ As AI agent projects grow, `.md` files start referencing each other via `@includ
47
20
 
48
21
  **dotmd-parser** solves this by parsing your `.md` files into a dependency graph — automatically detecting directives, runtime references, and template placeholders. One function call gives you the full picture.
49
22
 
23
+ ## Token savings — measured
24
+
25
+ The single biggest reason to reach for `dotmd-parser` in an agent loop is
26
+ that it lets Claude understand a folder **without reading every file**.
27
+ The numbers below are produced by `tests/test_token_savings.py` (run
28
+ with `DOTMD_TOKEN_REPORT=1 pytest -s`) using `tiktoken`'s `cl100k_base`
29
+ encoding (a close proxy for Claude's tokenizer family):
30
+
31
+ | Folder profile | Files | Naive read of every `.md` | `dotmd-index.md` | `digest` |
32
+ |---|---:|---:|---:|---:|
33
+ | Small skill (each file ~2 KB) | 4 | 1,610 tokens | **605 (0.38×)** | 174 (0.11×) |
34
+ | Medium docs (each file ~2 KB) | 31 | 15,855 tokens | **2,837 (0.18× → 5.6× cheaper)** | 1,285 (0.08×) |
35
+ | Large docs (each file ~2 KB) | 111 | 58,171 tokens | **9,535 (0.16× → 6.3× cheaper)** | 4,606 (0.08×) |
36
+
37
+ **Takeaway**: at ~30 files dotmd-parser already cuts Claude's reading
38
+ cost by **~5.6×**, and the savings *grow* with folder size. At 100+
39
+ files the same context window now fits **6× more conversation**, or
40
+ serves the same prompt at **1/6 the input-token spend**.
41
+
42
+ The persistent `dotmd-index.md` artifact pays a fixed frontmatter
43
+ overhead, so for *very small* folders (a handful of files of a few
44
+ hundred bytes) the naive read can still win. The `digest` output is
45
+ even more compact (~12× cheaper at scale) but isn't persisted on disk —
46
+ use `dotmd-index.md` for stable navigation, `digest` for one-shot
47
+ summaries.
48
+
49
+ The break-even is around **4 files × 1 KB each**: above that,
50
+ `dotmd-index.md` is always cheaper than reading the folder by hand.
51
+
50
52
  ## Comparison
51
53
 
52
54
  | Capability | Manual / grep | dotmd-parser |
@@ -177,6 +179,9 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
177
179
  | Command | Purpose |
178
180
  |---|---|
179
181
  | `dotmd-parser inventory <path>` | **API-free**: extension counts, markdown/binary ratio, largest files |
182
+ | `dotmd-parser dotmd-index <path>` | **API-free**: generate `<path>/dotmd-index.md` (single-file folder overview) |
183
+ | `dotmd-parser dotmd-index <path> --aggregate` | Roll up nested `dotmd-index.md` files into the parent (Sub-Indexes section) |
184
+ | `dotmd-parser dotmd-index <path> --push-openrag` | After writing, ingest into OpenRAG (`pip install dotmd-parser[openrag]`) |
180
185
  | `dotmd-parser index <path>` | Build and save `.claude/dotmd-index.json` |
181
186
  | `dotmd-parser index <path> --scope <subdir>` | Incrementally re-index one subfolder, merge into the existing index |
182
187
  | `dotmd-parser check <path>` | Exit non-zero on cycles / missing refs (CI-friendly) |
@@ -189,16 +194,80 @@ from dotmd_parser import parse_directives, parse_read_refs, parse_placeholders,
189
194
  | `dotmd-parser analyze <path> --dry-run` | **API-free**: estimate tokens and USD cost |
190
195
  | `dotmd-parser analyze <path> --plan` | **API-free**: emit a host-agent prompt pack for Claude Code to execute |
191
196
  | `dotmd-parser analyze <path> --apply-from <json>` | Apply a pre-computed analysis JSON |
197
+ | `dotmd-parser init [--skill dotmd-index]` | Install a bundled SKILL.md into `.claude/skills/<id>/` |
192
198
  | `dotmd-parser show <path>` | Summary + full JSON graph (legacy default) |
193
199
 
194
200
  ```bash
195
201
  # Typical Claude Code workflow
196
- dotmd-parser inventory ./my-skill/ # start here if you've never seen the folder
197
- dotmd-parser index ./my-skill/ # one-off; cached until files change
198
- dotmd-parser digest ./my-skill/ # compact summary for the LLM
202
+ dotmd-parser inventory ./my-skill/ # start here if you've never seen the folder
203
+ dotmd-parser dotmd-index ./my-skill/ # write ./my-skill/dotmd-index.md (Claude reads ONLY this file)
204
+ dotmd-parser index ./my-skill/ # one-off; cached until files change
205
+ dotmd-parser digest ./my-skill/ # compact summary for the LLM
199
206
  dotmd-parser affects ./my-skill/ shared/role.md
200
207
  ```
201
208
 
209
+ ## `dotmd-index.md` — folder overview in a single file
210
+
211
+ `dotmd-parser dotmd-index <path>` writes `<path>/dotmd-index.md`, a
212
+ self-contained Markdown file that combines `inventory()` and
213
+ `build_index()` into one artifact Claude can read **instead of**
214
+ grep-scanning every file in the folder.
215
+
216
+ The file contains:
217
+
218
+ - YAML frontmatter (`schema`, `content_hash`, `stats`, RAG `chunks[]`)
219
+ - `## Summary` (file counts, total size, health)
220
+ - `## Folder Map` (depth-limited ASCII tree)
221
+ - `## Files` (markdown: title + desc + deps; other: kind + size)
222
+ - `## Dependency Tree` (`@include` / `@delegate` / `@ref` graph as ASCII)
223
+ - `## Placeholders` (unresolved `{{...}}` variables)
224
+ - `<!-- chunk:id -->` HTML markers so any RAG tool can split deterministically
225
+
226
+ Re-running on an unchanged folder writes nothing (`content_hash` matches).
227
+ The command refuses to overwrite a hand-written `dotmd-index.md` unless
228
+ `--force` is passed.
229
+
230
+ ### Aggregating across nested folders
231
+
232
+ Run with `--aggregate` to roll up descendant artifacts:
233
+
234
+ ```bash
235
+ dotmd-parser dotmd-index ./project/ --aggregate
236
+ # project/dotmd-index.md now references project/docs/dotmd-index.md and
237
+ # project/src/dotmd-index.md without duplicating their file listings.
238
+ ```
239
+
240
+ Each child is discovered, its frontmatter is read, and a one-line
241
+ summary (file count, edges, health) appears under `## Sub-Indexes`.
242
+ The `aggregates[]` frontmatter array records each child's `content_hash`
243
+ so staleness is easy to detect. User-authored `dotmd-index.md` files
244
+ that lack `generated_by: dotmd-parser` are silently skipped.
245
+
246
+ This is a **reference**, not a merge — Claude reads the parent to learn
247
+ which subfolders exist, then drills into the relevant child file. The
248
+ parent stays token-efficient even with deeply nested trees.
249
+
250
+ ### OpenRAG integration
251
+
252
+ [OpenRAG](https://github.com/langflow-ai/openrag) is a self-hosted RAG
253
+ platform built on Langflow + Docling + OpenSearch. dotmd-parser can ship
254
+ the artifact straight into it:
255
+
256
+ ```bash
257
+ pip install dotmd-parser[openrag] # adds openrag-sdk
258
+ export OPENRAG_URL=http://localhost:3000
259
+ export OPENRAG_API_KEY=... # if your instance requires auth
260
+
261
+ dotmd-parser dotmd-index ./docs/ --push-openrag
262
+ # 1. Writes ./docs/dotmd-index.md
263
+ # 2. Calls OpenRAGClient.documents.ingest(file_path=...)
264
+ # 3. Records {document_id, base_url, pushed_at} in exports.openrag
265
+ ```
266
+
267
+ `dotmd-index.md` is the **map** (one-shot overview); OpenRAG is the
268
+ **search index** (full-content semantic retrieval). Register OpenRAG's
269
+ MCP server with Claude Code to use both surfaces from the same client.
270
+
202
271
  ## `analyze` — AI-assisted dependency detection
203
272
 
204
273
  Use when a folder of markdown has **no explicit directives yet**. `analyze`
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "dotmd-parser"
3
- version = "0.5.0"
4
- description = "Dependency graph parser and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, and detect implicit dependencies via Claude for AI agent prompt engineering"
3
+ version = "0.7.0"
4
+ description = "Dependency graph parser, single-file folder index (dotmd-index.md), and AI analyzer for .md skill files — parse @include/@delegate/@ref directives, build graphs, resolve templates, generate RAG-friendly overviews, and ingest into OpenRAG"
5
5
  requires-python = ">=3.10"
6
6
  license = "MIT"
7
7
  readme = "README.md"
@@ -40,6 +40,16 @@ Homepage = "https://github.com/dotmd-projects/dotmd-parser"
40
40
  Repository = "https://github.com/dotmd-projects/dotmd-parser"
41
41
  Issues = "https://github.com/dotmd-projects/dotmd-parser/issues"
42
42
 
43
+ [project.optional-dependencies]
44
+ openrag = ["openrag-sdk>=0.3.1"]
45
+ pdf = ["pdfplumber>=0.10"]
46
+ docx = ["python-docx>=1.0"]
47
+ all = [
48
+ "openrag-sdk>=0.3.1",
49
+ "pdfplumber>=0.10",
50
+ "python-docx>=1.0",
51
+ ]
52
+
43
53
  [project.scripts]
44
54
  dotmd-parser = "dotmd_parser.parser:main"
45
55
 
@@ -53,6 +63,7 @@ where = ["src"]
53
63
  [tool.setuptools.package-data]
54
64
  "dotmd_parser.templates" = ["*.md"]
55
65
  "dotmd_parser.templates.prompts" = ["*.md"]
66
+ "dotmd_parser.templates.dotmd_index" = ["*.md"]
56
67
 
57
68
  [tool.pytest.ini_options]
58
69
  testpaths = ["tests"]
@@ -10,7 +10,7 @@ API:
10
10
  from dotmd_parser import digest, tree, affects
11
11
  """
12
12
 
13
- __version__ = "0.5.0"
13
+ __version__ = "0.7.0"
14
14
 
15
15
  from dotmd_parser.parser import (
16
16
  build_graph,
@@ -63,6 +63,14 @@ from dotmd_parser.inventory import (
63
63
  BINARY_EXTENSIONS,
64
64
  MARKDOWN_EXTENSIONS,
65
65
  )
66
+ from dotmd_parser.index_md import (
67
+ generate_index_md,
68
+ write_index_md,
69
+ extract_frontmatter,
70
+ DEFAULT_INDEX_FILENAME,
71
+ INDEX_MD_SCHEMA,
72
+ )
73
+ from dotmd_parser.openrag import push_to_openrag
66
74
 
67
75
  __all__ = [
68
76
  "__version__",
@@ -112,4 +120,12 @@ __all__ = [
112
120
  "TEXT_EXTENSIONS",
113
121
  "BINARY_EXTENSIONS",
114
122
  "MARKDOWN_EXTENSIONS",
123
+ # index_md
124
+ "generate_index_md",
125
+ "write_index_md",
126
+ "extract_frontmatter",
127
+ "DEFAULT_INDEX_FILENAME",
128
+ "INDEX_MD_SCHEMA",
129
+ # openrag
130
+ "push_to_openrag",
115
131
  ]
@@ -53,6 +53,11 @@ from dotmd_parser.inventory import (
53
53
  format_inventory as _format_inventory,
54
54
  suggest_next_command as _suggest_next_command,
55
55
  )
56
+ from dotmd_parser.index_md import (
57
+ DEFAULT_INDEX_FILENAME,
58
+ generate_index_md as _generate_index_md,
59
+ write_index_md as _write_index_md,
60
+ )
56
61
 
57
62
 
58
63
  def _maybe_warn_empty(path: str) -> None:
@@ -88,20 +93,35 @@ def _load_or_build_index(path: str, use_cache: bool = True) -> dict:
88
93
  SKILL_DIR_NAME = "dotmd-parser"
89
94
  SKILL_TEMPLATE = "SKILL.md"
90
95
 
96
+ # Map skill id (user-facing folder name) → package resource path
97
+ _SKILLS = {
98
+ "dotmd-parser": ("dotmd_parser.templates", SKILL_TEMPLATE),
99
+ "dotmd-index": ("dotmd_parser.templates.dotmd_index", SKILL_TEMPLATE),
100
+ }
101
+
91
102
 
92
- def _read_bundled_skill() -> str:
93
- """Load the packaged SKILL.md via importlib.resources."""
94
- return resources.files("dotmd_parser.templates").joinpath(SKILL_TEMPLATE).read_text(encoding="utf-8")
103
+ def _read_bundled_skill(skill_id: str = "dotmd-parser") -> str:
104
+ """Load a packaged SKILL.md via importlib.resources."""
105
+ pkg, name = _SKILLS[skill_id]
106
+ return resources.files(pkg).joinpath(name).read_text(encoding="utf-8")
95
107
 
96
108
 
97
109
  def cmd_init(args: argparse.Namespace) -> int:
98
- """Install the bundled SKILL.md into `<path>/.claude/skills/dotmd-parser/SKILL.md`."""
110
+ """Install a bundled SKILL.md into `<path>/.claude/skills/<skill-id>/SKILL.md`."""
99
111
  project = Path(args.path).resolve()
100
112
  if not project.exists():
101
113
  print(f"error: path does not exist: {project}", file=sys.stderr)
102
114
  return 2
103
115
 
104
- target_dir = project / ".claude" / "skills" / SKILL_DIR_NAME
116
+ skill_id = args.skill or "dotmd-parser"
117
+ if skill_id not in _SKILLS:
118
+ print(
119
+ f"error: unknown skill {skill_id!r}; choose from {sorted(_SKILLS)}",
120
+ file=sys.stderr,
121
+ )
122
+ return 2
123
+
124
+ target_dir = project / ".claude" / "skills" / skill_id
105
125
  target = target_dir / SKILL_TEMPLATE
106
126
 
107
127
  if target.exists() and not args.force:
@@ -112,9 +132,12 @@ def cmd_init(args: argparse.Namespace) -> int:
112
132
  return 1
113
133
 
114
134
  target_dir.mkdir(parents=True, exist_ok=True)
115
- target.write_text(_read_bundled_skill(), encoding="utf-8")
135
+ target.write_text(_read_bundled_skill(skill_id), encoding="utf-8")
116
136
  print(f"Installed skill: {target}")
117
- print("Next: run `dotmd-parser index .` from the project root.")
137
+ if skill_id == "dotmd-parser":
138
+ print("Next: run `dotmd-parser index .` from the project root.")
139
+ elif skill_id == "dotmd-index":
140
+ print("Next: run `dotmd-parser dotmd-index .` to generate the artifact.")
118
141
  return 0
119
142
 
120
143
 
@@ -307,6 +330,73 @@ def cmd_inventory(args: argparse.Namespace) -> int:
307
330
  return 0
308
331
 
309
332
 
333
+ def cmd_dotmd_index(args: argparse.Namespace) -> int:
334
+ """Generate `<root>/dotmd-index.md` (or print to stdout). Optionally push to OpenRAG."""
335
+ gen_kwargs = {
336
+ "include_folder_map": not args.no_folder_map,
337
+ "include_deps_tree": not args.no_deps,
338
+ "max_files": args.max_files,
339
+ "aggregate": args.aggregate,
340
+ }
341
+
342
+ if args.stdout:
343
+ if args.push_openrag:
344
+ print("error: --stdout and --push-openrag are mutually exclusive", file=sys.stderr)
345
+ return 2
346
+ try:
347
+ md = _generate_index_md(args.path, **gen_kwargs)
348
+ except ValueError as e:
349
+ print(f"error: {e}", file=sys.stderr)
350
+ return 2
351
+ print(md, end="" if md.endswith("\n") else "\n")
352
+ return 0
353
+
354
+ try:
355
+ path, written = _write_index_md(args.path, force=args.force, **gen_kwargs)
356
+ except ValueError as e:
357
+ msg = str(e)
358
+ print(f"error: {msg}", file=sys.stderr)
359
+ if "does not exist" in msg or "is not a directory" in msg:
360
+ return 2
361
+ return 1
362
+ if written:
363
+ print(f"Wrote {path}")
364
+ else:
365
+ print(f"{path} unchanged (content_hash matches).")
366
+
367
+ if args.push_openrag:
368
+ from dotmd_parser.openrag import push_to_openrag as _push_to_openrag
369
+ try:
370
+ export = _push_to_openrag(
371
+ str(path),
372
+ base_url=args.openrag_url,
373
+ api_key=args.openrag_api_key,
374
+ )
375
+ except ImportError as e:
376
+ print(f"error: {e}", file=sys.stderr)
377
+ return 2
378
+ except (ValueError, RuntimeError) as e:
379
+ print(f"error: openrag push failed: {e}", file=sys.stderr)
380
+ return 1
381
+
382
+ # Re-emit the file with exports.openrag recorded — bypass idempotency
383
+ # check because we have explicit new metadata to persist.
384
+ md_with_export = _generate_index_md(
385
+ args.path,
386
+ extra_frontmatter={"exports": {"openrag": export}},
387
+ **gen_kwargs,
388
+ )
389
+ path.write_text(md_with_export, encoding="utf-8")
390
+ tid = export.get("task_id") or "<n/a>"
391
+ succ = export.get("successful_files", 0)
392
+ fail = export.get("failed_files", 0)
393
+ print(
394
+ f"Pushed to OpenRAG: {export.get('base_url')} "
395
+ f"(task_id={tid}, successful={succ}, failed={fail})"
396
+ )
397
+ return 0
398
+
399
+
310
400
  def cmd_show(args: argparse.Namespace) -> int:
311
401
  graph = build_graph(args.path)
312
402
  print(summary(graph))
@@ -325,8 +415,14 @@ def _build_parser() -> argparse.ArgumentParser:
325
415
 
326
416
  sub = parser.add_subparsers(dest="command")
327
417
 
328
- p_init = sub.add_parser("init", help="Install bundled SKILL.md into .claude/skills/dotmd-parser/")
418
+ p_init = sub.add_parser("init", help="Install a bundled SKILL.md into .claude/skills/<id>/")
329
419
  p_init.add_argument("path", nargs="?", default=".", help="Project root (default: current directory)")
420
+ p_init.add_argument(
421
+ "--skill",
422
+ choices=sorted(_SKILLS),
423
+ default="dotmd-parser",
424
+ help="Which bundled skill to install (default: dotmd-parser)",
425
+ )
330
426
  p_init.add_argument("--force", action="store_true", help="Overwrite an existing SKILL.md")
331
427
  p_init.set_defaults(func=cmd_init)
332
428
 
@@ -404,6 +500,60 @@ def _build_parser() -> argparse.ArgumentParser:
404
500
  p_inv.add_argument("--json", action="store_true", help="Emit JSON instead of formatted text")
405
501
  p_inv.set_defaults(func=cmd_inventory)
406
502
 
503
+ p_idxmd = sub.add_parser(
504
+ "dotmd-index",
505
+ help=f"Generate {DEFAULT_INDEX_FILENAME} at <path>/ (single-file folder overview)",
506
+ )
507
+ p_idxmd.add_argument("path", help="Directory to summarize")
508
+ p_idxmd.add_argument(
509
+ "--stdout",
510
+ action="store_true",
511
+ help="Print to stdout instead of writing to <path>/dotmd-index.md",
512
+ )
513
+ p_idxmd.add_argument(
514
+ "--force",
515
+ action="store_true",
516
+ help="Overwrite an existing file even if it isn't a dotmd-parser artifact",
517
+ )
518
+ p_idxmd.add_argument(
519
+ "--no-folder-map",
520
+ action="store_true",
521
+ help="Skip the ASCII folder-map section",
522
+ )
523
+ p_idxmd.add_argument(
524
+ "--no-deps",
525
+ action="store_true",
526
+ help="Skip the dependency-tree section",
527
+ )
528
+ p_idxmd.add_argument(
529
+ "--max-files",
530
+ type=int,
531
+ default=200,
532
+ help="Cap on the number of files listed in the body (default: 200)",
533
+ )
534
+ p_idxmd.add_argument(
535
+ "--aggregate",
536
+ action="store_true",
537
+ help="Discover descendant dotmd-index.md artifacts and reference them "
538
+ "(adds a ## Sub-Indexes section + aggregates[] frontmatter)",
539
+ )
540
+ p_idxmd.add_argument(
541
+ "--push-openrag",
542
+ action="store_true",
543
+ help="After writing, ingest the file into OpenRAG (requires `pip install dotmd-parser[openrag]`)",
544
+ )
545
+ p_idxmd.add_argument(
546
+ "--openrag-url",
547
+ metavar="URL",
548
+ help="OpenRAG endpoint (default: $OPENRAG_URL or http://localhost:3000)",
549
+ )
550
+ p_idxmd.add_argument(
551
+ "--openrag-api-key",
552
+ metavar="KEY",
553
+ help="OpenRAG API key (default: $OPENRAG_API_KEY, handled by the SDK)",
554
+ )
555
+ p_idxmd.set_defaults(func=cmd_dotmd_index)
556
+
407
557
  p_show = sub.add_parser("show", help="Legacy summary + full JSON graph")
408
558
  p_show.add_argument("path", help="Directory or SKILL.md")
409
559
  p_show.add_argument("--quiet", action="store_true", help="Suppress JSON dump")
@@ -417,7 +567,7 @@ def run(argv: list[str] | None = None) -> int:
417
567
  args_list = list(sys.argv[1:] if argv is None else argv)
418
568
 
419
569
  # Backwards compatibility: `dotmd-parser <path>` with no subcommand → show
420
- known_cmds = {"init", "index", "check", "affects", "deps", "digest", "tree", "resolve", "analyze", "inventory", "show"}
570
+ known_cmds = {"init", "index", "check", "affects", "deps", "digest", "tree", "resolve", "analyze", "inventory", "dotmd-index", "show"}
421
571
  if args_list and args_list[0] not in known_cmds and not args_list[0].startswith("-"):
422
572
  args_list = ["show", *args_list]
423
573
  if not args_list: