embedm 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- embedm-0.9.0/LICENSE.txt +9 -0
- embedm-0.9.0/PKG-INFO +248 -0
- embedm-0.9.0/README.md +217 -0
- embedm-0.9.0/pyproject.toml +195 -0
- embedm-0.9.0/setup.cfg +4 -0
- embedm-0.9.0/src/embedm/__init__.py +0 -0
- embedm-0.9.0/src/embedm/__main__.py +3 -0
- embedm-0.9.0/src/embedm/application/__init__.py +0 -0
- embedm-0.9.0/src/embedm/application/application_resources.py +24 -0
- embedm-0.9.0/src/embedm/application/cli.py +124 -0
- embedm-0.9.0/src/embedm/application/config_loader.py +186 -0
- embedm-0.9.0/src/embedm/application/configuration.py +114 -0
- embedm-0.9.0/src/embedm/application/console.py +220 -0
- embedm-0.9.0/src/embedm/application/embedm_context.py +14 -0
- embedm-0.9.0/src/embedm/application/orchestration.py +444 -0
- embedm-0.9.0/src/embedm/application/plan_tree.py +39 -0
- embedm-0.9.0/src/embedm/application/planner.py +182 -0
- embedm-0.9.0/src/embedm/application/verification.py +28 -0
- embedm-0.9.0/src/embedm/domain/__init__.py +0 -0
- embedm-0.9.0/src/embedm/domain/directive.py +12 -0
- embedm-0.9.0/src/embedm/domain/document.py +13 -0
- embedm-0.9.0/src/embedm/domain/domain_resources.py +5 -0
- embedm-0.9.0/src/embedm/domain/plan_node.py +18 -0
- embedm-0.9.0/src/embedm/domain/span.py +6 -0
- embedm-0.9.0/src/embedm/domain/status_level.py +15 -0
- embedm-0.9.0/src/embedm/infrastructure/__init__.py +1 -0
- embedm-0.9.0/src/embedm/infrastructure/file_cache.py +226 -0
- embedm-0.9.0/src/embedm/infrastructure/infrastructure_resources.py +3 -0
- embedm-0.9.0/src/embedm/parsing/__init__.py +0 -0
- embedm-0.9.0/src/embedm/parsing/directive_parser.py +142 -0
- embedm-0.9.0/src/embedm/parsing/extraction.py +107 -0
- embedm-0.9.0/src/embedm/parsing/parsing_resources.py +3 -0
- embedm-0.9.0/src/embedm/parsing/symbol_parser.py +679 -0
- embedm-0.9.0/src/embedm/plugins/__init__.py +0 -0
- embedm-0.9.0/src/embedm/plugins/directive_options.py +59 -0
- embedm-0.9.0/src/embedm/plugins/plugin_base.py +61 -0
- embedm-0.9.0/src/embedm/plugins/plugin_configuration.py +13 -0
- embedm-0.9.0/src/embedm/plugins/plugin_registry.py +74 -0
- embedm-0.9.0/src/embedm/plugins/plugin_resources.py +5 -0
- embedm-0.9.0/src/embedm/plugins/transformer_base.py +19 -0
- embedm-0.9.0/src/embedm/plugins/validation_base.py +32 -0
- embedm-0.9.0/src/embedm.egg-info/PKG-INFO +248 -0
- embedm-0.9.0/src/embedm.egg-info/SOURCES.txt +77 -0
- embedm-0.9.0/src/embedm.egg-info/dependency_links.txt +1 -0
- embedm-0.9.0/src/embedm.egg-info/entry_points.txt +11 -0
- embedm-0.9.0/src/embedm.egg-info/requires.txt +5 -0
- embedm-0.9.0/src/embedm.egg-info/top_level.txt +2 -0
- embedm-0.9.0/src/embedm_plugins/__init__.py +0 -0
- embedm-0.9.0/src/embedm_plugins/file_plugin.py +228 -0
- embedm-0.9.0/src/embedm_plugins/file_resources.py +16 -0
- embedm-0.9.0/src/embedm_plugins/file_transformer.py +152 -0
- embedm-0.9.0/src/embedm_plugins/hello_world_plugin.py +46 -0
- embedm-0.9.0/src/embedm_plugins/hello_world_transformer.py +8 -0
- embedm-0.9.0/src/embedm_plugins/line_transformer.py +24 -0
- embedm-0.9.0/src/embedm_plugins/query_path_engine.py +53 -0
- embedm-0.9.0/src/embedm_plugins/query_path_normalize_json.py +9 -0
- embedm-0.9.0/src/embedm_plugins/query_path_normalize_toml.py +9 -0
- embedm-0.9.0/src/embedm_plugins/query_path_normalize_xml.py +41 -0
- embedm-0.9.0/src/embedm_plugins/query_path_normalize_yaml.py +10 -0
- embedm-0.9.0/src/embedm_plugins/query_path_plugin.py +161 -0
- embedm-0.9.0/src/embedm_plugins/query_path_resources.py +14 -0
- embedm-0.9.0/src/embedm_plugins/query_path_transformer.py +48 -0
- embedm-0.9.0/src/embedm_plugins/recall_plugin.py +108 -0
- embedm-0.9.0/src/embedm_plugins/recall_resources.py +10 -0
- embedm-0.9.0/src/embedm_plugins/recall_transformer.py +80 -0
- embedm-0.9.0/src/embedm_plugins/region_transformer.py +31 -0
- embedm-0.9.0/src/embedm_plugins/symbol_transformer.py +25 -0
- embedm-0.9.0/src/embedm_plugins/synopsis_plugin.py +105 -0
- embedm-0.9.0/src/embedm_plugins/synopsis_resources.py +9 -0
- embedm-0.9.0/src/embedm_plugins/synopsis_stopwords.py +201 -0
- embedm-0.9.0/src/embedm_plugins/synopsis_transformer.py +103 -0
- embedm-0.9.0/src/embedm_plugins/table_plugin.py +153 -0
- embedm-0.9.0/src/embedm_plugins/table_resources.py +14 -0
- embedm-0.9.0/src/embedm_plugins/table_transformer.py +216 -0
- embedm-0.9.0/src/embedm_plugins/table_validation.py +141 -0
- embedm-0.9.0/src/embedm_plugins/text_processing.py +92 -0
- embedm-0.9.0/src/embedm_plugins/toc_plugin.py +69 -0
- embedm-0.9.0/src/embedm_plugins/toc_resources.py +5 -0
- embedm-0.9.0/src/embedm_plugins/toc_transformer.py +135 -0
embedm-0.9.0/LICENSE.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
Copyright 2026 Fultslop
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
embedm-0.9.0/PKG-INFO
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: embedm
|
|
3
|
+
Version: 0.9.0
|
|
4
|
+
Summary: A Python tool for embedding files, code snippets, and generating tables of contents in Markdown documents with built-in safety limits and validation
|
|
5
|
+
Author: Fultslop
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Fultslop/embedm
|
|
8
|
+
Project-URL: Repository, https://github.com/Fultslop/embedm
|
|
9
|
+
Project-URL: Issues, https://github.com/Fultslop/embedm/issues
|
|
10
|
+
Keywords: markdown,embedding,documentation,code-snippets,table-of-contents
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Documentation
|
|
21
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
22
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE.txt
|
|
26
|
+
Requires-Dist: PyYAML>=6.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# EmbedM
|
|
33
|
+
|
|
34
|
+
version 0.9.0
|
|
35
|
+
|
|
36
|
+
A Markdown compiler driven by source files.
|
|
37
|
+
|
|
38
|
+
- [How It Works](#how-it-works)
|
|
39
|
+
- [Use Cases](#use-cases)
|
|
40
|
+
- [Keeping code documentation in sync](#keeping-code-documentation-in-sync)
|
|
41
|
+
- [Live metadata in a README or changelog](#live-metadata-in-a-readme-or-changelog)
|
|
42
|
+
- [Data tables without copy-paste](#data-tables-without-copy-paste)
|
|
43
|
+
- [CI drift detection](#ci-drift-detection)
|
|
44
|
+
- [AI agent context documents](#ai-agent-context-documents)
|
|
45
|
+
- [Directives](#directives)
|
|
46
|
+
- [Quick Start](#quick-start)
|
|
47
|
+
- [Features](#features)
|
|
48
|
+
- [Documentation](#documentation)
|
|
49
|
+
- [Project Background](#project-background)
|
|
50
|
+
- [License](#license)
|
|
51
|
+
- [Contributing](#contributing)
|
|
52
|
+
|
|
53
|
+
## How It Works
|
|
54
|
+
|
|
55
|
+
EmbedM compiles Markdown documents from directive blocks. Each directive references a source — a code file, a data query, a CSV table, or another document — and is replaced with the extracted, formatted content on compile. Change the source; recompile; the document is current.
|
|
56
|
+
|
|
57
|
+
## Use Cases
|
|
58
|
+
|
|
59
|
+
### Keeping code documentation in sync
|
|
60
|
+
|
|
61
|
+
Embed a function directly from the source file, scoped by a named region or by symbol name. When the implementation changes the docs regenerate on the next compile — no copy-paste, no drift.
|
|
62
|
+
|
|
63
|
+
````yaml
|
|
64
|
+
type: file
|
|
65
|
+
source: src/api/handlers.java
|
|
66
|
+
symbol: UserHandler.createUser
|
|
67
|
+
title: "POST /users"
|
|
68
|
+
link: true
|
|
69
|
+
````
|
|
70
|
+
|
|
71
|
+
### Live metadata in a README or changelog
|
|
72
|
+
|
|
73
|
+
Pull version numbers, project names, and other values from `pyproject.toml`, `package.json`, or any JSON/YAML/TOML/XML file. The version at the top of this page is a live example — it is compiled from `pyproject.toml` at build time.
|
|
74
|
+
|
|
75
|
+
````yaml
|
|
76
|
+
type: query-path
|
|
77
|
+
source: pyproject.toml
|
|
78
|
+
path: project.version
|
|
79
|
+
format: "Released: **v{value}**"
|
|
80
|
+
````
|
|
81
|
+
|
|
82
|
+
### Data tables without copy-paste
|
|
83
|
+
|
|
84
|
+
Embed CSV or TSV data as formatted Markdown tables. Apply column selection, filtering, and sorting inline — the source file is the single source of truth.
|
|
85
|
+
|
|
86
|
+
````yaml
|
|
87
|
+
type: table
|
|
88
|
+
source: reports/q4-summary.csv
|
|
89
|
+
select: "Region as Region, Revenue as Revenue_USD"
|
|
90
|
+
order_by: "Revenue_USD desc"
|
|
91
|
+
limit: 10
|
|
92
|
+
````
|
|
93
|
+
|
|
94
|
+
### CI drift detection
|
|
95
|
+
|
|
96
|
+
Use `--verify` in your pipeline to catch documentation that has fallen behind its sources. Exit code 1 if any compiled file is stale.
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
embedm ./docs/src --verify -d ./docs/compiled
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### AI agent context documents
|
|
103
|
+
|
|
104
|
+
Use `recall` to query a large document — a devlog, a decision log, an ADR set — and extract the sentences most relevant to a given topic. Compose multiple queries into a single compiled context file that an AI assistant reads at session start.
|
|
105
|
+
|
|
106
|
+
````yaml
|
|
107
|
+
type: recall
|
|
108
|
+
source: ./devlog.md
|
|
109
|
+
query: "validation transform boundary error handling"
|
|
110
|
+
max_sentences: 5
|
|
111
|
+
````
|
|
112
|
+
|
|
113
|
+
EmbedM itself uses this: its agent context file is compiled from the project devlog using four targeted recall queries — plugin conventions, architectural rules, common mistakes, and the active spec. The context window stays focused without manual curation.
|
|
114
|
+
|
|
115
|
+
## Directives
|
|
116
|
+
|
|
117
|
+
Directives are fenced YAML blocks tagged `` ```yaml embedm ``. On compile, each is replaced in-place with the extracted content:
|
|
118
|
+
|
|
119
|
+
````yaml
|
|
120
|
+
type: file
|
|
121
|
+
source: src/config/defaults.py
|
|
122
|
+
region: connection_defaults
|
|
123
|
+
````
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# connection_defaults
|
|
127
|
+
HOST = "localhost"
|
|
128
|
+
PORT = 5432
|
|
129
|
+
TIMEOUT = 30
|
|
130
|
+
POOL_SIZE = 10
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Structured data queries render inline:
|
|
134
|
+
|
|
135
|
+
````yaml
|
|
136
|
+
type: query-path
|
|
137
|
+
source: config/app.yaml
|
|
138
|
+
path: database.pool_size
|
|
139
|
+
format: "Default pool size: **{value}**"
|
|
140
|
+
````
|
|
141
|
+
|
|
142
|
+
> Default pool size: **10**
|
|
143
|
+
|
|
144
|
+
## Quick Start
|
|
145
|
+
|
|
146
|
+
**Install**
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
(coming soon) pip install embedm
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Or from source:
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
git clone https://github.com/Fultslop/embedm.git
|
|
156
|
+
cd embedm
|
|
157
|
+
pip install -e .
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Compile a single file**
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
embedm content.md -o compiled/content.md
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Compile a directory**
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
embedm ./docs/src -d ./docs/compiled
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Preview without writing**
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
embedm content.md -n
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
**Check that compiled files are up to date**
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
embedm ./docs/src --verify -d ./docs/compiled
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Generate a default config file**
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
embedm --init
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Features
|
|
191
|
+
|
|
192
|
+
**File embedding**
|
|
193
|
+
- Embed entire files, line ranges (`5..10`), or named regions (`md.start:name` / `md.end:name`)
|
|
194
|
+
- Markdown sources are merged inline; all other types are wrapped in a fenced code block
|
|
195
|
+
- Optional title, source link, and line-number annotation
|
|
196
|
+
|
|
197
|
+
**Symbol extraction**
|
|
198
|
+
- Extract classes and methods by name from C/C++, C#, and Java source files
|
|
199
|
+
- Dot-notation for nested symbols: `OuterClass.InnerClass.methodName`
|
|
200
|
+
- Overload disambiguation: `add(int, int)` vs `add(int, int, int)`
|
|
201
|
+
|
|
202
|
+
**Structured data**
|
|
203
|
+
- Query any value from JSON, YAML, TOML, or XML using dot-notation paths
|
|
204
|
+
- Scalars render inline; dicts and lists render as YAML code blocks
|
|
205
|
+
- Format strings for inline interpolation: `"version {value}"`
|
|
206
|
+
|
|
207
|
+
**Data tables**
|
|
208
|
+
- Render CSV and TSV files as Markdown tables
|
|
209
|
+
- Column selection, row filtering (exact match and comparison operators), sorting, pagination
|
|
210
|
+
|
|
211
|
+
**Table of contents**
|
|
212
|
+
- Auto-generated from document headings, including headings in embedded files
|
|
213
|
+
- GitHub-compatible anchor links
|
|
214
|
+
|
|
215
|
+
**AI context**
|
|
216
|
+
- `synopsis` — generate a condensed summary of a document
|
|
217
|
+
- `recall` — build structured retrieval blocks for AI agent context files
|
|
218
|
+
|
|
219
|
+
**Recursive embedding**
|
|
220
|
+
- Markdown files that embed other Markdown files, up to a configurable depth
|
|
221
|
+
|
|
222
|
+
**Safety**
|
|
223
|
+
- Configurable limits on file size, memory, recursion depth, and embed output size
|
|
224
|
+
- `--verify` mode for CI drift detection
|
|
225
|
+
|
|
226
|
+
## Documentation
|
|
227
|
+
|
|
228
|
+
| Document | Description |
|
|
229
|
+
|----------|-------------|
|
|
230
|
+
| [CLI Reference](doc/manual/compiled/cli.md) | All flags, input modes, and exit codes |
|
|
231
|
+
| [Configuration Reference](doc/manual/compiled/configuration.md) | `embedm-config.yaml` properties and defaults |
|
|
232
|
+
| [File Plugin](doc/manual/compiled/file_plugin.md) | File embedding, regions, lines, symbol extraction |
|
|
233
|
+
| [Query-Path Plugin](doc/manual/compiled/query_path_plugin.md) | Structured data extraction from JSON/YAML/TOML/XML |
|
|
234
|
+
| [Table Plugin](doc/manual/compiled/table_plugin.md) | CSV/TSV tables with filtering and sorting |
|
|
235
|
+
| [Toc Plugin](doc/manual/compiled/toc_plugin.md) | Table-of-contents generation |
|
|
236
|
+
| [Architecture](doc/manual/compiled/architecture.md) | System design, plugin model, plan/compile pipeline |
|
|
237
|
+
|
|
238
|
+
## Project Background
|
|
239
|
+
|
|
240
|
+
EmbedM is part of an exploration into how far AI-assisted development can go when building a non-trivial tool that could be used in a production CD/CI chain. This project has been built based on a _human_ defined architecture, functional spec and a series of interface contracts, then implemented using using [Claude](https://claude.ai/) and to a lesser extent [Google Gemini](https://gemini.google.com/app).
|
|
241
|
+
|
|
242
|
+
## License
|
|
243
|
+
|
|
244
|
+
MIT License — see LICENSE file for details.
|
|
245
|
+
|
|
246
|
+
## Contributing
|
|
247
|
+
|
|
248
|
+
Contributions are welcome. Please open an issue to discuss proposed changes before submitting a pull request.
|
embedm-0.9.0/README.md
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# EmbedM
|
|
2
|
+
|
|
3
|
+
version 0.9.0
|
|
4
|
+
|
|
5
|
+
A Markdown compiler driven by source files.
|
|
6
|
+
|
|
7
|
+
- [How It Works](#how-it-works)
|
|
8
|
+
- [Use Cases](#use-cases)
|
|
9
|
+
- [Keeping code documentation in sync](#keeping-code-documentation-in-sync)
|
|
10
|
+
- [Live metadata in a README or changelog](#live-metadata-in-a-readme-or-changelog)
|
|
11
|
+
- [Data tables without copy-paste](#data-tables-without-copy-paste)
|
|
12
|
+
- [CI drift detection](#ci-drift-detection)
|
|
13
|
+
- [AI agent context documents](#ai-agent-context-documents)
|
|
14
|
+
- [Directives](#directives)
|
|
15
|
+
- [Quick Start](#quick-start)
|
|
16
|
+
- [Features](#features)
|
|
17
|
+
- [Documentation](#documentation)
|
|
18
|
+
- [Project Background](#project-background)
|
|
19
|
+
- [License](#license)
|
|
20
|
+
- [Contributing](#contributing)
|
|
21
|
+
|
|
22
|
+
## How It Works
|
|
23
|
+
|
|
24
|
+
EmbedM compiles Markdown documents from directive blocks. Each directive references a source — a code file, a data query, a CSV table, or another document — and is replaced with the extracted, formatted content on compile. Change the source; recompile; the document is current.
|
|
25
|
+
|
|
26
|
+
## Use Cases
|
|
27
|
+
|
|
28
|
+
### Keeping code documentation in sync
|
|
29
|
+
|
|
30
|
+
Embed a function directly from the source file, scoped by a named region or by symbol name. When the implementation changes the docs regenerate on the next compile — no copy-paste, no drift.
|
|
31
|
+
|
|
32
|
+
````yaml
|
|
33
|
+
type: file
|
|
34
|
+
source: src/api/handlers.java
|
|
35
|
+
symbol: UserHandler.createUser
|
|
36
|
+
title: "POST /users"
|
|
37
|
+
link: true
|
|
38
|
+
````
|
|
39
|
+
|
|
40
|
+
### Live metadata in a README or changelog
|
|
41
|
+
|
|
42
|
+
Pull version numbers, project names, and other values from `pyproject.toml`, `package.json`, or any JSON/YAML/TOML/XML file. The version at the top of this page is a live example — it is compiled from `pyproject.toml` at build time.
|
|
43
|
+
|
|
44
|
+
````yaml
|
|
45
|
+
type: query-path
|
|
46
|
+
source: pyproject.toml
|
|
47
|
+
path: project.version
|
|
48
|
+
format: "Released: **v{value}**"
|
|
49
|
+
````
|
|
50
|
+
|
|
51
|
+
### Data tables without copy-paste
|
|
52
|
+
|
|
53
|
+
Embed CSV or TSV data as formatted Markdown tables. Apply column selection, filtering, and sorting inline — the source file is the single source of truth.
|
|
54
|
+
|
|
55
|
+
````yaml
|
|
56
|
+
type: table
|
|
57
|
+
source: reports/q4-summary.csv
|
|
58
|
+
select: "Region as Region, Revenue as Revenue_USD"
|
|
59
|
+
order_by: "Revenue_USD desc"
|
|
60
|
+
limit: 10
|
|
61
|
+
````
|
|
62
|
+
|
|
63
|
+
### CI drift detection
|
|
64
|
+
|
|
65
|
+
Use `--verify` in your pipeline to catch documentation that has fallen behind its sources. Exit code 1 if any compiled file is stale.
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
embedm ./docs/src --verify -d ./docs/compiled
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### AI agent context documents
|
|
72
|
+
|
|
73
|
+
Use `recall` to query a large document — a devlog, a decision log, an ADR set — and extract the sentences most relevant to a given topic. Compose multiple queries into a single compiled context file that an AI assistant reads at session start.
|
|
74
|
+
|
|
75
|
+
````yaml
|
|
76
|
+
type: recall
|
|
77
|
+
source: ./devlog.md
|
|
78
|
+
query: "validation transform boundary error handling"
|
|
79
|
+
max_sentences: 5
|
|
80
|
+
````
|
|
81
|
+
|
|
82
|
+
EmbedM itself uses this: its agent context file is compiled from the project devlog using four targeted recall queries — plugin conventions, architectural rules, common mistakes, and the active spec. The context window stays focused without manual curation.
|
|
83
|
+
|
|
84
|
+
## Directives
|
|
85
|
+
|
|
86
|
+
Directives are fenced YAML blocks tagged `` ```yaml embedm ``. On compile, each is replaced in-place with the extracted content:
|
|
87
|
+
|
|
88
|
+
````yaml
|
|
89
|
+
type: file
|
|
90
|
+
source: src/config/defaults.py
|
|
91
|
+
region: connection_defaults
|
|
92
|
+
````
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
# connection_defaults
|
|
96
|
+
HOST = "localhost"
|
|
97
|
+
PORT = 5432
|
|
98
|
+
TIMEOUT = 30
|
|
99
|
+
POOL_SIZE = 10
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Structured data queries render inline:
|
|
103
|
+
|
|
104
|
+
````yaml
|
|
105
|
+
type: query-path
|
|
106
|
+
source: config/app.yaml
|
|
107
|
+
path: database.pool_size
|
|
108
|
+
format: "Default pool size: **{value}**"
|
|
109
|
+
````
|
|
110
|
+
|
|
111
|
+
> Default pool size: **10**
|
|
112
|
+
|
|
113
|
+
## Quick Start
|
|
114
|
+
|
|
115
|
+
**Install**
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
(coming soon) pip install embedm
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Or from source:
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
git clone https://github.com/Fultslop/embedm.git
|
|
125
|
+
cd embedm
|
|
126
|
+
pip install -e .
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Compile a single file**
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
embedm content.md -o compiled/content.md
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Compile a directory**
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
embedm ./docs/src -d ./docs/compiled
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Preview without writing**
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
embedm content.md -n
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Check that compiled files are up to date**
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
embedm ./docs/src --verify -d ./docs/compiled
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Generate a default config file**
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
embedm --init
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Features
|
|
160
|
+
|
|
161
|
+
**File embedding**
|
|
162
|
+
- Embed entire files, line ranges (`5..10`), or named regions (`md.start:name` / `md.end:name`)
|
|
163
|
+
- Markdown sources are merged inline; all other types are wrapped in a fenced code block
|
|
164
|
+
- Optional title, source link, and line-number annotation
|
|
165
|
+
|
|
166
|
+
**Symbol extraction**
|
|
167
|
+
- Extract classes and methods by name from C/C++, C#, and Java source files
|
|
168
|
+
- Dot-notation for nested symbols: `OuterClass.InnerClass.methodName`
|
|
169
|
+
- Overload disambiguation: `add(int, int)` vs `add(int, int, int)`
|
|
170
|
+
|
|
171
|
+
**Structured data**
|
|
172
|
+
- Query any value from JSON, YAML, TOML, or XML using dot-notation paths
|
|
173
|
+
- Scalars render inline; dicts and lists render as YAML code blocks
|
|
174
|
+
- Format strings for inline interpolation: `"version {value}"`
|
|
175
|
+
|
|
176
|
+
**Data tables**
|
|
177
|
+
- Render CSV and TSV files as Markdown tables
|
|
178
|
+
- Column selection, row filtering (exact match and comparison operators), sorting, pagination
|
|
179
|
+
|
|
180
|
+
**Table of contents**
|
|
181
|
+
- Auto-generated from document headings, including headings in embedded files
|
|
182
|
+
- GitHub-compatible anchor links
|
|
183
|
+
|
|
184
|
+
**AI context**
|
|
185
|
+
- `synopsis` — generate a condensed summary of a document
|
|
186
|
+
- `recall` — build structured retrieval blocks for AI agent context files
|
|
187
|
+
|
|
188
|
+
**Recursive embedding**
|
|
189
|
+
- Markdown files that embed other Markdown files, up to a configurable depth
|
|
190
|
+
|
|
191
|
+
**Safety**
|
|
192
|
+
- Configurable limits on file size, memory, recursion depth, and embed output size
|
|
193
|
+
- `--verify` mode for CI drift detection
|
|
194
|
+
|
|
195
|
+
## Documentation
|
|
196
|
+
|
|
197
|
+
| Document | Description |
|
|
198
|
+
|----------|-------------|
|
|
199
|
+
| [CLI Reference](doc/manual/compiled/cli.md) | All flags, input modes, and exit codes |
|
|
200
|
+
| [Configuration Reference](doc/manual/compiled/configuration.md) | `embedm-config.yaml` properties and defaults |
|
|
201
|
+
| [File Plugin](doc/manual/compiled/file_plugin.md) | File embedding, regions, lines, symbol extraction |
|
|
202
|
+
| [Query-Path Plugin](doc/manual/compiled/query_path_plugin.md) | Structured data extraction from JSON/YAML/TOML/XML |
|
|
203
|
+
| [Table Plugin](doc/manual/compiled/table_plugin.md) | CSV/TSV tables with filtering and sorting |
|
|
204
|
+
| [Toc Plugin](doc/manual/compiled/toc_plugin.md) | Table-of-contents generation |
|
|
205
|
+
| [Architecture](doc/manual/compiled/architecture.md) | System design, plugin model, plan/compile pipeline |
|
|
206
|
+
|
|
207
|
+
## Project Background
|
|
208
|
+
|
|
209
|
+
EmbedM is part of an exploration into how far AI-assisted development can go when building a non-trivial tool that could be used in a production CD/CI chain. This project has been built based on a _human_ defined architecture, functional spec and a series of interface contracts, then implemented using using [Claude](https://claude.ai/) and to a lesser extent [Google Gemini](https://gemini.google.com/app).
|
|
210
|
+
|
|
211
|
+
## License
|
|
212
|
+
|
|
213
|
+
MIT License — see LICENSE file for details.
|
|
214
|
+
|
|
215
|
+
## Contributing
|
|
216
|
+
|
|
217
|
+
Contributions are welcome. Please open an issue to discuss proposed changes before submitting a pull request.
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "embedm"
|
|
3
|
+
version = "0.9.0"
|
|
4
|
+
description = "A Python tool for embedding files, code snippets, and generating tables of contents in Markdown documents with built-in safety limits and validation"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.8"
|
|
7
|
+
license = {text = "MIT"}
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "Fultslop"}
|
|
10
|
+
]
|
|
11
|
+
keywords = ["markdown", "embedding", "documentation", "code-snippets", "table-of-contents"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.8",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Documentation",
|
|
23
|
+
"Topic :: Software Development :: Documentation",
|
|
24
|
+
"Topic :: Text Processing :: Markup :: Markdown",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
dependencies = [
|
|
28
|
+
"PyYAML>=6.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
dev = [
|
|
33
|
+
"pytest>=7.0",
|
|
34
|
+
"pytest-cov>=4.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/Fultslop/embedm"
|
|
39
|
+
Repository = "https://github.com/Fultslop/embedm"
|
|
40
|
+
Issues = "https://github.com/Fultslop/embedm/issues"
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
embedm = "embedm.application.orchestration:main"
|
|
44
|
+
|
|
45
|
+
# md.start:entry_points
|
|
46
|
+
[project.entry-points."embedm.plugins"]
|
|
47
|
+
embedm_file = "embedm_plugins.file_plugin:FilePlugin"
|
|
48
|
+
table_of_contents = "embedm_plugins.toc_plugin:ToCPlugin"
|
|
49
|
+
hello_world = "embedm_plugins.hello_world_plugin:HelloWorldPlugin"
|
|
50
|
+
table = "embedm_plugins.table_plugin:TablePlugin"
|
|
51
|
+
synopsis = "embedm_plugins.synopsis_plugin:SynopsisPlugin"
|
|
52
|
+
recall = "embedm_plugins.recall_plugin:RecallPlugin"
|
|
53
|
+
query_path = "embedm_plugins.query_path_plugin:QueryPathPlugin"
|
|
54
|
+
# md.end:entry_points
|
|
55
|
+
|
|
56
|
+
[build-system]
|
|
57
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
58
|
+
build-backend = "setuptools.build_meta"
|
|
59
|
+
|
|
60
|
+
[tool.setuptools]
|
|
61
|
+
package-dir = {"" = "src"}
|
|
62
|
+
|
|
63
|
+
[tool.setuptools.packages.find]
|
|
64
|
+
where = ["src"]
|
|
65
|
+
|
|
66
|
+
[tool.pytest.ini_options]
|
|
67
|
+
pythonpath = ["src"]
|
|
68
|
+
testpaths = ["tests"]
|
|
69
|
+
norecursedirs = ["archive", ".*", "dist", "build"]
|
|
70
|
+
python_files = ["test_*.py", "*_test.py"]
|
|
71
|
+
python_functions = ["test_*", "*_test"]
|
|
72
|
+
|
|
73
|
+
addopts = [
|
|
74
|
+
"--verbose",
|
|
75
|
+
"--color=yes",
|
|
76
|
+
"--cov=embedm",
|
|
77
|
+
"--cov=embedm_plugins",
|
|
78
|
+
"--cov-report=term-missing:skip-covered",
|
|
79
|
+
"--cov-report=html",
|
|
80
|
+
"--cov-report=json",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
[tool.coverage.run]
|
|
84
|
+
source = ["embedm"]
|
|
85
|
+
omit = [
|
|
86
|
+
"*/tests/*",
|
|
87
|
+
"*/test_*.py",
|
|
88
|
+
"src/embedm/__main__.py",
|
|
89
|
+
"src/embedm/application/console.py",
|
|
90
|
+
"src/embedm/application/orchestration.py"
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
[tool.coverage.report]
|
|
94
|
+
fail_under = 70
|
|
95
|
+
show_missing = true
|
|
96
|
+
exclude_lines = [
|
|
97
|
+
"pragma: no cover",
|
|
98
|
+
"def __repr__",
|
|
99
|
+
"raise AssertionError",
|
|
100
|
+
"raise NotImplementedError",
|
|
101
|
+
"if __name__ == .__main__.:",
|
|
102
|
+
"if TYPE_CHECKING:",
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# -------------------
|
|
107
|
+
# IMPORT LINTER
|
|
108
|
+
# -------------------
|
|
109
|
+
[tool.importlinter]
|
|
110
|
+
root_packages = ["embedm", "embedm_plugins"]
|
|
111
|
+
include_external_packages = false
|
|
112
|
+
|
|
113
|
+
[[tool.importlinter.contracts]]
|
|
114
|
+
name = "Domain must not import from infrastructure, cli, or application"
|
|
115
|
+
type = "forbidden"
|
|
116
|
+
source_modules = ["embedm.domain"]
|
|
117
|
+
forbidden_modules = ["embedm.infrastructure", "embedm.application"]
|
|
118
|
+
|
|
119
|
+
[[tool.importlinter.contracts]]
|
|
120
|
+
name = "Parsing must not import from infrastructure or application"
|
|
121
|
+
type = "forbidden"
|
|
122
|
+
source_modules = ["embedm.parsing"]
|
|
123
|
+
forbidden_modules = ["embedm.infrastructure", "embedm.application"]
|
|
124
|
+
|
|
125
|
+
[[tool.importlinter.contracts]]
|
|
126
|
+
name = "Plugins must not import from application"
|
|
127
|
+
type = "forbidden"
|
|
128
|
+
source_modules = ["embedm.plugins"]
|
|
129
|
+
forbidden_modules = ["embedm.application"]
|
|
130
|
+
|
|
131
|
+
[[tool.importlinter.contracts]]
|
|
132
|
+
name = "Infrastructure must not import from application, parsing, or plugins"
|
|
133
|
+
type = "forbidden"
|
|
134
|
+
source_modules = ["embedm.infrastructure"]
|
|
135
|
+
forbidden_modules = ["embedm.application", "embedm.parsing", "embedm.plugins"]
|
|
136
|
+
|
|
137
|
+
[[tool.importlinter.contracts]]
|
|
138
|
+
name = "Embedm plugins must not import from application"
|
|
139
|
+
type = "forbidden"
|
|
140
|
+
source_modules = ["embedm_plugins"]
|
|
141
|
+
forbidden_modules = ["embedm.application" ]
|
|
142
|
+
|
|
143
|
+
# -------------------
|
|
144
|
+
# VULTURE
|
|
145
|
+
# -------------------
|
|
146
|
+
[tool.vulture]
|
|
147
|
+
paths = ["src/"]
|
|
148
|
+
min_confidence = 80
|
|
149
|
+
exclude = ["__pycache__"]
|
|
150
|
+
|
|
151
|
+
# -------------------
|
|
152
|
+
# RADON / XENON
|
|
153
|
+
# -------------------
|
|
154
|
+
# Xenon thresholds (run via CLI):
|
|
155
|
+
# xenon src/ --max-absolute B --max-modules A --max-average A
|
|
156
|
+
|
|
157
|
+
# -------------------
|
|
158
|
+
# RUFF
|
|
159
|
+
# -------------------
|
|
160
|
+
[tool.ruff]
|
|
161
|
+
line-length = 120
|
|
162
|
+
target-version = "py311"
|
|
163
|
+
|
|
164
|
+
[tool.ruff.lint]
|
|
165
|
+
select = [
|
|
166
|
+
"E", # pycodestyle
|
|
167
|
+
"F", # pyflakes
|
|
168
|
+
"W",
|
|
169
|
+
"I", # import sorting
|
|
170
|
+
"B", # bugbear
|
|
171
|
+
"C4", # comprehensions
|
|
172
|
+
"UP", # pyupgrade
|
|
173
|
+
"ARG", # unused args
|
|
174
|
+
"SIM", # simplifications
|
|
175
|
+
]
|
|
176
|
+
ignore = []
|
|
177
|
+
|
|
178
|
+
[tool.ruff.format]
|
|
179
|
+
quote-style = "double"
|
|
180
|
+
indent-style = "space"
|
|
181
|
+
|
|
182
|
+
# -------------------
|
|
183
|
+
# MYPY
|
|
184
|
+
# -------------------
|
|
185
|
+
[tool.mypy]
|
|
186
|
+
python_version = "3.11"
|
|
187
|
+
strict = true
|
|
188
|
+
warn_unused_configs = true
|
|
189
|
+
warn_return_any = true
|
|
190
|
+
warn_unreachable = true
|
|
191
|
+
no_implicit_optional = true
|
|
192
|
+
disallow_untyped_defs = true
|
|
193
|
+
disallow_any_generics = true
|
|
194
|
+
check_untyped_defs = true
|
|
195
|
+
exclude = ["tests/"]
|
embedm-0.9.0/setup.cfg
ADDED
|
File without changes
|
|
File without changes
|