frontmatter-format 0.0.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frontmatter_format-0.0.0.dev0/LICENSE +21 -0
- frontmatter_format-0.0.0.dev0/PKG-INFO +289 -0
- frontmatter_format-0.0.0.dev0/README.md +269 -0
- frontmatter_format-0.0.0.dev0/frontmatter_format/__init__.py +41 -0
- frontmatter_format-0.0.0.dev0/frontmatter_format/frontmatter_format.py +262 -0
- frontmatter_format-0.0.0.dev0/frontmatter_format/key_sort.py +19 -0
- frontmatter_format-0.0.0.dev0/frontmatter_format/yaml_util.py +132 -0
- frontmatter_format-0.0.0.dev0/pyproject.toml +73 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Joshua Levy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: frontmatter-format
|
|
3
|
+
Version: 0.0.0.dev0
|
|
4
|
+
Summary: A format for YAML frontmatter on any file.
|
|
5
|
+
Home-page: https://github.com/jlevy/frontmatter-format
|
|
6
|
+
License: MIT
|
|
7
|
+
Author: Joshua Levy
|
|
8
|
+
Author-email: joshua@cal.berkeley.edu
|
|
9
|
+
Requires-Python: >=3.10,<4.0
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: ruamel-yaml (>=0.18.6,<0.19.0)
|
|
17
|
+
Project-URL: Repository, https://github.com/jlevy/frontmatter-format
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Frontmatter Format
|
|
21
|
+
|
|
22
|
+
## Motivation
|
|
23
|
+
|
|
24
|
+
Simple, readable metadata attached to files can be useful in numerous situations, such as
|
|
25
|
+
recording title, author, source, copyright, or the provenance of a file.
|
|
26
|
+
|
|
27
|
+
Unfortunately, it's often unclear how to format such metadata consistently across different
|
|
28
|
+
file types while also not breaking interoperability with existing tools.
|
|
29
|
+
|
|
30
|
+
**Frontmatter format** is a way to add metadata as frontmatter on any file.
|
|
31
|
+
It is a simple set of conventions to put structured metadata as YAML at the top of a file in
|
|
32
|
+
a syntax that is broadly compatible with programming languages, browsers, editors, and other
|
|
33
|
+
tools.
|
|
34
|
+
|
|
35
|
+
Frontmatter format specifies a syntax for the metadata as a comment block at the top of a
|
|
36
|
+
file.
|
|
37
|
+
This approach works while ensuring the file remains valid Markdown, HTML, CSS, Python,
|
|
38
|
+
C/C++, Rust, SQL, or most other text formats.
|
|
39
|
+
|
|
40
|
+
Frontmatter format is a generalization of the common format for frontmatter used by Jekyll
|
|
41
|
+
and other CMSs for Markdown files.
|
|
42
|
+
In that format, frontmatter is enclosed in lines containing `---` delimiters.
|
|
43
|
+
|
|
44
|
+
In this generalized format, we allow several styles of frontmatter demarcation, with the
|
|
45
|
+
first line of the file indicating the format and style.
|
|
46
|
+
|
|
47
|
+
This is a description of the format and a simple reference implementation.
|
|
48
|
+
The implementation is in Python but the format is very simple and easy to implement in any
|
|
49
|
+
language.
|
|
50
|
+
|
|
51
|
+
The purpose of this repository is to explain the idea of the format so anyone can use it,
|
|
52
|
+
and encourage the adoption of the format, especially for workflows around text documents that
|
|
53
|
+
are becoming increasingly common in AI tools and pipelines.
|
|
54
|
+
|
|
55
|
+
## Examples
|
|
56
|
+
|
|
57
|
+
```markdown
|
|
58
|
+
---
|
|
59
|
+
title: Sample Markdown File
|
|
60
|
+
state: draft
|
|
61
|
+
created_at: 2022-08-07 00:00:00
|
|
62
|
+
tags:
|
|
63
|
+
- yaml
|
|
64
|
+
- examples
|
|
65
|
+
# This is a YAML comment, so ignored.
|
|
66
|
+
---
|
|
67
|
+
Hello, *World*!
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
```html
|
|
71
|
+
<!---
|
|
72
|
+
title: Sample HTML File
|
|
73
|
+
--->
|
|
74
|
+
Hello, <i>World</i>!
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
#---
|
|
79
|
+
# author: Jane Doe
|
|
80
|
+
# description: A sample Python script
|
|
81
|
+
#---
|
|
82
|
+
print("Hello, World!")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
```css
|
|
86
|
+
/*---
|
|
87
|
+
filename: styles.css
|
|
88
|
+
---*/
|
|
89
|
+
.hello {
|
|
90
|
+
color: green;
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
```sql
|
|
95
|
+
----
|
|
96
|
+
-- title: Sample SQL Script
|
|
97
|
+
----
|
|
98
|
+
SELECT * FROM world;
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Advantages of this Approach
|
|
102
|
+
|
|
103
|
+
- **Compatible with existing syntax:** By choosing a style for the metadata consistent with
|
|
104
|
+
any given file, it generally doesn't break existing tools.
|
|
105
|
+
Almost every language has a style for which frontmatter works as a comment.
|
|
106
|
+
|
|
107
|
+
- **Auto-detectable format:** Frontmatter and its format can be recognized by the first few
|
|
108
|
+
bytes of the file.
|
|
109
|
+
That means it's possible to detect metadata and parse it automatically.
|
|
110
|
+
|
|
111
|
+
- **Metadata is optional:** Files with or without metadata can be read with the same tools.
|
|
112
|
+
So it's easy to roll out metadata into files gracefully, as needed file by file.
|
|
113
|
+
|
|
114
|
+
- **YAML syntax:** JSON, YAML, XML, and TOML are all used for metadata in some situatiohns.
|
|
115
|
+
YAML is the best choice here because it is already in widespread use with Markdown, is a
|
|
116
|
+
superset of JSON (in case an application wishes to use pure JSON), and is easy to read and
|
|
117
|
+
edit manually.
|
|
118
|
+
|
|
119
|
+
## Format Definition
|
|
120
|
+
|
|
121
|
+
A file is in frontmatter format if the first characters are one of the following:
|
|
122
|
+
|
|
123
|
+
- `---`
|
|
124
|
+
|
|
125
|
+
- `<!---`
|
|
126
|
+
|
|
127
|
+
- `#---`
|
|
128
|
+
|
|
129
|
+
- `//---`
|
|
130
|
+
|
|
131
|
+
- `/*---`
|
|
132
|
+
|
|
133
|
+
and if this prefix is followed by a newline (`\n`).
|
|
134
|
+
|
|
135
|
+
The prefix determines the *style* of the frontmatter.
|
|
136
|
+
The style specifies the matching terminating delimiter for the end of the frontmatter as
|
|
137
|
+
well as an optional prefix (which is typically a comment character in some language).
|
|
138
|
+
|
|
139
|
+
The supported frontmatter styles are:
|
|
140
|
+
|
|
141
|
+
1. *YAML style*: delimiters `---` and `---` with no prefix on each line.
|
|
142
|
+
Useful for text or Markdown content.
|
|
143
|
+
|
|
144
|
+
2. *HTML style*: delimiters `<!---` and `--->` with no prefix on each line.
|
|
145
|
+
Useful for HTML or XML or similar content.
|
|
146
|
+
|
|
147
|
+
3. *Hash style*: delimiters `#---` and `#---` with `# ` prefix on each line.
|
|
148
|
+
Useful for Python or similar code content.
|
|
149
|
+
Also works for CSV files with many tools.
|
|
150
|
+
|
|
151
|
+
4. *Rust style*: delimiters `//---` and `//---` with `// ` prefix on each line.
|
|
152
|
+
Useful for Rust or C++ or similar code content.
|
|
153
|
+
|
|
154
|
+
5. *C style*: delimiters `/*---` and `---*/` with no prefix on each line.
|
|
155
|
+
Useful for JavaScript, TypeScript, CSS or C or similar code content.
|
|
156
|
+
|
|
157
|
+
6. *Dash style*: delimiters `----` and `----` with `-- ` prefix on each line.
|
|
158
|
+
Useful for SQL or similar code content.
|
|
159
|
+
|
|
160
|
+
The delimiters must be alone on their own lines, terminated with a newline.
|
|
161
|
+
|
|
162
|
+
Any style is acceptable on any file as it can be automatically detected.
|
|
163
|
+
When writing, you can specify the style.
|
|
164
|
+
|
|
165
|
+
For all frontmatter styles, the content between the delimiters is YAML text in UTF-8
|
|
166
|
+
encoding, with an optional prefix on each line that depends on the style.
|
|
167
|
+
|
|
168
|
+
For some of the formats, each frontmatter line is prefixed with a prefix to make sure the
|
|
169
|
+
entire file remains valid in a given syntax (Python, Rust, SQL, etc.). This prefix is
|
|
170
|
+
stripped during parsing.
|
|
171
|
+
|
|
172
|
+
It is recommended to use a prefix with a trailing space (such as `# ` or `// `) but a bare
|
|
173
|
+
prefix without the trailing space (`#` or `##`) is also allowed.
|
|
174
|
+
|
|
175
|
+
Other whitespace is preserved (before parsing with YAML).
|
|
176
|
+
|
|
177
|
+
Note that YAML comments, which are lines beginning with `#` in the metadata, are allowed.
|
|
178
|
+
For example, for hash style, this means there must be two hashes (`# #` or `##`) at the
|
|
179
|
+
start of a comment line.
|
|
180
|
+
|
|
181
|
+
There is no restriction on the content of the file after the frontmatter.
|
|
182
|
+
It may even contain other content in frontmatter format, but this will not be parsed as
|
|
183
|
+
frontmatter.
|
|
184
|
+
Typically, it is text, but it could be binary as well.
|
|
185
|
+
|
|
186
|
+
Frontmatter is optional.
|
|
187
|
+
This means almost any text file can be read as frontmatter format.
|
|
188
|
+
|
|
189
|
+
## Reference Implementation
|
|
190
|
+
|
|
191
|
+
This is a simple Python reference implementation.
|
|
192
|
+
It auto-detects all the frontmatter styles above.
|
|
193
|
+
It supports reading small files easily into memory, but also allows extracting or changing
|
|
194
|
+
frontmatter without reading an entire file.
|
|
195
|
+
|
|
196
|
+
Both raw (string) parsed YAML frontmatter (using ruamel.yaml) are supported.
|
|
197
|
+
For readability, there is also support for preferred sorting of YAML keys.
|
|
198
|
+
|
|
199
|
+
## Installation
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
# Use pip
|
|
203
|
+
pip install frontmatter-format
|
|
204
|
+
# Or poetry
|
|
205
|
+
poetry add frontmatter-format
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Usage
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from frontmatter_format import fmf_read, fmf_read_raw, fmf_write, FmStyle
|
|
212
|
+
|
|
213
|
+
# Write some content:
|
|
214
|
+
content = "Hello, World!"
|
|
215
|
+
metadata = {"title": "Test Title", "author": "Test Author"}
|
|
216
|
+
fmf_write("example.md", content, metadata, style=FmStyle.yaml)
|
|
217
|
+
|
|
218
|
+
# Or any other desired style:
|
|
219
|
+
html_content = "<p>Hello, World!</p>"
|
|
220
|
+
fmf_write("example.html", content, metadata, style=FmStyle.html)
|
|
221
|
+
|
|
222
|
+
# Read it back. Style is auto-detected:
|
|
223
|
+
content, metadata = fmf_read("example.md")
|
|
224
|
+
print(content) # Outputs: Hello, World!
|
|
225
|
+
print(metadata) # Outputs: {'title': 'Test Title', 'author': 'Test Author'}
|
|
226
|
+
|
|
227
|
+
# Read metadata without parsing:
|
|
228
|
+
content, raw_metadata = fmf_read_raw("example.md")
|
|
229
|
+
print(content) # Outputs: Hello, World!
|
|
230
|
+
print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
The above is easiest for small files, but you can also operate more efficiently directly on
|
|
234
|
+
files, without reading the file contents into memory.
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
from frontmatter_format import fmf_strip_frontmatter, fmf_insert_frontmatter, fmf_read_frontmatter_raw
|
|
238
|
+
|
|
239
|
+
# Strip and discard the metadata from a file:
|
|
240
|
+
fmf_strip_frontmatter("example.md")
|
|
241
|
+
|
|
242
|
+
# Insert the metadata at the top of an existing file:
|
|
243
|
+
new_metadata = {"title": "New Title", "author": "New Author"}
|
|
244
|
+
fmf_insert_frontmatter("example.md", new_metadata, fm_style=FmStyle.yaml)
|
|
245
|
+
|
|
246
|
+
# Read the raw frontmatter metadata and get the offset for the rest of the content:
|
|
247
|
+
raw_metadata, offset = fmf_read_frontmatter_raw("example.md")
|
|
248
|
+
print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
|
|
249
|
+
print(offset) # Outputs the byte offset where the content starts
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## FAQ
|
|
253
|
+
|
|
254
|
+
- **Is this mature?** This is the first draft of this format.
|
|
255
|
+
But I've been using this on my own projects for a couple months.
|
|
256
|
+
The flexibity of just having metadata on all your text files is great for workflows,
|
|
257
|
+
pipelines, etc.
|
|
258
|
+
|
|
259
|
+
- **When should we use it?** All the time if you can!
|
|
260
|
+
It's especially important for command-line tools, AI agents, LLM workflows, since you
|
|
261
|
+
often want to store extra metadata is a consistent way on text inputs of various formats
|
|
262
|
+
like Markdown, HTML, CSS, and Python.
|
|
263
|
+
|
|
264
|
+
- **Does this specify the format of the YAML itself?** No.
|
|
265
|
+
This is simply a format for attaching metadata.
|
|
266
|
+
What metadata you attach is up to your use case.
|
|
267
|
+
Standardizing headings like title, author, description, let alone other more
|
|
268
|
+
application-specific information is beyond the scope of this frontmatter format.
|
|
269
|
+
|
|
270
|
+
- **Can this work with Pydantic?** Yes, definitely.
|
|
271
|
+
In fact, I think it's probably a good practice to define self-identifiable Pydantic (or
|
|
272
|
+
Zod) schemas for all your metadata, and then just serialize and deserialize them to
|
|
273
|
+
frontmatter everywhere.
|
|
274
|
+
|
|
275
|
+
- **Isn't this the same as what some CMSs use, Markdown files and YAML at the top?** Yes!
|
|
276
|
+
But this generalizes that format, and removes the direct tie-in to Markdown or any CMS.
|
|
277
|
+
This can work with any tool.
|
|
278
|
+
For HTML and code, it works basically with no changes at all since the frontmatter is
|
|
279
|
+
considered a comment.
|
|
280
|
+
|
|
281
|
+
- **Can this work with binary files?** No reason why not, if it makes sense for you!
|
|
282
|
+
You can use `fmf_insert_frontmatter()` to add metadata of any style to any file.
|
|
283
|
+
Whether this works for your application depends on the file format.
|
|
284
|
+
|
|
285
|
+
- **Does this work for CSV files?** Sort of.
|
|
286
|
+
Some tools do properly honor hash style comments when parsing CSV files.
|
|
287
|
+
A few do not. Our recommendation is go ahead and use it, and find ways to strip the
|
|
288
|
+
metadata at the last minute if you really can't get a tool to work with the metadata.
|
|
289
|
+
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# Frontmatter Format
|
|
2
|
+
|
|
3
|
+
## Motivation
|
|
4
|
+
|
|
5
|
+
Simple, readable metadata attached to files can be useful in numerous situations, such as
|
|
6
|
+
recording title, author, source, copyright, or the provenance of a file.
|
|
7
|
+
|
|
8
|
+
Unfortunately, it's often unclear how to format such metadata consistently across different
|
|
9
|
+
file types while also not breaking interoperability with existing tools.
|
|
10
|
+
|
|
11
|
+
**Frontmatter format** is a way to add metadata as frontmatter on any file.
|
|
12
|
+
It is a simple set of conventions to put structured metadata as YAML at the top of a file in
|
|
13
|
+
a syntax that is broadly compatible with programming languages, browsers, editors, and other
|
|
14
|
+
tools.
|
|
15
|
+
|
|
16
|
+
Frontmatter format specifies a syntax for the metadata as a comment block at the top of a
|
|
17
|
+
file.
|
|
18
|
+
This approach works while ensuring the file remains valid Markdown, HTML, CSS, Python,
|
|
19
|
+
C/C++, Rust, SQL, or most other text formats.
|
|
20
|
+
|
|
21
|
+
Frontmatter format is a generalization of the common format for frontmatter used by Jekyll
|
|
22
|
+
and other CMSs for Markdown files.
|
|
23
|
+
In that format, frontmatter is enclosed in lines containing `---` delimiters.
|
|
24
|
+
|
|
25
|
+
In this generalized format, we allow several styles of frontmatter demarcation, with the
|
|
26
|
+
first line of the file indicating the format and style.
|
|
27
|
+
|
|
28
|
+
This is a description of the format and a simple reference implementation.
|
|
29
|
+
The implementation is in Python but the format is very simple and easy to implement in any
|
|
30
|
+
language.
|
|
31
|
+
|
|
32
|
+
The purpose of this repository is to explain the idea of the format so anyone can use it,
|
|
33
|
+
and encourage the adoption of the format, especially for workflows around text documents that
|
|
34
|
+
are becoming increasingly common in AI tools and pipelines.
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
```markdown
|
|
39
|
+
---
|
|
40
|
+
title: Sample Markdown File
|
|
41
|
+
state: draft
|
|
42
|
+
created_at: 2022-08-07 00:00:00
|
|
43
|
+
tags:
|
|
44
|
+
- yaml
|
|
45
|
+
- examples
|
|
46
|
+
# This is a YAML comment, so ignored.
|
|
47
|
+
---
|
|
48
|
+
Hello, *World*!
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
```html
|
|
52
|
+
<!---
|
|
53
|
+
title: Sample HTML File
|
|
54
|
+
--->
|
|
55
|
+
Hello, <i>World</i>!
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
#---
|
|
60
|
+
# author: Jane Doe
|
|
61
|
+
# description: A sample Python script
|
|
62
|
+
#---
|
|
63
|
+
print("Hello, World!")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
```css
|
|
67
|
+
/*---
|
|
68
|
+
filename: styles.css
|
|
69
|
+
---*/
|
|
70
|
+
.hello {
|
|
71
|
+
color: green;
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
```sql
|
|
76
|
+
----
|
|
77
|
+
-- title: Sample SQL Script
|
|
78
|
+
----
|
|
79
|
+
SELECT * FROM world;
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Advantages of this Approach
|
|
83
|
+
|
|
84
|
+
- **Compatible with existing syntax:** By choosing a style for the metadata consistent with
|
|
85
|
+
any given file, it generally doesn't break existing tools.
|
|
86
|
+
Almost every language has a style for which frontmatter works as a comment.
|
|
87
|
+
|
|
88
|
+
- **Auto-detectable format:** Frontmatter and its format can be recognized by the first few
|
|
89
|
+
bytes of the file.
|
|
90
|
+
That means it's possible to detect metadata and parse it automatically.
|
|
91
|
+
|
|
92
|
+
- **Metadata is optional:** Files with or without metadata can be read with the same tools.
|
|
93
|
+
So it's easy to roll out metadata into files gracefully, as needed file by file.
|
|
94
|
+
|
|
95
|
+
- **YAML syntax:** JSON, YAML, XML, and TOML are all used for metadata in some situatiohns.
|
|
96
|
+
YAML is the best choice here because it is already in widespread use with Markdown, is a
|
|
97
|
+
superset of JSON (in case an application wishes to use pure JSON), and is easy to read and
|
|
98
|
+
edit manually.
|
|
99
|
+
|
|
100
|
+
## Format Definition
|
|
101
|
+
|
|
102
|
+
A file is in frontmatter format if the first characters are one of the following:
|
|
103
|
+
|
|
104
|
+
- `---`
|
|
105
|
+
|
|
106
|
+
- `<!---`
|
|
107
|
+
|
|
108
|
+
- `#---`
|
|
109
|
+
|
|
110
|
+
- `//---`
|
|
111
|
+
|
|
112
|
+
- `/*---`
|
|
113
|
+
|
|
114
|
+
and if this prefix is followed by a newline (`\n`).
|
|
115
|
+
|
|
116
|
+
The prefix determines the *style* of the frontmatter.
|
|
117
|
+
The style specifies the matching terminating delimiter for the end of the frontmatter as
|
|
118
|
+
well as an optional prefix (which is typically a comment character in some language).
|
|
119
|
+
|
|
120
|
+
The supported frontmatter styles are:
|
|
121
|
+
|
|
122
|
+
1. *YAML style*: delimiters `---` and `---` with no prefix on each line.
|
|
123
|
+
Useful for text or Markdown content.
|
|
124
|
+
|
|
125
|
+
2. *HTML style*: delimiters `<!---` and `--->` with no prefix on each line.
|
|
126
|
+
Useful for HTML or XML or similar content.
|
|
127
|
+
|
|
128
|
+
3. *Hash style*: delimiters `#---` and `#---` with `# ` prefix on each line.
|
|
129
|
+
Useful for Python or similar code content.
|
|
130
|
+
Also works for CSV files with many tools.
|
|
131
|
+
|
|
132
|
+
4. *Rust style*: delimiters `//---` and `//---` with `// ` prefix on each line.
|
|
133
|
+
Useful for Rust or C++ or similar code content.
|
|
134
|
+
|
|
135
|
+
5. *C style*: delimiters `/*---` and `---*/` with no prefix on each line.
|
|
136
|
+
Useful for JavaScript, TypeScript, CSS or C or similar code content.
|
|
137
|
+
|
|
138
|
+
6. *Dash style*: delimiters `----` and `----` with `-- ` prefix on each line.
|
|
139
|
+
Useful for SQL or similar code content.
|
|
140
|
+
|
|
141
|
+
The delimiters must be alone on their own lines, terminated with a newline.
|
|
142
|
+
|
|
143
|
+
Any style is acceptable on any file as it can be automatically detected.
|
|
144
|
+
When writing, you can specify the style.
|
|
145
|
+
|
|
146
|
+
For all frontmatter styles, the content between the delimiters is YAML text in UTF-8
|
|
147
|
+
encoding, with an optional prefix on each line that depends on the style.
|
|
148
|
+
|
|
149
|
+
For some of the formats, each frontmatter line is prefixed with a prefix to make sure the
|
|
150
|
+
entire file remains valid in a given syntax (Python, Rust, SQL, etc.). This prefix is
|
|
151
|
+
stripped during parsing.
|
|
152
|
+
|
|
153
|
+
It is recommended to use a prefix with a trailing space (such as `# ` or `// `) but a bare
|
|
154
|
+
prefix without the trailing space (`#` or `##`) is also allowed.
|
|
155
|
+
|
|
156
|
+
Other whitespace is preserved (before parsing with YAML).
|
|
157
|
+
|
|
158
|
+
Note that YAML comments, which are lines beginning with `#` in the metadata, are allowed.
|
|
159
|
+
For example, for hash style, this means there must be two hashes (`# #` or `##`) at the
|
|
160
|
+
start of a comment line.
|
|
161
|
+
|
|
162
|
+
There is no restriction on the content of the file after the frontmatter.
|
|
163
|
+
It may even contain other content in frontmatter format, but this will not be parsed as
|
|
164
|
+
frontmatter.
|
|
165
|
+
Typically, it is text, but it could be binary as well.
|
|
166
|
+
|
|
167
|
+
Frontmatter is optional.
|
|
168
|
+
This means almost any text file can be read as frontmatter format.
|
|
169
|
+
|
|
170
|
+
## Reference Implementation
|
|
171
|
+
|
|
172
|
+
This is a simple Python reference implementation.
|
|
173
|
+
It auto-detects all the frontmatter styles above.
|
|
174
|
+
It supports reading small files easily into memory, but also allows extracting or changing
|
|
175
|
+
frontmatter without reading an entire file.
|
|
176
|
+
|
|
177
|
+
Both raw (string) parsed YAML frontmatter (using ruamel.yaml) are supported.
|
|
178
|
+
For readability, there is also support for preferred sorting of YAML keys.
|
|
179
|
+
|
|
180
|
+
## Installation
|
|
181
|
+
|
|
182
|
+
```
|
|
183
|
+
# Use pip
|
|
184
|
+
pip install frontmatter-format
|
|
185
|
+
# Or poetry
|
|
186
|
+
poetry add frontmatter-format
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Usage
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from frontmatter_format import fmf_read, fmf_read_raw, fmf_write, FmStyle
|
|
193
|
+
|
|
194
|
+
# Write some content:
|
|
195
|
+
content = "Hello, World!"
|
|
196
|
+
metadata = {"title": "Test Title", "author": "Test Author"}
|
|
197
|
+
fmf_write("example.md", content, metadata, style=FmStyle.yaml)
|
|
198
|
+
|
|
199
|
+
# Or any other desired style:
|
|
200
|
+
html_content = "<p>Hello, World!</p>"
|
|
201
|
+
fmf_write("example.html", content, metadata, style=FmStyle.html)
|
|
202
|
+
|
|
203
|
+
# Read it back. Style is auto-detected:
|
|
204
|
+
content, metadata = fmf_read("example.md")
|
|
205
|
+
print(content) # Outputs: Hello, World!
|
|
206
|
+
print(metadata) # Outputs: {'title': 'Test Title', 'author': 'Test Author'}
|
|
207
|
+
|
|
208
|
+
# Read metadata without parsing:
|
|
209
|
+
content, raw_metadata = fmf_read_raw("example.md")
|
|
210
|
+
print(content) # Outputs: Hello, World!
|
|
211
|
+
print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
The above is easiest for small files, but you can also operate more efficiently directly on
|
|
215
|
+
files, without reading the file contents into memory.
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
from frontmatter_format import fmf_strip_frontmatter, fmf_insert_frontmatter, fmf_read_frontmatter_raw
|
|
219
|
+
|
|
220
|
+
# Strip and discard the metadata from a file:
|
|
221
|
+
fmf_strip_frontmatter("example.md")
|
|
222
|
+
|
|
223
|
+
# Insert the metadata at the top of an existing file:
|
|
224
|
+
new_metadata = {"title": "New Title", "author": "New Author"}
|
|
225
|
+
fmf_insert_frontmatter("example.md", new_metadata, fm_style=FmStyle.yaml)
|
|
226
|
+
|
|
227
|
+
# Read the raw frontmatter metadata and get the offset for the rest of the content:
|
|
228
|
+
raw_metadata, offset = fmf_read_frontmatter_raw("example.md")
|
|
229
|
+
print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
|
|
230
|
+
print(offset) # Outputs the byte offset where the content starts
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## FAQ
|
|
234
|
+
|
|
235
|
+
- **Is this mature?** This is the first draft of this format.
|
|
236
|
+
But I've been using this on my own projects for a couple months.
|
|
237
|
+
The flexibity of just having metadata on all your text files is great for workflows,
|
|
238
|
+
pipelines, etc.
|
|
239
|
+
|
|
240
|
+
- **When should we use it?** All the time if you can!
|
|
241
|
+
It's especially important for command-line tools, AI agents, LLM workflows, since you
|
|
242
|
+
often want to store extra metadata is a consistent way on text inputs of various formats
|
|
243
|
+
like Markdown, HTML, CSS, and Python.
|
|
244
|
+
|
|
245
|
+
- **Does this specify the format of the YAML itself?** No.
|
|
246
|
+
This is simply a format for attaching metadata.
|
|
247
|
+
What metadata you attach is up to your use case.
|
|
248
|
+
Standardizing headings like title, author, description, let alone other more
|
|
249
|
+
application-specific information is beyond the scope of this frontmatter format.
|
|
250
|
+
|
|
251
|
+
- **Can this work with Pydantic?** Yes, definitely.
|
|
252
|
+
In fact, I think it's probably a good practice to define self-identifiable Pydantic (or
|
|
253
|
+
Zod) schemas for all your metadata, and then just serialize and deserialize them to
|
|
254
|
+
frontmatter everywhere.
|
|
255
|
+
|
|
256
|
+
- **Isn't this the same as what some CMSs use, Markdown files and YAML at the top?** Yes!
|
|
257
|
+
But this generalizes that format, and removes the direct tie-in to Markdown or any CMS.
|
|
258
|
+
This can work with any tool.
|
|
259
|
+
For HTML and code, it works basically with no changes at all since the frontmatter is
|
|
260
|
+
considered a comment.
|
|
261
|
+
|
|
262
|
+
- **Can this work with binary files?** No reason why not, if it makes sense for you!
|
|
263
|
+
You can use `fmf_insert_frontmatter()` to add metadata of any style to any file.
|
|
264
|
+
Whether this works for your application depends on the file format.
|
|
265
|
+
|
|
266
|
+
- **Does this work for CSV files?** Sort of.
|
|
267
|
+
Some tools do properly honor hash style comments when parsing CSV files.
|
|
268
|
+
A few do not. Our recommendation is go ahead and use it, and find ways to strip the
|
|
269
|
+
metadata at the last minute if you really can't get a tool to work with the metadata.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from .frontmatter_format import (
|
|
2
|
+
fmf_insert_frontmatter,
|
|
3
|
+
fmf_read,
|
|
4
|
+
fmf_read_frontmatter_raw,
|
|
5
|
+
fmf_read_raw,
|
|
6
|
+
fmf_strip_frontmatter,
|
|
7
|
+
fmf_write,
|
|
8
|
+
FmFormatError,
|
|
9
|
+
FmStyle,
|
|
10
|
+
Metadata,
|
|
11
|
+
)
|
|
12
|
+
from .key_sort import custom_key_sort
|
|
13
|
+
from .yaml_util import (
|
|
14
|
+
add_default_yaml_representer,
|
|
15
|
+
dump_yaml,
|
|
16
|
+
from_yaml_string,
|
|
17
|
+
new_yaml,
|
|
18
|
+
read_yaml_file,
|
|
19
|
+
to_yaml_string,
|
|
20
|
+
write_yaml_file,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"FmStyle",
|
|
25
|
+
"FmFormatError",
|
|
26
|
+
"fmf_write",
|
|
27
|
+
"fmf_read",
|
|
28
|
+
"fmf_read_raw",
|
|
29
|
+
"fmf_read_frontmatter_raw",
|
|
30
|
+
"fmf_strip_frontmatter",
|
|
31
|
+
"fmf_insert_frontmatter",
|
|
32
|
+
"Metadata",
|
|
33
|
+
"add_default_yaml_representer",
|
|
34
|
+
"dump_yaml",
|
|
35
|
+
"from_yaml_string",
|
|
36
|
+
"new_yaml",
|
|
37
|
+
"read_yaml_file",
|
|
38
|
+
"to_yaml_string",
|
|
39
|
+
"write_yaml_file",
|
|
40
|
+
"custom_key_sort",
|
|
41
|
+
]
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python implementation of frontmatter format.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, cast, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from ruamel.yaml.error import YAMLError
|
|
13
|
+
|
|
14
|
+
from .yaml_util import from_yaml_string, KeySort, to_yaml_string
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FmFormatError(ValueError):
|
|
18
|
+
"""
|
|
19
|
+
Error for frontmatter file format issues.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class FmDelimiters:
|
|
25
|
+
start: str
|
|
26
|
+
end: str
|
|
27
|
+
prefix: str
|
|
28
|
+
strip_prefixes: List[str]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FmStyle(Enum):
|
|
32
|
+
"""
|
|
33
|
+
The style of frontmatter demarcation to use.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
yaml = FmDelimiters("---", "---", "", [])
|
|
37
|
+
html = FmDelimiters("<!---", "--->", "", [])
|
|
38
|
+
hash = FmDelimiters("#---", "#---", "# ", ["# ", "#"])
|
|
39
|
+
slash = FmDelimiters("//---", "//---", "// ", ["// ", "//"])
|
|
40
|
+
slash_star = FmDelimiters("/*---", "---*/", "", [])
|
|
41
|
+
dash = FmDelimiters("----", "----", "-- ", ["-- ", "--"])
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def start(self) -> str:
|
|
45
|
+
return self.value.start
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def end(self) -> str:
|
|
49
|
+
return self.value.end
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def prefix(self) -> str:
|
|
53
|
+
return self.value.prefix
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def strip_prefixes(self) -> List[str]:
|
|
57
|
+
return self.value.strip_prefixes
|
|
58
|
+
|
|
59
|
+
def strip_prefix(self, line: str) -> str:
|
|
60
|
+
for prefix in self.strip_prefixes:
|
|
61
|
+
if line.startswith(prefix):
|
|
62
|
+
return line[len(prefix) :]
|
|
63
|
+
return line
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
Metadata = Dict[str, Any]
|
|
67
|
+
"""
|
|
68
|
+
Parsed metadata from frontmatter.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def fmf_write(
|
|
73
|
+
path: Path | str,
|
|
74
|
+
content: str,
|
|
75
|
+
metadata: Optional[Metadata | str],
|
|
76
|
+
style: FmStyle = FmStyle.yaml,
|
|
77
|
+
key_sort: Optional[KeySort] = None,
|
|
78
|
+
make_parents: bool = True,
|
|
79
|
+
) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Write the given Markdown text content to a file, with associated YAML metadata, in a
|
|
82
|
+
generalized Jekyll-style frontmatter format. Metadata can be a raw string or a dict
|
|
83
|
+
that will be serialized to YAML.
|
|
84
|
+
"""
|
|
85
|
+
if isinstance(metadata, str):
|
|
86
|
+
frontmatter_str = metadata
|
|
87
|
+
else:
|
|
88
|
+
frontmatter_str = to_yaml_string(metadata, key_sort=key_sort)
|
|
89
|
+
|
|
90
|
+
path = Path(path)
|
|
91
|
+
if make_parents and path.parent:
|
|
92
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
tmp_path = f"{path}.fmf.write.tmp"
|
|
95
|
+
try:
|
|
96
|
+
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
97
|
+
if metadata:
|
|
98
|
+
f.write(style.start)
|
|
99
|
+
f.write("\n")
|
|
100
|
+
for line in frontmatter_str.splitlines():
|
|
101
|
+
f.write(style.prefix + line)
|
|
102
|
+
f.write("\n")
|
|
103
|
+
f.write(style.end)
|
|
104
|
+
f.write("\n")
|
|
105
|
+
|
|
106
|
+
f.write(content)
|
|
107
|
+
os.replace(tmp_path, path)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
try:
|
|
110
|
+
os.remove(tmp_path)
|
|
111
|
+
except FileNotFoundError:
|
|
112
|
+
pass
|
|
113
|
+
raise e
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def fmf_read(path: Path | str) -> Tuple[str, Optional[Metadata]]:
|
|
117
|
+
"""
|
|
118
|
+
Read UTF-8 text content (typically Markdown) from a file with optional YAML metadata
|
|
119
|
+
in Jekyll-style frontmatter format. Auto-detects variant formats for HTML and code
|
|
120
|
+
(Python style) based on whether the prefix is `---` or `<!---` or `#---`.
|
|
121
|
+
Reads the entire file into memory. Parses the metadata as YAML.
|
|
122
|
+
"""
|
|
123
|
+
content, metadata_str = fmf_read_raw(path)
|
|
124
|
+
metadata = None
|
|
125
|
+
if metadata_str:
|
|
126
|
+
try:
|
|
127
|
+
metadata = from_yaml_string(metadata_str)
|
|
128
|
+
except YAMLError as e:
|
|
129
|
+
raise FmFormatError(f"Error parsing YAML metadata: `{path}`: {e}") from e
|
|
130
|
+
if not isinstance(metadata, dict):
|
|
131
|
+
raise FmFormatError(f"Invalid metadata type: {type(metadata)}")
|
|
132
|
+
metadata = cast(Metadata, metadata)
|
|
133
|
+
return content, metadata
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def fmf_read_raw(path: Path | str) -> Tuple[str, Optional[str]]:
|
|
137
|
+
"""
|
|
138
|
+
Reads the full content and raw (unparsed) metadata from the file, both as strings.
|
|
139
|
+
"""
|
|
140
|
+
metadata_str, offset = fmf_read_frontmatter_raw(path)
|
|
141
|
+
|
|
142
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
143
|
+
f.seek(offset)
|
|
144
|
+
content = f.read()
|
|
145
|
+
|
|
146
|
+
return content, metadata_str
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def fmf_read_frontmatter_raw(path: Path | str) -> Tuple[Optional[str], int]:
|
|
150
|
+
"""
|
|
151
|
+
Reads the metadata frontmatter from the file and returns the metadata string and
|
|
152
|
+
the seek offset of the beginning of the content. Does not parse the metadata.
|
|
153
|
+
Does not read the body content into memory.
|
|
154
|
+
"""
|
|
155
|
+
metadata_lines: List[str] = []
|
|
156
|
+
in_metadata = False
|
|
157
|
+
|
|
158
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
159
|
+
first_line = f.readline().rstrip()
|
|
160
|
+
|
|
161
|
+
if first_line == FmStyle.yaml.start:
|
|
162
|
+
delimiters = FmStyle.yaml
|
|
163
|
+
in_metadata = True
|
|
164
|
+
elif first_line == FmStyle.html.start:
|
|
165
|
+
delimiters = FmStyle.html
|
|
166
|
+
in_metadata = True
|
|
167
|
+
elif first_line == FmStyle.hash.start:
|
|
168
|
+
delimiters = FmStyle.hash
|
|
169
|
+
in_metadata = True
|
|
170
|
+
else:
|
|
171
|
+
# Empty file or no recognized frontmatter.
|
|
172
|
+
return None, 0
|
|
173
|
+
|
|
174
|
+
while True:
|
|
175
|
+
line = f.readline()
|
|
176
|
+
if not line:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
if line.rstrip() == delimiters.end and in_metadata:
|
|
180
|
+
metadata_str = "".join(delimiters.strip_prefix(mline) for mline in metadata_lines)
|
|
181
|
+
return metadata_str, f.tell()
|
|
182
|
+
|
|
183
|
+
if in_metadata:
|
|
184
|
+
metadata_lines.append(line)
|
|
185
|
+
|
|
186
|
+
if in_metadata: # If still true, the end delimiter was never found
|
|
187
|
+
raise FmFormatError(
|
|
188
|
+
f"Delimiter `{delimiters.end}` for end of frontmatter not found: `{(path)}`"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return None, 0
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def fmf_strip_frontmatter(path: Path | str) -> None:
|
|
195
|
+
"""
|
|
196
|
+
Strip the metadata frontmatter from the file, in place on the file.
|
|
197
|
+
Does not read the content (except to do a file copy) so should work fairly
|
|
198
|
+
quickly on large files. Does nothing if there is no frontmatter.
|
|
199
|
+
"""
|
|
200
|
+
_, offset = fmf_read_frontmatter_raw(path)
|
|
201
|
+
if offset > 0:
|
|
202
|
+
tmp_path = f"{path}.fmf.strip.tmp"
|
|
203
|
+
try:
|
|
204
|
+
with open(path, "r", encoding="utf-8") as original_file, open(
|
|
205
|
+
tmp_path, "w", encoding="utf-8"
|
|
206
|
+
) as temp_file:
|
|
207
|
+
original_file.seek(offset)
|
|
208
|
+
shutil.copyfileobj(original_file, temp_file)
|
|
209
|
+
os.replace(tmp_path, path)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
try:
|
|
212
|
+
os.remove(tmp_path)
|
|
213
|
+
except FileNotFoundError:
|
|
214
|
+
pass
|
|
215
|
+
raise e
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def fmf_insert_frontmatter(
|
|
219
|
+
path: Path | str,
|
|
220
|
+
metadata: Optional[Metadata],
|
|
221
|
+
fm_style: FmStyle = FmStyle.yaml,
|
|
222
|
+
key_sort: Optional[KeySort] = None,
|
|
223
|
+
) -> None:
|
|
224
|
+
"""
|
|
225
|
+
Insert metadata as frontmatter into the given file, inserting at the top
|
|
226
|
+
and replacing any existing frontmatter.
|
|
227
|
+
"""
|
|
228
|
+
if metadata is None:
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
if isinstance(metadata, str):
|
|
232
|
+
frontmatter_str = metadata
|
|
233
|
+
else:
|
|
234
|
+
frontmatter_str = to_yaml_string(metadata, key_sort=key_sort)
|
|
235
|
+
|
|
236
|
+
# Prepare the new frontmatter.
|
|
237
|
+
frontmatter_lines = [fm_style.start + "\n"]
|
|
238
|
+
if frontmatter_str:
|
|
239
|
+
for line in frontmatter_str.splitlines():
|
|
240
|
+
frontmatter_lines.append(fm_style.prefix + line + "\n")
|
|
241
|
+
frontmatter_lines.append(fm_style.end + "\n")
|
|
242
|
+
|
|
243
|
+
tmp_path = f"{path}.fmf.insert.tmp"
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
# Determine where any existing frontmatter ends (offset).
|
|
247
|
+
_, offset = fmf_read_frontmatter_raw(path)
|
|
248
|
+
|
|
249
|
+
with open(tmp_path, "w", encoding="utf-8") as temp_file:
|
|
250
|
+
temp_file.writelines(frontmatter_lines)
|
|
251
|
+
|
|
252
|
+
with open(path, "r", encoding="utf-8") as original_file:
|
|
253
|
+
original_file.seek(offset)
|
|
254
|
+
shutil.copyfileobj(original_file, temp_file)
|
|
255
|
+
|
|
256
|
+
os.replace(tmp_path, path)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
try:
|
|
259
|
+
os.remove(tmp_path)
|
|
260
|
+
except FileNotFoundError:
|
|
261
|
+
pass
|
|
262
|
+
raise e
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import Any, Callable, List, Tuple, TypeVar
|
|
2
|
+
|
|
3
|
+
T = TypeVar("T")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def custom_key_sort(priority_keys: List[T]) -> Callable[[T], Any]:
|
|
7
|
+
"""
|
|
8
|
+
Custom sort function that prioritizes the specific keys in a certain order, followed
|
|
9
|
+
by all the other keys in natural order.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def sort_func(key: T) -> Tuple[float, T]:
|
|
13
|
+
try:
|
|
14
|
+
i = priority_keys.index(key)
|
|
15
|
+
return (float(i), key)
|
|
16
|
+
except ValueError:
|
|
17
|
+
return (float("inf"), key)
|
|
18
|
+
|
|
19
|
+
return sort_func
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
YAML file storage. Wraps ruamel.yaml with a few extra features.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from io import StringIO
|
|
7
|
+
from typing import Any, Callable, Dict, Optional, TextIO, Type
|
|
8
|
+
|
|
9
|
+
from ruamel.yaml import Representer, YAML
|
|
10
|
+
|
|
11
|
+
KeySort = Callable[[str], Any]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def none_or_empty_dict(val: Any) -> bool:
|
|
15
|
+
return val is None or val == {}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_default_representers: Dict[Type[Any], Callable[[Representer, Any], Any]] = {}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def add_default_yaml_representer(type: Type[Any], represent: Callable[[Representer, Any], Any]):
|
|
22
|
+
"""
|
|
23
|
+
Add a default representer for a type.
|
|
24
|
+
"""
|
|
25
|
+
global _default_representers
|
|
26
|
+
_default_representers[type] = represent
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def new_yaml(
|
|
30
|
+
key_sort: Optional[KeySort] = None,
|
|
31
|
+
suppress_vals: Optional[Callable[[Any], bool]] = none_or_empty_dict,
|
|
32
|
+
stringify_unknown: bool = False,
|
|
33
|
+
typ: str = "safe",
|
|
34
|
+
) -> YAML:
|
|
35
|
+
"""
|
|
36
|
+
Configure a new YAML instance with custom settings.
|
|
37
|
+
|
|
38
|
+
If just using this for pretty-printing values, can set `stringify_unknown` to avoid
|
|
39
|
+
RepresenterError for unexpected types.
|
|
40
|
+
|
|
41
|
+
For input, `typ="safe"` is safest. For output, consider using `typ="rt"` for better
|
|
42
|
+
control of string formatting (e.g. style of long strings).
|
|
43
|
+
"""
|
|
44
|
+
yaml = YAML(typ=typ)
|
|
45
|
+
yaml.default_flow_style = False # Block style dictionaries.
|
|
46
|
+
|
|
47
|
+
suppr = suppress_vals or (lambda v: False)
|
|
48
|
+
|
|
49
|
+
# Ignore None values in output. Sort keys if key_sort is provided.
|
|
50
|
+
def represent_dict(dumper, data):
|
|
51
|
+
if key_sort:
|
|
52
|
+
data = {k: data[k] for k in sorted(data.keys(), key=key_sort)}
|
|
53
|
+
return dumper.represent_dict({k: v for k, v in data.items() if not suppr(v)})
|
|
54
|
+
|
|
55
|
+
yaml.representer.add_representer(dict, represent_dict)
|
|
56
|
+
|
|
57
|
+
# Use YAML block style for strings with newlines.
|
|
58
|
+
def represent_str(dumper, data):
|
|
59
|
+
style = "|" if "\n" in data else None
|
|
60
|
+
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
|
61
|
+
|
|
62
|
+
yaml.representer.add_representer(str, represent_str)
|
|
63
|
+
|
|
64
|
+
# Add other default representers.
|
|
65
|
+
for type, representer in _default_representers.items():
|
|
66
|
+
yaml.representer.add_representer(type, representer)
|
|
67
|
+
|
|
68
|
+
if stringify_unknown:
|
|
69
|
+
|
|
70
|
+
def represent_unknown(dumper, data):
|
|
71
|
+
return dumper.represent_str(str(data))
|
|
72
|
+
|
|
73
|
+
yaml.representer.add_representer(None, represent_unknown)
|
|
74
|
+
|
|
75
|
+
if key_sort:
|
|
76
|
+
yaml.representer.sort_base_mapping_type_on_output = False
|
|
77
|
+
|
|
78
|
+
return yaml
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def from_yaml_string(yaml_string: str) -> Any:
|
|
82
|
+
"""
|
|
83
|
+
Read a YAML string into a Python object.
|
|
84
|
+
"""
|
|
85
|
+
return new_yaml().load(yaml_string)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read_yaml_file(filename: str) -> Any:
|
|
89
|
+
"""
|
|
90
|
+
Read YAML file into a Python object.
|
|
91
|
+
"""
|
|
92
|
+
with open(filename, "r") as f:
|
|
93
|
+
return new_yaml().load(f)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def to_yaml_string(
|
|
97
|
+
value: Any, key_sort: Optional[KeySort] = None, stringify_unknown: bool = False
|
|
98
|
+
) -> str:
|
|
99
|
+
"""
|
|
100
|
+
Convert a Python object to a YAML string.
|
|
101
|
+
"""
|
|
102
|
+
stream = StringIO()
|
|
103
|
+
new_yaml(key_sort=key_sort, stringify_unknown=stringify_unknown, typ="rt").dump(value, stream)
|
|
104
|
+
return stream.getvalue()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def dump_yaml(
|
|
108
|
+
value: Any, stream: TextIO, key_sort: Optional[KeySort] = None, stringify_unknown: bool = False
|
|
109
|
+
):
|
|
110
|
+
"""
|
|
111
|
+
Write a Python object to a YAML stream.
|
|
112
|
+
"""
|
|
113
|
+
new_yaml(key_sort=key_sort, stringify_unknown=stringify_unknown, typ="rt").dump(value, stream)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def write_yaml_file(
|
|
117
|
+
value: Any, filename: str, key_sort: Optional[KeySort] = None, stringify_unknown: bool = False
|
|
118
|
+
):
|
|
119
|
+
"""
|
|
120
|
+
Write the given value to the YAML file, creating it atomically.
|
|
121
|
+
"""
|
|
122
|
+
temp_filename = f"{filename}.yml.tmp" # Same directory with a temporary suffix.
|
|
123
|
+
try:
|
|
124
|
+
with open(temp_filename, "w", encoding="utf-8") as f:
|
|
125
|
+
dump_yaml(value, f, key_sort, stringify_unknown=stringify_unknown)
|
|
126
|
+
os.replace(temp_filename, filename)
|
|
127
|
+
except Exception as e:
|
|
128
|
+
try:
|
|
129
|
+
os.remove(temp_filename)
|
|
130
|
+
except FileNotFoundError:
|
|
131
|
+
pass
|
|
132
|
+
raise e
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "frontmatter-format"
|
|
3
|
+
# Keep this a dev version, as the dynamic versioning plugin is used for actual release versions:
|
|
4
|
+
version = "0.0.0.dev"
|
|
5
|
+
description = "A format for YAML frontmatter on any file."
|
|
6
|
+
authors = ["Joshua Levy <joshua@cal.berkeley.edu>"]
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
license = "MIT"
|
|
9
|
+
repository = "https://github.com/jlevy/frontmatter-format"
|
|
10
|
+
|
|
11
|
+
[tool.poetry.dependencies]
|
|
12
|
+
python = "^3.10"
|
|
13
|
+
ruamel-yaml = "^0.18.6"
|
|
14
|
+
|
|
15
|
+
[tool.poetry.group.dev.dependencies]
|
|
16
|
+
black = "^24.10.0"
|
|
17
|
+
pytest = "^8.3.3"
|
|
18
|
+
ruff = "^0.4.10"
|
|
19
|
+
usort = "^1.0.8.post1"
|
|
20
|
+
mypy = "^1.13.0"
|
|
21
|
+
codespell = "^2.3.0"
|
|
22
|
+
rich = "^13.9.3"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["poetry-core"]
|
|
26
|
+
build-backend = "poetry.core.masonry.api"
|
|
27
|
+
|
|
28
|
+
# Auto-generated version in build, based on tag or commit.
|
|
29
|
+
# https://sam.hooke.me/note/2023/08/poetry-automatically-generated-package-version-from-git-commit/
|
|
30
|
+
[tool.poetry-dynamic-versioning]
|
|
31
|
+
enable = true
|
|
32
|
+
vcs = "git"
|
|
33
|
+
pattern = "^v?(?P<base>\\d+\\.\\d+\\.\\d+)(-?((?P<stage>[a-zA-Z]+)\\.?(?P<revision>\\d+)?))?"
|
|
34
|
+
format-jinja = """
|
|
35
|
+
{%- if distance == 0 -%}
|
|
36
|
+
{{- base -}}
|
|
37
|
+
{%- else -%}
|
|
38
|
+
{{- base }}.dev{{ distance }}+{{commit}}
|
|
39
|
+
{%- endif -%}
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
[tool.poetry.scripts]
|
|
43
|
+
lint = "devtools.lint:main"
|
|
44
|
+
test = "pytest:main"
|
|
45
|
+
|
|
46
|
+
[tool.black]
|
|
47
|
+
line-length = 100
|
|
48
|
+
|
|
49
|
+
[tool.ruff]
|
|
50
|
+
line-length = 100
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint]
|
|
53
|
+
ignore = ["E402", "E731", "E712"]
|
|
54
|
+
|
|
55
|
+
[tool.mypy]
|
|
56
|
+
disable_error_code = [
|
|
57
|
+
"import-untyped",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tool.codespell]
|
|
61
|
+
# ignore-words-list = "foo,bar"
|
|
62
|
+
# skip = "foo.py,bar.py"
|
|
63
|
+
|
|
64
|
+
[tool.pytest.ini_options]
|
|
65
|
+
python_files = ["*.py"]
|
|
66
|
+
python_classes = ["Test*"]
|
|
67
|
+
python_functions = ["test_*"]
|
|
68
|
+
testpaths = [
|
|
69
|
+
"frontmatter_format",
|
|
70
|
+
"tests",
|
|
71
|
+
]
|
|
72
|
+
norecursedirs = []
|
|
73
|
+
filterwarnings = []
|