frontmatter-format 0.0.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Joshua Levy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,289 @@
1
+ Metadata-Version: 2.1
2
+ Name: frontmatter-format
3
+ Version: 0.0.0.dev0
4
+ Summary: A format for YAML frontmatter on any file.
5
+ Home-page: https://github.com/jlevy/frontmatter-format
6
+ License: MIT
7
+ Author: Joshua Levy
8
+ Author-email: joshua@cal.berkeley.edu
9
+ Requires-Python: >=3.10,<4.0
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: ruamel-yaml (>=0.18.6,<0.19.0)
17
+ Project-URL: Repository, https://github.com/jlevy/frontmatter-format
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Frontmatter Format
21
+
22
+ ## Motivation
23
+
24
+ Simple, readable metadata attached to files can be useful in numerous situations, such as
25
+ recording title, author, source, copyright, or the provenance of a file.
26
+
27
+ Unfortunately, it's often unclear how to format such metadata consistently across different
28
+ file types while also not breaking interoperability with existing tools.
29
+
30
+ **Frontmatter format** is a way to add metadata as frontmatter on any file.
31
+ It is a simple set of conventions to put structured metadata as YAML at the top of a file in
32
+ a syntax that is broadly compatible with programming languages, browsers, editors, and other
33
+ tools.
34
+
35
+ Frontmatter format specifies a syntax for the metadata as a comment block at the top of a
36
+ file.
37
+ This approach works while ensuring the file remains valid Markdown, HTML, CSS, Python,
38
+ C/C++, Rust, SQL, or most other text formats.
39
+
40
+ Frontmatter format is a generalization of the common format for frontmatter used by Jekyll
41
+ and other CMSs for Markdown files.
42
+ In that format, frontmatter is enclosed in lines containing `---` delimiters.
43
+
44
+ In this generalized format, we allow several styles of frontmatter demarcation, with the
45
+ first line of the file indicating the format and style.
46
+
47
+ This is a description of the format and a simple reference implementation.
48
+ The implementation is in Python but the format is very simple and easy to implement in any
49
+ language.
50
+
51
+ The purpose of this repository is to explain the idea of the format so anyone can use it,
52
+ and encourage the adoption of the format, especially for workflows around text documents that
53
+ are becoming increasingly common in AI tools and pipelines.
54
+
55
+ ## Examples
56
+
57
+ ```markdown
58
+ ---
59
+ title: Sample Markdown File
60
+ state: draft
61
+ created_at: 2022-08-07 00:00:00
62
+ tags:
63
+ - yaml
64
+ - examples
65
+ # This is a YAML comment, so ignored.
66
+ ---
67
+ Hello, *World*!
68
+ ```
69
+
70
+ ```html
71
+ <!---
72
+ title: Sample HTML File
73
+ --->
74
+ Hello, <i>World</i>!
75
+ ```
76
+
77
+ ```python
78
+ #---
79
+ # author: Jane Doe
80
+ # description: A sample Python script
81
+ #---
82
+ print("Hello, World!")
83
+ ```
84
+
85
+ ```css
86
+ /*---
87
+ filename: styles.css
88
+ ---*/
89
+ .hello {
90
+ color: green;
91
+ }
92
+ ```
93
+
94
+ ```sql
95
+ ----
96
+ -- title: Sample SQL Script
97
+ ----
98
+ SELECT * FROM world;
99
+ ```
100
+
101
+ ## Advantages of this Approach
102
+
103
+ - **Compatible with existing syntax:** By choosing a style for the metadata consistent with
104
+ any given file, it generally doesn't break existing tools.
105
+ Almost every language has a style for which frontmatter works as a comment.
106
+
107
+ - **Auto-detectable format:** Frontmatter and its format can be recognized by the first few
108
+ bytes of the file.
109
+ That means it's possible to detect metadata and parse it automatically.
110
+
111
+ - **Metadata is optional:** Files with or without metadata can be read with the same tools.
112
+ So it's easy to roll out metadata into files gracefully, as needed file by file.
113
+
114
+ - **YAML syntax:** JSON, YAML, XML, and TOML are all used for metadata in some situatiohns.
115
+ YAML is the best choice here because it is already in widespread use with Markdown, is a
116
+ superset of JSON (in case an application wishes to use pure JSON), and is easy to read and
117
+ edit manually.
118
+
119
+ ## Format Definition
120
+
121
+ A file is in frontmatter format if the first characters are one of the following:
122
+
123
+ - `---`
124
+
125
+ - `<!---`
126
+
127
+ - `#---`
128
+
129
+ - `//---`
130
+
131
+ - `/*---`
132
+
133
+ and if this prefix is followed by a newline (`\n`).
134
+
135
+ The prefix determines the *style* of the frontmatter.
136
+ The style specifies the matching terminating delimiter for the end of the frontmatter as
137
+ well as an optional prefix (which is typically a comment character in some language).
138
+
139
+ The supported frontmatter styles are:
140
+
141
+ 1. *YAML style*: delimiters `---` and `---` with no prefix on each line.
142
+ Useful for text or Markdown content.
143
+
144
+ 2. *HTML style*: delimiters `<!---` and `--->` with no prefix on each line.
145
+ Useful for HTML or XML or similar content.
146
+
147
+ 3. *Hash style*: delimiters `#---` and `#---` with `# ` prefix on each line.
148
+ Useful for Python or similar code content.
149
+ Also works for CSV files with many tools.
150
+
151
+ 4. *Rust style*: delimiters `//---` and `//---` with `// ` prefix on each line.
152
+ Useful for Rust or C++ or similar code content.
153
+
154
+ 5. *C style*: delimiters `/*---` and `---*/` with no prefix on each line.
155
+ Useful for JavaScript, TypeScript, CSS or C or similar code content.
156
+
157
+ 6. *Dash style*: delimiters `----` and `----` with `-- ` prefix on each line.
158
+ Useful for SQL or similar code content.
159
+
160
+ The delimiters must be alone on their own lines, terminated with a newline.
161
+
162
+ Any style is acceptable on any file as it can be automatically detected.
163
+ When writing, you can specify the style.
164
+
165
+ For all frontmatter styles, the content between the delimiters is YAML text in UTF-8
166
+ encoding, with an optional prefix on each line that depends on the style.
167
+
168
+ For some of the formats, each frontmatter line is prefixed with a prefix to make sure the
169
+ entire file remains valid in a given syntax (Python, Rust, SQL, etc.). This prefix is
170
+ stripped during parsing.
171
+
172
+ It is recommended to use a prefix with a trailing space (such as `# ` or `// `) but a bare
173
+ prefix without the trailing space (`#` or `##`) is also allowed.
174
+
175
+ Other whitespace is preserved (before parsing with YAML).
176
+
177
+ Note that YAML comments, which are lines beginning with `#` in the metadata, are allowed.
178
+ For example, for hash style, this means there must be two hashes (`# #` or `##`) at the
179
+ start of a comment line.
180
+
181
+ There is no restriction on the content of the file after the frontmatter.
182
+ It may even contain other content in frontmatter format, but this will not be parsed as
183
+ frontmatter.
184
+ Typically, it is text, but it could be binary as well.
185
+
186
+ Frontmatter is optional.
187
+ This means almost any text file can be read as frontmatter format.
188
+
189
+ ## Reference Implementation
190
+
191
+ This is a simple Python reference implementation.
192
+ It auto-detects all the frontmatter styles above.
193
+ It supports reading small files easily into memory, but also allows extracting or changing
194
+ frontmatter without reading an entire file.
195
+
196
+ Both raw (string) parsed YAML frontmatter (using ruamel.yaml) are supported.
197
+ For readability, there is also support for preferred sorting of YAML keys.
198
+
199
+ ## Installation
200
+
201
+ ```
202
+ # Use pip
203
+ pip install frontmatter-format
204
+ # Or poetry
205
+ poetry add frontmatter-format
206
+ ```
207
+
208
+ ## Usage
209
+
210
+ ```python
211
+ from frontmatter_format import fmf_read, fmf_read_raw, fmf_write, FmStyle
212
+
213
+ # Write some content:
214
+ content = "Hello, World!"
215
+ metadata = {"title": "Test Title", "author": "Test Author"}
216
+ fmf_write("example.md", content, metadata, style=FmStyle.yaml)
217
+
218
+ # Or any other desired style:
219
+ html_content = "<p>Hello, World!</p>"
220
+ fmf_write("example.html", content, metadata, style=FmStyle.html)
221
+
222
+ # Read it back. Style is auto-detected:
223
+ content, metadata = fmf_read("example.md")
224
+ print(content) # Outputs: Hello, World!
225
+ print(metadata) # Outputs: {'title': 'Test Title', 'author': 'Test Author'}
226
+
227
+ # Read metadata without parsing:
228
+ content, raw_metadata = fmf_read_raw("example.md")
229
+ print(content) # Outputs: Hello, World!
230
+ print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
231
+ ```
232
+
233
+ The above is easiest for small files, but you can also operate more efficiently directly on
234
+ files, without reading the file contents into memory.
235
+
236
+ ```python
237
+ from frontmatter_format import fmf_strip_frontmatter, fmf_insert_frontmatter, fmf_read_frontmatter_raw
238
+
239
+ # Strip and discard the metadata from a file:
240
+ fmf_strip_frontmatter("example.md")
241
+
242
+ # Insert the metadata at the top of an existing file:
243
+ new_metadata = {"title": "New Title", "author": "New Author"}
244
+ fmf_insert_frontmatter("example.md", new_metadata, fm_style=FmStyle.yaml)
245
+
246
+ # Read the raw frontmatter metadata and get the offset for the rest of the content:
247
+ raw_metadata, offset = fmf_read_frontmatter_raw("example.md")
248
+ print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
249
+ print(offset) # Outputs the byte offset where the content starts
250
+ ```
251
+
252
+ ## FAQ
253
+
254
+ - **Is this mature?** This is the first draft of this format.
255
+ But I've been using this on my own projects for a couple months.
256
+ The flexibity of just having metadata on all your text files is great for workflows,
257
+ pipelines, etc.
258
+
259
+ - **When should we use it?** All the time if you can!
260
+ It's especially important for command-line tools, AI agents, LLM workflows, since you
261
+ often want to store extra metadata is a consistent way on text inputs of various formats
262
+ like Markdown, HTML, CSS, and Python.
263
+
264
+ - **Does this specify the format of the YAML itself?** No.
265
+ This is simply a format for attaching metadata.
266
+ What metadata you attach is up to your use case.
267
+ Standardizing headings like title, author, description, let alone other more
268
+ application-specific information is beyond the scope of this frontmatter format.
269
+
270
+ - **Can this work with Pydantic?** Yes, definitely.
271
+ In fact, I think it's probably a good practice to define self-identifiable Pydantic (or
272
+ Zod) schemas for all your metadata, and then just serialize and deserialize them to
273
+ frontmatter everywhere.
274
+
275
+ - **Isn't this the same as what some CMSs use, Markdown files and YAML at the top?** Yes!
276
+ But this generalizes that format, and removes the direct tie-in to Markdown or any CMS.
277
+ This can work with any tool.
278
+ For HTML and code, it works basically with no changes at all since the frontmatter is
279
+ considered a comment.
280
+
281
+ - **Can this work with binary files?** No reason why not, if it makes sense for you!
282
+ You can use `fmf_insert_frontmatter()` to add metadata of any style to any file.
283
+ Whether this works for your application depends on the file format.
284
+
285
+ - **Does this work for CSV files?** Sort of.
286
+ Some tools do properly honor hash style comments when parsing CSV files.
287
+ A few do not. Our recommendation is go ahead and use it, and find ways to strip the
288
+ metadata at the last minute if you really can't get a tool to work with the metadata.
289
+
@@ -0,0 +1,269 @@
1
+ # Frontmatter Format
2
+
3
+ ## Motivation
4
+
5
+ Simple, readable metadata attached to files can be useful in numerous situations, such as
6
+ recording title, author, source, copyright, or the provenance of a file.
7
+
8
+ Unfortunately, it's often unclear how to format such metadata consistently across different
9
+ file types while also not breaking interoperability with existing tools.
10
+
11
+ **Frontmatter format** is a way to add metadata as frontmatter on any file.
12
+ It is a simple set of conventions to put structured metadata as YAML at the top of a file in
13
+ a syntax that is broadly compatible with programming languages, browsers, editors, and other
14
+ tools.
15
+
16
+ Frontmatter format specifies a syntax for the metadata as a comment block at the top of a
17
+ file.
18
+ This approach works while ensuring the file remains valid Markdown, HTML, CSS, Python,
19
+ C/C++, Rust, SQL, or most other text formats.
20
+
21
+ Frontmatter format is a generalization of the common format for frontmatter used by Jekyll
22
+ and other CMSs for Markdown files.
23
+ In that format, frontmatter is enclosed in lines containing `---` delimiters.
24
+
25
+ In this generalized format, we allow several styles of frontmatter demarcation, with the
26
+ first line of the file indicating the format and style.
27
+
28
+ This is a description of the format and a simple reference implementation.
29
+ The implementation is in Python but the format is very simple and easy to implement in any
30
+ language.
31
+
32
+ The purpose of this repository is to explain the idea of the format so anyone can use it,
33
+ and encourage the adoption of the format, especially for workflows around text documents that
34
+ are becoming increasingly common in AI tools and pipelines.
35
+
36
+ ## Examples
37
+
38
+ ```markdown
39
+ ---
40
+ title: Sample Markdown File
41
+ state: draft
42
+ created_at: 2022-08-07 00:00:00
43
+ tags:
44
+ - yaml
45
+ - examples
46
+ # This is a YAML comment, so ignored.
47
+ ---
48
+ Hello, *World*!
49
+ ```
50
+
51
+ ```html
52
+ <!---
53
+ title: Sample HTML File
54
+ --->
55
+ Hello, <i>World</i>!
56
+ ```
57
+
58
+ ```python
59
+ #---
60
+ # author: Jane Doe
61
+ # description: A sample Python script
62
+ #---
63
+ print("Hello, World!")
64
+ ```
65
+
66
+ ```css
67
+ /*---
68
+ filename: styles.css
69
+ ---*/
70
+ .hello {
71
+ color: green;
72
+ }
73
+ ```
74
+
75
+ ```sql
76
+ ----
77
+ -- title: Sample SQL Script
78
+ ----
79
+ SELECT * FROM world;
80
+ ```
81
+
82
+ ## Advantages of this Approach
83
+
84
+ - **Compatible with existing syntax:** By choosing a style for the metadata consistent with
85
+ any given file, it generally doesn't break existing tools.
86
+ Almost every language has a style for which frontmatter works as a comment.
87
+
88
+ - **Auto-detectable format:** Frontmatter and its format can be recognized by the first few
89
+ bytes of the file.
90
+ That means it's possible to detect metadata and parse it automatically.
91
+
92
+ - **Metadata is optional:** Files with or without metadata can be read with the same tools.
93
+ So it's easy to roll out metadata into files gracefully, as needed file by file.
94
+
95
+ - **YAML syntax:** JSON, YAML, XML, and TOML are all used for metadata in some situatiohns.
96
+ YAML is the best choice here because it is already in widespread use with Markdown, is a
97
+ superset of JSON (in case an application wishes to use pure JSON), and is easy to read and
98
+ edit manually.
99
+
100
+ ## Format Definition
101
+
102
+ A file is in frontmatter format if the first characters are one of the following:
103
+
104
+ - `---`
105
+
106
+ - `<!---`
107
+
108
+ - `#---`
109
+
110
+ - `//---`
111
+
112
+ - `/*---`
113
+
114
+ and if this prefix is followed by a newline (`\n`).
115
+
116
+ The prefix determines the *style* of the frontmatter.
117
+ The style specifies the matching terminating delimiter for the end of the frontmatter as
118
+ well as an optional prefix (which is typically a comment character in some language).
119
+
120
+ The supported frontmatter styles are:
121
+
122
+ 1. *YAML style*: delimiters `---` and `---` with no prefix on each line.
123
+ Useful for text or Markdown content.
124
+
125
+ 2. *HTML style*: delimiters `<!---` and `--->` with no prefix on each line.
126
+ Useful for HTML or XML or similar content.
127
+
128
+ 3. *Hash style*: delimiters `#---` and `#---` with `# ` prefix on each line.
129
+ Useful for Python or similar code content.
130
+ Also works for CSV files with many tools.
131
+
132
+ 4. *Rust style*: delimiters `//---` and `//---` with `// ` prefix on each line.
133
+ Useful for Rust or C++ or similar code content.
134
+
135
+ 5. *C style*: delimiters `/*---` and `---*/` with no prefix on each line.
136
+ Useful for JavaScript, TypeScript, CSS or C or similar code content.
137
+
138
+ 6. *Dash style*: delimiters `----` and `----` with `-- ` prefix on each line.
139
+ Useful for SQL or similar code content.
140
+
141
+ The delimiters must be alone on their own lines, terminated with a newline.
142
+
143
+ Any style is acceptable on any file as it can be automatically detected.
144
+ When writing, you can specify the style.
145
+
146
+ For all frontmatter styles, the content between the delimiters is YAML text in UTF-8
147
+ encoding, with an optional prefix on each line that depends on the style.
148
+
149
+ For some of the formats, each frontmatter line is prefixed with a prefix to make sure the
150
+ entire file remains valid in a given syntax (Python, Rust, SQL, etc.). This prefix is
151
+ stripped during parsing.
152
+
153
+ It is recommended to use a prefix with a trailing space (such as `# ` or `// `) but a bare
154
+ prefix without the trailing space (`#` or `##`) is also allowed.
155
+
156
+ Other whitespace is preserved (before parsing with YAML).
157
+
158
+ Note that YAML comments, which are lines beginning with `#` in the metadata, are allowed.
159
+ For example, for hash style, this means there must be two hashes (`# #` or `##`) at the
160
+ start of a comment line.
161
+
162
+ There is no restriction on the content of the file after the frontmatter.
163
+ It may even contain other content in frontmatter format, but this will not be parsed as
164
+ frontmatter.
165
+ Typically, it is text, but it could be binary as well.
166
+
167
+ Frontmatter is optional.
168
+ This means almost any text file can be read as frontmatter format.
169
+
170
+ ## Reference Implementation
171
+
172
+ This is a simple Python reference implementation.
173
+ It auto-detects all the frontmatter styles above.
174
+ It supports reading small files easily into memory, but also allows extracting or changing
175
+ frontmatter without reading an entire file.
176
+
177
+ Both raw (string) parsed YAML frontmatter (using ruamel.yaml) are supported.
178
+ For readability, there is also support for preferred sorting of YAML keys.
179
+
180
+ ## Installation
181
+
182
+ ```
183
+ # Use pip
184
+ pip install frontmatter-format
185
+ # Or poetry
186
+ poetry add frontmatter-format
187
+ ```
188
+
189
+ ## Usage
190
+
191
+ ```python
192
+ from frontmatter_format import fmf_read, fmf_read_raw, fmf_write, FmStyle
193
+
194
+ # Write some content:
195
+ content = "Hello, World!"
196
+ metadata = {"title": "Test Title", "author": "Test Author"}
197
+ fmf_write("example.md", content, metadata, style=FmStyle.yaml)
198
+
199
+ # Or any other desired style:
200
+ html_content = "<p>Hello, World!</p>"
201
+ fmf_write("example.html", content, metadata, style=FmStyle.html)
202
+
203
+ # Read it back. Style is auto-detected:
204
+ content, metadata = fmf_read("example.md")
205
+ print(content) # Outputs: Hello, World!
206
+ print(metadata) # Outputs: {'title': 'Test Title', 'author': 'Test Author'}
207
+
208
+ # Read metadata without parsing:
209
+ content, raw_metadata = fmf_read_raw("example.md")
210
+ print(content) # Outputs: Hello, World!
211
+ print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
212
+ ```
213
+
214
+ The above is easiest for small files, but you can also operate more efficiently directly on
215
+ files, without reading the file contents into memory.
216
+
217
+ ```python
218
+ from frontmatter_format import fmf_strip_frontmatter, fmf_insert_frontmatter, fmf_read_frontmatter_raw
219
+
220
+ # Strip and discard the metadata from a file:
221
+ fmf_strip_frontmatter("example.md")
222
+
223
+ # Insert the metadata at the top of an existing file:
224
+ new_metadata = {"title": "New Title", "author": "New Author"}
225
+ fmf_insert_frontmatter("example.md", new_metadata, fm_style=FmStyle.yaml)
226
+
227
+ # Read the raw frontmatter metadata and get the offset for the rest of the content:
228
+ raw_metadata, offset = fmf_read_frontmatter_raw("example.md")
229
+ print(raw_metadata) # Outputs: 'title: Test Title\nauthor: Test Author\n'
230
+ print(offset) # Outputs the byte offset where the content starts
231
+ ```
232
+
233
+ ## FAQ
234
+
235
+ - **Is this mature?** This is the first draft of this format.
236
+ But I've been using this on my own projects for a couple months.
237
+ The flexibity of just having metadata on all your text files is great for workflows,
238
+ pipelines, etc.
239
+
240
+ - **When should we use it?** All the time if you can!
241
+ It's especially important for command-line tools, AI agents, LLM workflows, since you
242
+ often want to store extra metadata is a consistent way on text inputs of various formats
243
+ like Markdown, HTML, CSS, and Python.
244
+
245
+ - **Does this specify the format of the YAML itself?** No.
246
+ This is simply a format for attaching metadata.
247
+ What metadata you attach is up to your use case.
248
+ Standardizing headings like title, author, description, let alone other more
249
+ application-specific information is beyond the scope of this frontmatter format.
250
+
251
+ - **Can this work with Pydantic?** Yes, definitely.
252
+ In fact, I think it's probably a good practice to define self-identifiable Pydantic (or
253
+ Zod) schemas for all your metadata, and then just serialize and deserialize them to
254
+ frontmatter everywhere.
255
+
256
+ - **Isn't this the same as what some CMSs use, Markdown files and YAML at the top?** Yes!
257
+ But this generalizes that format, and removes the direct tie-in to Markdown or any CMS.
258
+ This can work with any tool.
259
+ For HTML and code, it works basically with no changes at all since the frontmatter is
260
+ considered a comment.
261
+
262
+ - **Can this work with binary files?** No reason why not, if it makes sense for you!
263
+ You can use `fmf_insert_frontmatter()` to add metadata of any style to any file.
264
+ Whether this works for your application depends on the file format.
265
+
266
+ - **Does this work for CSV files?** Sort of.
267
+ Some tools do properly honor hash style comments when parsing CSV files.
268
+ A few do not. Our recommendation is go ahead and use it, and find ways to strip the
269
+ metadata at the last minute if you really can't get a tool to work with the metadata.
@@ -0,0 +1,41 @@
1
+ from .frontmatter_format import (
2
+ fmf_insert_frontmatter,
3
+ fmf_read,
4
+ fmf_read_frontmatter_raw,
5
+ fmf_read_raw,
6
+ fmf_strip_frontmatter,
7
+ fmf_write,
8
+ FmFormatError,
9
+ FmStyle,
10
+ Metadata,
11
+ )
12
+ from .key_sort import custom_key_sort
13
+ from .yaml_util import (
14
+ add_default_yaml_representer,
15
+ dump_yaml,
16
+ from_yaml_string,
17
+ new_yaml,
18
+ read_yaml_file,
19
+ to_yaml_string,
20
+ write_yaml_file,
21
+ )
22
+
23
+ __all__ = [
24
+ "FmStyle",
25
+ "FmFormatError",
26
+ "fmf_write",
27
+ "fmf_read",
28
+ "fmf_read_raw",
29
+ "fmf_read_frontmatter_raw",
30
+ "fmf_strip_frontmatter",
31
+ "fmf_insert_frontmatter",
32
+ "Metadata",
33
+ "add_default_yaml_representer",
34
+ "dump_yaml",
35
+ "from_yaml_string",
36
+ "new_yaml",
37
+ "read_yaml_file",
38
+ "to_yaml_string",
39
+ "write_yaml_file",
40
+ "custom_key_sort",
41
+ ]
@@ -0,0 +1,262 @@
1
+ """
2
+ Python implementation of frontmatter format.
3
+ """
4
+
5
+ import os
6
+ import shutil
7
+ from dataclasses import dataclass
8
+ from enum import Enum
9
+ from pathlib import Path
10
+ from typing import Any, cast, Dict, List, Optional, Tuple
11
+
12
+ from ruamel.yaml.error import YAMLError
13
+
14
+ from .yaml_util import from_yaml_string, KeySort, to_yaml_string
15
+
16
+
17
+ class FmFormatError(ValueError):
18
+ """
19
+ Error for frontmatter file format issues.
20
+ """
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class FmDelimiters:
25
+ start: str
26
+ end: str
27
+ prefix: str
28
+ strip_prefixes: List[str]
29
+
30
+
31
+ class FmStyle(Enum):
32
+ """
33
+ The style of frontmatter demarcation to use.
34
+ """
35
+
36
+ yaml = FmDelimiters("---", "---", "", [])
37
+ html = FmDelimiters("<!---", "--->", "", [])
38
+ hash = FmDelimiters("#---", "#---", "# ", ["# ", "#"])
39
+ slash = FmDelimiters("//---", "//---", "// ", ["// ", "//"])
40
+ slash_star = FmDelimiters("/*---", "---*/", "", [])
41
+ dash = FmDelimiters("----", "----", "-- ", ["-- ", "--"])
42
+
43
+ @property
44
+ def start(self) -> str:
45
+ return self.value.start
46
+
47
+ @property
48
+ def end(self) -> str:
49
+ return self.value.end
50
+
51
+ @property
52
+ def prefix(self) -> str:
53
+ return self.value.prefix
54
+
55
+ @property
56
+ def strip_prefixes(self) -> List[str]:
57
+ return self.value.strip_prefixes
58
+
59
+ def strip_prefix(self, line: str) -> str:
60
+ for prefix in self.strip_prefixes:
61
+ if line.startswith(prefix):
62
+ return line[len(prefix) :]
63
+ return line
64
+
65
+
66
+ Metadata = Dict[str, Any]
67
+ """
68
+ Parsed metadata from frontmatter.
69
+ """
70
+
71
+
72
+ def fmf_write(
73
+ path: Path | str,
74
+ content: str,
75
+ metadata: Optional[Metadata | str],
76
+ style: FmStyle = FmStyle.yaml,
77
+ key_sort: Optional[KeySort] = None,
78
+ make_parents: bool = True,
79
+ ) -> None:
80
+ """
81
+ Write the given Markdown text content to a file, with associated YAML metadata, in a
82
+ generalized Jekyll-style frontmatter format. Metadata can be a raw string or a dict
83
+ that will be serialized to YAML.
84
+ """
85
+ if isinstance(metadata, str):
86
+ frontmatter_str = metadata
87
+ else:
88
+ frontmatter_str = to_yaml_string(metadata, key_sort=key_sort)
89
+
90
+ path = Path(path)
91
+ if make_parents and path.parent:
92
+ path.parent.mkdir(parents=True, exist_ok=True)
93
+
94
+ tmp_path = f"{path}.fmf.write.tmp"
95
+ try:
96
+ with open(tmp_path, "w", encoding="utf-8") as f:
97
+ if metadata:
98
+ f.write(style.start)
99
+ f.write("\n")
100
+ for line in frontmatter_str.splitlines():
101
+ f.write(style.prefix + line)
102
+ f.write("\n")
103
+ f.write(style.end)
104
+ f.write("\n")
105
+
106
+ f.write(content)
107
+ os.replace(tmp_path, path)
108
+ except Exception as e:
109
+ try:
110
+ os.remove(tmp_path)
111
+ except FileNotFoundError:
112
+ pass
113
+ raise e
114
+
115
+
116
+ def fmf_read(path: Path | str) -> Tuple[str, Optional[Metadata]]:
117
+ """
118
+ Read UTF-8 text content (typically Markdown) from a file with optional YAML metadata
119
+ in Jekyll-style frontmatter format. Auto-detects variant formats for HTML and code
120
+ (Python style) based on whether the prefix is `---` or `<!---` or `#---`.
121
+ Reads the entire file into memory. Parses the metadata as YAML.
122
+ """
123
+ content, metadata_str = fmf_read_raw(path)
124
+ metadata = None
125
+ if metadata_str:
126
+ try:
127
+ metadata = from_yaml_string(metadata_str)
128
+ except YAMLError as e:
129
+ raise FmFormatError(f"Error parsing YAML metadata: `{path}`: {e}") from e
130
+ if not isinstance(metadata, dict):
131
+ raise FmFormatError(f"Invalid metadata type: {type(metadata)}")
132
+ metadata = cast(Metadata, metadata)
133
+ return content, metadata
134
+
135
+
136
+ def fmf_read_raw(path: Path | str) -> Tuple[str, Optional[str]]:
137
+ """
138
+ Reads the full content and raw (unparsed) metadata from the file, both as strings.
139
+ """
140
+ metadata_str, offset = fmf_read_frontmatter_raw(path)
141
+
142
+ with open(path, "r", encoding="utf-8") as f:
143
+ f.seek(offset)
144
+ content = f.read()
145
+
146
+ return content, metadata_str
147
+
148
+
149
+ def fmf_read_frontmatter_raw(path: Path | str) -> Tuple[Optional[str], int]:
150
+ """
151
+ Reads the metadata frontmatter from the file and returns the metadata string and
152
+ the seek offset of the beginning of the content. Does not parse the metadata.
153
+ Does not read the body content into memory.
154
+ """
155
+ metadata_lines: List[str] = []
156
+ in_metadata = False
157
+
158
+ with open(path, "r", encoding="utf-8") as f:
159
+ first_line = f.readline().rstrip()
160
+
161
+ if first_line == FmStyle.yaml.start:
162
+ delimiters = FmStyle.yaml
163
+ in_metadata = True
164
+ elif first_line == FmStyle.html.start:
165
+ delimiters = FmStyle.html
166
+ in_metadata = True
167
+ elif first_line == FmStyle.hash.start:
168
+ delimiters = FmStyle.hash
169
+ in_metadata = True
170
+ else:
171
+ # Empty file or no recognized frontmatter.
172
+ return None, 0
173
+
174
+ while True:
175
+ line = f.readline()
176
+ if not line:
177
+ break
178
+
179
+ if line.rstrip() == delimiters.end and in_metadata:
180
+ metadata_str = "".join(delimiters.strip_prefix(mline) for mline in metadata_lines)
181
+ return metadata_str, f.tell()
182
+
183
+ if in_metadata:
184
+ metadata_lines.append(line)
185
+
186
+ if in_metadata: # If still true, the end delimiter was never found
187
+ raise FmFormatError(
188
+ f"Delimiter `{delimiters.end}` for end of frontmatter not found: `{(path)}`"
189
+ )
190
+
191
+ return None, 0
192
+
193
+
194
+ def fmf_strip_frontmatter(path: Path | str) -> None:
195
+ """
196
+ Strip the metadata frontmatter from the file, in place on the file.
197
+ Does not read the content (except to do a file copy) so should work fairly
198
+ quickly on large files. Does nothing if there is no frontmatter.
199
+ """
200
+ _, offset = fmf_read_frontmatter_raw(path)
201
+ if offset > 0:
202
+ tmp_path = f"{path}.fmf.strip.tmp"
203
+ try:
204
+ with open(path, "r", encoding="utf-8") as original_file, open(
205
+ tmp_path, "w", encoding="utf-8"
206
+ ) as temp_file:
207
+ original_file.seek(offset)
208
+ shutil.copyfileobj(original_file, temp_file)
209
+ os.replace(tmp_path, path)
210
+ except Exception as e:
211
+ try:
212
+ os.remove(tmp_path)
213
+ except FileNotFoundError:
214
+ pass
215
+ raise e
216
+
217
+
218
+ def fmf_insert_frontmatter(
219
+ path: Path | str,
220
+ metadata: Optional[Metadata],
221
+ fm_style: FmStyle = FmStyle.yaml,
222
+ key_sort: Optional[KeySort] = None,
223
+ ) -> None:
224
+ """
225
+ Insert metadata as frontmatter into the given file, inserting at the top
226
+ and replacing any existing frontmatter.
227
+ """
228
+ if metadata is None:
229
+ return
230
+
231
+ if isinstance(metadata, str):
232
+ frontmatter_str = metadata
233
+ else:
234
+ frontmatter_str = to_yaml_string(metadata, key_sort=key_sort)
235
+
236
+ # Prepare the new frontmatter.
237
+ frontmatter_lines = [fm_style.start + "\n"]
238
+ if frontmatter_str:
239
+ for line in frontmatter_str.splitlines():
240
+ frontmatter_lines.append(fm_style.prefix + line + "\n")
241
+ frontmatter_lines.append(fm_style.end + "\n")
242
+
243
+ tmp_path = f"{path}.fmf.insert.tmp"
244
+
245
+ try:
246
+ # Determine where any existing frontmatter ends (offset).
247
+ _, offset = fmf_read_frontmatter_raw(path)
248
+
249
+ with open(tmp_path, "w", encoding="utf-8") as temp_file:
250
+ temp_file.writelines(frontmatter_lines)
251
+
252
+ with open(path, "r", encoding="utf-8") as original_file:
253
+ original_file.seek(offset)
254
+ shutil.copyfileobj(original_file, temp_file)
255
+
256
+ os.replace(tmp_path, path)
257
+ except Exception as e:
258
+ try:
259
+ os.remove(tmp_path)
260
+ except FileNotFoundError:
261
+ pass
262
+ raise e
@@ -0,0 +1,19 @@
1
+ from typing import Any, Callable, List, Tuple, TypeVar
2
+
3
+ T = TypeVar("T")
4
+
5
+
6
+ def custom_key_sort(priority_keys: List[T]) -> Callable[[T], Any]:
7
+ """
8
+ Custom sort function that prioritizes the specific keys in a certain order, followed
9
+ by all the other keys in natural order.
10
+ """
11
+
12
+ def sort_func(key: T) -> Tuple[float, T]:
13
+ try:
14
+ i = priority_keys.index(key)
15
+ return (float(i), key)
16
+ except ValueError:
17
+ return (float("inf"), key)
18
+
19
+ return sort_func
@@ -0,0 +1,132 @@
1
+ """
2
+ YAML file storage. Wraps ruamel.yaml with a few extra features.
3
+ """
4
+
5
+ import os
6
+ from io import StringIO
7
+ from typing import Any, Callable, Dict, Optional, TextIO, Type
8
+
9
+ from ruamel.yaml import Representer, YAML
10
+
11
+ KeySort = Callable[[str], Any]
12
+
13
+
14
+ def none_or_empty_dict(val: Any) -> bool:
15
+ return val is None or val == {}
16
+
17
+
18
+ _default_representers: Dict[Type[Any], Callable[[Representer, Any], Any]] = {}
19
+
20
+
21
+ def add_default_yaml_representer(type: Type[Any], represent: Callable[[Representer, Any], Any]):
22
+ """
23
+ Add a default representer for a type.
24
+ """
25
+ global _default_representers
26
+ _default_representers[type] = represent
27
+
28
+
29
+ def new_yaml(
30
+ key_sort: Optional[KeySort] = None,
31
+ suppress_vals: Optional[Callable[[Any], bool]] = none_or_empty_dict,
32
+ stringify_unknown: bool = False,
33
+ typ: str = "safe",
34
+ ) -> YAML:
35
+ """
36
+ Configure a new YAML instance with custom settings.
37
+
38
+ If just using this for pretty-printing values, can set `stringify_unknown` to avoid
39
+ RepresenterError for unexpected types.
40
+
41
+ For input, `typ="safe"` is safest. For output, consider using `typ="rt"` for better
42
+ control of string formatting (e.g. style of long strings).
43
+ """
44
+ yaml = YAML(typ=typ)
45
+ yaml.default_flow_style = False # Block style dictionaries.
46
+
47
+ suppr = suppress_vals or (lambda v: False)
48
+
49
+ # Ignore None values in output. Sort keys if key_sort is provided.
50
+ def represent_dict(dumper, data):
51
+ if key_sort:
52
+ data = {k: data[k] for k in sorted(data.keys(), key=key_sort)}
53
+ return dumper.represent_dict({k: v for k, v in data.items() if not suppr(v)})
54
+
55
+ yaml.representer.add_representer(dict, represent_dict)
56
+
57
+ # Use YAML block style for strings with newlines.
58
+ def represent_str(dumper, data):
59
+ style = "|" if "\n" in data else None
60
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
61
+
62
+ yaml.representer.add_representer(str, represent_str)
63
+
64
+ # Add other default representers.
65
+ for type, representer in _default_representers.items():
66
+ yaml.representer.add_representer(type, representer)
67
+
68
+ if stringify_unknown:
69
+
70
+ def represent_unknown(dumper, data):
71
+ return dumper.represent_str(str(data))
72
+
73
+ yaml.representer.add_representer(None, represent_unknown)
74
+
75
+ if key_sort:
76
+ yaml.representer.sort_base_mapping_type_on_output = False
77
+
78
+ return yaml
79
+
80
+
81
+ def from_yaml_string(yaml_string: str) -> Any:
82
+ """
83
+ Read a YAML string into a Python object.
84
+ """
85
+ return new_yaml().load(yaml_string)
86
+
87
+
88
+ def read_yaml_file(filename: str) -> Any:
89
+ """
90
+ Read YAML file into a Python object.
91
+ """
92
+ with open(filename, "r") as f:
93
+ return new_yaml().load(f)
94
+
95
+
96
+ def to_yaml_string(
97
+ value: Any, key_sort: Optional[KeySort] = None, stringify_unknown: bool = False
98
+ ) -> str:
99
+ """
100
+ Convert a Python object to a YAML string.
101
+ """
102
+ stream = StringIO()
103
+ new_yaml(key_sort=key_sort, stringify_unknown=stringify_unknown, typ="rt").dump(value, stream)
104
+ return stream.getvalue()
105
+
106
+
107
+ def dump_yaml(
108
+ value: Any, stream: TextIO, key_sort: Optional[KeySort] = None, stringify_unknown: bool = False
109
+ ):
110
+ """
111
+ Write a Python object to a YAML stream.
112
+ """
113
+ new_yaml(key_sort=key_sort, stringify_unknown=stringify_unknown, typ="rt").dump(value, stream)
114
+
115
+
116
+ def write_yaml_file(
117
+ value: Any, filename: str, key_sort: Optional[KeySort] = None, stringify_unknown: bool = False
118
+ ):
119
+ """
120
+ Write the given value to the YAML file, creating it atomically.
121
+ """
122
+ temp_filename = f"{filename}.yml.tmp" # Same directory with a temporary suffix.
123
+ try:
124
+ with open(temp_filename, "w", encoding="utf-8") as f:
125
+ dump_yaml(value, f, key_sort, stringify_unknown=stringify_unknown)
126
+ os.replace(temp_filename, filename)
127
+ except Exception as e:
128
+ try:
129
+ os.remove(temp_filename)
130
+ except FileNotFoundError:
131
+ pass
132
+ raise e
@@ -0,0 +1,73 @@
1
+ [tool.poetry]
2
+ name = "frontmatter-format"
3
+ # Keep this a dev version, as the dynamic versioning plugin is used for actual release versions:
4
+ version = "0.0.0.dev"
5
+ description = "A format for YAML frontmatter on any file."
6
+ authors = ["Joshua Levy <joshua@cal.berkeley.edu>"]
7
+ readme = "README.md"
8
+ license = "MIT"
9
+ repository = "https://github.com/jlevy/frontmatter-format"
10
+
11
+ [tool.poetry.dependencies]
12
+ python = "^3.10"
13
+ ruamel-yaml = "^0.18.6"
14
+
15
+ [tool.poetry.group.dev.dependencies]
16
+ black = "^24.10.0"
17
+ pytest = "^8.3.3"
18
+ ruff = "^0.4.10"
19
+ usort = "^1.0.8.post1"
20
+ mypy = "^1.13.0"
21
+ codespell = "^2.3.0"
22
+ rich = "^13.9.3"
23
+
24
+ [build-system]
25
+ requires = ["poetry-core"]
26
+ build-backend = "poetry.core.masonry.api"
27
+
28
+ # Auto-generated version in build, based on tag or commit.
29
+ # https://sam.hooke.me/note/2023/08/poetry-automatically-generated-package-version-from-git-commit/
30
+ [tool.poetry-dynamic-versioning]
31
+ enable = true
32
+ vcs = "git"
33
+ pattern = "^v?(?P<base>\\d+\\.\\d+\\.\\d+)(-?((?P<stage>[a-zA-Z]+)\\.?(?P<revision>\\d+)?))?"
34
+ format-jinja = """
35
+ {%- if distance == 0 -%}
36
+ {{- base -}}
37
+ {%- else -%}
38
+ {{- base }}.dev{{ distance }}+{{commit}}
39
+ {%- endif -%}
40
+ """
41
+
42
+ [tool.poetry.scripts]
43
+ lint = "devtools.lint:main"
44
+ test = "pytest:main"
45
+
46
+ [tool.black]
47
+ line-length = 100
48
+
49
+ [tool.ruff]
50
+ line-length = 100
51
+
52
+ [tool.ruff.lint]
53
+ ignore = ["E402", "E731", "E712"]
54
+
55
+ [tool.mypy]
56
+ disable_error_code = [
57
+ "import-untyped",
58
+ ]
59
+
60
+ [tool.codespell]
61
+ # ignore-words-list = "foo,bar"
62
+ # skip = "foo.py,bar.py"
63
+
64
+ [tool.pytest.ini_options]
65
+ python_files = ["*.py"]
66
+ python_classes = ["Test*"]
67
+ python_functions = ["test_*"]
68
+ testpaths = [
69
+ "frontmatter_format",
70
+ "tests",
71
+ ]
72
+ norecursedirs = []
73
+ filterwarnings = []