jmd-format 0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. jmd_format-0.4/LICENSE +28 -0
  2. jmd_format-0.4/PKG-INFO +166 -0
  3. jmd_format-0.4/README.md +113 -0
  4. jmd_format-0.4/jmd/__init__.py +213 -0
  5. jmd_format-0.4/jmd/__main__.py +6 -0
  6. jmd_format-0.4/jmd/_cli.py +374 -0
  7. jmd_format-0.4/jmd/_cparser.c +1526 -0
  8. jmd_format-0.4/jmd/_cparser.pyi +6 -0
  9. jmd_format-0.4/jmd/_cserializer.c +687 -0
  10. jmd_format-0.4/jmd/_cserializer.pyi +4 -0
  11. jmd_format-0.4/jmd/_delete.py +85 -0
  12. jmd_format-0.4/jmd/_error.py +116 -0
  13. jmd_format-0.4/jmd/_html.py +142 -0
  14. jmd_format-0.4/jmd/_parser.py +589 -0
  15. jmd_format-0.4/jmd/_query.py +436 -0
  16. jmd_format-0.4/jmd/_scalars.py +145 -0
  17. jmd_format-0.4/jmd/_schema.py +514 -0
  18. jmd_format-0.4/jmd/_serializer.py +189 -0
  19. jmd_format-0.4/jmd/_streaming.py +357 -0
  20. jmd_format-0.4/jmd/_tokenizer.py +109 -0
  21. jmd_format-0.4/jmd/py.typed +0 -0
  22. jmd_format-0.4/jmd/xml.py +453 -0
  23. jmd_format-0.4/jmd_format.egg-info/PKG-INFO +166 -0
  24. jmd_format-0.4/jmd_format.egg-info/SOURCES.txt +39 -0
  25. jmd_format-0.4/jmd_format.egg-info/dependency_links.txt +1 -0
  26. jmd_format-0.4/jmd_format.egg-info/entry_points.txt +2 -0
  27. jmd_format-0.4/jmd_format.egg-info/requires.txt +1 -0
  28. jmd_format-0.4/jmd_format.egg-info/top_level.txt +1 -0
  29. jmd_format-0.4/pyproject.toml +62 -0
  30. jmd_format-0.4/setup.cfg +4 -0
  31. jmd_format-0.4/setup.py +31 -0
  32. jmd_format-0.4/tests/test_delete.py +59 -0
  33. jmd_format-0.4/tests/test_error.py +92 -0
  34. jmd_format-0.4/tests/test_modes.py +56 -0
  35. jmd_format-0.4/tests/test_parser.py +266 -0
  36. jmd_format-0.4/tests/test_query.py +198 -0
  37. jmd_format-0.4/tests/test_roundtrip.py +94 -0
  38. jmd_format-0.4/tests/test_schema.py +243 -0
  39. jmd_format-0.4/tests/test_serializer.py +125 -0
  40. jmd_format-0.4/tests/test_streaming.py +99 -0
  41. jmd_format-0.4/tests/test_xml.py +347 -0
jmd_format-0.4/LICENSE ADDED
@@ -0,0 +1,28 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Andreas Ostermeyer <andreas@ostermeyer.de>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ This license applies to the benchmark code only (the `benchmark/` and
26
+ `benchmark_results/` directories). It does not grant any rights to the JMD
27
+ format specification, which is licensed separately under CC BY-NC-SA 4.0.
28
+ See LICENSE for details.
@@ -0,0 +1,166 @@
1
+ Metadata-Version: 2.4
2
+ Name: jmd-format
3
+ Version: 0.4
4
+ Summary: JMD (JSON Markdown) — structured data format for LLM-driven infrastructure
5
+ Author-email: Andreas Ostermeyer <andreas@ostermeyer.de>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Andreas Ostermeyer <andreas@ostermeyer.de>
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ ---
29
+
30
+ This license applies to the benchmark code only (the `benchmark/` and
31
+ `benchmark_results/` directories). It does not grant any rights to the JMD
32
+ format specification, which is licensed separately under CC BY-NC-SA 4.0.
33
+ See LICENSE for details.
34
+
35
+ Project-URL: Homepage, https://github.com/ostermeyer/jmd-spec
36
+ Project-URL: Repository, https://github.com/ostermeyer/jmd-impl
37
+ Keywords: jmd,llm,structured-data,json,markdown,mcp
38
+ Classifier: Development Status :: 4 - Beta
39
+ Classifier: Intended Audience :: Developers
40
+ Classifier: License :: Other/Proprietary License
41
+ Classifier: Programming Language :: Python :: 3
42
+ Classifier: Programming Language :: Python :: 3.10
43
+ Classifier: Programming Language :: Python :: 3.11
44
+ Classifier: Programming Language :: Python :: 3.12
45
+ Classifier: Programming Language :: Python :: 3.13
46
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
47
+ Classifier: Topic :: Text Processing :: Markup
48
+ Requires-Python: >=3.10
49
+ Description-Content-Type: text/markdown
50
+ License-File: LICENSE
51
+ Requires-Dist: lxml>=5.0
52
+ Dynamic: license-file
53
+
54
+ # jmd-format — Python Reference Implementation
55
+
56
+ Python reference implementation of the [JMD specification](https://github.com/ostermeyer/jmd-spec) (v0.4). Includes a C-accelerated parser and serializer, plus lossless XML↔JMD conversion.
57
+
58
+ ## Installation
59
+
60
+ Install the latest version directly from GitHub:
61
+
62
+ ```bash
63
+ pip install git+https://github.com/ostermeyer/jmd-impl.git
64
+ ```
65
+
66
+ Or pin a specific release:
67
+
68
+ ```bash
69
+ pip install git+https://github.com/ostermeyer/jmd-impl.git@v0.4
70
+ ```
71
+
72
+ Pre-built wheels for Linux, macOS, and Windows are attached to each
73
+ [GitHub Release](https://github.com/ostermeyer/jmd-impl/releases) and can be
74
+ installed directly:
75
+
76
+ ```bash
77
+ pip install https://github.com/ostermeyer/jmd-impl/releases/download/v0.4/jmd_format-0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
78
+ ```
79
+
80
+ The C extensions are built automatically during installation if a C compiler is available. If not, the pure-Python fallback is used transparently.
81
+
82
+ ## Quick Start
83
+
84
+ ```python
85
+ from jmd import parse, serialize
86
+
87
+ data = parse("""
88
+ # Order
89
+ id: 42
90
+ status: pending
91
+
92
+ ## customer
93
+ name: Anna Müller
94
+ email: anna@example.com
95
+ """)
96
+
97
+ print(data)
98
+ # {'id': 42, 'status': 'pending', 'customer': {'name': 'Anna Müller', 'email': 'anna@example.com'}}
99
+
100
+ print(serialize(data, label="Order"))
101
+ ```
102
+
103
+ ## Document Modes
104
+
105
+ ```python
106
+ from jmd import jmd_mode, JMDQueryParser, JMDSchemaParser, JMDDeleteParser, parse_error
107
+
108
+ # Detect mode without full parse
109
+ mode = jmd_mode(source) # 'data' | 'query' | 'schema' | 'delete'
110
+
111
+ # Query by Example (#?)
112
+ query = JMDQueryParser().parse("#? Order\nstatus: pending")
113
+
114
+ # Schema (#!)
115
+ schema = JMDSchemaParser().parse("#! Order\nid: integer readonly\nstatus: string")
116
+
117
+ # Delete (#-)
118
+ delete = JMDDeleteParser().parse("#- Order\nid: 42")
119
+
120
+ # Error (# Error)
121
+ error = parse_error("# Error\nstatus: 404\ncode: not_found\nmessage: Not found")
122
+ ```
123
+
124
+ ## Streaming
125
+
126
+ ```python
127
+ from jmd import jmd_stream
128
+
129
+ for event in jmd_stream(source):
130
+ print(event)
131
+ ```
132
+
133
+ ## XML Mapping
134
+
135
+ Lossless conversion between data XML and JMD (requires `lxml`):
136
+
137
+ ```python
138
+ from jmd.xml import xml_to_jmd, jmd_to_xml
139
+
140
+ jmd_source = xml_to_jmd(xml_bytes_or_str) # XML → JMD string
141
+ xml_output = jmd_to_xml(jmd_source) # JMD → XML bytes
142
+ ```
143
+
144
+ Targets data XML — OOXML (WordprocessingML, DrawingML, SpreadsheetML),
145
+ SOAP, XBRL, XRechnung, and similar formats. Mixed-content XML (ODF, XHTML)
146
+ is out of scope.
147
+
148
+ See the [JMD over XML companion specification](https://github.com/ostermeyer/jmd-spec/blob/main/jmd-over-xml.md) for the full mapping rules.
149
+
150
+ ## C Extensions
151
+
152
+ Build manually if needed:
153
+
154
+ ```bash
155
+ python build_ext.py build_ext --inplace
156
+ ```
157
+
158
+ ## Specification
159
+
160
+ See [jmd-spec](https://github.com/ostermeyer/jmd-spec) for the full format specification, benchmark results, and design documentation.
161
+
162
+ ## License
163
+
164
+ Licensed under the MIT License. See [LICENSE](LICENSE).
165
+
166
+ The JMD format specification is licensed separately under CC BY-NC-SA 4.0.
@@ -0,0 +1,113 @@
1
+ # jmd-format — Python Reference Implementation
2
+
3
+ Python reference implementation of the [JMD specification](https://github.com/ostermeyer/jmd-spec) (v0.4). Includes a C-accelerated parser and serializer, plus lossless XML↔JMD conversion.
4
+
5
+ ## Installation
6
+
7
+ Install the latest version directly from GitHub:
8
+
9
+ ```bash
10
+ pip install git+https://github.com/ostermeyer/jmd-impl.git
11
+ ```
12
+
13
+ Or pin a specific release:
14
+
15
+ ```bash
16
+ pip install git+https://github.com/ostermeyer/jmd-impl.git@v0.4
17
+ ```
18
+
19
+ Pre-built wheels for Linux, macOS, and Windows are attached to each
20
+ [GitHub Release](https://github.com/ostermeyer/jmd-impl/releases) and can be
21
+ installed directly:
22
+
23
+ ```bash
24
+ pip install https://github.com/ostermeyer/jmd-impl/releases/download/v0.4/jmd_format-0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
25
+ ```
26
+
27
+ The C extensions are built automatically during installation if a C compiler is available. If not, the pure-Python fallback is used transparently.
28
+
29
+ ## Quick Start
30
+
31
+ ```python
32
+ from jmd import parse, serialize
33
+
34
+ data = parse("""
35
+ # Order
36
+ id: 42
37
+ status: pending
38
+
39
+ ## customer
40
+ name: Anna Müller
41
+ email: anna@example.com
42
+ """)
43
+
44
+ print(data)
45
+ # {'id': 42, 'status': 'pending', 'customer': {'name': 'Anna Müller', 'email': 'anna@example.com'}}
46
+
47
+ print(serialize(data, label="Order"))
48
+ ```
49
+
50
+ ## Document Modes
51
+
52
+ ```python
53
+ from jmd import jmd_mode, JMDQueryParser, JMDSchemaParser, JMDDeleteParser, parse_error
54
+
55
+ # Detect mode without full parse
56
+ mode = jmd_mode(source) # 'data' | 'query' | 'schema' | 'delete'
57
+
58
+ # Query by Example (#?)
59
+ query = JMDQueryParser().parse("#? Order\nstatus: pending")
60
+
61
+ # Schema (#!)
62
+ schema = JMDSchemaParser().parse("#! Order\nid: integer readonly\nstatus: string")
63
+
64
+ # Delete (#-)
65
+ delete = JMDDeleteParser().parse("#- Order\nid: 42")
66
+
67
+ # Error (# Error)
68
+ error = parse_error("# Error\nstatus: 404\ncode: not_found\nmessage: Not found")
69
+ ```
70
+
71
+ ## Streaming
72
+
73
+ ```python
74
+ from jmd import jmd_stream
75
+
76
+ for event in jmd_stream(source):
77
+ print(event)
78
+ ```
79
+
80
+ ## XML Mapping
81
+
82
+ Lossless conversion between data XML and JMD (requires `lxml`):
83
+
84
+ ```python
85
+ from jmd.xml import xml_to_jmd, jmd_to_xml
86
+
87
+ jmd_source = xml_to_jmd(xml_bytes_or_str) # XML → JMD string
88
+ xml_output = jmd_to_xml(jmd_source) # JMD → XML bytes
89
+ ```
90
+
91
+ Targets data XML — OOXML (WordprocessingML, DrawingML, SpreadsheetML),
92
+ SOAP, XBRL, XRechnung, and similar formats. Mixed-content XML (ODF, XHTML)
93
+ is out of scope.
94
+
95
+ See the [JMD over XML companion specification](https://github.com/ostermeyer/jmd-spec/blob/main/jmd-over-xml.md) for the full mapping rules.
96
+
97
+ ## C Extensions
98
+
99
+ Build manually if needed:
100
+
101
+ ```bash
102
+ python build_ext.py build_ext --inplace
103
+ ```
104
+
105
+ ## Specification
106
+
107
+ See [jmd-spec](https://github.com/ostermeyer/jmd-spec) for the full format specification, benchmark results, and design documentation.
108
+
109
+ ## License
110
+
111
+ Licensed under the MIT License. See [LICENSE](LICENSE).
112
+
113
+ The JMD format specification is licensed separately under CC BY-NC-SA 4.0.
@@ -0,0 +1,213 @@
1
+ """JMD (JSON Markdown) — Parser, Serializer, and Tooling.
2
+
3
+ Implements JMD Specification v0.3 — heading-scope model with blockquotes
4
+ and indentation continuation.
5
+
6
+ Usage:
7
+ python -m jmd # demo + roundtrip test
8
+ python -m jmd to-json input.jmd # pretty-print JSON
9
+ python -m jmd from-json input.json # convert JSON to JMD
10
+ python -m jmd render input.jmd # render HTML to stdout
11
+ python -m jmd roundtrip input.jmd # JMD -> JSON -> JMD, assert lossless
12
+
13
+ As a library:
14
+ from jmd import parse, serialize
15
+ data = parse(text)
16
+ jmd = serialize(data, label="Order")
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Any
22
+
23
+ from ._cli import (
24
+ SAMPLE_JMD,
25
+ SAMPLE_QUERY,
26
+ SAMPLE_RECORDS,
27
+ SAMPLE_SCHEMA,
28
+ dict_to_jmd,
29
+ jmd_parse_schema,
30
+ jmd_query,
31
+ jmd_schema_to_json_schema,
32
+ jmd_to_dict,
33
+ jmd_to_json,
34
+ json_schema_to_jmd_schema,
35
+ json_to_jmd,
36
+ )
37
+ from ._delete import JMDDelete, JMDDeleteParser
38
+ from ._error import JMDError, JMDErrorItem, is_error_document, parse_error
39
+ from ._html import JMDHTMLRenderer
40
+ from ._parser import JMDParser
41
+ from ._query import (
42
+ Condition,
43
+ JMDQuery,
44
+ JMDQueryExecutor,
45
+ JMDQueryParser,
46
+ QueryArray,
47
+ QueryField,
48
+ QueryObject,
49
+ )
50
+ from ._scalars import parse_key, parse_scalar, quote_key, serialize_scalar
51
+ from ._schema import (
52
+ JMDSchema,
53
+ JMDSchemaParser,
54
+ SchemaArray,
55
+ SchemaField,
56
+ SchemaObject,
57
+ SchemaRef,
58
+ )
59
+ from ._serializer import JMDSerializer
60
+ from ._streaming import StreamEvent, jmd_stream
61
+ from ._tokenizer import Line, tokenize
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # C extension detection — done once at import time, not on every call
65
+ # ---------------------------------------------------------------------------
66
+
67
+ try:
68
+ from ._cparser import parse as _c_parse
69
+ _HAS_CPARSER: bool = True
70
+ except ImportError:
71
+ _HAS_CPARSER = False
72
+
73
+ try:
74
+ from ._cserializer import serialize as _c_serialize
75
+ _HAS_CSERIALIZER: bool = True
76
+ except ImportError:
77
+ _HAS_CSERIALIZER = False
78
+
79
+
80
+ # ---------------------------------------------------------------------------
81
+ # Public API — C-accelerated by default, Python fallback
82
+ # ---------------------------------------------------------------------------
83
+
84
+ def parse(source: str) -> Any:
85
+ """Parse a JMD document into a Python value.
86
+
87
+ Uses the C-accelerated parser if available; falls back to the pure-Python
88
+ :class:`JMDParser` otherwise.
89
+
90
+ Args:
91
+ source: Complete JMD document text.
92
+
93
+ Returns:
94
+ A Python ``dict``, ``list``, or scalar value.
95
+ """
96
+ if _HAS_CPARSER:
97
+ return _c_parse(source)
98
+ return JMDParser().parse(source)
99
+
100
+
101
+ def serialize(data: Any, label: str = "Document") -> str:
102
+ """Serialize a Python value to a JMD document string.
103
+
104
+ Uses the C-accelerated serializer if available; falls back to the
105
+ pure-Python :class:`JMDSerializer` otherwise.
106
+
107
+ Args:
108
+ data: Python ``dict``, ``list``, or scalar value.
109
+ label: Root heading label.
110
+
111
+ Returns:
112
+ A JMD document string.
113
+ """
114
+ if _HAS_CSERIALIZER:
115
+ return str(_c_serialize(data, label))
116
+ return JMDSerializer().serialize(data, label=label)
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Document mode detection
121
+ # ---------------------------------------------------------------------------
122
+
123
+ _MODE_PREFIXES = {
124
+ "? ": "query",
125
+ "! ": "schema",
126
+ "- ": "delete",
127
+ }
128
+
129
+
130
+ def jmd_mode(source: str) -> str:
131
+ """Detect the document mode of a JMD source string.
132
+
133
+ Inspects only the first non-blank heading line; does not parse the full
134
+ document.
135
+
136
+ Args:
137
+ source: JMD document text.
138
+
139
+ Returns:
140
+ One of ``'data'``, ``'query'``, ``'schema'``, or ``'delete'``.
141
+ Returns ``'data'`` for ``# Error`` documents (error documents are
142
+ standard data documents with a reserved label).
143
+ """
144
+ lines = tokenize(source)
145
+ for line in lines:
146
+ if line.heading_depth == 1:
147
+ for prefix, mode in _MODE_PREFIXES.items():
148
+ if line.content.startswith(prefix):
149
+ return mode
150
+ return "data"
151
+ return "data"
152
+
153
+
154
+ __all__ = [
155
+ # Tokenizer
156
+ "Line",
157
+ "tokenize",
158
+ # Scalars
159
+ "parse_scalar",
160
+ "parse_key",
161
+ "serialize_scalar",
162
+ "quote_key",
163
+ # Parser & Serializer (Python classes)
164
+ "JMDParser",
165
+ "JMDSerializer",
166
+ # Top-level API (C-accelerated by default)
167
+ "parse",
168
+ "serialize",
169
+ # Mode detection
170
+ "jmd_mode",
171
+ # HTML
172
+ "JMDHTMLRenderer",
173
+ # Streaming
174
+ "StreamEvent",
175
+ "jmd_stream",
176
+ # QBE
177
+ "Condition",
178
+ "QueryField",
179
+ "QueryObject",
180
+ "QueryArray",
181
+ "JMDQuery",
182
+ "JMDQueryParser",
183
+ "JMDQueryExecutor",
184
+ # Schema
185
+ "SchemaField",
186
+ "SchemaObject",
187
+ "SchemaArray",
188
+ "SchemaRef",
189
+ "JMDSchema",
190
+ "JMDSchemaParser",
191
+ # Delete
192
+ "JMDDelete",
193
+ "JMDDeleteParser",
194
+ # Error
195
+ "JMDError",
196
+ "JMDErrorItem",
197
+ "is_error_document",
198
+ "parse_error",
199
+ # Convenience functions
200
+ "jmd_to_json",
201
+ "json_to_jmd",
202
+ "jmd_to_dict",
203
+ "dict_to_jmd",
204
+ "jmd_query",
205
+ "jmd_parse_schema",
206
+ "jmd_schema_to_json_schema",
207
+ "json_schema_to_jmd_schema",
208
+ # Sample data
209
+ "SAMPLE_JMD",
210
+ "SAMPLE_QUERY",
211
+ "SAMPLE_SCHEMA",
212
+ "SAMPLE_RECORDS",
213
+ ]
@@ -0,0 +1,6 @@
1
+ """Allow running JMD as ``python -m jmd``."""
2
+
3
+ from ._cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()