org-parser 0.23.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- org_parser-0.23.5/LICENSE +21 -0
- org_parser-0.23.5/PKG-INFO +202 -0
- org_parser-0.23.5/docs/index.md +169 -0
- org_parser-0.23.5/pyproject.toml +160 -0
- org_parser-0.23.5/src/org_parser/__init__.py +116 -0
- org_parser-0.23.5/src/org_parser/_from_source.py +78 -0
- org_parser-0.23.5/src/org_parser/_lang.py +12 -0
- org_parser-0.23.5/src/org_parser/_node.py +81 -0
- org_parser-0.23.5/src/org_parser/_nodes.py +231 -0
- org_parser-0.23.5/src/org_parser/document/__init__.py +16 -0
- org_parser-0.23.5/src/org_parser/document/_body.py +156 -0
- org_parser-0.23.5/src/org_parser/document/_document.py +1133 -0
- org_parser-0.23.5/src/org_parser/document/_heading.py +1653 -0
- org_parser-0.23.5/src/org_parser/document/_loader.py +41 -0
- org_parser-0.23.5/src/org_parser/element/__init__.py +83 -0
- org_parser-0.23.5/src/org_parser/element/_babel.py +172 -0
- org_parser-0.23.5/src/org_parser/element/_block.py +1144 -0
- org_parser-0.23.5/src/org_parser/element/_dirty_list.py +60 -0
- org_parser-0.23.5/src/org_parser/element/_dispatch.py +131 -0
- org_parser-0.23.5/src/org_parser/element/_drawer.py +565 -0
- org_parser-0.23.5/src/org_parser/element/_element.py +460 -0
- org_parser-0.23.5/src/org_parser/element/_keyword.py +397 -0
- org_parser-0.23.5/src/org_parser/element/_list.py +787 -0
- org_parser-0.23.5/src/org_parser/element/_paragraph.py +103 -0
- org_parser-0.23.5/src/org_parser/element/_structure.py +324 -0
- org_parser-0.23.5/src/org_parser/element/_structure_recovery.py +70 -0
- org_parser-0.23.5/src/org_parser/element/_table.py +445 -0
- org_parser-0.23.5/src/org_parser/py.typed +0 -0
- org_parser-0.23.5/src/org_parser/text/__init__.py +63 -0
- org_parser-0.23.5/src/org_parser/text/_inline.py +392 -0
- org_parser-0.23.5/src/org_parser/text/_rich_text.py +659 -0
- org_parser-0.23.5/src/org_parser/time/__init__.py +6 -0
- org_parser-0.23.5/src/org_parser/time/_clock.py +190 -0
- org_parser-0.23.5/src/org_parser/time/_timestamp.py +1037 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kajetan Rzepecki <k@spartan.works>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: org-parser
|
|
3
|
+
Version: 0.23.5
|
|
4
|
+
Summary: A parser for Org Mode documents
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: parsing,tree-sitter,org,org-mode
|
|
8
|
+
Author: Kajetan Rzepecki
|
|
9
|
+
Author-email: k@spartan.works
|
|
10
|
+
Maintainer: Kajetan Rzepecki
|
|
11
|
+
Maintainer-email: k@spartan.works
|
|
12
|
+
Requires-Python: >=3.12,<4.0
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
|
+
Classifier: Topic :: Software Development :: Compilers
|
|
22
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Dist: tree-sitter (>=0.25,<1.0)
|
|
25
|
+
Requires-Dist: tree-sitter-org (>=0.23.6)
|
|
26
|
+
Project-URL: Documentation, https://idorobots.github.io/org-parser/
|
|
27
|
+
Project-URL: Homepage, https://github.com/Idorobots/org-parser
|
|
28
|
+
Project-URL: Issue Tracker, https://github.com/Idorobots/org-parser/issues
|
|
29
|
+
Project-URL: Repository, https://github.com/Idorobots/org-parser
|
|
30
|
+
Project-URL: Releases, https://github.com/Idorobots/org-parser/releases
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# Overview
|
|
34
|
+
|
|
35
|
+
This project implements a Python parser for Emacs Org-Mode files
|
|
36
|
+
providing nice semantic nodes in a hierarchical document tree. It's
|
|
37
|
+
based on [this Tree-Sitter
|
|
38
|
+
grammar](https://github.com/Idorobots/tree-sitter-org).
|
|
39
|
+
|
|
40
|
+
`org-parser` was created as an alternative to the wonderful, but
|
|
41
|
+
admittedly incomplete and unmaintained
|
|
42
|
+
[`orgparse`](https://github.com/karlicoss/orgparse/). The main
|
|
43
|
+
improvements on top of `orgparse` are:
|
|
44
|
+
|
|
45
|
+
- *Almost* complete feature set, including markup, blocks, tables and
|
|
46
|
+
more niche constructs such as Babel calls.
|
|
47
|
+
- Fully mutable tree for easy edits that preserves original document
|
|
48
|
+
whitespace & formatting.
|
|
49
|
+
- Better document error handling and reporting, useful for validation.
|
|
50
|
+
|
|
51
|
+
[See the documentation here.](https://idorobots.github.io/org-parser/)
|
|
52
|
+
|
|
53
|
+
# Installation
|
|
54
|
+
|
|
55
|
+
``` bash
|
|
56
|
+
pip install org-parser
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
# Features
|
|
60
|
+
|
|
61
|
+
- Semantic nodes with all the expected attributes
|
|
62
|
+
|
|
63
|
+
``` python
|
|
64
|
+
>>> from org_parser import loads
|
|
65
|
+
>>> doc = loads('''
|
|
66
|
+
... #+TITLE: Document
|
|
67
|
+
... #+AUTHOR: Idorobots
|
|
68
|
+
... :PROPERTIES:
|
|
69
|
+
... :name: value
|
|
70
|
+
... :END:
|
|
71
|
+
... An example document.
|
|
72
|
+
... * Heading 1
|
|
73
|
+
... ** Heading 2 :tag:
|
|
74
|
+
... *** Heading 3
|
|
75
|
+
... ''')
|
|
76
|
+
>>> doc.title
|
|
77
|
+
RichText('Document')
|
|
78
|
+
>>> doc.body
|
|
79
|
+
[BlankLine(), Paragraph(body=RichText('An example document.\n'))]
|
|
80
|
+
>>> doc.body_text
|
|
81
|
+
'\nAn example document.\n'
|
|
82
|
+
>>> doc.properties["name"]
|
|
83
|
+
RichText('value')
|
|
84
|
+
>>> len(doc.all_headings)
|
|
85
|
+
3
|
|
86
|
+
>>> doc.all_headings[1].tags
|
|
87
|
+
['tag']
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
- Structured traversal
|
|
91
|
+
|
|
92
|
+
``` python
|
|
93
|
+
>>> from org_parser import loads
|
|
94
|
+
>>> doc = loads('''
|
|
95
|
+
... * Heading 1
|
|
96
|
+
... ** Heading 2
|
|
97
|
+
... *** Heading 3
|
|
98
|
+
... ''')
|
|
99
|
+
>>> doc.children[0].title
|
|
100
|
+
RichText('Heading 1')
|
|
101
|
+
>>> doc.children[0].children[0].title
|
|
102
|
+
RichText('Heading 2')
|
|
103
|
+
>>> doc.children[0].children[0].parent.title == "Heading 1"
|
|
104
|
+
True
|
|
105
|
+
>>> doc.children[0].children[0].siblings
|
|
106
|
+
[]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
- Original source whitespace and formatting preserved by default
|
|
110
|
+
|
|
111
|
+
``` python
|
|
112
|
+
>>> from org_parser import loads
|
|
113
|
+
>>> doc = loads('''
|
|
114
|
+
... #+TITLE: Original formatting
|
|
115
|
+
... * Heading 1
|
|
116
|
+
... Indented section
|
|
117
|
+
... remains indented
|
|
118
|
+
... in-tree.
|
|
119
|
+
... ''')
|
|
120
|
+
>>> print(str(doc.children[0]))
|
|
121
|
+
* Heading 1
|
|
122
|
+
Indented section
|
|
123
|
+
remains indented
|
|
124
|
+
in-tree.
|
|
125
|
+
|
|
126
|
+
>>> print(doc.render())
|
|
127
|
+
|
|
128
|
+
#+TITLE: Original formatting
|
|
129
|
+
* Heading 1
|
|
130
|
+
Indented section
|
|
131
|
+
remains indented
|
|
132
|
+
in-tree.
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
- Fully mutable tree allowing dynamic edits
|
|
136
|
+
|
|
137
|
+
``` python
|
|
138
|
+
>>> from org_parser import loads
|
|
139
|
+
>>> from org_parser.element import Paragraph
|
|
140
|
+
>>> doc = loads('''
|
|
141
|
+
... #+TITLE: Document title
|
|
142
|
+
... * Heading 1
|
|
143
|
+
... Body text.
|
|
144
|
+
... ''')
|
|
145
|
+
>>> doc.title
|
|
146
|
+
RichText('Document title')
|
|
147
|
+
>>> doc.title.text = "Another title"
|
|
148
|
+
>>> doc[0].body = [Paragraph.from_source("New *and* improved!")]
|
|
149
|
+
>>> doc[0].title.text = "Improved heading"
|
|
150
|
+
>>> print(doc.render())
|
|
151
|
+
#+TITLE: Another title
|
|
152
|
+
|
|
153
|
+
* Improved heading
|
|
154
|
+
New *and* improved!
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
- Org Table support
|
|
158
|
+
|
|
159
|
+
``` python
|
|
160
|
+
>>> from org_parser import loads
|
|
161
|
+
>>> doc = loads('''
|
|
162
|
+
... |Value|Double|
|
|
163
|
+
... |1| |
|
|
164
|
+
... |2| |
|
|
165
|
+
... |3| |
|
|
166
|
+
... ''')
|
|
167
|
+
>>> for r in doc.body[1].rows[1:]:
|
|
168
|
+
... r[1].text = str(2 * int(r[0].text))
|
|
169
|
+
...
|
|
170
|
+
>>> print(doc.render())
|
|
171
|
+
|
|
172
|
+
| Value | Double |
|
|
173
|
+
| 1 | 2 |
|
|
174
|
+
| 2 | 4 |
|
|
175
|
+
| 3 | 6 |
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
- Rich Text support
|
|
179
|
+
|
|
180
|
+
``` python
|
|
181
|
+
>>> from org_parser.text import RichText
|
|
182
|
+
>>> text = RichText.from_source("Supports *org-mode* /markup/ and inline_{objects}: <2026-03-29>")
|
|
183
|
+
>>> text
|
|
184
|
+
RichText('Supports *org-mode* /markup/ and inline_{objects}: <2026-03-29>')
|
|
185
|
+
>>> text.parts
|
|
186
|
+
[PlainText(text='Supports '), Bold(body=[PlainText(text='org-mode')]), PlainText(text=' '), Italic(body=[PlainText(text='markup')]), PlainText(text=' and inline'), Subscript(body=[PlainText(text='objects')], form='{}'), PlainText(text=': '), Timestamp(is_active=True, start_year=2026, start_month=3, start_day=29), PlainText(text='')]
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
- Error recovery
|
|
190
|
+
|
|
191
|
+
``` python
|
|
192
|
+
>>> from org_parser import loads
|
|
193
|
+
>>> doc = loads('''
|
|
194
|
+
... * Heading
|
|
195
|
+
... SCHEDULED: yesterday
|
|
196
|
+
... ''')
|
|
197
|
+
>>> doc[0].scheduled is None
|
|
198
|
+
True
|
|
199
|
+
>>> doc.errors
|
|
200
|
+
[ParseError(start_point=Point(row=2, column=0), end_point=Point(row=2, column=20), text='SCHEDULED: yesterday')]
|
|
201
|
+
```
|
|
202
|
+
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Overview
|
|
2
|
+
|
|
3
|
+
This project implements a Python parser for Emacs Org-Mode files
|
|
4
|
+
providing nice semantic nodes in a hierarchical document tree. It's
|
|
5
|
+
based on [this Tree-Sitter
|
|
6
|
+
grammar](https://github.com/Idorobots/tree-sitter-org).
|
|
7
|
+
|
|
8
|
+
`org-parser` was created as an alternative to the wonderful, but
|
|
9
|
+
admittedly incomplete and unmaintained
|
|
10
|
+
[`orgparse`](https://github.com/karlicoss/orgparse/). The main
|
|
11
|
+
improvements on top of `orgparse` are:
|
|
12
|
+
|
|
13
|
+
- *Almost* complete feature set, including markup, blocks, tables and
|
|
14
|
+
more niche constructs such as Babel calls.
|
|
15
|
+
- Fully mutable tree for easy edits that preserves original document
|
|
16
|
+
whitespace & formatting.
|
|
17
|
+
- Better document error handling and reporting, useful for validation.
|
|
18
|
+
|
|
19
|
+
[See the documentation here.](https://idorobots.github.io/org-parser/)
|
|
20
|
+
|
|
21
|
+
# Installation
|
|
22
|
+
|
|
23
|
+
``` bash
|
|
24
|
+
pip install org-parser
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
# Features
|
|
28
|
+
|
|
29
|
+
- Semantic nodes with all the expected attributes
|
|
30
|
+
|
|
31
|
+
``` python
|
|
32
|
+
>>> from org_parser import loads
|
|
33
|
+
>>> doc = loads('''
|
|
34
|
+
... #+TITLE: Document
|
|
35
|
+
... #+AUTHOR: Idorobots
|
|
36
|
+
... :PROPERTIES:
|
|
37
|
+
... :name: value
|
|
38
|
+
... :END:
|
|
39
|
+
... An example document.
|
|
40
|
+
... * Heading 1
|
|
41
|
+
... ** Heading 2 :tag:
|
|
42
|
+
... *** Heading 3
|
|
43
|
+
... ''')
|
|
44
|
+
>>> doc.title
|
|
45
|
+
RichText('Document')
|
|
46
|
+
>>> doc.body
|
|
47
|
+
[BlankLine(), Paragraph(body=RichText('An example document.\n'))]
|
|
48
|
+
>>> doc.body_text
|
|
49
|
+
'\nAn example document.\n'
|
|
50
|
+
>>> doc.properties["name"]
|
|
51
|
+
RichText('value')
|
|
52
|
+
>>> len(doc.all_headings)
|
|
53
|
+
3
|
|
54
|
+
>>> doc.all_headings[1].tags
|
|
55
|
+
['tag']
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
- Structured traversal
|
|
59
|
+
|
|
60
|
+
``` python
|
|
61
|
+
>>> from org_parser import loads
|
|
62
|
+
>>> doc = loads('''
|
|
63
|
+
... * Heading 1
|
|
64
|
+
... ** Heading 2
|
|
65
|
+
... *** Heading 3
|
|
66
|
+
... ''')
|
|
67
|
+
>>> doc.children[0].title
|
|
68
|
+
RichText('Heading 1')
|
|
69
|
+
>>> doc.children[0].children[0].title
|
|
70
|
+
RichText('Heading 2')
|
|
71
|
+
>>> doc.children[0].children[0].parent.title == "Heading 1"
|
|
72
|
+
True
|
|
73
|
+
>>> doc.children[0].children[0].siblings
|
|
74
|
+
[]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
- Original source whitespace and formatting preserved by default
|
|
78
|
+
|
|
79
|
+
``` python
|
|
80
|
+
>>> from org_parser import loads
|
|
81
|
+
>>> doc = loads('''
|
|
82
|
+
... #+TITLE: Original formatting
|
|
83
|
+
... * Heading 1
|
|
84
|
+
... Indented section
|
|
85
|
+
... remains indented
|
|
86
|
+
... in-tree.
|
|
87
|
+
... ''')
|
|
88
|
+
>>> print(str(doc.children[0]))
|
|
89
|
+
* Heading 1
|
|
90
|
+
Indented section
|
|
91
|
+
remains indented
|
|
92
|
+
in-tree.
|
|
93
|
+
|
|
94
|
+
>>> print(doc.render())
|
|
95
|
+
|
|
96
|
+
#+TITLE: Original formatting
|
|
97
|
+
* Heading 1
|
|
98
|
+
Indented section
|
|
99
|
+
remains indented
|
|
100
|
+
in-tree.
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
- Fully mutable tree allowing dynamic edits
|
|
104
|
+
|
|
105
|
+
``` python
|
|
106
|
+
>>> from org_parser import loads
|
|
107
|
+
>>> from org_parser.element import Paragraph
|
|
108
|
+
>>> doc = loads('''
|
|
109
|
+
... #+TITLE: Document title
|
|
110
|
+
... * Heading 1
|
|
111
|
+
... Body text.
|
|
112
|
+
... ''')
|
|
113
|
+
>>> doc.title
|
|
114
|
+
RichText('Document title')
|
|
115
|
+
>>> doc.title.text = "Another title"
|
|
116
|
+
>>> doc[0].body = [Paragraph.from_source("New *and* improved!")]
|
|
117
|
+
>>> doc[0].title.text = "Improved heading"
|
|
118
|
+
>>> print(doc.render())
|
|
119
|
+
#+TITLE: Another title
|
|
120
|
+
|
|
121
|
+
* Improved heading
|
|
122
|
+
New *and* improved!
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
- Org Table support
|
|
126
|
+
|
|
127
|
+
``` python
|
|
128
|
+
>>> from org_parser import loads
|
|
129
|
+
>>> doc = loads('''
|
|
130
|
+
... |Value|Double|
|
|
131
|
+
... |1| |
|
|
132
|
+
... |2| |
|
|
133
|
+
... |3| |
|
|
134
|
+
... ''')
|
|
135
|
+
>>> for r in doc.body[1].rows[1:]:
|
|
136
|
+
... r[1].text = str(2 * int(r[0].text))
|
|
137
|
+
...
|
|
138
|
+
>>> print(doc.render())
|
|
139
|
+
|
|
140
|
+
| Value | Double |
|
|
141
|
+
| 1 | 2 |
|
|
142
|
+
| 2 | 4 |
|
|
143
|
+
| 3 | 6 |
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
- Rich Text support
|
|
147
|
+
|
|
148
|
+
``` python
|
|
149
|
+
>>> from org_parser.text import RichText
|
|
150
|
+
>>> text = RichText.from_source("Supports *org-mode* /markup/ and inline_{objects}: <2026-03-29>")
|
|
151
|
+
>>> text
|
|
152
|
+
RichText('Supports *org-mode* /markup/ and inline_{objects}: <2026-03-29>')
|
|
153
|
+
>>> text.parts
|
|
154
|
+
[PlainText(text='Supports '), Bold(body=[PlainText(text='org-mode')]), PlainText(text=' '), Italic(body=[PlainText(text='markup')]), PlainText(text=' and inline'), Subscript(body=[PlainText(text='objects')], form='{}'), PlainText(text=': '), Timestamp(is_active=True, start_year=2026, start_month=3, start_day=29), PlainText(text='')]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
- Error recovery
|
|
158
|
+
|
|
159
|
+
``` python
|
|
160
|
+
>>> from org_parser import loads
|
|
161
|
+
>>> doc = loads('''
|
|
162
|
+
... * Heading
|
|
163
|
+
... SCHEDULED: yesterday
|
|
164
|
+
... ''')
|
|
165
|
+
>>> doc[0].scheduled is None
|
|
166
|
+
True
|
|
167
|
+
>>> doc.errors
|
|
168
|
+
[ParseError(start_point=Point(row=2, column=0), end_point=Point(row=2, column=20), text='SCHEDULED: yesterday')]
|
|
169
|
+
```
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "org-parser"
|
|
3
|
+
version = "0.23.5"
|
|
4
|
+
description = "A parser for Org Mode documents"
|
|
5
|
+
authors = ["Kajetan Rzepecki <k@spartan.works>"]
|
|
6
|
+
maintainers = ["Kajetan Rzepecki <k@spartan.works>"]
|
|
7
|
+
homepage = "https://github.com/Idorobots/org-parser"
|
|
8
|
+
repository = "https://github.com/Idorobots/org-parser"
|
|
9
|
+
documentation = "https://idorobots.github.io/org-parser/"
|
|
10
|
+
keywords = ["parsing", "tree-sitter", "org", "org-mode"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
"Programming Language :: Python",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Topic :: Software Development :: Compilers",
|
|
19
|
+
"Topic :: Text Processing :: Linguistic",
|
|
20
|
+
"Typing :: Typed",
|
|
21
|
+
]
|
|
22
|
+
license = "MIT"
|
|
23
|
+
readme = "docs/index.md"
|
|
24
|
+
packages = [{ include = "org_parser", from = "src" }]
|
|
25
|
+
|
|
26
|
+
[tool.poetry.urls]
|
|
27
|
+
"Issue Tracker" = "https://github.com/Idorobots/org-parser/issues"
|
|
28
|
+
"Releases" = "https://github.com/Idorobots/org-parser/releases"
|
|
29
|
+
|
|
30
|
+
[tool.poetry.dependencies]
|
|
31
|
+
python = "^3.12"
|
|
32
|
+
tree-sitter = ">=0.25,<1.0"
|
|
33
|
+
tree-sitter-org = ">=0.23.6"
|
|
34
|
+
|
|
35
|
+
[tool.poetry.group.dev.dependencies]
|
|
36
|
+
mypy = "^1.11"
|
|
37
|
+
ruff = "^0.8"
|
|
38
|
+
taskipy = "^1.14.1"
|
|
39
|
+
pytest = "^8.3"
|
|
40
|
+
pytest-cov = "^6.0"
|
|
41
|
+
pyright = "^1.1"
|
|
42
|
+
mkdocs = "^1.6"
|
|
43
|
+
mkdocs-section-index = "^0.3"
|
|
44
|
+
mkdocs-autorefs = "^1.4"
|
|
45
|
+
mkdocs-material = "^9.7.6"
|
|
46
|
+
mkdocstrings = { version = "^0.30", extras = ["python"] }
|
|
47
|
+
|
|
48
|
+
[build-system]
|
|
49
|
+
requires = ["poetry-core"]
|
|
50
|
+
build-backend = "poetry.core.masonry.api"
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# taskipy — useful tasks
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
[tool.taskipy.tasks]
|
|
57
|
+
test = "pytest --cov=src/ --cov-report=term-missing"
|
|
58
|
+
lint = "ruff check src/ tests/"
|
|
59
|
+
lint-fix = "ruff check --fix src/ tests/"
|
|
60
|
+
format-check = "ruff format --check src/ tests/"
|
|
61
|
+
format = "ruff format src/ tests/"
|
|
62
|
+
type = "mypy src/ tests"
|
|
63
|
+
docs-build = "mkdocs build --strict"
|
|
64
|
+
docs-serve = "mkdocs serve"
|
|
65
|
+
check = "task format-check && task lint && task type && task test"
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# ruff — linting & formatting
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
[tool.ruff]
|
|
72
|
+
line-length = 100
|
|
73
|
+
target-version = "py312"
|
|
74
|
+
src = ["src", "tests"]
|
|
75
|
+
|
|
76
|
+
[tool.ruff.lint]
|
|
77
|
+
select = ["ALL"]
|
|
78
|
+
ignore = [
|
|
79
|
+
# Docstring style: prefer D211 (no blank line before class docstring) and
|
|
80
|
+
# D212 (summary on first line); disable the conflicting alternatives.
|
|
81
|
+
"D203", # one-blank-line-before-class (conflicts with D211)
|
|
82
|
+
"D213", # multi-line-summary-second-line (conflicts with D212)
|
|
83
|
+
# Formatter compatibility and readability expectations in this codebase.
|
|
84
|
+
"COM812", # missing-trailing-comma (handled by formatter)
|
|
85
|
+
"ISC001", # single-line-implicit-string-concatenation (handled by formatter)
|
|
86
|
+
# Rules that are currently too noisy or conflict with project patterns.
|
|
87
|
+
"DTZ001", # call-datetime-without-tzinfo
|
|
88
|
+
"EM101", # raw-string-in-exception
|
|
89
|
+
"EM102", # f-string-in-exception
|
|
90
|
+
"PLR0913", # too-many-arguments
|
|
91
|
+
"PLR2004", # magic-value-comparison (too many false positives in tests)
|
|
92
|
+
"SLF001", # private-member-access
|
|
93
|
+
"TRY003", # raise-vanilla-args
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
[tool.ruff.lint.per-file-ignores]
|
|
97
|
+
"tests/**/*.py" = [
|
|
98
|
+
"D", # docstrings not required in test files
|
|
99
|
+
"PT011", # existing tests intentionally assert broad ValueError paths
|
|
100
|
+
"PT015", # legacy assertion style retained in negative-path tests
|
|
101
|
+
"PT018", # compound assertions retained for concise readability
|
|
102
|
+
"S101", # assert is expected in tests
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
[tool.ruff.lint.pydocstyle]
|
|
106
|
+
convention = "google"
|
|
107
|
+
|
|
108
|
+
[tool.ruff.lint.isort]
|
|
109
|
+
known-first-party = ["org_parser"]
|
|
110
|
+
force-sort-within-sections = true
|
|
111
|
+
|
|
112
|
+
[tool.ruff.format]
|
|
113
|
+
quote-style = "double"
|
|
114
|
+
indent-style = "space"
|
|
115
|
+
docstring-code-format = true
|
|
116
|
+
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
# mypy — static type checking
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
[tool.mypy]
|
|
122
|
+
python_version = "3.12"
|
|
123
|
+
strict = true
|
|
124
|
+
warn_return_any = true
|
|
125
|
+
warn_unused_configs = true
|
|
126
|
+
warn_redundant_casts = true
|
|
127
|
+
warn_unused_ignores = true
|
|
128
|
+
no_implicit_reexport = true
|
|
129
|
+
show_error_codes = true
|
|
130
|
+
pretty = true
|
|
131
|
+
# Use the src layout
|
|
132
|
+
mypy_path = "src"
|
|
133
|
+
|
|
134
|
+
[[tool.mypy.overrides]]
|
|
135
|
+
# tree-sitter ships incomplete type stubs; suppress import errors for it.
|
|
136
|
+
module = ["tree_sitter", "tree_sitter.*"]
|
|
137
|
+
ignore_missing_imports = true
|
|
138
|
+
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
# pytest
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
[tool.pytest.ini_options]
|
|
144
|
+
testpaths = ["tests"]
|
|
145
|
+
addopts = "--tb=short --strict-markers -q"
|
|
146
|
+
markers = ["integration: tests that require the compiled org.so shared library"]
|
|
147
|
+
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
# coverage
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
[tool.coverage.run]
|
|
153
|
+
branch = true
|
|
154
|
+
source = ["src"]
|
|
155
|
+
omit = ["*/__init__.py"]
|
|
156
|
+
|
|
157
|
+
[tool.coverage.report]
|
|
158
|
+
show_missing = true
|
|
159
|
+
skip_covered = false
|
|
160
|
+
fail_under = 0
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""org_parser — Python bindings for the tree-sitter org-mode parser.
|
|
2
|
+
|
|
3
|
+
This package provides convenience helpers for loading and dumping Org Mode
|
|
4
|
+
documents as [org_parser.document.Document][] instances.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from org_parser._lang import PARSER
|
|
12
|
+
from org_parser.document import Document
|
|
13
|
+
|
|
14
|
+
__all__ = ["Document", "dump", "dumps", "load", "loads"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load(filename: str) -> Document:
|
|
18
|
+
"""Load an Org Mode document from a file.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
filename: Path to the Org Mode file.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Parsed [org_parser.document.Document][] instance.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
```python
|
|
28
|
+
>>> from org_parser import load
|
|
29
|
+
>>> document = load('path/to/file.org')
|
|
30
|
+
>>> document.children[0].title_text
|
|
31
|
+
'Some heading'
|
|
32
|
+
```
|
|
33
|
+
"""
|
|
34
|
+
path = Path(filename)
|
|
35
|
+
source = path.read_bytes()
|
|
36
|
+
tree = PARSER.parse(source)
|
|
37
|
+
return Document.from_tree(tree, filename, source)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def loads(source: str, filename: str | None = None) -> Document:
|
|
41
|
+
"""Load an Org Mode document from a string.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
source: Org Mode text to parse.
|
|
45
|
+
filename: Optional filename to assign to the parsed document.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Parsed [org_parser.document.Document][] instance.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
```python
|
|
52
|
+
>>> from org_parser import loads
|
|
53
|
+
>>> document = loads("* TODO Heading 1")
|
|
54
|
+
>>> document.children[0].todo
|
|
55
|
+
'TODO'
|
|
56
|
+
```
|
|
57
|
+
"""
|
|
58
|
+
assigned_filename = filename if filename is not None else ""
|
|
59
|
+
source_bytes = source.encode()
|
|
60
|
+
tree = PARSER.parse(source_bytes)
|
|
61
|
+
return Document.from_tree(tree, assigned_filename, source_bytes)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def dumps(document: Document) -> str:
|
|
65
|
+
"""Return Org Mode text for a parsed document.
|
|
66
|
+
|
|
67
|
+
Produces the complete document text including all headings. For clean
|
|
68
|
+
(unmodified) parse-backed documents the original source is returned
|
|
69
|
+
verbatim; for dirty documents every section is reconstructed from its
|
|
70
|
+
semantic fields.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
document: Parsed document instance.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Full Org Mode source text.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
```python
|
|
80
|
+
>>> from org_parser import dumps, loads
|
|
81
|
+
>>> document = loads("* TODO Heading 1")
|
|
82
|
+
>>> dumps(document).startswith("* TODO")
|
|
83
|
+
True
|
|
84
|
+
```
|
|
85
|
+
"""
|
|
86
|
+
return document.render()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def dump(document: Document, filename: str | None = None) -> None:
|
|
90
|
+
"""Write a parsed document to disk.
|
|
91
|
+
|
|
92
|
+
The output path is *filename* when provided; otherwise
|
|
93
|
+
[document.filename][org_parser.document.Document.filename].
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
document: Parsed document instance.
|
|
97
|
+
filename: Optional output path.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ValueError: If neither *filename* nor ``document.filename`` is set.
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
```python
|
|
104
|
+
>>> from pathlib import Path
|
|
105
|
+
>>> from org_parser import dump, loads
|
|
106
|
+
>>> document = loads("* TODO Heading 1")
|
|
107
|
+
>>> dump(document, 'path/to/file.org')
|
|
108
|
+
>>> out = Path('path/to/file.org')
|
|
109
|
+
>>> out.read_text().startswith("* TODO")
|
|
110
|
+
True
|
|
111
|
+
```
|
|
112
|
+
"""
|
|
113
|
+
target = filename if filename is not None else document.filename
|
|
114
|
+
if target == "":
|
|
115
|
+
raise ValueError("No output filename provided")
|
|
116
|
+
Path(target).write_text(dumps(document))
|