org-dex-parse 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- org_dex_parse/__init__.py +29 -0
- org_dex_parse/__main__.py +294 -0
- org_dex_parse/config.py +108 -0
- org_dex_parse/evaluator.py +116 -0
- org_dex_parse/parser.py +954 -0
- org_dex_parse/types.py +165 -0
- org_dex_parse-0.1.0.dist-info/METADATA +846 -0
- org_dex_parse-0.1.0.dist-info/RECORD +10 -0
- org_dex_parse-0.1.0.dist-info/WHEEL +4 -0
- org_dex_parse-0.1.0.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""org-dex-parse: parse org-mode files into structured data."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from org_dex_parse.config import Config
|
|
6
|
+
from org_dex_parse.evaluator import compile_predicate
|
|
7
|
+
from org_dex_parse.parser import parse_file
|
|
8
|
+
from org_dex_parse.types import (
|
|
9
|
+
ClockEntry,
|
|
10
|
+
Item,
|
|
11
|
+
Link,
|
|
12
|
+
ParseResult,
|
|
13
|
+
Range,
|
|
14
|
+
StateChange,
|
|
15
|
+
Timestamp,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"compile_predicate",
|
|
20
|
+
"Config",
|
|
21
|
+
"ClockEntry",
|
|
22
|
+
"Item",
|
|
23
|
+
"Link",
|
|
24
|
+
"ParseResult",
|
|
25
|
+
"Range",
|
|
26
|
+
"StateChange",
|
|
27
|
+
"Timestamp",
|
|
28
|
+
"parse_file",
|
|
29
|
+
]
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""CLI for org-dex-parse: python -m org_dex_parse FILE [FILE ...]
|
|
2
|
+
|
|
3
|
+
Parses org files and prints each item with its populated fields.
|
|
4
|
+
Uses bare configuration by default (any heading with :ID: is an item).
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python -m org_dex_parse file.org
|
|
8
|
+
python -m org_dex_parse --json file.org
|
|
9
|
+
python -m org_dex_parse --config myconfig.json file.org
|
|
10
|
+
python -m org_dex_parse --predicate '["property", "Type"]' file.org
|
|
11
|
+
python -m org_dex_parse --todos TODO,NEXT --dones DONE file.org
|
|
12
|
+
python -m org_dex_parse --json -vv file.org # full output with raw_text
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
import datetime
|
|
19
|
+
import json
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from .config import Config
|
|
24
|
+
from .parser import parse_file
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# -- Config construction -------------------------------------------------------
|
|
28
|
+
# Builds a Config from CLI flags and optional JSON config file.
|
|
29
|
+
# Precedence: CLI flags > config file > Config defaults.
|
|
30
|
+
|
|
31
|
+
# Valid keys in the JSON config file — must match Config fields.
|
|
32
|
+
# Used to reject typos early (AC11).
|
|
33
|
+
_VALID_CONFIG_KEYS = frozenset({
|
|
34
|
+
"predicate", "todos", "dones", "tags_exclude_from_inheritance",
|
|
35
|
+
"exclude_drawers", "exclude_blocks", "exclude_properties",
|
|
36
|
+
"created_property", "extra_tag_chars",
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _load_config_file(path: str) -> dict:
|
|
41
|
+
"""Load and validate a JSON config file.
|
|
42
|
+
|
|
43
|
+
Returns a dict with only known keys. Raises SystemExit on
|
|
44
|
+
unknown keys or missing file (with clear error messages).
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
text = Path(path).read_text()
|
|
48
|
+
except FileNotFoundError:
|
|
49
|
+
print(f"error: config file not found: {path}", file=sys.stderr)
|
|
50
|
+
raise SystemExit(1)
|
|
51
|
+
|
|
52
|
+
data = json.loads(text)
|
|
53
|
+
if not isinstance(data, dict):
|
|
54
|
+
print(f"error: config file must be a JSON object, got {type(data).__name__}",
|
|
55
|
+
file=sys.stderr)
|
|
56
|
+
raise SystemExit(1)
|
|
57
|
+
|
|
58
|
+
unknown = set(data.keys()) - _VALID_CONFIG_KEYS
|
|
59
|
+
if unknown:
|
|
60
|
+
print(f"error: unknown fields in config file: {', '.join(sorted(unknown))}",
|
|
61
|
+
file=sys.stderr)
|
|
62
|
+
raise SystemExit(1)
|
|
63
|
+
|
|
64
|
+
return data
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _build_config(args: argparse.Namespace) -> Config:
|
|
68
|
+
"""Build a Config from CLI args, merging config file if present.
|
|
69
|
+
|
|
70
|
+
Precedence: CLI flags > config file > Config defaults.
|
|
71
|
+
A CLI flag is considered "set" when its value differs from None
|
|
72
|
+
(argparse default for all our optional flags).
|
|
73
|
+
"""
|
|
74
|
+
# Start with config file values (if any).
|
|
75
|
+
file_cfg: dict = {}
|
|
76
|
+
if args.config is not None:
|
|
77
|
+
file_cfg = _load_config_file(args.config)
|
|
78
|
+
|
|
79
|
+
# Map CLI flag names to config dict keys.
|
|
80
|
+
# Each entry: (argparse dest, config key, transform).
|
|
81
|
+
# transform converts the CLI string to the config value type.
|
|
82
|
+
_split = lambda s: tuple(s.split(",")) if s else ()
|
|
83
|
+
_split_frozen = lambda s: frozenset(s.split(",")) if s else frozenset()
|
|
84
|
+
|
|
85
|
+
cli_mappings = [
|
|
86
|
+
("predicate", "predicate", lambda s: json.loads(s)),
|
|
87
|
+
("todos", "todos", _split),
|
|
88
|
+
("dones", "dones", _split),
|
|
89
|
+
("tags_exclude", "tags_exclude_from_inheritance", _split_frozen),
|
|
90
|
+
("exclude_drawers", "exclude_drawers", _split_frozen),
|
|
91
|
+
("exclude_blocks", "exclude_blocks", _split_frozen),
|
|
92
|
+
("exclude_properties", "exclude_properties", _split_frozen),
|
|
93
|
+
("created_property", "created_property", lambda s: s),
|
|
94
|
+
("extra_tag_chars", "extra_tag_chars", lambda s: s),
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
# Merge: CLI flags override config file.
|
|
98
|
+
merged: dict = {}
|
|
99
|
+
for arg_name, cfg_key, transform in cli_mappings:
|
|
100
|
+
cli_val = getattr(args, arg_name, None)
|
|
101
|
+
if cli_val is not None:
|
|
102
|
+
# CLI flag was explicitly set — it wins.
|
|
103
|
+
merged[cfg_key] = transform(cli_val)
|
|
104
|
+
elif cfg_key in file_cfg:
|
|
105
|
+
# Config file has this key — use it.
|
|
106
|
+
merged[cfg_key] = file_cfg[cfg_key]
|
|
107
|
+
# else: use Config default (don't set in merged).
|
|
108
|
+
|
|
109
|
+
# Convert config file types to Config constructor types.
|
|
110
|
+
# JSON arrays → tuples/frozensets as needed.
|
|
111
|
+
if "todos" in merged and isinstance(merged["todos"], list):
|
|
112
|
+
merged["todos"] = tuple(merged["todos"])
|
|
113
|
+
if "dones" in merged and isinstance(merged["dones"], list):
|
|
114
|
+
merged["dones"] = tuple(merged["dones"])
|
|
115
|
+
for set_key in ("tags_exclude_from_inheritance", "exclude_drawers",
|
|
116
|
+
"exclude_blocks", "exclude_properties"):
|
|
117
|
+
if set_key in merged and isinstance(merged[set_key], list):
|
|
118
|
+
merged[set_key] = frozenset(merged[set_key])
|
|
119
|
+
|
|
120
|
+
# predicate: list or None passed directly to Config (compiled in
|
|
121
|
+
# __post_init__). "null" on CLI becomes None via json.loads.
|
|
122
|
+
if "predicate" in merged:
|
|
123
|
+
merged["item_predicate"] = merged.pop("predicate")
|
|
124
|
+
|
|
125
|
+
return Config(**merged)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# -- JSON serialization --------------------------------------------------------
|
|
129
|
+
# Custom encoder for Item dataclasses and org-dex-parse types.
|
|
130
|
+
|
|
131
|
+
class _ItemEncoder(json.JSONEncoder):
|
|
132
|
+
"""JSON encoder for Item and its nested types."""
|
|
133
|
+
|
|
134
|
+
def default(self, obj):
|
|
135
|
+
# date/datetime → ISO string.
|
|
136
|
+
if isinstance(obj, datetime.datetime):
|
|
137
|
+
return obj.isoformat()
|
|
138
|
+
if isinstance(obj, datetime.date):
|
|
139
|
+
return obj.isoformat()
|
|
140
|
+
# frozenset → sorted list.
|
|
141
|
+
if isinstance(obj, frozenset):
|
|
142
|
+
return sorted(obj)
|
|
143
|
+
return super().default(obj)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _item_to_dict(item, verbosity: int) -> dict:
|
|
147
|
+
"""Convert an Item to a JSON-friendly dict.
|
|
148
|
+
|
|
149
|
+
Verbosity controls which fields are included:
|
|
150
|
+
- 0 (default): all fields except body and raw_text
|
|
151
|
+
- 1 (-v): adds body
|
|
152
|
+
- 2 (-vv): adds body and raw_text
|
|
153
|
+
|
|
154
|
+
Properties tuple-of-tuples is converted to a dict for readability.
|
|
155
|
+
"""
|
|
156
|
+
d = dataclasses.asdict(item)
|
|
157
|
+
|
|
158
|
+
# Properties: tuple-of-tuples → dict.
|
|
159
|
+
d["properties"] = dict(d["properties"])
|
|
160
|
+
|
|
161
|
+
# Verbosity filtering.
|
|
162
|
+
if verbosity < 2:
|
|
163
|
+
d.pop("raw_text", None)
|
|
164
|
+
if verbosity < 1:
|
|
165
|
+
d.pop("body", None)
|
|
166
|
+
|
|
167
|
+
return d
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# -- Text output ---------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
def _print_item(item, verbosity: int) -> None:
|
|
173
|
+
"""Print a single item in human-readable text format."""
|
|
174
|
+
print(f" {item.title}")
|
|
175
|
+
print(f" id={item.item_id} level={item.level} line={item.linenumber}")
|
|
176
|
+
|
|
177
|
+
if item.parent_item_id:
|
|
178
|
+
print(f" parent={item.parent_item_id}")
|
|
179
|
+
if item.todo:
|
|
180
|
+
print(f" todo={item.todo}", end="")
|
|
181
|
+
if item.priority:
|
|
182
|
+
print(f" priority={item.priority}", end="")
|
|
183
|
+
print()
|
|
184
|
+
elif item.priority:
|
|
185
|
+
print(f" priority={item.priority}")
|
|
186
|
+
if item.local_tags:
|
|
187
|
+
print(f" local_tags={sorted(item.local_tags)}")
|
|
188
|
+
if item.inherited_tags:
|
|
189
|
+
print(f" inherited_tags={sorted(item.inherited_tags)}")
|
|
190
|
+
if item.properties:
|
|
191
|
+
print(f" properties={dict(item.properties)}")
|
|
192
|
+
|
|
193
|
+
# -v: show body.
|
|
194
|
+
if verbosity >= 1 and item.body:
|
|
195
|
+
lines = item.body.split("\n")
|
|
196
|
+
preview = lines[0][:80]
|
|
197
|
+
if len(lines) > 1:
|
|
198
|
+
preview += f" ... ({len(lines)} lines)"
|
|
199
|
+
print(f" body: {preview}")
|
|
200
|
+
|
|
201
|
+
# -vv: show raw_text.
|
|
202
|
+
if verbosity >= 2 and item.raw_text:
|
|
203
|
+
lines = item.raw_text.split("\n")
|
|
204
|
+
preview = lines[0][:80]
|
|
205
|
+
if len(lines) > 1:
|
|
206
|
+
preview += f" ... ({len(lines)} lines)"
|
|
207
|
+
print(f" raw_text: {preview}")
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# -- Main ----------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
def main() -> None:
|
|
213
|
+
parser = argparse.ArgumentParser(
|
|
214
|
+
prog="python -m org_dex_parse",
|
|
215
|
+
description="Parse org files and show items (bare config by default).",
|
|
216
|
+
)
|
|
217
|
+
parser.add_argument("files", nargs="+", help="Org files to parse")
|
|
218
|
+
|
|
219
|
+
# Configuration flags — all optional, override config file values.
|
|
220
|
+
parser.add_argument(
|
|
221
|
+
"--config", type=str, default=None, metavar="FILE",
|
|
222
|
+
help="JSON config file (all fields optional)",
|
|
223
|
+
)
|
|
224
|
+
parser.add_argument(
|
|
225
|
+
"--predicate", type=str, default=None,
|
|
226
|
+
help='JSON s-expression predicate, e.g. \'["property", "Type"]\'',
|
|
227
|
+
)
|
|
228
|
+
parser.add_argument(
|
|
229
|
+
"--todos", type=str, default=None,
|
|
230
|
+
help="Comma-separated active TODO keywords",
|
|
231
|
+
)
|
|
232
|
+
parser.add_argument(
|
|
233
|
+
"--dones", type=str, default=None,
|
|
234
|
+
help="Comma-separated done TODO keywords",
|
|
235
|
+
)
|
|
236
|
+
parser.add_argument(
|
|
237
|
+
"--tags-exclude", type=str, default=None, dest="tags_exclude",
|
|
238
|
+
help="Comma-separated tags excluded from inheritance",
|
|
239
|
+
)
|
|
240
|
+
parser.add_argument(
|
|
241
|
+
"--exclude-drawers", type=str, default=None, dest="exclude_drawers",
|
|
242
|
+
help="Comma-separated drawer names to exclude from body",
|
|
243
|
+
)
|
|
244
|
+
parser.add_argument(
|
|
245
|
+
"--exclude-blocks", type=str, default=None, dest="exclude_blocks",
|
|
246
|
+
help="Comma-separated block names to exclude from body",
|
|
247
|
+
)
|
|
248
|
+
parser.add_argument(
|
|
249
|
+
"--exclude-properties", type=str, default=None, dest="exclude_properties",
|
|
250
|
+
help="Comma-separated property names to exclude",
|
|
251
|
+
)
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
"--created-property", type=str, default=None, dest="created_property",
|
|
254
|
+
help="Property name for creation date (default: CREATED)",
|
|
255
|
+
)
|
|
256
|
+
parser.add_argument(
|
|
257
|
+
"--extra-tag-chars", type=str, default=None, dest="extra_tag_chars",
|
|
258
|
+
help="Additional characters allowed in tag names",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Output flags.
|
|
262
|
+
parser.add_argument(
|
|
263
|
+
"--json", action="store_true", dest="json_output",
|
|
264
|
+
help="Output items as JSON",
|
|
265
|
+
)
|
|
266
|
+
parser.add_argument(
|
|
267
|
+
"-v", "--verbose", action="count", default=0,
|
|
268
|
+
help="Increase verbosity: -v adds body, -vv adds raw_text",
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
args = parser.parse_args()
|
|
272
|
+
config = _build_config(args)
|
|
273
|
+
|
|
274
|
+
if args.json_output:
|
|
275
|
+
# JSON mode: collect all items across files, output as one array.
|
|
276
|
+
all_items = []
|
|
277
|
+
for path in args.files:
|
|
278
|
+
result = parse_file(path, config)
|
|
279
|
+
all_items.extend(
|
|
280
|
+
_item_to_dict(item, args.verbose) for item in result.items
|
|
281
|
+
)
|
|
282
|
+
print(json.dumps(all_items, cls=_ItemEncoder, indent=2,
|
|
283
|
+
ensure_ascii=False))
|
|
284
|
+
else:
|
|
285
|
+
# Text mode: print per-file summary.
|
|
286
|
+
for path in args.files:
|
|
287
|
+
result = parse_file(path, config)
|
|
288
|
+
print(f"\n{path}: {len(result.items)} items")
|
|
289
|
+
for item in result.items:
|
|
290
|
+
_print_item(item, args.verbose)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
if __name__ == "__main__":
|
|
294
|
+
main()
|
org_dex_parse/config.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Parser configuration — predicate, keywords, exclusion lists."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Callable, Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Default predicate: any heading with :ID: is an item.
|
|
9
|
+
# The :ID: check is a structural invariant applied before the predicate,
|
|
10
|
+
# so the default predicate just returns True unconditionally.
|
|
11
|
+
_DEFAULT_PREDICATE: Callable[[Any], bool] = lambda h: True
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class Config:
|
|
16
|
+
"""Configuration for org-dex-parse.
|
|
17
|
+
|
|
18
|
+
The caller constructs this with TODO keywords, tag rules, and
|
|
19
|
+
exclusion lists matching their org-mode environment.
|
|
20
|
+
|
|
21
|
+
:arg item_predicate: Determines which headings (that already have
|
|
22
|
+
``:ID:``) are items. Accepts three forms:
|
|
23
|
+
- ``Callable[[Any], bool]`` — a Python function (backward compat)
|
|
24
|
+
- ``list`` — a JSON-like s-expression compiled via the evaluator
|
|
25
|
+
(e.g. ``["property", "Type"]``)
|
|
26
|
+
- ``None`` — default predicate (any heading with ``:ID:``).
|
|
27
|
+
After ``__post_init__``, always stored as a callable.
|
|
28
|
+
:arg todos: Active (unfinished) TODO keywords.
|
|
29
|
+
:arg dones: Terminal (finished) TODO keywords.
|
|
30
|
+
:arg tags_exclude_from_inheritance: Tags that don't propagate to
|
|
31
|
+
children (corresponds to ``org-tags-exclude-from-inheritance``).
|
|
32
|
+
:arg exclude_drawers: Drawer names to exclude from body text.
|
|
33
|
+
Case-insensitive — normalized to lowercase in ``__post_init__``.
|
|
34
|
+
:arg exclude_blocks: Block names to exclude from body text.
|
|
35
|
+
Case-insensitive — normalized to lowercase in ``__post_init__``.
|
|
36
|
+
:arg exclude_properties: Property names to omit from the properties
|
|
37
|
+
tuple. Case-insensitive — normalized to lowercase in
|
|
38
|
+
``__post_init__``.
|
|
39
|
+
:arg created_property: Name of the org property that holds the
|
|
40
|
+
creation date (e.g. ``"CREATED"``). The parser looks for this
|
|
41
|
+
property on each item and uses its value for the ``Item.created``
|
|
42
|
+
field. Case-insensitive — normalized to uppercase in
|
|
43
|
+
``__post_init__`` (org-mode convention for property names).
|
|
44
|
+
Default: ``"CREATED"``. This property is automatically excluded
|
|
45
|
+
from ``Item.properties`` (like ``ID`` and ``ARCHIVE_TIME``).
|
|
46
|
+
:arg extra_tag_chars: Additional characters to allow in org-mode tag
|
|
47
|
+
names beyond the default ``[a-zA-Z0-9_@#%]``. The parser uses
|
|
48
|
+
this to build a monkey-patch regex for orgparse (applied in S04).
|
|
49
|
+
Default: ``""`` (no extra characters).
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
item_predicate: Callable[[Any], bool] = field(
|
|
53
|
+
default=_DEFAULT_PREDICATE
|
|
54
|
+
)
|
|
55
|
+
todos: tuple[str, ...] = ()
|
|
56
|
+
dones: tuple[str, ...] = ()
|
|
57
|
+
tags_exclude_from_inheritance: frozenset[str] = frozenset()
|
|
58
|
+
exclude_drawers: frozenset[str] = frozenset()
|
|
59
|
+
exclude_blocks: frozenset[str] = frozenset()
|
|
60
|
+
exclude_properties: frozenset[str] = frozenset()
|
|
61
|
+
created_property: str = "CREATED"
|
|
62
|
+
extra_tag_chars: str = ""
|
|
63
|
+
|
|
64
|
+
def __post_init__(self) -> None:
|
|
65
|
+
"""Normalize fields on frozen dataclass.
|
|
66
|
+
|
|
67
|
+
- item_predicate: list/None compiled to callable via evaluator,
|
|
68
|
+
callable passed through, anything else raises ValueError.
|
|
69
|
+
- Exclusion sets lowercased for case-insensitive matching.
|
|
70
|
+
|
|
71
|
+
Uses object.__setattr__ because the dataclass is frozen — the
|
|
72
|
+
standard Python pattern for post-init normalization on frozen
|
|
73
|
+
dataclasses.
|
|
74
|
+
"""
|
|
75
|
+
# -- Predicate normalization (S08) -----------------------------------
|
|
76
|
+
pred = self.item_predicate
|
|
77
|
+
if isinstance(pred, list) or pred is None:
|
|
78
|
+
from .evaluator import compile_predicate
|
|
79
|
+
object.__setattr__(
|
|
80
|
+
self, "item_predicate", compile_predicate(pred)
|
|
81
|
+
)
|
|
82
|
+
elif not callable(pred):
|
|
83
|
+
raise ValueError(
|
|
84
|
+
f"item_predicate must be callable, list, or None,"
|
|
85
|
+
f" got {type(pred).__name__}: {pred!r}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# -- Exclusion normalization -----------------------------------------
|
|
89
|
+
object.__setattr__(
|
|
90
|
+
self,
|
|
91
|
+
"exclude_drawers",
|
|
92
|
+
frozenset(d.lower() for d in self.exclude_drawers),
|
|
93
|
+
)
|
|
94
|
+
object.__setattr__(
|
|
95
|
+
self,
|
|
96
|
+
"exclude_blocks",
|
|
97
|
+
frozenset(b.lower() for b in self.exclude_blocks),
|
|
98
|
+
)
|
|
99
|
+
object.__setattr__(
|
|
100
|
+
self,
|
|
101
|
+
"exclude_properties",
|
|
102
|
+
frozenset(p.lower() for p in self.exclude_properties),
|
|
103
|
+
)
|
|
104
|
+
object.__setattr__(
|
|
105
|
+
self,
|
|
106
|
+
"created_property",
|
|
107
|
+
self.created_property.upper(),
|
|
108
|
+
)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""S-expression predicate compiler for item predicates.
|
|
2
|
+
|
|
3
|
+
Compiles a JSON-like s-expression (Python list) into a callable predicate
|
|
4
|
+
``(node) -> bool``. The expression format mirrors org-ql, serialized as
|
|
5
|
+
JSON arrays for cross-process transport (Elisp -> JSON-RPC -> Python).
|
|
6
|
+
|
|
7
|
+
Example::
|
|
8
|
+
|
|
9
|
+
>>> pred = compile_predicate(["and", ["property", "Type"],
|
|
10
|
+
... ["not", ["property", "ARCHIVE_TIME"]]])
|
|
11
|
+
>>> pred(some_orgparse_node)
|
|
12
|
+
True
|
|
13
|
+
|
|
14
|
+
Supported operators (extensible via ``_OPERATORS`` dispatch table):
|
|
15
|
+
|
|
16
|
+
- ``["property", "Name"]`` — ``node.get_property("Name") is not None``
|
|
17
|
+
- ``["not", expr]`` — negation
|
|
18
|
+
- ``["and", expr, ...]`` — conjunction (n-ary, short-circuits)
|
|
19
|
+
- ``["or", expr, ...]`` — disjunction (n-ary, short-circuits)
|
|
20
|
+
- ``None`` — default predicate (always True)
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from typing import Any, Callable
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def compile_predicate(
|
|
28
|
+
expr: list | None,
|
|
29
|
+
) -> Callable[[Any], bool]:
|
|
30
|
+
"""Compile a JSON-like s-expression into a predicate callable.
|
|
31
|
+
|
|
32
|
+
:arg expr: A list (s-expression) or None. None returns the default
|
|
33
|
+
predicate (always True).
|
|
34
|
+
:returns: A callable ``(node) -> bool``.
|
|
35
|
+
:raises ValueError: On unknown operators, wrong arity, or invalid types.
|
|
36
|
+
"""
|
|
37
|
+
if expr is None:
|
|
38
|
+
return _DEFAULT_PREDICATE
|
|
39
|
+
|
|
40
|
+
if not isinstance(expr, list):
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"expected list or None, got {type(expr).__name__}: {expr!r}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if len(expr) == 0:
|
|
46
|
+
raise ValueError("empty expression — expected [operator, ...args]")
|
|
47
|
+
|
|
48
|
+
operator = expr[0]
|
|
49
|
+
args = expr[1:]
|
|
50
|
+
|
|
51
|
+
if operator not in _OPERATORS:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"unknown operator {operator!r}"
|
|
54
|
+
f" — supported: {', '.join(sorted(_OPERATORS))}"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return _OPERATORS[operator](operator, args)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# -- Default predicate -------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
_DEFAULT_PREDICATE: Callable[[Any], bool] = lambda _node: True
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# -- Operator handlers -------------------------------------------------------
|
|
66
|
+
# Each handler takes (operator_name, args) and returns a callable.
|
|
67
|
+
# operator_name is passed for error messages.
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _compile_property(op: str, args: list) -> Callable[[Any], bool]:
|
|
71
|
+
"""["property", "Name"] → node.get_property("Name") is not None."""
|
|
72
|
+
if len(args) != 1:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"{op!r} expects exactly 1 argument (property name),"
|
|
75
|
+
f" got {len(args)}: {args!r}"
|
|
76
|
+
)
|
|
77
|
+
prop_name = args[0]
|
|
78
|
+
return lambda node: node.get_property(prop_name) is not None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _compile_not(op: str, args: list) -> Callable[[Any], bool]:
|
|
82
|
+
"""["not", expr] → negation of sub-expression."""
|
|
83
|
+
if len(args) != 1:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"{op!r} expects exactly 1 argument (sub-expression),"
|
|
86
|
+
f" got {len(args)}: {args!r}"
|
|
87
|
+
)
|
|
88
|
+
inner = compile_predicate(args[0])
|
|
89
|
+
return lambda node: not inner(node)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _compile_and(op: str, args: list) -> Callable[[Any], bool]:
|
|
93
|
+
"""["and", expr, ...] → conjunction with short-circuit."""
|
|
94
|
+
if len(args) == 0:
|
|
95
|
+
raise ValueError(f"{op!r} expects at least 1 operand, got 0")
|
|
96
|
+
compiled = [compile_predicate(a) for a in args]
|
|
97
|
+
return lambda node: all(p(node) for p in compiled)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _compile_or(op: str, args: list) -> Callable[[Any], bool]:
|
|
101
|
+
"""["or", expr, ...] → disjunction with short-circuit."""
|
|
102
|
+
if len(args) == 0:
|
|
103
|
+
raise ValueError(f"{op!r} expects at least 1 operand, got 0")
|
|
104
|
+
compiled = [compile_predicate(a) for a in args]
|
|
105
|
+
return lambda node: any(p(node) for p in compiled)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# -- Dispatch table -----------------------------------------------------------
|
|
109
|
+
# Adding a new operator: one entry here + one handler above.
|
|
110
|
+
|
|
111
|
+
_OPERATORS: dict[str, Callable[[str, list], Callable[[Any], bool]]] = {
|
|
112
|
+
"property": _compile_property,
|
|
113
|
+
"not": _compile_not,
|
|
114
|
+
"and": _compile_and,
|
|
115
|
+
"or": _compile_or,
|
|
116
|
+
}
|