org-dex-parse 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ """org-dex-parse: parse org-mode files into structured data."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from org_dex_parse.config import Config
6
+ from org_dex_parse.evaluator import compile_predicate
7
+ from org_dex_parse.parser import parse_file
8
+ from org_dex_parse.types import (
9
+ ClockEntry,
10
+ Item,
11
+ Link,
12
+ ParseResult,
13
+ Range,
14
+ StateChange,
15
+ Timestamp,
16
+ )
17
+
18
+ __all__ = [
19
+ "compile_predicate",
20
+ "Config",
21
+ "ClockEntry",
22
+ "Item",
23
+ "Link",
24
+ "ParseResult",
25
+ "Range",
26
+ "StateChange",
27
+ "Timestamp",
28
+ "parse_file",
29
+ ]
@@ -0,0 +1,294 @@
1
+ """CLI for org-dex-parse: python -m org_dex_parse FILE [FILE ...]
2
+
3
+ Parses org files and prints each item with its populated fields.
4
+ Uses bare configuration by default (any heading with :ID: is an item).
5
+
6
+ Usage:
7
+ python -m org_dex_parse file.org
8
+ python -m org_dex_parse --json file.org
9
+ python -m org_dex_parse --config myconfig.json file.org
10
+ python -m org_dex_parse --predicate '["property", "Type"]' file.org
11
+ python -m org_dex_parse --todos TODO,NEXT --dones DONE file.org
12
+ python -m org_dex_parse --json -vv file.org # full output with raw_text
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import dataclasses
18
+ import datetime
19
+ import json
20
+ import sys
21
+ from pathlib import Path
22
+
23
+ from .config import Config
24
+ from .parser import parse_file
25
+
26
+
27
+ # -- Config construction -------------------------------------------------------
28
+ # Builds a Config from CLI flags and optional JSON config file.
29
+ # Precedence: CLI flags > config file > Config defaults.
30
+
31
+ # Valid keys in the JSON config file — must match Config fields.
32
+ # Used to reject typos early (AC11).
33
+ _VALID_CONFIG_KEYS = frozenset({
34
+ "predicate", "todos", "dones", "tags_exclude_from_inheritance",
35
+ "exclude_drawers", "exclude_blocks", "exclude_properties",
36
+ "created_property", "extra_tag_chars",
37
+ })
38
+
39
+
40
+ def _load_config_file(path: str) -> dict:
41
+ """Load and validate a JSON config file.
42
+
43
+ Returns a dict with only known keys. Raises SystemExit on
44
+ unknown keys or missing file (with clear error messages).
45
+ """
46
+ try:
47
+ text = Path(path).read_text()
48
+ except FileNotFoundError:
49
+ print(f"error: config file not found: {path}", file=sys.stderr)
50
+ raise SystemExit(1)
51
+
52
+ data = json.loads(text)
53
+ if not isinstance(data, dict):
54
+ print(f"error: config file must be a JSON object, got {type(data).__name__}",
55
+ file=sys.stderr)
56
+ raise SystemExit(1)
57
+
58
+ unknown = set(data.keys()) - _VALID_CONFIG_KEYS
59
+ if unknown:
60
+ print(f"error: unknown fields in config file: {', '.join(sorted(unknown))}",
61
+ file=sys.stderr)
62
+ raise SystemExit(1)
63
+
64
+ return data
65
+
66
+
67
+ def _build_config(args: argparse.Namespace) -> Config:
68
+ """Build a Config from CLI args, merging config file if present.
69
+
70
+ Precedence: CLI flags > config file > Config defaults.
71
+ A CLI flag is considered "set" when its value differs from None
72
+ (argparse default for all our optional flags).
73
+ """
74
+ # Start with config file values (if any).
75
+ file_cfg: dict = {}
76
+ if args.config is not None:
77
+ file_cfg = _load_config_file(args.config)
78
+
79
+ # Map CLI flag names to config dict keys.
80
+ # Each entry: (argparse dest, config key, transform).
81
+ # transform converts the CLI string to the config value type.
82
+ _split = lambda s: tuple(s.split(",")) if s else ()
83
+ _split_frozen = lambda s: frozenset(s.split(",")) if s else frozenset()
84
+
85
+ cli_mappings = [
86
+ ("predicate", "predicate", lambda s: json.loads(s)),
87
+ ("todos", "todos", _split),
88
+ ("dones", "dones", _split),
89
+ ("tags_exclude", "tags_exclude_from_inheritance", _split_frozen),
90
+ ("exclude_drawers", "exclude_drawers", _split_frozen),
91
+ ("exclude_blocks", "exclude_blocks", _split_frozen),
92
+ ("exclude_properties", "exclude_properties", _split_frozen),
93
+ ("created_property", "created_property", lambda s: s),
94
+ ("extra_tag_chars", "extra_tag_chars", lambda s: s),
95
+ ]
96
+
97
+ # Merge: CLI flags override config file.
98
+ merged: dict = {}
99
+ for arg_name, cfg_key, transform in cli_mappings:
100
+ cli_val = getattr(args, arg_name, None)
101
+ if cli_val is not None:
102
+ # CLI flag was explicitly set — it wins.
103
+ merged[cfg_key] = transform(cli_val)
104
+ elif cfg_key in file_cfg:
105
+ # Config file has this key — use it.
106
+ merged[cfg_key] = file_cfg[cfg_key]
107
+ # else: use Config default (don't set in merged).
108
+
109
+ # Convert config file types to Config constructor types.
110
+ # JSON arrays → tuples/frozensets as needed.
111
+ if "todos" in merged and isinstance(merged["todos"], list):
112
+ merged["todos"] = tuple(merged["todos"])
113
+ if "dones" in merged and isinstance(merged["dones"], list):
114
+ merged["dones"] = tuple(merged["dones"])
115
+ for set_key in ("tags_exclude_from_inheritance", "exclude_drawers",
116
+ "exclude_blocks", "exclude_properties"):
117
+ if set_key in merged and isinstance(merged[set_key], list):
118
+ merged[set_key] = frozenset(merged[set_key])
119
+
120
+ # predicate: list or None passed directly to Config (compiled in
121
+ # __post_init__). "null" on CLI becomes None via json.loads.
122
+ if "predicate" in merged:
123
+ merged["item_predicate"] = merged.pop("predicate")
124
+
125
+ return Config(**merged)
126
+
127
+
128
+ # -- JSON serialization --------------------------------------------------------
129
+ # Custom encoder for Item dataclasses and org-dex-parse types.
130
+
131
+ class _ItemEncoder(json.JSONEncoder):
132
+ """JSON encoder for Item and its nested types."""
133
+
134
+ def default(self, obj):
135
+ # date/datetime → ISO string.
136
+ if isinstance(obj, datetime.datetime):
137
+ return obj.isoformat()
138
+ if isinstance(obj, datetime.date):
139
+ return obj.isoformat()
140
+ # frozenset → sorted list.
141
+ if isinstance(obj, frozenset):
142
+ return sorted(obj)
143
+ return super().default(obj)
144
+
145
+
146
+ def _item_to_dict(item, verbosity: int) -> dict:
147
+ """Convert an Item to a JSON-friendly dict.
148
+
149
+ Verbosity controls which fields are included:
150
+ - 0 (default): all fields except body and raw_text
151
+ - 1 (-v): adds body
152
+ - 2 (-vv): adds body and raw_text
153
+
154
+ Properties tuple-of-tuples is converted to a dict for readability.
155
+ """
156
+ d = dataclasses.asdict(item)
157
+
158
+ # Properties: tuple-of-tuples → dict.
159
+ d["properties"] = dict(d["properties"])
160
+
161
+ # Verbosity filtering.
162
+ if verbosity < 2:
163
+ d.pop("raw_text", None)
164
+ if verbosity < 1:
165
+ d.pop("body", None)
166
+
167
+ return d
168
+
169
+
170
+ # -- Text output ---------------------------------------------------------------
171
+
172
+ def _print_item(item, verbosity: int) -> None:
173
+ """Print a single item in human-readable text format."""
174
+ print(f" {item.title}")
175
+ print(f" id={item.item_id} level={item.level} line={item.linenumber}")
176
+
177
+ if item.parent_item_id:
178
+ print(f" parent={item.parent_item_id}")
179
+ if item.todo:
180
+ print(f" todo={item.todo}", end="")
181
+ if item.priority:
182
+ print(f" priority={item.priority}", end="")
183
+ print()
184
+ elif item.priority:
185
+ print(f" priority={item.priority}")
186
+ if item.local_tags:
187
+ print(f" local_tags={sorted(item.local_tags)}")
188
+ if item.inherited_tags:
189
+ print(f" inherited_tags={sorted(item.inherited_tags)}")
190
+ if item.properties:
191
+ print(f" properties={dict(item.properties)}")
192
+
193
+ # -v: show body.
194
+ if verbosity >= 1 and item.body:
195
+ lines = item.body.split("\n")
196
+ preview = lines[0][:80]
197
+ if len(lines) > 1:
198
+ preview += f" ... ({len(lines)} lines)"
199
+ print(f" body: {preview}")
200
+
201
+ # -vv: show raw_text.
202
+ if verbosity >= 2 and item.raw_text:
203
+ lines = item.raw_text.split("\n")
204
+ preview = lines[0][:80]
205
+ if len(lines) > 1:
206
+ preview += f" ... ({len(lines)} lines)"
207
+ print(f" raw_text: {preview}")
208
+
209
+
210
+ # -- Main ----------------------------------------------------------------------
211
+
212
+ def main() -> None:
213
+ parser = argparse.ArgumentParser(
214
+ prog="python -m org_dex_parse",
215
+ description="Parse org files and show items (bare config by default).",
216
+ )
217
+ parser.add_argument("files", nargs="+", help="Org files to parse")
218
+
219
+ # Configuration flags — all optional, override config file values.
220
+ parser.add_argument(
221
+ "--config", type=str, default=None, metavar="FILE",
222
+ help="JSON config file (all fields optional)",
223
+ )
224
+ parser.add_argument(
225
+ "--predicate", type=str, default=None,
226
+ help='JSON s-expression predicate, e.g. \'["property", "Type"]\'',
227
+ )
228
+ parser.add_argument(
229
+ "--todos", type=str, default=None,
230
+ help="Comma-separated active TODO keywords",
231
+ )
232
+ parser.add_argument(
233
+ "--dones", type=str, default=None,
234
+ help="Comma-separated done TODO keywords",
235
+ )
236
+ parser.add_argument(
237
+ "--tags-exclude", type=str, default=None, dest="tags_exclude",
238
+ help="Comma-separated tags excluded from inheritance",
239
+ )
240
+ parser.add_argument(
241
+ "--exclude-drawers", type=str, default=None, dest="exclude_drawers",
242
+ help="Comma-separated drawer names to exclude from body",
243
+ )
244
+ parser.add_argument(
245
+ "--exclude-blocks", type=str, default=None, dest="exclude_blocks",
246
+ help="Comma-separated block names to exclude from body",
247
+ )
248
+ parser.add_argument(
249
+ "--exclude-properties", type=str, default=None, dest="exclude_properties",
250
+ help="Comma-separated property names to exclude",
251
+ )
252
+ parser.add_argument(
253
+ "--created-property", type=str, default=None, dest="created_property",
254
+ help="Property name for creation date (default: CREATED)",
255
+ )
256
+ parser.add_argument(
257
+ "--extra-tag-chars", type=str, default=None, dest="extra_tag_chars",
258
+ help="Additional characters allowed in tag names",
259
+ )
260
+
261
+ # Output flags.
262
+ parser.add_argument(
263
+ "--json", action="store_true", dest="json_output",
264
+ help="Output items as JSON",
265
+ )
266
+ parser.add_argument(
267
+ "-v", "--verbose", action="count", default=0,
268
+ help="Increase verbosity: -v adds body, -vv adds raw_text",
269
+ )
270
+
271
+ args = parser.parse_args()
272
+ config = _build_config(args)
273
+
274
+ if args.json_output:
275
+ # JSON mode: collect all items across files, output as one array.
276
+ all_items = []
277
+ for path in args.files:
278
+ result = parse_file(path, config)
279
+ all_items.extend(
280
+ _item_to_dict(item, args.verbose) for item in result.items
281
+ )
282
+ print(json.dumps(all_items, cls=_ItemEncoder, indent=2,
283
+ ensure_ascii=False))
284
+ else:
285
+ # Text mode: print per-file summary.
286
+ for path in args.files:
287
+ result = parse_file(path, config)
288
+ print(f"\n{path}: {len(result.items)} items")
289
+ for item in result.items:
290
+ _print_item(item, args.verbose)
291
+
292
+
293
+ if __name__ == "__main__":
294
+ main()
@@ -0,0 +1,108 @@
1
+ """Parser configuration — predicate, keywords, exclusion lists."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+ from typing import Callable, Any
6
+
7
+
8
+ # Default predicate: any heading with :ID: is an item.
9
+ # The :ID: check is a structural invariant applied before the predicate,
10
+ # so the default predicate just returns True unconditionally.
11
+ _DEFAULT_PREDICATE: Callable[[Any], bool] = lambda h: True
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class Config:
16
+ """Configuration for org-dex-parse.
17
+
18
+ The caller constructs this with TODO keywords, tag rules, and
19
+ exclusion lists matching their org-mode environment.
20
+
21
+ :arg item_predicate: Determines which headings (that already have
22
+ ``:ID:``) are items. Accepts three forms:
23
+ - ``Callable[[Any], bool]`` — a Python function (backward compat)
24
+ - ``list`` — a JSON-like s-expression compiled via the evaluator
25
+ (e.g. ``["property", "Type"]``)
26
+ - ``None`` — default predicate (any heading with ``:ID:``).
27
+ After ``__post_init__``, always stored as a callable.
28
+ :arg todos: Active (unfinished) TODO keywords.
29
+ :arg dones: Terminal (finished) TODO keywords.
30
+ :arg tags_exclude_from_inheritance: Tags that don't propagate to
31
+ children (corresponds to ``org-tags-exclude-from-inheritance``).
32
+ :arg exclude_drawers: Drawer names to exclude from body text.
33
+ Case-insensitive — normalized to lowercase in ``__post_init__``.
34
+ :arg exclude_blocks: Block names to exclude from body text.
35
+ Case-insensitive — normalized to lowercase in ``__post_init__``.
36
+ :arg exclude_properties: Property names to omit from the properties
37
+ tuple. Case-insensitive — normalized to lowercase in
38
+ ``__post_init__``.
39
+ :arg created_property: Name of the org property that holds the
40
+ creation date (e.g. ``"CREATED"``). The parser looks for this
41
+ property on each item and uses its value for the ``Item.created``
42
+ field. Case-insensitive — normalized to uppercase in
43
+ ``__post_init__`` (org-mode convention for property names).
44
+ Default: ``"CREATED"``. This property is automatically excluded
45
+ from ``Item.properties`` (like ``ID`` and ``ARCHIVE_TIME``).
46
+ :arg extra_tag_chars: Additional characters to allow in org-mode tag
47
+ names beyond the default ``[a-zA-Z0-9_@#%]``. The parser uses
48
+ this to build a monkey-patch regex for orgparse (applied in S04).
49
+ Default: ``""`` (no extra characters).
50
+ """
51
+
52
+ item_predicate: Callable[[Any], bool] = field(
53
+ default=_DEFAULT_PREDICATE
54
+ )
55
+ todos: tuple[str, ...] = ()
56
+ dones: tuple[str, ...] = ()
57
+ tags_exclude_from_inheritance: frozenset[str] = frozenset()
58
+ exclude_drawers: frozenset[str] = frozenset()
59
+ exclude_blocks: frozenset[str] = frozenset()
60
+ exclude_properties: frozenset[str] = frozenset()
61
+ created_property: str = "CREATED"
62
+ extra_tag_chars: str = ""
63
+
64
+ def __post_init__(self) -> None:
65
+ """Normalize fields on frozen dataclass.
66
+
67
+ - item_predicate: list/None compiled to callable via evaluator,
68
+ callable passed through, anything else raises ValueError.
69
+ - Exclusion sets lowercased for case-insensitive matching.
70
+
71
+ Uses object.__setattr__ because the dataclass is frozen — the
72
+ standard Python pattern for post-init normalization on frozen
73
+ dataclasses.
74
+ """
75
+ # -- Predicate normalization (S08) -----------------------------------
76
+ pred = self.item_predicate
77
+ if isinstance(pred, list) or pred is None:
78
+ from .evaluator import compile_predicate
79
+ object.__setattr__(
80
+ self, "item_predicate", compile_predicate(pred)
81
+ )
82
+ elif not callable(pred):
83
+ raise ValueError(
84
+ f"item_predicate must be callable, list, or None,"
85
+ f" got {type(pred).__name__}: {pred!r}"
86
+ )
87
+
88
+ # -- Exclusion normalization -----------------------------------------
89
+ object.__setattr__(
90
+ self,
91
+ "exclude_drawers",
92
+ frozenset(d.lower() for d in self.exclude_drawers),
93
+ )
94
+ object.__setattr__(
95
+ self,
96
+ "exclude_blocks",
97
+ frozenset(b.lower() for b in self.exclude_blocks),
98
+ )
99
+ object.__setattr__(
100
+ self,
101
+ "exclude_properties",
102
+ frozenset(p.lower() for p in self.exclude_properties),
103
+ )
104
+ object.__setattr__(
105
+ self,
106
+ "created_property",
107
+ self.created_property.upper(),
108
+ )
@@ -0,0 +1,116 @@
1
+ """S-expression predicate compiler for item predicates.
2
+
3
+ Compiles a JSON-like s-expression (Python list) into a callable predicate
4
+ ``(node) -> bool``. The expression format mirrors org-ql, serialized as
5
+ JSON arrays for cross-process transport (Elisp -> JSON-RPC -> Python).
6
+
7
+ Example::
8
+
9
+ >>> pred = compile_predicate(["and", ["property", "Type"],
10
+ ... ["not", ["property", "ARCHIVE_TIME"]]])
11
+ >>> pred(some_orgparse_node)
12
+ True
13
+
14
+ Supported operators (extensible via ``_OPERATORS`` dispatch table):
15
+
16
+ - ``["property", "Name"]`` — ``node.get_property("Name") is not None``
17
+ - ``["not", expr]`` — negation
18
+ - ``["and", expr, ...]`` — conjunction (n-ary, short-circuits)
19
+ - ``["or", expr, ...]`` — disjunction (n-ary, short-circuits)
20
+ - ``None`` — default predicate (always True)
21
+ """
22
+ from __future__ import annotations
23
+
24
+ from typing import Any, Callable
25
+
26
+
27
+ def compile_predicate(
28
+ expr: list | None,
29
+ ) -> Callable[[Any], bool]:
30
+ """Compile a JSON-like s-expression into a predicate callable.
31
+
32
+ :arg expr: A list (s-expression) or None. None returns the default
33
+ predicate (always True).
34
+ :returns: A callable ``(node) -> bool``.
35
+ :raises ValueError: On unknown operators, wrong arity, or invalid types.
36
+ """
37
+ if expr is None:
38
+ return _DEFAULT_PREDICATE
39
+
40
+ if not isinstance(expr, list):
41
+ raise ValueError(
42
+ f"expected list or None, got {type(expr).__name__}: {expr!r}"
43
+ )
44
+
45
+ if len(expr) == 0:
46
+ raise ValueError("empty expression — expected [operator, ...args]")
47
+
48
+ operator = expr[0]
49
+ args = expr[1:]
50
+
51
+ if operator not in _OPERATORS:
52
+ raise ValueError(
53
+ f"unknown operator {operator!r}"
54
+ f" — supported: {', '.join(sorted(_OPERATORS))}"
55
+ )
56
+
57
+ return _OPERATORS[operator](operator, args)
58
+
59
+
60
+ # -- Default predicate -------------------------------------------------------
61
+
62
+ _DEFAULT_PREDICATE: Callable[[Any], bool] = lambda _node: True
63
+
64
+
65
+ # -- Operator handlers -------------------------------------------------------
66
+ # Each handler takes (operator_name, args) and returns a callable.
67
+ # operator_name is passed for error messages.
68
+
69
+
70
+ def _compile_property(op: str, args: list) -> Callable[[Any], bool]:
71
+ """["property", "Name"] → node.get_property("Name") is not None."""
72
+ if len(args) != 1:
73
+ raise ValueError(
74
+ f"{op!r} expects exactly 1 argument (property name),"
75
+ f" got {len(args)}: {args!r}"
76
+ )
77
+ prop_name = args[0]
78
+ return lambda node: node.get_property(prop_name) is not None
79
+
80
+
81
+ def _compile_not(op: str, args: list) -> Callable[[Any], bool]:
82
+ """["not", expr] → negation of sub-expression."""
83
+ if len(args) != 1:
84
+ raise ValueError(
85
+ f"{op!r} expects exactly 1 argument (sub-expression),"
86
+ f" got {len(args)}: {args!r}"
87
+ )
88
+ inner = compile_predicate(args[0])
89
+ return lambda node: not inner(node)
90
+
91
+
92
+ def _compile_and(op: str, args: list) -> Callable[[Any], bool]:
93
+ """["and", expr, ...] → conjunction with short-circuit."""
94
+ if len(args) == 0:
95
+ raise ValueError(f"{op!r} expects at least 1 operand, got 0")
96
+ compiled = [compile_predicate(a) for a in args]
97
+ return lambda node: all(p(node) for p in compiled)
98
+
99
+
100
+ def _compile_or(op: str, args: list) -> Callable[[Any], bool]:
101
+ """["or", expr, ...] → disjunction with short-circuit."""
102
+ if len(args) == 0:
103
+ raise ValueError(f"{op!r} expects at least 1 operand, got 0")
104
+ compiled = [compile_predicate(a) for a in args]
105
+ return lambda node: any(p(node) for p in compiled)
106
+
107
+
108
+ # -- Dispatch table -----------------------------------------------------------
109
+ # Adding a new operator: one entry here + one handler above.
110
+
111
+ _OPERATORS: dict[str, Callable[[str, list], Callable[[Any], bool]]] = {
112
+ "property": _compile_property,
113
+ "not": _compile_not,
114
+ "and": _compile_and,
115
+ "or": _compile_or,
116
+ }