ledgercore 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ledgercore/__init__.py +109 -0
- ledgercore/atomic.py +124 -0
- ledgercore/errors.py +40 -0
- ledgercore/frontmatter.py +151 -0
- ledgercore/ids.py +208 -0
- ledgercore/io.py +64 -0
- ledgercore/jsonio.py +110 -0
- ledgercore/paths.py +167 -0
- ledgercore/py.typed +0 -0
- ledgercore/refs.py +289 -0
- ledgercore/time.py +12 -0
- ledgercore/yamlio.py +85 -0
- ledgercore-0.1.0.dist-info/METADATA +310 -0
- ledgercore-0.1.0.dist-info/RECORD +16 -0
- ledgercore-0.1.0.dist-info/WHEEL +4 -0
- ledgercore-0.1.0.dist-info/licenses/LICENSE +201 -0
ledgercore/__init__.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""ledgercore: generic ledger and storage primitives."""
|
|
2
|
+
|
|
3
|
+
from ledgercore.atomic import atomic_create_text, atomic_write_text
|
|
4
|
+
from ledgercore.errors import (
|
|
5
|
+
AtomicWriteError,
|
|
6
|
+
FrontMatterError,
|
|
7
|
+
IdFormatError,
|
|
8
|
+
JsonStoreError,
|
|
9
|
+
LedgerCoreError,
|
|
10
|
+
PathValidationError,
|
|
11
|
+
StorageError,
|
|
12
|
+
YamlStoreError,
|
|
13
|
+
)
|
|
14
|
+
from ledgercore.frontmatter import (
|
|
15
|
+
iter_markdown_files,
|
|
16
|
+
iter_source_files,
|
|
17
|
+
read_front_matter_document,
|
|
18
|
+
write_front_matter_document,
|
|
19
|
+
)
|
|
20
|
+
from ledgercore.ids import (
|
|
21
|
+
LedgerIdFormat,
|
|
22
|
+
LedgerIdParts,
|
|
23
|
+
NumericIdFormat,
|
|
24
|
+
next_prefixed_id,
|
|
25
|
+
parse_prefixed_number,
|
|
26
|
+
slugify_ref,
|
|
27
|
+
)
|
|
28
|
+
from ledgercore.refs import (
|
|
29
|
+
LedgerResourceRef,
|
|
30
|
+
RefStyle,
|
|
31
|
+
is_resource_ref,
|
|
32
|
+
normalize_kind,
|
|
33
|
+
normalize_ref_token,
|
|
34
|
+
parse_global_ref,
|
|
35
|
+
parse_local_ref,
|
|
36
|
+
parse_resource_ref,
|
|
37
|
+
)
|
|
38
|
+
from ledgercore.io import (
|
|
39
|
+
content_hash,
|
|
40
|
+
ensure_dir,
|
|
41
|
+
merge_text,
|
|
42
|
+
normalize_newlines,
|
|
43
|
+
read_text,
|
|
44
|
+
summarize_text,
|
|
45
|
+
write_text,
|
|
46
|
+
)
|
|
47
|
+
from ledgercore.jsonio import load_json_array, load_json_object, write_json
|
|
48
|
+
from ledgercore.paths import (
|
|
49
|
+
ConfigLocator,
|
|
50
|
+
find_config_upwards,
|
|
51
|
+
is_relative_to,
|
|
52
|
+
locate_config,
|
|
53
|
+
resolve_config_relative_path,
|
|
54
|
+
resolve_relative_child,
|
|
55
|
+
validate_relative_posix_path,
|
|
56
|
+
)
|
|
57
|
+
from ledgercore.time import utc_now_iso
|
|
58
|
+
from ledgercore.yamlio import load_yaml_object, write_yaml
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
"atomic_create_text",
|
|
62
|
+
"atomic_write_text",
|
|
63
|
+
"AtomicWriteError",
|
|
64
|
+
"FrontMatterError",
|
|
65
|
+
"IdFormatError",
|
|
66
|
+
"JsonStoreError",
|
|
67
|
+
"LedgerCoreError",
|
|
68
|
+
"PathValidationError",
|
|
69
|
+
"StorageError",
|
|
70
|
+
"YamlStoreError",
|
|
71
|
+
"iter_markdown_files",
|
|
72
|
+
"iter_source_files",
|
|
73
|
+
"read_front_matter_document",
|
|
74
|
+
"write_front_matter_document",
|
|
75
|
+
"LedgerIdFormat",
|
|
76
|
+
"LedgerIdParts",
|
|
77
|
+
"NumericIdFormat",
|
|
78
|
+
"next_prefixed_id",
|
|
79
|
+
"parse_prefixed_number",
|
|
80
|
+
"slugify_ref",
|
|
81
|
+
"LedgerResourceRef",
|
|
82
|
+
"RefStyle",
|
|
83
|
+
"is_resource_ref",
|
|
84
|
+
"normalize_kind",
|
|
85
|
+
"normalize_ref_token",
|
|
86
|
+
"parse_global_ref",
|
|
87
|
+
"parse_local_ref",
|
|
88
|
+
"parse_resource_ref",
|
|
89
|
+
"content_hash",
|
|
90
|
+
"ensure_dir",
|
|
91
|
+
"merge_text",
|
|
92
|
+
"normalize_newlines",
|
|
93
|
+
"read_text",
|
|
94
|
+
"summarize_text",
|
|
95
|
+
"write_text",
|
|
96
|
+
"load_json_array",
|
|
97
|
+
"load_json_object",
|
|
98
|
+
"write_json",
|
|
99
|
+
"ConfigLocator",
|
|
100
|
+
"find_config_upwards",
|
|
101
|
+
"is_relative_to",
|
|
102
|
+
"locate_config",
|
|
103
|
+
"resolve_config_relative_path",
|
|
104
|
+
"resolve_relative_child",
|
|
105
|
+
"validate_relative_posix_path",
|
|
106
|
+
"utc_now_iso",
|
|
107
|
+
"load_yaml_object",
|
|
108
|
+
"write_yaml",
|
|
109
|
+
]
|
ledgercore/atomic.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Atomic file write utilities for ledgercore."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from ledgercore.errors import AtomicWriteError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _should_fsync(fast_io_env_var: str | None) -> bool:
|
|
13
|
+
if fast_io_env_var is None:
|
|
14
|
+
return True
|
|
15
|
+
return not os.environ.get(fast_io_env_var, "")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _fsync_dir(path: Path) -> None:
|
|
19
|
+
try:
|
|
20
|
+
fd = os.open(path, os.O_RDONLY)
|
|
21
|
+
except OSError:
|
|
22
|
+
return
|
|
23
|
+
try:
|
|
24
|
+
os.fsync(fd)
|
|
25
|
+
finally:
|
|
26
|
+
os.close(fd)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _cleanup_tmp(tmp_fd: int | None, tmp_path: Path | None) -> None:
|
|
30
|
+
if tmp_fd is not None:
|
|
31
|
+
try:
|
|
32
|
+
os.close(tmp_fd)
|
|
33
|
+
except OSError:
|
|
34
|
+
pass
|
|
35
|
+
if tmp_path is not None:
|
|
36
|
+
try:
|
|
37
|
+
tmp_path.unlink()
|
|
38
|
+
except OSError:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def atomic_write_text(
|
|
43
|
+
path: Path,
|
|
44
|
+
contents: str,
|
|
45
|
+
*,
|
|
46
|
+
normalize: bool = False,
|
|
47
|
+
fsync: bool = True,
|
|
48
|
+
fast_io_env_var: str | None = None,
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Write text to a file atomically using a temp file and os.replace."""
|
|
51
|
+
if normalize:
|
|
52
|
+
contents = contents.replace("\r\n", "\n").replace("\r", "\n")
|
|
53
|
+
|
|
54
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
do_fsync = fsync and _should_fsync(fast_io_env_var)
|
|
56
|
+
|
|
57
|
+
tmp_fd: int | None = None
|
|
58
|
+
tmp_path: Path | None = None
|
|
59
|
+
try:
|
|
60
|
+
tmp_fd, tmp_name = tempfile.mkstemp(
|
|
61
|
+
dir=str(path.parent),
|
|
62
|
+
prefix=".ledgercore-tmp-",
|
|
63
|
+
)
|
|
64
|
+
tmp_path = Path(tmp_name)
|
|
65
|
+
with os.fdopen(tmp_fd, "wb") as f:
|
|
66
|
+
f.write(contents.encode("utf-8"))
|
|
67
|
+
if do_fsync:
|
|
68
|
+
f.flush()
|
|
69
|
+
os.fsync(f.fileno())
|
|
70
|
+
tmp_fd = None # closed by fdopen context manager
|
|
71
|
+
os.replace(tmp_name, str(path))
|
|
72
|
+
tmp_path = None
|
|
73
|
+
if do_fsync:
|
|
74
|
+
_fsync_dir(path.parent)
|
|
75
|
+
except OSError as exc:
|
|
76
|
+
_cleanup_tmp(tmp_fd, tmp_path)
|
|
77
|
+
raise AtomicWriteError(f"Atomic write failed for {path}") from exc
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def atomic_create_text(
|
|
81
|
+
path: Path,
|
|
82
|
+
contents: str,
|
|
83
|
+
*,
|
|
84
|
+
fsync: bool = True,
|
|
85
|
+
fast_io_env_var: str | None = None,
|
|
86
|
+
) -> None:
|
|
87
|
+
"""Create a new file atomically using O_CREAT|O_EXCL for race safety.
|
|
88
|
+
|
|
89
|
+
Fails with AtomicWriteError if the target already exists or if a
|
|
90
|
+
concurrent process creates the file between the check and the write.
|
|
91
|
+
"""
|
|
92
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
do_fsync = fsync and _should_fsync(fast_io_env_var)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
fd = os.open(str(path), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
|
97
|
+
except FileExistsError as exc:
|
|
98
|
+
raise AtomicWriteError(f"Target already exists: {path}") from exc
|
|
99
|
+
except OSError as exc:
|
|
100
|
+
raise AtomicWriteError(f"Atomic create failed for {path}") from exc
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
encoded = contents.encode("utf-8")
|
|
104
|
+
os.write(fd, encoded)
|
|
105
|
+
if do_fsync:
|
|
106
|
+
os.fsync(fd)
|
|
107
|
+
except OSError as exc:
|
|
108
|
+
try:
|
|
109
|
+
os.close(fd)
|
|
110
|
+
except OSError:
|
|
111
|
+
pass
|
|
112
|
+
try:
|
|
113
|
+
path.unlink()
|
|
114
|
+
except OSError:
|
|
115
|
+
pass
|
|
116
|
+
raise AtomicWriteError(f"Atomic create failed for {path}") from exc
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
os.close(fd)
|
|
120
|
+
except OSError as exc:
|
|
121
|
+
raise AtomicWriteError(f"Atomic create failed for {path}") from exc
|
|
122
|
+
|
|
123
|
+
if do_fsync:
|
|
124
|
+
_fsync_dir(path.parent)
|
ledgercore/errors.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Generic error hierarchy for ledgercore."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LedgerCoreError(Exception):
|
|
5
|
+
"""Base exception for all ledgercore errors."""
|
|
6
|
+
|
|
7
|
+
code: str = "LEDGERCORE_ERROR"
|
|
8
|
+
|
|
9
|
+
def __init__(self, message: str, *, code: str | None = None) -> None:
|
|
10
|
+
super().__init__(message)
|
|
11
|
+
if code is not None:
|
|
12
|
+
self.code = code
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StorageError(LedgerCoreError):
|
|
16
|
+
"""Base exception for storage-related errors."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AtomicWriteError(StorageError):
|
|
20
|
+
"""Raised when an atomic write operation fails."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FrontMatterError(StorageError):
|
|
24
|
+
"""Raised when front matter parsing or writing fails."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class JsonStoreError(StorageError):
|
|
28
|
+
"""Raised when a JSON store operation fails."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class YamlStoreError(StorageError):
|
|
32
|
+
"""Raised when a YAML store operation fails."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PathValidationError(StorageError):
|
|
36
|
+
"""Raised when a path fails validation."""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class IdFormatError(LedgerCoreError):
|
|
40
|
+
"""Raised when an ID does not match the expected format."""
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""YAML front matter read/write and file iteration for ledgercore."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from ledgercore.errors import FrontMatterError
|
|
12
|
+
|
|
13
|
+
BodyMode = Literal["preserve", "ensure-single-final-newline"]
|
|
14
|
+
|
|
15
|
+
_FRONT_MATTER_DELIM = "---"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def read_front_matter_document(path: Path) -> tuple[dict[str, object], str]:
|
|
19
|
+
"""Read a YAML front matter document, returning (metadata, body)."""
|
|
20
|
+
try:
|
|
21
|
+
raw = path.read_text(encoding="utf-8")
|
|
22
|
+
except OSError as exc:
|
|
23
|
+
raise FrontMatterError(f"Cannot read {path}: {exc}") from exc
|
|
24
|
+
|
|
25
|
+
raw = raw.replace("\r\n", "\n").replace("\r", "\n")
|
|
26
|
+
|
|
27
|
+
if not raw.startswith(_FRONT_MATTER_DELIM + "\n"):
|
|
28
|
+
raise FrontMatterError(
|
|
29
|
+
f"Document must start with '---' followed by a newline: {path}"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
rest = raw[len(_FRONT_MATTER_DELIM) + 1 :]
|
|
33
|
+
|
|
34
|
+
# Look for closing --- followed by newline and body
|
|
35
|
+
close_with_body = rest.find("\n" + _FRONT_MATTER_DELIM + "\n")
|
|
36
|
+
# Look for closing --- at the very end of the document (no trailing body)
|
|
37
|
+
close_at_end = rest.endswith("\n" + _FRONT_MATTER_DELIM)
|
|
38
|
+
# Look for closing --- right at the start of rest (empty YAML block with body)
|
|
39
|
+
close_immediate_with_body = -1
|
|
40
|
+
if rest.startswith(_FRONT_MATTER_DELIM + "\n"):
|
|
41
|
+
close_immediate_with_body = 0
|
|
42
|
+
# Look for closing --- as the entirety of rest (empty YAML, no body)
|
|
43
|
+
close_immediate_at_end = rest == _FRONT_MATTER_DELIM
|
|
44
|
+
|
|
45
|
+
if close_with_body >= 0:
|
|
46
|
+
yaml_block = rest[:close_with_body]
|
|
47
|
+
body_start = close_with_body + len("\n" + _FRONT_MATTER_DELIM + "\n")
|
|
48
|
+
body = rest[body_start:]
|
|
49
|
+
elif close_immediate_with_body >= 0:
|
|
50
|
+
yaml_block = ""
|
|
51
|
+
body = rest[len(_FRONT_MATTER_DELIM) + 1 :]
|
|
52
|
+
elif close_immediate_at_end:
|
|
53
|
+
yaml_block = ""
|
|
54
|
+
body = ""
|
|
55
|
+
elif close_at_end:
|
|
56
|
+
yaml_block = rest[: len(rest) - len("\n" + _FRONT_MATTER_DELIM)]
|
|
57
|
+
body = ""
|
|
58
|
+
else:
|
|
59
|
+
raise FrontMatterError(f"No closing '---' delimiter found in {path}")
|
|
60
|
+
|
|
61
|
+
if not yaml_block.strip():
|
|
62
|
+
metadata: dict[str, object] = {}
|
|
63
|
+
else:
|
|
64
|
+
try:
|
|
65
|
+
loaded = yaml.safe_load(yaml_block)
|
|
66
|
+
except yaml.YAMLError as exc:
|
|
67
|
+
raise FrontMatterError(f"Invalid YAML in {path}: {exc}") from exc
|
|
68
|
+
if loaded is None:
|
|
69
|
+
metadata = {}
|
|
70
|
+
elif isinstance(loaded, dict):
|
|
71
|
+
metadata = loaded
|
|
72
|
+
else:
|
|
73
|
+
raise FrontMatterError(
|
|
74
|
+
f"YAML front matter must be a mapping,"
|
|
75
|
+
f" got {type(loaded).__name__}: {path}"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return metadata, body
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def write_front_matter_document(
|
|
82
|
+
path: Path,
|
|
83
|
+
metadata: Mapping[str, object],
|
|
84
|
+
body: str,
|
|
85
|
+
*,
|
|
86
|
+
body_mode: BodyMode = "preserve",
|
|
87
|
+
atomic: bool = True,
|
|
88
|
+
) -> None:
|
|
89
|
+
"""Write a YAML front matter document."""
|
|
90
|
+
yaml_block = yaml.safe_dump(
|
|
91
|
+
dict(metadata),
|
|
92
|
+
allow_unicode=True,
|
|
93
|
+
sort_keys=False,
|
|
94
|
+
)
|
|
95
|
+
if not yaml_block.endswith("\n"):
|
|
96
|
+
yaml_block += "\n"
|
|
97
|
+
|
|
98
|
+
if body_mode == "ensure-single-final-newline":
|
|
99
|
+
if body and not body.endswith("\n"):
|
|
100
|
+
body = body + "\n"
|
|
101
|
+
elif body.endswith("\n\n"):
|
|
102
|
+
body = body.rstrip("\n") + "\n"
|
|
103
|
+
|
|
104
|
+
content = f"{_FRONT_MATTER_DELIM}\n{yaml_block}{_FRONT_MATTER_DELIM}\n{body}"
|
|
105
|
+
|
|
106
|
+
if atomic:
|
|
107
|
+
from ledgercore.atomic import atomic_write_text
|
|
108
|
+
|
|
109
|
+
atomic_write_text(path, content)
|
|
110
|
+
else:
|
|
111
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
path.write_text(content, encoding="utf-8")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def iter_source_files(
|
|
116
|
+
directory: Path,
|
|
117
|
+
extensions: tuple[str, ...],
|
|
118
|
+
*,
|
|
119
|
+
recursive: bool = True,
|
|
120
|
+
) -> list[Path]:
|
|
121
|
+
"""Iterate source files matching given extensions in sorted order."""
|
|
122
|
+
if not directory.is_dir():
|
|
123
|
+
return []
|
|
124
|
+
ext_lower = {e.lower() for e in extensions}
|
|
125
|
+
if recursive:
|
|
126
|
+
paths = [
|
|
127
|
+
p
|
|
128
|
+
for p in directory.rglob("*")
|
|
129
|
+
if p.is_file() and p.suffix.lower() in ext_lower
|
|
130
|
+
]
|
|
131
|
+
else:
|
|
132
|
+
paths = [
|
|
133
|
+
p
|
|
134
|
+
for p in directory.iterdir()
|
|
135
|
+
if p.is_file() and p.suffix.lower() in ext_lower
|
|
136
|
+
]
|
|
137
|
+
return sorted(paths)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def iter_markdown_files(
|
|
141
|
+
directory: Path,
|
|
142
|
+
*,
|
|
143
|
+
recursive: bool = False,
|
|
144
|
+
) -> list[Path]:
|
|
145
|
+
"""Iterate markdown files in sorted order."""
|
|
146
|
+
return iter_source_files(directory, (".md",), recursive=recursive)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# Compatibility aliases
|
|
150
|
+
read_markdown_front_matter = read_front_matter_document
|
|
151
|
+
write_markdown_front_matter = write_front_matter_document
|
ledgercore/ids.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Prefixed numeric ID formatting and slug utilities for ledgercore."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class LedgerIdParts:
|
|
12
|
+
"""Parsed components of a ledger ID."""
|
|
13
|
+
|
|
14
|
+
prefix: str
|
|
15
|
+
number: int
|
|
16
|
+
segment: str | None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class LedgerIdFormat:
|
|
21
|
+
"""Configurable prefixed numeric ID format with optional segment support.
|
|
22
|
+
|
|
23
|
+
Supports ID patterns like:
|
|
24
|
+
task-0001 (prefix="task")
|
|
25
|
+
plan-0001 (prefix="plan")
|
|
26
|
+
al_0013 (prefix="al", separator="_")
|
|
27
|
+
al_content_0013 (prefix="al", separator="_", segment_separator="_")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
prefix: str
|
|
31
|
+
separator: str = "-"
|
|
32
|
+
width: int = 4
|
|
33
|
+
segment_separator: str | None = None
|
|
34
|
+
segment_required: bool = False
|
|
35
|
+
|
|
36
|
+
def format(self, number: int, *, segment: str | None = None) -> str:
|
|
37
|
+
"""Format a number (and optional segment) as an ID string."""
|
|
38
|
+
_validate_number(number)
|
|
39
|
+
padded = f"{number:0{self.width}d}"
|
|
40
|
+
if segment is not None:
|
|
41
|
+
seg_sep = (
|
|
42
|
+
self.segment_separator if self.segment_separator else self.separator
|
|
43
|
+
)
|
|
44
|
+
return f"{self.prefix}{seg_sep}{segment}{self.separator}{padded}"
|
|
45
|
+
return f"{self.prefix}{self.separator}{padded}"
|
|
46
|
+
|
|
47
|
+
def parse(self, value: str) -> int:
|
|
48
|
+
"""Parse an ID string and return the numeric part."""
|
|
49
|
+
return self.parse_parts(value).number
|
|
50
|
+
|
|
51
|
+
def parse_parts(self, value: str) -> LedgerIdParts:
|
|
52
|
+
"""Parse an ID string and return all components."""
|
|
53
|
+
# Try simple pattern first (prefix + sep + number)
|
|
54
|
+
simple = self._build_simple_pattern()
|
|
55
|
+
m = simple.fullmatch(value)
|
|
56
|
+
if m:
|
|
57
|
+
return LedgerIdParts(
|
|
58
|
+
prefix=self.prefix,
|
|
59
|
+
number=int(m.group("number")),
|
|
60
|
+
segment=None,
|
|
61
|
+
)
|
|
62
|
+
# If segment support is enabled, try segmented pattern
|
|
63
|
+
if self.segment_separator is not None:
|
|
64
|
+
seg = self._build_segmented_pattern()
|
|
65
|
+
m = seg.fullmatch(value)
|
|
66
|
+
if m:
|
|
67
|
+
return LedgerIdParts(
|
|
68
|
+
prefix=self.prefix,
|
|
69
|
+
number=int(m.group("number")),
|
|
70
|
+
segment=m.group("segment"),
|
|
71
|
+
)
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"ID '{value}' does not match format "
|
|
74
|
+
f"prefix='{self.prefix}' separator='{self.separator}'"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def next(
|
|
78
|
+
self,
|
|
79
|
+
existing_ids: Iterable[str],
|
|
80
|
+
*,
|
|
81
|
+
segment: str | None = None,
|
|
82
|
+
) -> str:
|
|
83
|
+
"""Return the next ID not present in existing_ids."""
|
|
84
|
+
max_num = 0
|
|
85
|
+
for eid in existing_ids:
|
|
86
|
+
try:
|
|
87
|
+
parts = self.parse_parts(eid)
|
|
88
|
+
except ValueError:
|
|
89
|
+
continue
|
|
90
|
+
if segment is not None and parts.segment != segment:
|
|
91
|
+
continue
|
|
92
|
+
if segment is None and parts.segment is not None:
|
|
93
|
+
continue
|
|
94
|
+
if parts.number > max_num:
|
|
95
|
+
max_num = parts.number
|
|
96
|
+
return self.format(max_num + 1, segment=segment)
|
|
97
|
+
|
|
98
|
+
def is_valid(self, value: object) -> bool:
|
|
99
|
+
"""Check whether a value is a valid ID for this format."""
|
|
100
|
+
if not isinstance(value, str):
|
|
101
|
+
return False
|
|
102
|
+
try:
|
|
103
|
+
parts = self.parse_parts(value)
|
|
104
|
+
if parts.segment is not None and self.segment_required is False:
|
|
105
|
+
return True
|
|
106
|
+
if parts.segment is None and self.segment_required:
|
|
107
|
+
return False
|
|
108
|
+
return True
|
|
109
|
+
except ValueError:
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
def filename(self, value: str, *, extension: str) -> str:
|
|
113
|
+
"""Convert an ID to a filename with the given extension."""
|
|
114
|
+
return f"{value}{extension}"
|
|
115
|
+
|
|
116
|
+
def _build_simple_pattern(self) -> re.Pattern[str]:
|
|
117
|
+
sep = re.escape(self.separator)
|
|
118
|
+
return re.compile(rf"^{re.escape(self.prefix)}{sep}(?P<number>\d+)$")
|
|
119
|
+
|
|
120
|
+
def _build_segmented_pattern(self) -> re.Pattern[str]:
|
|
121
|
+
assert self.segment_separator is not None
|
|
122
|
+
seg_sep = re.escape(self.segment_separator)
|
|
123
|
+
sep = re.escape(self.separator)
|
|
124
|
+
return re.compile(
|
|
125
|
+
rf"^{re.escape(self.prefix)}"
|
|
126
|
+
rf"{seg_sep}(?P<segment>[a-zA-Z0-9_-]+)"
|
|
127
|
+
rf"{sep}(?P<number>\d+)$"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass(frozen=True)
|
|
132
|
+
class NumericIdFormat:
|
|
133
|
+
"""Configurable prefixed numeric ID format (legacy compatibility)."""
|
|
134
|
+
|
|
135
|
+
prefix: str
|
|
136
|
+
separator: str = "-"
|
|
137
|
+
width: int = 4
|
|
138
|
+
|
|
139
|
+
def format(self, number: int) -> str:
|
|
140
|
+
"""Format a number as a prefixed, zero-padded ID string."""
|
|
141
|
+
return f"{self.prefix}{self.separator}{number:0{self.width}d}"
|
|
142
|
+
|
|
143
|
+
def parse(self, value: str) -> int:
|
|
144
|
+
"""Parse a prefixed ID string and return the numeric part."""
|
|
145
|
+
expected_prefix = f"{self.prefix}{self.separator}"
|
|
146
|
+
if not value.startswith(expected_prefix):
|
|
147
|
+
raise ValueError(f"ID '{value}' does not match prefix '{expected_prefix}'")
|
|
148
|
+
num_str = value[len(expected_prefix) :]
|
|
149
|
+
if not num_str.isdigit():
|
|
150
|
+
raise ValueError(f"Numeric part of ID '{value}' is not a valid number")
|
|
151
|
+
return int(num_str)
|
|
152
|
+
|
|
153
|
+
def next(self, existing_ids: Iterable[str]) -> str:
|
|
154
|
+
"""Return the next ID not present in existing_ids."""
|
|
155
|
+
max_num = 0
|
|
156
|
+
expected_prefix = f"{self.prefix}{self.separator}"
|
|
157
|
+
for eid in existing_ids:
|
|
158
|
+
if not eid.startswith(expected_prefix):
|
|
159
|
+
continue
|
|
160
|
+
num_str = eid[len(expected_prefix) :]
|
|
161
|
+
if num_str.isdigit():
|
|
162
|
+
num = int(num_str)
|
|
163
|
+
if num > max_num:
|
|
164
|
+
max_num = num
|
|
165
|
+
return self.format(max_num + 1)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _validate_number(number: int) -> None:
|
|
169
|
+
"""Validate that number is a positive integer and not a boolean."""
|
|
170
|
+
if isinstance(number, bool):
|
|
171
|
+
raise ValueError("Number must not be a boolean")
|
|
172
|
+
if number <= 0:
|
|
173
|
+
raise ValueError(f"Number must be positive, got {number}")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def parse_prefixed_number(
|
|
177
|
+
value: str,
|
|
178
|
+
*,
|
|
179
|
+
prefix: str,
|
|
180
|
+
separator: str = "-",
|
|
181
|
+
width: int = 4,
|
|
182
|
+
) -> int:
|
|
183
|
+
"""Parse a prefixed numeric ID string and return the number."""
|
|
184
|
+
fmt = NumericIdFormat(prefix=prefix, separator=separator, width=width)
|
|
185
|
+
return fmt.parse(value)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def next_prefixed_id(
|
|
189
|
+
prefix: str,
|
|
190
|
+
existing_ids: Iterable[str],
|
|
191
|
+
*,
|
|
192
|
+
separator: str = "-",
|
|
193
|
+
width: int = 4,
|
|
194
|
+
) -> str:
|
|
195
|
+
"""Return the next prefixed ID given existing IDs."""
|
|
196
|
+
fmt = NumericIdFormat(prefix=prefix, separator=separator, width=width)
|
|
197
|
+
return fmt.next(existing_ids)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
_slug_non_alpha = re.compile(r"[^a-z0-9]+")
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def slugify_ref(value: str, *, empty: str = "item") -> str:
|
|
204
|
+
"""Lowercase, trim, collapse non-alphanumeric runs to dashes."""
|
|
205
|
+
slug = _slug_non_alpha.sub("-", value.strip().lower()).strip("-")
|
|
206
|
+
if not slug:
|
|
207
|
+
return empty
|
|
208
|
+
return slug
|
ledgercore/io.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Text I/O utilities for ledgercore."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def normalize_newlines(text: str) -> str:
|
|
11
|
+
"""Convert CRLF and CR to LF."""
|
|
12
|
+
return text.replace("\r\n", "\n").replace("\r", "\n")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def ensure_dir(path: Path) -> None:
|
|
16
|
+
"""Create parent directories as needed."""
|
|
17
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def read_text(path: Path, *, normalize: bool = True) -> str:
|
|
21
|
+
"""Read UTF-8 text from a file."""
|
|
22
|
+
if normalize:
|
|
23
|
+
text = path.read_text(encoding="utf-8")
|
|
24
|
+
return normalize_newlines(text)
|
|
25
|
+
return path.read_bytes().decode("utf-8")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def write_text(path: Path, text: str, *, normalize: bool = True) -> None:
|
|
29
|
+
"""Write UTF-8 text to a file, creating parent directories."""
|
|
30
|
+
if normalize:
|
|
31
|
+
text = normalize_newlines(text)
|
|
32
|
+
ensure_dir(path.parent)
|
|
33
|
+
path.write_text(text, encoding="utf-8")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def content_hash(text: str) -> str:
|
|
37
|
+
"""Return a stable SHA-256 hex digest of UTF-8 text."""
|
|
38
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def summarize_text(text: str, max_chars: int = 80) -> str:
|
|
42
|
+
"""Collapse whitespace and truncate safely."""
|
|
43
|
+
collapsed = re.sub(r"\s+", " ", text).strip()
|
|
44
|
+
if len(collapsed) <= max_chars:
|
|
45
|
+
return collapsed
|
|
46
|
+
truncated = collapsed[:max_chars]
|
|
47
|
+
if truncated.rfind(" ") > max_chars // 2:
|
|
48
|
+
truncated = truncated[: truncated.rfind(" ")]
|
|
49
|
+
return truncated.rstrip() + "..."
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def merge_text(current: str, incoming: str, *, prepend: bool = False) -> str:
|
|
53
|
+
"""Combine text blocks without introducing excessive blank lines."""
|
|
54
|
+
cur = current.strip()
|
|
55
|
+
inc = incoming.strip()
|
|
56
|
+
if not cur:
|
|
57
|
+
return inc
|
|
58
|
+
if not inc:
|
|
59
|
+
return cur
|
|
60
|
+
if prepend:
|
|
61
|
+
parts = [inc, cur]
|
|
62
|
+
else:
|
|
63
|
+
parts = [cur, inc]
|
|
64
|
+
return "\n\n".join(parts)
|