ledgerkit 1.0.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ledgerkit/__init__.py +51 -0
- ledgerkit/__main__.py +12 -0
- ledgerkit/_pandas_compat.py +21 -0
- ledgerkit/checks.py +617 -0
- ledgerkit/cli.py +448 -0
- ledgerkit/commodity_style.py +274 -0
- ledgerkit/editor_model.py +193 -0
- ledgerkit/loader.py +311 -0
- ledgerkit/models.py +459 -0
- ledgerkit/parser.py +1547 -0
- ledgerkit/reports.py +573 -0
- ledgerkit/writer.py +97 -0
- ledgerkit-1.0.0.dev1.dist-info/METADATA +203 -0
- ledgerkit-1.0.0.dev1.dist-info/RECORD +18 -0
- ledgerkit-1.0.0.dev1.dist-info/WHEEL +5 -0
- ledgerkit-1.0.0.dev1.dist-info/entry_points.txt +2 -0
- ledgerkit-1.0.0.dev1.dist-info/licenses/LICENSE +21 -0
- ledgerkit-1.0.0.dev1.dist-info/top_level.txt +1 -0
ledgerkit/loader.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""File loader for ledgerkit.
|
|
2
|
+
|
|
3
|
+
Handles file I/O, include directive expansion, path resolution, glob
|
|
4
|
+
matching, and circular include detection. Calls parse_string() for
|
|
5
|
+
text-to-Journal conversion.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import glob as _glob_module
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from ledgerkit.models import Journal
|
|
16
|
+
from ledgerkit.parser import ParseError, parse_string
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_SUPPORTED_EXTENSIONS: frozenset[str] = frozenset({".journal", ".ledger"})
|
|
20
|
+
|
|
21
|
+
# Known hledger format type prefixes (e.g. "timedot:", "csv:").
|
|
22
|
+
# These are not supported in ledgerkit v1.
|
|
23
|
+
_FORMAT_PREFIXES: frozenset[str] = frozenset({
|
|
24
|
+
"journal", "ledger", "timeclock", "timedot", "csv", "ssv", "tsv", "rules",
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
# Matches an hledger `include` directive line.
|
|
28
|
+
#
|
|
29
|
+
# Purpose: detect whether a non-indented line is an include directive and
|
|
30
|
+
# extract the target path string for further resolution.
|
|
31
|
+
#
|
|
32
|
+
# Group breakdown:
|
|
33
|
+
# (1) (.+) — the raw target path string, captured after the mandatory
|
|
34
|
+
# whitespace that follows the "include" keyword. May contain
|
|
35
|
+
# glob characters, tildes, slashes, or spaces. Caller strips
|
|
36
|
+
# leading/trailing whitespace before use.
|
|
37
|
+
#
|
|
38
|
+
# Edge cases:
|
|
39
|
+
# - A line of just "include" (no whitespace) does not match because \s+
|
|
40
|
+
# requires at least one whitespace character after the keyword.
|
|
41
|
+
# - "include " (keyword + only spaces) matches with a whitespace-only
|
|
42
|
+
# group (1); the caller raises ParseError after stripping.
|
|
43
|
+
# - Indented lines (starting with spaces/tabs) do not match because ^
|
|
44
|
+
# anchors to the start of the line; indented posting-style lines are
|
|
45
|
+
# never directives.
|
|
46
|
+
# - "included" or "includes" do not match because \s+ requires whitespace
|
|
47
|
+
# immediately after the exact word "include".
|
|
48
|
+
_INCLUDE_LINE = re.compile(r"^include\s+(.+)$")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _validate_extension(
|
|
52
|
+
path: Path,
|
|
53
|
+
*,
|
|
54
|
+
lineno: int | None = None,
|
|
55
|
+
source: Path | None = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Raise ParseError if path has an unsupported extension."""
|
|
58
|
+
ext = path.suffix.lower()
|
|
59
|
+
if ext not in _SUPPORTED_EXTENSIONS:
|
|
60
|
+
supported = ", ".join(sorted(_SUPPORTED_EXTENSIONS))
|
|
61
|
+
src = f" in {source}" if source is not None else ""
|
|
62
|
+
raise ParseError(
|
|
63
|
+
f"unsupported file format {ext!r} — ledgerkit accepts: {supported}{src}",
|
|
64
|
+
lineno,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _check_format_prefix(raw: str, lineno: int, source: Path) -> None:
|
|
69
|
+
"""Raise ParseError if raw begins with a known hledger format type prefix.
|
|
70
|
+
|
|
71
|
+
Format prefixes (e.g. "timedot:notes.md") are not supported in ledgerkit v1.
|
|
72
|
+
Single-character prefixes are treated as Windows drive letters (e.g. "C:")
|
|
73
|
+
and are not flagged here.
|
|
74
|
+
"""
|
|
75
|
+
colon = raw.find(":")
|
|
76
|
+
if colon > 1:
|
|
77
|
+
prefix = raw[:colon].lower()
|
|
78
|
+
if prefix in _FORMAT_PREFIXES:
|
|
79
|
+
raise ParseError(
|
|
80
|
+
f"format prefixes not supported in ledgerkit v1 — "
|
|
81
|
+
f"remove the '{prefix}:' prefix from the include path",
|
|
82
|
+
lineno,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _resolve_include_path(raw: str, containing_dir: Path) -> list[Path]:
|
|
87
|
+
"""Resolve a raw include path string to a sorted list of absolute Paths.
|
|
88
|
+
|
|
89
|
+
Handles tilde expansion, absolute paths, relative paths (relative to
|
|
90
|
+
containing_dir), and glob patterns. Returns an empty list when a glob
|
|
91
|
+
pattern matches no files. Does NOT filter out the calling file.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
raw: The stripped path string from the include directive.
|
|
95
|
+
containing_dir: Absolute directory of the file containing the include.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Sorted list of resolved absolute Path objects.
|
|
99
|
+
"""
|
|
100
|
+
if raw.startswith("~"):
|
|
101
|
+
base = Path(raw).expanduser()
|
|
102
|
+
elif Path(raw).is_absolute():
|
|
103
|
+
base = Path(raw)
|
|
104
|
+
else:
|
|
105
|
+
base = containing_dir / raw
|
|
106
|
+
|
|
107
|
+
base_str = str(base)
|
|
108
|
+
if any(c in base_str for c in ("*", "?", "[")):
|
|
109
|
+
matches = _glob_module.glob(base_str, recursive=True)
|
|
110
|
+
return sorted(Path(m).resolve() for m in matches)
|
|
111
|
+
|
|
112
|
+
return [base.resolve()]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _expand_includes(
|
|
116
|
+
file_path: Path,
|
|
117
|
+
visited: set[Path],
|
|
118
|
+
line_map: list[tuple[Path, int]],
|
|
119
|
+
) -> str:
|
|
120
|
+
"""Recursively read file_path and expand all include directives.
|
|
121
|
+
|
|
122
|
+
Appends one (file_path, lineno) entry to line_map for every line that
|
|
123
|
+
appears in the returned text, so callers can map expanded-text line
|
|
124
|
+
numbers back to their originating source file and line.
|
|
125
|
+
|
|
126
|
+
The include directive lines themselves are consumed and produce no output
|
|
127
|
+
or line_map entries; they are replaced by the expanded content of the
|
|
128
|
+
referenced file(s).
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
file_path: Resolved absolute path to the file to expand.
|
|
132
|
+
visited: Set of absolute paths currently in the include chain.
|
|
133
|
+
Mutated (add before recurse, remove after) for cycle detection.
|
|
134
|
+
line_map: Accumulator for source attribution. Caller passes an empty
|
|
135
|
+
list for the root file; recursive calls share the same list.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Fully expanded text with all include directives substituted inline.
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
FileNotFoundError: if file_path or a non-glob included file does not exist.
|
|
142
|
+
ParseError: on circular include, unsupported extension, format prefix,
|
|
143
|
+
or a glob pattern that matches no files.
|
|
144
|
+
"""
|
|
145
|
+
text = file_path.read_text(encoding="utf-8")
|
|
146
|
+
lines = text.splitlines()
|
|
147
|
+
output: list[str] = []
|
|
148
|
+
|
|
149
|
+
for lineno, line in enumerate(lines, 1):
|
|
150
|
+
m = _INCLUDE_LINE.match(line)
|
|
151
|
+
if not m:
|
|
152
|
+
line_map.append((file_path, lineno))
|
|
153
|
+
output.append(line + "\n")
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
raw_target = m.group(1).strip()
|
|
157
|
+
if not raw_target:
|
|
158
|
+
raise ParseError("include: missing file path", lineno)
|
|
159
|
+
|
|
160
|
+
_check_format_prefix(raw_target, lineno, file_path)
|
|
161
|
+
|
|
162
|
+
is_glob = any(c in raw_target for c in ("*", "?", "["))
|
|
163
|
+
targets = _resolve_include_path(raw_target, file_path.parent)
|
|
164
|
+
targets = [t for t in targets if t != file_path]
|
|
165
|
+
|
|
166
|
+
if not targets:
|
|
167
|
+
if is_glob:
|
|
168
|
+
raise ParseError(
|
|
169
|
+
f"include: no files matched {raw_target!r}",
|
|
170
|
+
lineno,
|
|
171
|
+
)
|
|
172
|
+
# Non-glob: resolve for a clear error message
|
|
173
|
+
resolved = _resolve_include_path(raw_target, file_path.parent)[0]
|
|
174
|
+
raise FileNotFoundError(
|
|
175
|
+
f"include: file not found: {resolved} "
|
|
176
|
+
f"(referenced at {file_path}, line {lineno})"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
for target in targets:
|
|
180
|
+
_validate_extension(target, lineno=lineno, source=file_path)
|
|
181
|
+
if target in visited:
|
|
182
|
+
raise ParseError(
|
|
183
|
+
f"circular include detected: {target} is already being "
|
|
184
|
+
f"processed (referenced at {file_path}, line {lineno})",
|
|
185
|
+
lineno,
|
|
186
|
+
)
|
|
187
|
+
visited.add(target)
|
|
188
|
+
output.append(_expand_includes(target, visited, line_map))
|
|
189
|
+
visited.remove(target)
|
|
190
|
+
|
|
191
|
+
return "".join(output)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def load_journal_stdin() -> Journal:
|
|
195
|
+
"""Read a journal from stdin and return a Journal object.
|
|
196
|
+
|
|
197
|
+
Parses the full stdin contents as hledger journal text.
|
|
198
|
+
Sets source_file to "(stdin)". included_files is always 0
|
|
199
|
+
because stdin content cannot reference include directives
|
|
200
|
+
with resolvable relative paths.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
A :class:`~ledgerkit.models.Journal` with ``source_file``
|
|
204
|
+
set to ``"(stdin)"``.
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
ParseError: if the stdin content is malformed.
|
|
208
|
+
"""
|
|
209
|
+
import sys
|
|
210
|
+
|
|
211
|
+
journal = parse_string(sys.stdin.read())
|
|
212
|
+
journal.source_file = "(stdin)"
|
|
213
|
+
return journal
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def merge_journals(journals: list[Journal]) -> Journal:
|
|
217
|
+
"""Merge a list of Journal objects into a single Journal.
|
|
218
|
+
|
|
219
|
+
Transactions and prices are concatenated in input order.
|
|
220
|
+
``source_file`` is taken from the first journal in the list.
|
|
221
|
+
``included_files`` is the sum of all input journals'
|
|
222
|
+
``included_files`` values.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
journals: Non-empty list of Journal objects to merge.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
A new :class:`~ledgerkit.models.Journal` containing the
|
|
229
|
+
combined data, or the original object when the list has
|
|
230
|
+
exactly one entry.
|
|
231
|
+
|
|
232
|
+
Raises:
|
|
233
|
+
ValueError: if ``journals`` is empty.
|
|
234
|
+
"""
|
|
235
|
+
if not journals:
|
|
236
|
+
raise ValueError("merge_journals: at least one journal required")
|
|
237
|
+
if len(journals) == 1:
|
|
238
|
+
return journals[0]
|
|
239
|
+
return Journal(
|
|
240
|
+
transactions=[t for j in journals for t in j.transactions],
|
|
241
|
+
prices=[p for j in journals for p in j.prices],
|
|
242
|
+
declared_accounts=[a for j in journals for a in j.declared_accounts],
|
|
243
|
+
declared_commodities=[c for j in journals for c in j.declared_commodities],
|
|
244
|
+
declared_payees=[p for j in journals for p in j.declared_payees],
|
|
245
|
+
declared_tags=[t for j in journals for t in j.declared_tags],
|
|
246
|
+
source_file=journals[0].source_file,
|
|
247
|
+
included_files=sum(j.included_files for j in journals),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def load_journal(path: str | os.PathLike) -> Journal:
|
|
252
|
+
"""Load a .journal or .ledger file and return a Journal object.
|
|
253
|
+
|
|
254
|
+
Supports the hledger ``include`` directive. Included files are expanded
|
|
255
|
+
recursively at the point of the directive before parsing, so directive
|
|
256
|
+
scope (e.g. an ``alias`` active before an ``include``) propagates
|
|
257
|
+
naturally through included content.
|
|
258
|
+
|
|
259
|
+
Path resolution in include directives:
|
|
260
|
+
- ``~/...`` tilde expanded to the home directory
|
|
261
|
+
- ``/abs/path`` used as-is (absolute)
|
|
262
|
+
- ``relative`` resolved relative to the containing file's directory
|
|
263
|
+
- Glob patterns (``*``, ``**``, ``?``, ``[range]``) are expanded via
|
|
264
|
+
:func:`glob.glob`; the containing file is always excluded from results.
|
|
265
|
+
|
|
266
|
+
Only ``.journal`` and ``.ledger`` files may be loaded or included.
|
|
267
|
+
Format prefixes (e.g. ``timedot:``) raise :class:`ParseError`.
|
|
268
|
+
Circular includes raise :class:`ParseError`.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
path: Absolute or relative path to the root journal file.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
A :class:`~ledgerkit.models.Journal` with ``source_file`` and
|
|
275
|
+
``included_files`` populated.
|
|
276
|
+
|
|
277
|
+
Raises:
|
|
278
|
+
FileNotFoundError: if the root path or a non-glob included file does
|
|
279
|
+
not exist.
|
|
280
|
+
ParseError: if an extension is unsupported, a format prefix is used,
|
|
281
|
+
a circular include is detected, a glob matches nothing, or the
|
|
282
|
+
file contents are malformed.
|
|
283
|
+
"""
|
|
284
|
+
abs_path = Path(os.fspath(path)).resolve()
|
|
285
|
+
_validate_extension(abs_path)
|
|
286
|
+
|
|
287
|
+
line_map: list[tuple[Path, int]] = []
|
|
288
|
+
visited: set[Path] = {abs_path}
|
|
289
|
+
expanded = _expand_includes(abs_path, visited, line_map)
|
|
290
|
+
|
|
291
|
+
included_count = len({src for src, _ in line_map if src != abs_path})
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
journal = parse_string(expanded, source_file=str(abs_path))
|
|
295
|
+
except ParseError as exc:
|
|
296
|
+
if exc.line_number is not None and 1 <= exc.line_number <= len(line_map):
|
|
297
|
+
orig_file, orig_lineno = line_map[exc.line_number - 1]
|
|
298
|
+
# Strip any existing " (line N)" suffix from the original message
|
|
299
|
+
# before re-raising with the correctly attributed location.
|
|
300
|
+
orig_msg = exc.args[0]
|
|
301
|
+
if " (line " in orig_msg:
|
|
302
|
+
orig_msg = orig_msg[: orig_msg.rfind(" (line ")]
|
|
303
|
+
raise ParseError(
|
|
304
|
+
f"{orig_file}: {orig_msg}",
|
|
305
|
+
orig_lineno,
|
|
306
|
+
) from exc
|
|
307
|
+
raise
|
|
308
|
+
|
|
309
|
+
journal.source_file = str(abs_path)
|
|
310
|
+
journal.included_files = included_count
|
|
311
|
+
return journal
|