ledgerkit 1.0.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ledgerkit/__init__.py +51 -0
- ledgerkit/__main__.py +12 -0
- ledgerkit/_pandas_compat.py +21 -0
- ledgerkit/checks.py +617 -0
- ledgerkit/cli.py +448 -0
- ledgerkit/commodity_style.py +274 -0
- ledgerkit/editor_model.py +193 -0
- ledgerkit/loader.py +311 -0
- ledgerkit/models.py +459 -0
- ledgerkit/parser.py +1547 -0
- ledgerkit/reports.py +573 -0
- ledgerkit/writer.py +97 -0
- ledgerkit-1.0.0.dev1.dist-info/METADATA +203 -0
- ledgerkit-1.0.0.dev1.dist-info/RECORD +18 -0
- ledgerkit-1.0.0.dev1.dist-info/WHEEL +5 -0
- ledgerkit-1.0.0.dev1.dist-info/entry_points.txt +2 -0
- ledgerkit-1.0.0.dev1.dist-info/licenses/LICENSE +21 -0
- ledgerkit-1.0.0.dev1.dist-info/top_level.txt +1 -0
ledgerkit/parser.py
ADDED
|
@@ -0,0 +1,1547 @@
|
|
|
1
|
+
"""Journal file parser for ledgerkit.
|
|
2
|
+
|
|
3
|
+
Converts raw .journal text into Journal/Transaction/Posting objects.
|
|
4
|
+
See docs/hledger-compatibility.md for the transaction block structure and
|
|
5
|
+
the list of supported format features.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import datetime
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from decimal import Decimal, InvalidOperation
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from ledgerkit.models import Amount, BalanceAssertion, Journal, Posting, PriceDirective, SourceSpan, Transaction
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class _ParseContext:
|
|
21
|
+
"""Mutable parser state threaded through the parsing call chain.
|
|
22
|
+
|
|
23
|
+
Holds directive-accumulated values that must be visible to both
|
|
24
|
+
_parse_amount and _parse_posting without threading multiple individual
|
|
25
|
+
parameters. Fields are mutated in-place as directives are encountered.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
default_year: int
|
|
29
|
+
decimal_mark: str
|
|
30
|
+
default_commodity: Optional[str] = None
|
|
31
|
+
account_prefix: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Module-level constants
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
# Matches the two-or-more-space + comment-character separator used in directives.
|
|
39
|
+
#
|
|
40
|
+
# Purpose: split directive lines on the boundary between directive body and
|
|
41
|
+
# inline comment, per hledger's rule that a single space may appear
|
|
42
|
+
# inside a body value (e.g. an account name like "expenses:fun money")
|
|
43
|
+
# but two or more spaces before a comment character always begin a comment.
|
|
44
|
+
#
|
|
45
|
+
# Pattern: \s{2,}[;#]
|
|
46
|
+
# \s{2,} — two or more whitespace characters (spaces or tabs)
|
|
47
|
+
# [;#] — semicolon or hash: both are recognised comment introducers in ledgerkit
|
|
48
|
+
#
|
|
49
|
+
# Edge cases:
|
|
50
|
+
# - A single space before ';' or '#' is NOT a separator (belongs to the body)
|
|
51
|
+
# - Only the FIRST match is used (re.split with maxsplit=1)
|
|
52
|
+
# - Lines with no such pattern return the original body unchanged
|
|
53
|
+
_TWO_SPACE_SEP = re.compile(r"\s{2,}[;#]")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ParseError(ValueError):
|
|
57
|
+
"""Raised on malformed hledger journal input.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
line_number: 1-based line number where the error was detected,
|
|
61
|
+
or None if not applicable.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, message: str, line_number: int | None = None) -> None:
|
|
65
|
+
self.line_number = line_number
|
|
66
|
+
location = f" (line {line_number})" if line_number is not None else ""
|
|
67
|
+
super().__init__(f"{message}{location}")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ParseWarning(ParseError):
|
|
71
|
+
"""A non-fatal parse notice; does not prevent journal loading."""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Regex patterns
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
# Matches a transaction header line in hledger journal format.
|
|
79
|
+
#
|
|
80
|
+
# Purpose: extract date, optional status flag, optional transaction code,
|
|
81
|
+
# description text, and optional inline comment from a single
|
|
82
|
+
# non-indented line that begins a transaction block.
|
|
83
|
+
#
|
|
84
|
+
# Group breakdown:
|
|
85
|
+
# (1) primary date (and optional =DATE2)
|
|
86
|
+
# — captured as a single raw string and passed to _parse_txn_header,
|
|
87
|
+
# which splits on '=' when present to obtain both dates.
|
|
88
|
+
# Handles YYYY-MM-DD, YYYY/MM/DD, YYYY.MM.DD, and year-omitted forms
|
|
89
|
+
# (M/DD, MM-DD, etc.); leading zeros optional on month and day.
|
|
90
|
+
# The secondary date uses the same format rules as the primary date.
|
|
91
|
+
# (2) [*!]? — status flag: '*' = cleared, '!' = pending;
|
|
92
|
+
# absent means uncleared
|
|
93
|
+
# (3) (?:\(([^)]*)\))? — transaction code in parentheses, e.g. (INV-42);
|
|
94
|
+
# outer parens consumed, only inner text captured;
|
|
95
|
+
# [^)]* prevents greedily crossing a closing paren
|
|
96
|
+
# (4) .*? — description: lazy so the trailing comment anchor
|
|
97
|
+
# can match; stripped of surrounding whitespace
|
|
98
|
+
# after the match
|
|
99
|
+
# (5) (?:\s*;\s*(.*))? — inline comment following ';'; the ';' itself and
|
|
100
|
+
# surrounding spaces are consumed but not captured
|
|
101
|
+
#
|
|
102
|
+
# Edge cases:
|
|
103
|
+
# - Description may be empty (e.g. "2024-01-01 *" with no text after the flag)
|
|
104
|
+
# - Code may be absent even when a flag is present, and vice versa
|
|
105
|
+
# - A bare ';' anywhere after the date is treated as the comment delimiter;
|
|
106
|
+
# this matches hledger's own behaviour (first ';' ends the description)
|
|
107
|
+
# - Mixed separators (e.g. "2024-01/15") are captured here without complaint;
|
|
108
|
+
# _parse_simple_date accepts them since each separator pair is matched
|
|
109
|
+
# independently
|
|
110
|
+
# - "2024-02-20=2024-02-22 * desc" → group 1 = "2024-02-20=2024-02-22";
|
|
111
|
+
# _parse_txn_header splits on '=' to obtain both dates
|
|
112
|
+
# - A trailing '=' without a following date2 pattern is not matched by the
|
|
113
|
+
# optional group, so group 1 contains only the primary date
|
|
114
|
+
_TXN_HEADER = re.compile(
|
|
115
|
+
r"^((?:\d{4}[-/.])?(?:\d{1,2})[-/.](?:\d{1,2})"
|
|
116
|
+
r"(?:=(?:\d{4}[-/.])?(?:\d{1,2})[-/.](?:\d{1,2}))?)"
|
|
117
|
+
r"\s*([*!])?"
|
|
118
|
+
r"\s*(?:\(([^)]*)\))?"
|
|
119
|
+
r"\s*(.*?)"
|
|
120
|
+
r"(?:\s*;\s*(.*))?$"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Parses a simple date string captured by _TXN_HEADER into its year, month,
|
|
124
|
+
# and day components.
|
|
125
|
+
#
|
|
126
|
+
# Purpose: decompose a raw date token (any of the accepted formats) into
|
|
127
|
+
# integer components so that datetime.date() can validate and
|
|
128
|
+
# construct the final date object.
|
|
129
|
+
#
|
|
130
|
+
# Group breakdown:
|
|
131
|
+
# (1) (\d{4}) — four-digit year; the entire (?:...) wrapper is optional,
|
|
132
|
+
# so this group is None when the year is omitted
|
|
133
|
+
# [-/.] — separator: hyphen, forward-slash, or dot; not captured
|
|
134
|
+
# (2) (\d{1,2}) — month, 1–2 digits, leading zero optional
|
|
135
|
+
# [-/.] — separator (same character classes; mixing is tolerated)
|
|
136
|
+
# (3) (\d{1,2}) — day of month, 1–2 digits, leading zero optional
|
|
137
|
+
#
|
|
138
|
+
# Edge cases:
|
|
139
|
+
# - Year absent ("1/31", "01-31"): group 1 is None; caller supplies default_year
|
|
140
|
+
# - Invalid calendar values ("2024-13-01"): regex matches but datetime.date()
|
|
141
|
+
# raises ValueError, which _parse_simple_date converts to ParseError
|
|
142
|
+
# - Dot separator ("2010.1.31"): matched by [-/.]; note this is unambiguous in
|
|
143
|
+
# header context because amounts (which also use '.') are on indented lines
|
|
144
|
+
_SIMPLE_DATE = re.compile(
|
|
145
|
+
r"^(?:(\d{4})[-/.])?(\d{1,2})[-/.](\d{1,2})$"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Matches an hledger amount string in either prefix-symbol or suffix-symbol form.
|
|
149
|
+
#
|
|
150
|
+
# Purpose: parse the quantity and commodity out of an amount token that has
|
|
151
|
+
# already been separated from the account name. Supports both
|
|
152
|
+
# '£30.00' (symbol before number) and '30.00 EUR' (symbol after number),
|
|
153
|
+
# optional thousands-separator commas, a leading minus sign, and a
|
|
154
|
+
# mid-minus sign that appears after the prefix symbol (e.g. '$-300').
|
|
155
|
+
#
|
|
156
|
+
# Group breakdown:
|
|
157
|
+
# (1) (-?) — optional leading minus sign before the symbol;
|
|
158
|
+
# applies when the minus precedes everything
|
|
159
|
+
# (2) ([^\d,.\s-]*) — prefix commodity: any run of characters that
|
|
160
|
+
# are not digits, commas, dots, whitespace, or
|
|
161
|
+
# minus; matches '£', '$', '€', etc.; empty
|
|
162
|
+
# string when the commodity is a suffix
|
|
163
|
+
# (3) (-?) — optional mid-minus: sign that appears AFTER the
|
|
164
|
+
# prefix symbol, e.g. '$-300.00'; combined with
|
|
165
|
+
# group 1 in _parse_amount — effective sign is
|
|
166
|
+
# group 1 OR group 3
|
|
167
|
+
# (4) ([\d,]+(?:\.\d*)?(?:[Ee][+-]?\d+)?)
|
|
168
|
+
# — numeric quantity: one or more digits/commas
|
|
169
|
+
# optionally followed by a decimal part, then
|
|
170
|
+
# an optional E-notation exponent ([Ee][+-]?\d+);
|
|
171
|
+
# commas are stripped before Decimal conversion
|
|
172
|
+
# (5) ([A-Za-z][A-Za-z0-9]*)? — suffix commodity: a letter-started alphanumeric
|
|
173
|
+
# token (e.g. EUR, USD, AAPL); absent when the
|
|
174
|
+
# commodity is a prefix symbol
|
|
175
|
+
#
|
|
176
|
+
# Edge cases:
|
|
177
|
+
# - Exactly one of group 2 or group 5 must be non-empty; if both are empty
|
|
178
|
+
# the caller raises ParseError (no commodity)
|
|
179
|
+
# - A space between prefix symbol and quantity is allowed: '£ 30.00' matches
|
|
180
|
+
# because \s* between groups 2/3 and 4 absorbs it
|
|
181
|
+
# - Integer quantities ('£100') are valid; the decimal part is optional
|
|
182
|
+
# - Negative suffix amounts ('-30.00 EUR'): the minus in group 1 precedes the
|
|
183
|
+
# empty group 2, empty group 3, then the quantity in group 4, then EUR in 5
|
|
184
|
+
# - Mid-sign after prefix symbol ('$-300'): group 1 is empty, group 3 is '-';
|
|
185
|
+
# combined so Decimal('-300') is produced correctly
|
|
186
|
+
# - Both group 1 and group 3 present is malformed but harmless; either '-'
|
|
187
|
+
# makes the effective sign negative
|
|
188
|
+
# - Trailing decimal with no fractional digits ('$1,000.') is accepted;
|
|
189
|
+
# Python's Decimal('1000.') is valid and equals Decimal('1000')
|
|
190
|
+
# - E-notation ('1E3 EUR', '1.5e-2 GBP'): Decimal natively handles these forms;
|
|
191
|
+
# no extra conversion needed beyond comma-stripping
|
|
192
|
+
_AMOUNT = re.compile(
|
|
193
|
+
r"^(-?)"
|
|
194
|
+
r"([^\d,.\s-]*)"
|
|
195
|
+
r"(-?)"
|
|
196
|
+
r"\s*([\d,]+(?:\.\d*)?(?:[Ee][+-]?\d+)?)"
|
|
197
|
+
r"\s*([A-Za-z][A-Za-z0-9]*)?"
|
|
198
|
+
r"$"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Matches an hledger amount string when `decimal-mark ,` is active (European
|
|
202
|
+
# notation where comma is the decimal mark and period is the thousands separator).
|
|
203
|
+
#
|
|
204
|
+
# Purpose: parse amounts of the form "1.234,56" or "€1.234,56" where the
|
|
205
|
+
# convention is the reverse of the default _AMOUNT regex. Selected
|
|
206
|
+
# by _parse_amount when decimal_mark == ",". Also supports a mid-minus
|
|
207
|
+
# sign after the prefix symbol (e.g. '€-1.234,56').
|
|
208
|
+
#
|
|
209
|
+
# Group breakdown: (mirrors _AMOUNT; only the numeric group differs)
|
|
210
|
+
# (1) (-?) — optional leading minus sign before the symbol
|
|
211
|
+
# (2) ([^\d,.\s-]*) — prefix commodity symbol (£, $, €, etc.)
|
|
212
|
+
# (3) (-?) — optional mid-minus: sign after prefix symbol;
|
|
213
|
+
# combined with group 1 in _parse_amount
|
|
214
|
+
# (4) ([\d.]*(?:,\d*)?(?:[Ee][+-]?\d+)?)
|
|
215
|
+
# — numeric quantity in comma-decimal form:
|
|
216
|
+
# zero or more digits/periods followed by
|
|
217
|
+
# optional comma+decimal digits, then
|
|
218
|
+
# an optional E-notation exponent;
|
|
219
|
+
# periods stripped and comma→period before Decimal
|
|
220
|
+
# (5) ([A-Za-z][A-Za-z0-9]*)? — suffix commodity symbol (EUR, USD, etc.)
|
|
221
|
+
#
|
|
222
|
+
# Edge cases:
|
|
223
|
+
# - "1.234,56" → period=thousands, comma=decimal → Decimal("1234.56")
|
|
224
|
+
# - "100,50" → no thousands separator → Decimal("100.50")
|
|
225
|
+
# - "1.234" → period=thousands, no decimal → Decimal("1234")
|
|
226
|
+
# - "100" → no separators at all → Decimal("100")
|
|
227
|
+
# - "€1.234,56" → prefix "€" + comma-decimal numeric
|
|
228
|
+
# - "€-1.234,56" → prefix "€", mid-minus '-', comma-decimal numeric
|
|
229
|
+
# - "1.234,56 EUR" → suffix "EUR" style
|
|
230
|
+
# - Trailing comma with no fractional digits ('1.234,') is accepted;
|
|
231
|
+
# Python's Decimal('1234.') is valid and equals Decimal('1234')
|
|
232
|
+
# - "1E3 EUR" in comma-decimal mode: no comma/period, exponent appended directly
|
|
233
|
+
_AMOUNT_COMMA = re.compile(
|
|
234
|
+
r"^(-?)"
|
|
235
|
+
r"([^\d,.\s-]*)"
|
|
236
|
+
r"(-?)"
|
|
237
|
+
r"\s*([\d.]*(?:,\d*)?(?:[Ee][+-]?\d+)?)"
|
|
238
|
+
r"\s*([A-Za-z][A-Za-z0-9]*)?"
|
|
239
|
+
r"$"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Matches a P (market price) directive line.
|
|
243
|
+
#
|
|
244
|
+
# Purpose: detect a P directive and capture its three space-delimited components
|
|
245
|
+
# so the handler can extract date, commodity symbol, and price amount.
|
|
246
|
+
#
|
|
247
|
+
# Group breakdown:
|
|
248
|
+
# (1) (\S+) — raw date string; no whitespace; passed to _parse_simple_date
|
|
249
|
+
# (2) (\S+) — COMMODITY1SYMBOL: the commodity being priced, e.g. "€", "$", "AAPL";
|
|
250
|
+
# no whitespace — multi-word commodity names are not valid in this position
|
|
251
|
+
# (3) (.+) — raw COMMODITY2AMOUNT string, e.g. "$1.35", "1.40 USD";
|
|
252
|
+
# may include a trailing inline comment (" ; note") — caller strips
|
|
253
|
+
# with _strip_directive_comment before passing to _parse_amount
|
|
254
|
+
#
|
|
255
|
+
# Edge cases:
|
|
256
|
+
# - "P 2024-01-01 AAPL $179.00 ; note" → group 3 is "$179.00 ; note";
|
|
257
|
+
# _strip_directive_comment removes the trailing " ;" portion
|
|
258
|
+
# - "P 2024-01-01 AAPL" (no price amount) does not match; caller raises ParseError
|
|
259
|
+
# - Lines starting with "P" but not "P " never reach this handler because they
|
|
260
|
+
# are either transaction headers (start with a date digit) or posting lines
|
|
261
|
+
# (indented)
|
|
262
|
+
# - "P" alone (no whitespace) does not match because \s+ requires at least one space
|
|
263
|
+
_P_DIRECTIVE = re.compile(r"^P\s+(\S+)\s+(\S+)\s+(.+)$")
|
|
264
|
+
|
|
265
|
+
# Matches an alias directive line.
|
|
266
|
+
#
|
|
267
|
+
# Purpose: detect an alias directive and capture the entire body after
|
|
268
|
+
# "alias " so the handler can determine whether it is a basic
|
|
269
|
+
# alias or a regex alias and parse it accordingly.
|
|
270
|
+
#
|
|
271
|
+
# Group breakdown:
|
|
272
|
+
# (1) (.+) — the raw alias body, e.g. "checking = assets:bank" or
|
|
273
|
+
# "/^(.+):bank/ = \1" or "/old/=new ; note" or "/old/=new # note";
|
|
274
|
+
# inline comments ( ; or #) are stripped by _strip_directive_comment
|
|
275
|
+
# before further parsing
|
|
276
|
+
#
|
|
277
|
+
# Edge cases:
|
|
278
|
+
# - "alias" with no body does not match because \s+ requires at least
|
|
279
|
+
# one space and (.+) requires at least one character after it
|
|
280
|
+
# - "aliases" (plural) does not match because \s+ requires whitespace
|
|
281
|
+
# immediately after the exact word "alias"
|
|
282
|
+
# - Leading whitespace on line: the outer guard (not line[0:1].isspace())
|
|
283
|
+
# prevents indented lines from ever reaching this check
|
|
284
|
+
_ALIAS_DIRECTIVE = re.compile(r"^alias\s+(.+)$")
|
|
285
|
+
|
|
286
|
+
# Matches an end aliases directive line (exact keyword, no trailing content).
|
|
287
|
+
#
|
|
288
|
+
# Purpose: detect the "end aliases" directive that clears all currently
|
|
289
|
+
# active alias rules from the parse state. The handler passes the
|
|
290
|
+
# line through _strip_directive_comment first, so trailing "; comment"
|
|
291
|
+
# and "# comment" sequences (with two-or-more-space prefix) are stripped
|
|
292
|
+
# before this regex is applied.
|
|
293
|
+
#
|
|
294
|
+
# Group breakdown:
|
|
295
|
+
# No capture groups — presence of the directive is sufficient.
|
|
296
|
+
#
|
|
297
|
+
# Edge cases:
|
|
298
|
+
# - "end aliases" (two spaces between words) matches because \s+ allows
|
|
299
|
+
# multiple spaces — consistent with hledger's lenient whitespace handling
|
|
300
|
+
# - "end aliases ; comment" or "end aliases # comment": the handler strips
|
|
301
|
+
# the trailing comment via _strip_directive_comment before matching, so
|
|
302
|
+
# both forms are recognised correctly
|
|
303
|
+
# - "end aliasesX" does not match because $ anchors immediately after "aliases"
|
|
304
|
+
_END_ALIASES = re.compile(r"^end\s+aliases$")
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# ---------------------------------------------------------------------------
|
|
308
|
+
# Private helpers
|
|
309
|
+
# ---------------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
def _parse_simple_date(
|
|
312
|
+
date_str: str, lineno: int, default_year: int
|
|
313
|
+
) -> datetime.date:
|
|
314
|
+
"""Parse a simple date string into a datetime.date.
|
|
315
|
+
|
|
316
|
+
Accepts YYYY-MM-DD, YYYY/MM/DD, YYYY.MM.DD, and year-omitted forms such as
|
|
317
|
+
M/DD or MM-DD. Leading zeros on month and day are optional. When the year is
|
|
318
|
+
absent, default_year is used (typically the current calendar year).
|
|
319
|
+
|
|
320
|
+
Raises:
|
|
321
|
+
ParseError: if the string does not match the simple date pattern or the
|
|
322
|
+
resulting calendar date is invalid (e.g. month 13).
|
|
323
|
+
"""
|
|
324
|
+
m = _SIMPLE_DATE.match(date_str)
|
|
325
|
+
if not m:
|
|
326
|
+
raise ParseError(f"invalid date {date_str!r}", lineno)
|
|
327
|
+
year_str, month_str, day_str = m.groups()
|
|
328
|
+
year = int(year_str) if year_str else default_year
|
|
329
|
+
try:
|
|
330
|
+
return datetime.date(year, int(month_str), int(day_str))
|
|
331
|
+
except ValueError as exc:
|
|
332
|
+
raise ParseError(f"invalid date {date_str!r}: {exc}", lineno)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _parse_txn_header(line: str, lineno: int, default_year: int) -> Transaction:
|
|
336
|
+
"""Parse a transaction header line into a Transaction with no postings."""
|
|
337
|
+
m = _TXN_HEADER.match(line)
|
|
338
|
+
if not m:
|
|
339
|
+
raise ParseError(f"invalid transaction header: {line!r}", lineno)
|
|
340
|
+
|
|
341
|
+
date_raw, flag, code, description, comment = m.groups()
|
|
342
|
+
if "=" in date_raw:
|
|
343
|
+
primary_raw, secondary_raw = date_raw.split("=", 1)
|
|
344
|
+
date = _parse_simple_date(primary_raw, lineno, default_year)
|
|
345
|
+
date2 = _parse_simple_date(secondary_raw, lineno, default_year)
|
|
346
|
+
else:
|
|
347
|
+
date = _parse_simple_date(date_raw, lineno, default_year)
|
|
348
|
+
date2 = None
|
|
349
|
+
|
|
350
|
+
comment_text = (comment or "").strip()
|
|
351
|
+
return Transaction(
|
|
352
|
+
date=date,
|
|
353
|
+
date2=date2,
|
|
354
|
+
description=(description or "").strip(),
|
|
355
|
+
postings=[],
|
|
356
|
+
cleared=(flag == "*"),
|
|
357
|
+
pending=(flag == "!"),
|
|
358
|
+
code=(code or "").strip(),
|
|
359
|
+
comment=comment_text,
|
|
360
|
+
source_line=lineno,
|
|
361
|
+
inline_comment=comment_text or None,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _strip_cost_annotation(raw: str) -> tuple[str, str | None]:
|
|
366
|
+
"""Strip a cost annotation (@/@@ PRICE) from a raw amount token.
|
|
367
|
+
|
|
368
|
+
Returns (cleaned_amount, cost_raw) where cost_raw is the text after the
|
|
369
|
+
marker (without the marker itself), or None if absent.
|
|
370
|
+
Checks @@ before @ so the two-char marker takes priority.
|
|
371
|
+
"""
|
|
372
|
+
for marker in (" @@ ", " @ "):
|
|
373
|
+
idx = raw.find(marker)
|
|
374
|
+
if idx != -1:
|
|
375
|
+
return raw[:idx].strip(), raw[idx + len(marker):].strip()
|
|
376
|
+
return raw, None
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# Matches one lot-annotation token appended to an amount in hledger journal format.
|
|
380
|
+
# Purpose: identify the four recognised annotation syntaxes so they can be removed
|
|
381
|
+
# before the amount regex is applied. hledger supports cost annotations
|
|
382
|
+
# ({AMOUNT}, {{AMOUNT}}), lot dates ([DATE]), and lot labels ((LABEL))
|
|
383
|
+
# appended after the commodity token with leading whitespace.
|
|
384
|
+
#
|
|
385
|
+
# Group breakdown: no capture groups — used only for substitution via re.sub
|
|
386
|
+
# \s+ — one or more whitespace chars separating amount from annotation
|
|
387
|
+
# (?:...) — non-capturing alternation of the four annotation forms:
|
|
388
|
+
# \{\{[^}]*\}\} — double-brace total-cost annotation: {{...}}
|
|
389
|
+
# \{[^}]*\} — single-brace per-unit-cost annotation: {...}
|
|
390
|
+
# \[[^\]]*\] — square-bracket lot date: [DATE]
|
|
391
|
+
# \([^)]*\) — round-bracket lot label: (LABEL)
|
|
392
|
+
#
|
|
393
|
+
# Edge cases:
|
|
394
|
+
# - Double-brace must be listed before single-brace (longer match wins)
|
|
395
|
+
# - Nested brackets are not supported (hledger itself does not allow them)
|
|
396
|
+
# - Multiple annotations in sequence (e.g. "{$1} [2024-01-01]") are all removed
|
|
397
|
+
# because re.sub replaces all non-overlapping matches
|
|
398
|
+
# - An annotation with no preceding whitespace is not matched (requires \s+);
|
|
399
|
+
# such input is malformed and will fail the amount regex anyway
|
|
400
|
+
_LOT_ANNOTATION_RE = re.compile(
|
|
401
|
+
r'\s+(?:\{\{[^}]*\}\}|\{[^}]*\}|\[[^\]]*\]|\([^)]*\))'
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _strip_lot_annotations(raw: str) -> str:
|
|
406
|
+
"""Remove all lot-annotation tokens from a raw amount string."""
|
|
407
|
+
return _LOT_ANNOTATION_RE.sub("", raw).strip()
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
# Matches a space used as a digit-group separator between a digit and a group of
|
|
411
|
+
# exactly three digits (followed by another digit-group boundary or non-digit).
|
|
412
|
+
#
|
|
413
|
+
# Purpose: normalise space-separated amounts like '1 000 000 JPY' → '1000000 JPY'
|
|
414
|
+
# so the main amount regex (which does not allow spaces in the numeric
|
|
415
|
+
# group) can parse them. Applied iteratively in _normalise_space_separators.
|
|
416
|
+
#
|
|
417
|
+
# Group breakdown:
|
|
418
|
+
# (1) (\d) — the digit immediately before the space
|
|
419
|
+
# (2) (\d{3}) — exactly three digits forming the group after the space
|
|
420
|
+
# (?=\s|\D|$) — lookahead: next char is whitespace, non-digit, or end-of-string;
|
|
421
|
+
# prevents collapsing a space before a non-three-digit run
|
|
422
|
+
#
|
|
423
|
+
# Edge cases:
|
|
424
|
+
# - '1 000 000 JPY' → after two iterations → '1000000 JPY' ✓
|
|
425
|
+
# - '1 JPY' → single digit before space, followed by non-digit 'J' → NOT collapsed ✓
|
|
426
|
+
# - '1 00 JPY' → two-digit group → NOT collapsed (requires exactly 3 digits)
|
|
427
|
+
# - Applied iteratively because each pass collapses one separator; two passes
|
|
428
|
+
# handle '1 000 000' → '1000 000' → '1000000'
|
|
429
|
+
_SPACE_DIGIT_GROUP_RE = re.compile(r'(\d) (\d{3})(?=\s|\D|$)')
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def _normalise_space_separators(s: str) -> str:
|
|
433
|
+
"""Collapse space digit-group separators: '1 000 000 JPY' → '1000000 JPY'."""
|
|
434
|
+
prev = None
|
|
435
|
+
while prev != s:
|
|
436
|
+
prev = s
|
|
437
|
+
s = _SPACE_DIGIT_GROUP_RE.sub(r'\1\2', s)
|
|
438
|
+
return s
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
# Matches an amount with a quoted commodity suffix, e.g. '-3 "Chocolate Frogs"'.
|
|
442
|
+
#
|
|
443
|
+
# Purpose: handle commodity names that require double quotes because they
|
|
444
|
+
# contain spaces or start with non-letter characters. This branch is
|
|
445
|
+
# checked BEFORE the main _AMOUNT regex, which cannot match quoted names.
|
|
446
|
+
#
|
|
447
|
+
# Group breakdown:
|
|
448
|
+
# (1) (-?) — optional leading minus sign
|
|
449
|
+
# (2) (.*?) — numeric quantity, possibly with prefix symbol (lazy match
|
|
450
|
+
# stops before the quoted suffix); may include the sign if
|
|
451
|
+
# the commodity is a prefix symbol
|
|
452
|
+
# (3) ([^"]+) — quoted commodity name, inner text only; outer quotes consumed
|
|
453
|
+
#
|
|
454
|
+
# Edge cases:
|
|
455
|
+
# - '-3 "Chocolate Frogs"' → sign='-', numeric='3', sym='Chocolate Frogs'
|
|
456
|
+
# - '3 "Foo Bar"' → sign='', numeric='3', sym='Foo Bar'
|
|
457
|
+
# - Quantity with comma/period ('1,000 "AAAA"'): commas stripped in branch
|
|
458
|
+
# - Empty quoted name '""' is technically matched but will produce a commodity
|
|
459
|
+
# of empty string; the caller does not separately reject it here
|
|
460
|
+
# - Cost/lot annotations are already stripped before this branch is reached,
|
|
461
|
+
# so "@@ $10" tails will never appear in raw at this point
|
|
462
|
+
_QUOTED_SUFFIX_RE = re.compile(r'^(-?)(.*?)\s+"([^"]+)"\s*$')
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _parse_amount(raw: str, lineno: int, ctx: _ParseContext) -> tuple[Amount, str | None]:
|
|
466
|
+
"""Parse a raw amount string into an (Amount, cost_raw) tuple.
|
|
467
|
+
|
|
468
|
+
Strips cost annotations (@ PRICE / @@ TOTAL) and lot annotations
|
|
469
|
+
({...}, {{...}}, [...], (...)) before parsing. The stripped cost
|
|
470
|
+
annotation text is returned as the second element (None if absent).
|
|
471
|
+
|
|
472
|
+
Supports prefix commodity (£30.00), suffix commodity (30.00 EUR),
|
|
473
|
+
negative amounts (-£5.00, $-300, -30.00 EUR), and digit-group separators.
|
|
474
|
+
|
|
475
|
+
When ctx.decimal_mark is "." (default), commas are treated as thousands
|
|
476
|
+
separators and periods as decimal marks (e.g. 1,234.56).
|
|
477
|
+
When ctx.decimal_mark is ",", periods are thousands separators and commas
|
|
478
|
+
are decimal marks (e.g. 1.234,56).
|
|
479
|
+
"""
|
|
480
|
+
raw = raw.strip()
|
|
481
|
+
raw, cost_raw = _strip_cost_annotation(raw)
|
|
482
|
+
raw = _strip_lot_annotations(raw)
|
|
483
|
+
raw = _normalise_space_separators(raw)
|
|
484
|
+
|
|
485
|
+
m_qs = _QUOTED_SUFFIX_RE.match(raw)
|
|
486
|
+
if m_qs:
|
|
487
|
+
sign, numeric_part, quoted_sym = m_qs.groups()
|
|
488
|
+
numeric_clean = numeric_part.strip().replace(",", "")
|
|
489
|
+
try:
|
|
490
|
+
quantity = Decimal(sign + numeric_clean) if numeric_clean else Decimal(0)
|
|
491
|
+
except InvalidOperation:
|
|
492
|
+
raise ParseError(f"invalid numeric quantity in amount: {raw!r}", lineno)
|
|
493
|
+
return Amount(quantity=quantity, commodity=quoted_sym, raw=raw), cost_raw
|
|
494
|
+
|
|
495
|
+
if ctx.decimal_mark == ",":
|
|
496
|
+
m = _AMOUNT_COMMA.match(raw)
|
|
497
|
+
else:
|
|
498
|
+
m = _AMOUNT.match(raw)
|
|
499
|
+
|
|
500
|
+
if not m:
|
|
501
|
+
raise ParseError(f"invalid amount: {raw!r}", lineno)
|
|
502
|
+
|
|
503
|
+
minus, prefix_sym, mid_minus, quantity_str, suffix_sym = m.groups()
|
|
504
|
+
negative = minus or mid_minus
|
|
505
|
+
|
|
506
|
+
commodity = (prefix_sym or suffix_sym or "").strip()
|
|
507
|
+
if not commodity:
|
|
508
|
+
if ctx.default_commodity:
|
|
509
|
+
commodity = ctx.default_commodity
|
|
510
|
+
else:
|
|
511
|
+
raise ParseError(f"amount has no commodity symbol: {raw!r}", lineno)
|
|
512
|
+
|
|
513
|
+
if ctx.decimal_mark == ",":
|
|
514
|
+
# Period is the digit-group mark; comma is the decimal mark.
|
|
515
|
+
quantity_clean = quantity_str.replace(".", "").replace(",", ".")
|
|
516
|
+
else:
|
|
517
|
+
# Comma is the digit-group mark; period is the decimal mark (default).
|
|
518
|
+
quantity_clean = quantity_str.replace(",", "")
|
|
519
|
+
|
|
520
|
+
try:
|
|
521
|
+
quantity = Decimal(negative + quantity_clean)
|
|
522
|
+
except InvalidOperation:
|
|
523
|
+
raise ParseError(f"invalid numeric quantity in amount: {raw!r}", lineno)
|
|
524
|
+
|
|
525
|
+
return Amount(quantity=quantity, commodity=commodity, raw=raw), cost_raw
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _strip_directive_comment(raw: str) -> str:
|
|
529
|
+
"""Strip an inline comment from a directive body using the 2-space rule.
|
|
530
|
+
|
|
531
|
+
Returns the body text before the first ' ;' sequence, stripped of
|
|
532
|
+
surrounding whitespace. If no such sequence exists, returns the stripped
|
|
533
|
+
input unchanged.
|
|
534
|
+
"""
|
|
535
|
+
parts = _TWO_SPACE_SEP.split(raw, maxsplit=1)
|
|
536
|
+
return parts[0].strip()
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
# Matches a commodity directive body where the symbol is a quoted trailing name,
|
|
540
|
+
# e.g. '1,000. "Chocolate Frogs"' or '0. "My Fund"'.
|
|
541
|
+
#
|
|
542
|
+
# Purpose: detect the trailing-quoted-symbol form BEFORE _COMMODITY_AMOUNT.match
|
|
543
|
+
# is tried, since _COMMODITY_AMOUNT's suffix group cannot match quoted names
|
|
544
|
+
# containing spaces.
|
|
545
|
+
#
|
|
546
|
+
# Group breakdown:
|
|
547
|
+
# (1) ([\d,. ]*) — numeric sample: digits, commas, dots, internal spaces;
|
|
548
|
+
# may be empty if the directive is just a quoted name
|
|
549
|
+
# (2) ([^"]+) — quoted commodity name, inner text only; outer quotes consumed
|
|
550
|
+
#
|
|
551
|
+
# Edge cases:
|
|
552
|
+
# - '1,000. "Chocolate Frogs"' → numeric='1,000. ', sym='Chocolate Frogs'
|
|
553
|
+
# - '"Chocolate Frogs"' → numeric='', sym='Chocolate Frogs'
|
|
554
|
+
# (but this form is caught by the earlier startswith('"') block)
|
|
555
|
+
# - Trailing whitespace after closing quote: absorbed by \s*$
|
|
556
|
+
# - Empty quoted name '""' is accepted (produces empty-string commodity)
|
|
557
|
+
_TRAILING_QUOTED_COMMODITY_RE = re.compile(r'^([\d,. ]*)\s*"([^"]+)"\s*$')
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# Matches the numeric-and-optional-suffix part of a commodity sample amount.
|
|
561
|
+
#
|
|
562
|
+
# Purpose: extract the commodity symbol from a commodity directive whose body
|
|
563
|
+
# is an amount token (e.g. "$1,000.00", "1,000.00 EUR", "1000. AAAA").
|
|
564
|
+
# Also handles a bare symbol (e.g. "$", "INR") where no digits follow.
|
|
565
|
+
#
|
|
566
|
+
# Group breakdown:
|
|
567
|
+
# (1) [^\d,.\s"-]* — prefix symbol: any run of chars that are NOT digits,
|
|
568
|
+
# commas, dots, whitespace, quotes, or minus; captures
|
|
569
|
+
# '$', '£', '€', etc. when they lead the token; empty
|
|
570
|
+
# string when the commodity is a suffix token
|
|
571
|
+
# (2) [\d,. ]* — numeric portion: digits, commas, dots, and spaces
|
|
572
|
+
# (thousands-separated amounts can contain internal spaces)
|
|
573
|
+
# (3) \s*([^\d,.\s]*) — suffix symbol: any non-numeric run after the numeric
|
|
574
|
+
# portion, stripped of leading whitespace; captures
|
|
575
|
+
# 'EUR', 'USD', 'AAPL' etc.; empty when prefix symbol
|
|
576
|
+
#
|
|
577
|
+
# Edge cases:
|
|
578
|
+
# - "1000. AAAA" → prefix='' numeric='1000. ' suffix='AAAA'
|
|
579
|
+
# - "$1,000.00" → prefix='$' numeric='1,000.00' suffix=''
|
|
580
|
+
# - "$" → prefix='$' numeric='' suffix=''
|
|
581
|
+
# - "INR" → prefix='' numeric='' suffix='INR' (falls through to bare-symbol path)
|
|
582
|
+
# - '1 000 000.0000' → internal spaces handled by numeric group
|
|
583
|
+
_COMMODITY_AMOUNT = re.compile(
|
|
584
|
+
r'^([^\d,.\s"-]*)'
|
|
585
|
+
r'([\d,. ]*)'
|
|
586
|
+
r'\s*([^\d,.\s]*)'
|
|
587
|
+
r'$'
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def _extract_commodity_symbol(raw: str, lineno: int) -> str:
|
|
592
|
+
"""Extract the commodity symbol from a commodity directive body.
|
|
593
|
+
|
|
594
|
+
Handles all hledger commodity directive forms:
|
|
595
|
+
- Quoted: "AAPL 2023" → AAPL 2023
|
|
596
|
+
- Prefix+amt: $1,000.00 → $
|
|
597
|
+
- Suffix+amt: 1,000.00 EUR → EUR
|
|
598
|
+
- Bare symbol: INR → INR
|
|
599
|
+
- Bare sigil: $ → $
|
|
600
|
+
- Empty quoted: "" → (empty string, the no-symbol commodity)
|
|
601
|
+
|
|
602
|
+
Raises:
|
|
603
|
+
ParseError: if the body is empty after stripping.
|
|
604
|
+
"""
|
|
605
|
+
body = raw.strip()
|
|
606
|
+
if not body:
|
|
607
|
+
raise ParseError("commodity directive has no symbol", lineno)
|
|
608
|
+
|
|
609
|
+
# Quoted symbol: "AAPL 2023" or ""
|
|
610
|
+
if body.startswith('"'):
|
|
611
|
+
end = body.find('"', 1)
|
|
612
|
+
if end == -1:
|
|
613
|
+
raise ParseError(f"commodity directive has unterminated quoted symbol: {body!r}", lineno)
|
|
614
|
+
return body[1:end]
|
|
615
|
+
|
|
616
|
+
# Trailing quoted symbol: e.g. '1,000. "Chocolate Frogs"'
|
|
617
|
+
m_tq = _TRAILING_QUOTED_COMMODITY_RE.match(body)
|
|
618
|
+
if m_tq:
|
|
619
|
+
return m_tq.group(2)
|
|
620
|
+
|
|
621
|
+
m = _COMMODITY_AMOUNT.match(body)
|
|
622
|
+
if not m:
|
|
623
|
+
raise ParseError(f"commodity directive: cannot parse symbol from {body!r}", lineno)
|
|
624
|
+
|
|
625
|
+
prefix, numeric, suffix = m.group(1), m.group(2), m.group(3)
|
|
626
|
+
|
|
627
|
+
if prefix:
|
|
628
|
+
return prefix
|
|
629
|
+
if suffix:
|
|
630
|
+
return suffix.strip()
|
|
631
|
+
# Numeric-only body (e.g. "1000.") — no symbol, no-symbol commodity
|
|
632
|
+
return ""
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
# Matches a balance assertion marker embedded in a posting amount token.
|
|
636
|
+
# Purpose: detect the first occurrence of ==*, ==, =*, or = that is preceded
|
|
637
|
+
# and followed by whitespace, so we can split a posting amount token
|
|
638
|
+
# into "posting amount" and "assertion amount".
|
|
639
|
+
# Group breakdown:
|
|
640
|
+
# (1) ==* | == | =* | = — the assertion marker; alternatives ordered
|
|
641
|
+
# longest-first so ==* is tried before == and =* before =
|
|
642
|
+
# Edge cases:
|
|
643
|
+
# - ==* must precede == in the alternation to avoid consuming only ==
|
|
644
|
+
# - =* must precede = in the alternation to avoid consuming only =
|
|
645
|
+
# - surrounding \s+ prevents matching = inside commodity symbols or
|
|
646
|
+
# numbers (e.g. scientific notation, if ever supported)
|
|
647
|
+
# - a bare = at the start of amount_raw ("= $500") indicates a balance
|
|
648
|
+
# assignment (amount elided); this regex will not match it because
|
|
649
|
+
# there is no leading \s+ before the =
|
|
650
|
+
_ASSERTION_MARKER_RE = re.compile(r"\s+(==\*|==|=\*|=)\s+")
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _parse_posting(line: str, lineno: int, ctx: _ParseContext) -> Posting:
|
|
654
|
+
"""Parse a single posting line (already stripped of leading whitespace).
|
|
655
|
+
|
|
656
|
+
Splits on two-or-more whitespace to separate account from amount.
|
|
657
|
+
If no amount token is present the posting is elided (amount=None).
|
|
658
|
+
ctx.decimal_mark controls how the amount's numeric portion is interpreted
|
|
659
|
+
("." = period-decimal default; "," = comma-decimal / EU style).
|
|
660
|
+
"""
|
|
661
|
+
# Purpose: split the posting line into (account, amount) on the first run
|
|
662
|
+
# of two or more whitespace characters. hledger requires at least
|
|
663
|
+
# two spaces to separate account from amount so that account names
|
|
664
|
+
# containing single spaces (e.g. "expenses:fun money") are preserved.
|
|
665
|
+
# Pattern: \s{2,}
|
|
666
|
+
# \s{2,} — two or more whitespace characters (spaces or tabs)
|
|
667
|
+
# maxsplit=1 ensures only the first such gap is used as the delimiter;
|
|
668
|
+
# any further double-spaces inside the amount are left intact
|
|
669
|
+
# Edge cases:
|
|
670
|
+
# - A posting with no amount (" assets:bank") produces a single-element
|
|
671
|
+
# list; the caller treats this as an elided amount (None)
|
|
672
|
+
# - An account name with a single internal space ("expenses:fun money £5")
|
|
673
|
+
# is correctly split because the delimiter requires two spaces
|
|
674
|
+
parts = re.split(r"\s{2,}", line, maxsplit=1)
|
|
675
|
+
account = parts[0].strip()
|
|
676
|
+
|
|
677
|
+
if not account:
|
|
678
|
+
raise ParseError("posting has no account name", lineno)
|
|
679
|
+
|
|
680
|
+
# Strip inline comment from the amount portion and capture it
|
|
681
|
+
amount_raw = ""
|
|
682
|
+
posting_inline_comment: str | None = None
|
|
683
|
+
if len(parts) > 1:
|
|
684
|
+
amount_part = parts[1]
|
|
685
|
+
# Remove trailing ; comment and capture the text
|
|
686
|
+
comment_idx = amount_part.find(";")
|
|
687
|
+
if comment_idx != -1:
|
|
688
|
+
posting_inline_comment = amount_part[comment_idx + 1:].strip() or None
|
|
689
|
+
amount_part = amount_part[:comment_idx]
|
|
690
|
+
amount_raw = amount_part.strip()
|
|
691
|
+
|
|
692
|
+
if not amount_raw:
|
|
693
|
+
return Posting(account=account, amount=None, source_line=lineno, inline_comment=posting_inline_comment)
|
|
694
|
+
|
|
695
|
+
# Detect a balance assertion marker (=, ==, =*, ==*) in the amount token.
|
|
696
|
+
# If found, split into posting amount and assertion amount.
|
|
697
|
+
assertion: BalanceAssertion | None = None
|
|
698
|
+
am = _ASSERTION_MARKER_RE.search(amount_raw)
|
|
699
|
+
if am:
|
|
700
|
+
marker = am.group(1)
|
|
701
|
+
posting_amount_raw = amount_raw[: am.start()].strip()
|
|
702
|
+
assertion_amount_raw = amount_raw[am.end() :].strip()
|
|
703
|
+
assertion_amount, _ = _parse_amount(assertion_amount_raw, lineno, ctx)
|
|
704
|
+
assertion = BalanceAssertion(
|
|
705
|
+
amount=assertion_amount,
|
|
706
|
+
inclusive="*" in marker,
|
|
707
|
+
sole_commodity=marker.startswith("=="),
|
|
708
|
+
)
|
|
709
|
+
amount_raw = posting_amount_raw
|
|
710
|
+
|
|
711
|
+
if not amount_raw:
|
|
712
|
+
# Posting amount elided (balance assignment syntax) — amount stays None
|
|
713
|
+
return Posting(
|
|
714
|
+
account=account,
|
|
715
|
+
amount=None,
|
|
716
|
+
balance_assertion=assertion,
|
|
717
|
+
source_line=lineno,
|
|
718
|
+
inline_comment=posting_inline_comment,
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
posting_amount, cost_raw = _parse_amount(amount_raw, lineno, ctx)
|
|
722
|
+
return Posting(
|
|
723
|
+
account=account,
|
|
724
|
+
amount=posting_amount,
|
|
725
|
+
balance_assertion=assertion,
|
|
726
|
+
cost_raw=cost_raw,
|
|
727
|
+
source_line=lineno,
|
|
728
|
+
inline_comment=posting_inline_comment,
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _parse_alias_body(body: str, lineno: int) -> tuple[str, str, bool]:
|
|
733
|
+
"""Parse alias directive body; return (old_or_pattern, replacement, is_regex)."""
|
|
734
|
+
if body.startswith("/"):
|
|
735
|
+
# Regex alias: /PATTERN/ = REPLACEMENT
|
|
736
|
+
# Scan forward to find the closing unescaped '/'.
|
|
737
|
+
i = 1
|
|
738
|
+
while i < len(body):
|
|
739
|
+
if body[i] == "/" and body[i - 1] != "\\":
|
|
740
|
+
break
|
|
741
|
+
i += 1
|
|
742
|
+
if i >= len(body):
|
|
743
|
+
raise ParseError(f"unclosed regex in alias directive: {body!r}", lineno)
|
|
744
|
+
pattern_str = body[1:i].replace("\\/", "/")
|
|
745
|
+
rest = body[i + 1 :].lstrip()
|
|
746
|
+
if not rest.startswith("="):
|
|
747
|
+
raise ParseError(f"missing '=' in alias directive: {body!r}", lineno)
|
|
748
|
+
replacement = rest[1:].lstrip()
|
|
749
|
+
try:
|
|
750
|
+
re.compile(pattern_str, re.IGNORECASE) # validate regex early
|
|
751
|
+
except re.error as exc:
|
|
752
|
+
raise ParseError(f"invalid regex in alias directive: {exc}", lineno)
|
|
753
|
+
return (pattern_str, replacement, True)
|
|
754
|
+
else:
|
|
755
|
+
# Basic alias: OLD = NEW (spaces around '=' are optional)
|
|
756
|
+
if "=" not in body:
|
|
757
|
+
raise ParseError(f"missing '=' in alias directive: {body!r}", lineno)
|
|
758
|
+
idx = body.index("=")
|
|
759
|
+
old = body[:idx].rstrip()
|
|
760
|
+
new = body[idx + 1 :].lstrip()
|
|
761
|
+
if not old:
|
|
762
|
+
raise ParseError(f"empty account name in alias directive: {body!r}", lineno)
|
|
763
|
+
return (old, new, False)
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def _apply_aliases(account: str, aliases: list[tuple[str, str, bool]]) -> str:
|
|
767
|
+
"""Apply active alias rules to an account name, most-recently-defined first (LIFO).
|
|
768
|
+
|
|
769
|
+
Basic aliases match as an exact name or colon-delimited prefix.
|
|
770
|
+
Regex aliases use re.sub with IGNORECASE and support backreferences.
|
|
771
|
+
"""
|
|
772
|
+
for old, new, is_regex in reversed(aliases):
|
|
773
|
+
if is_regex:
|
|
774
|
+
# Purpose: substitute matching substring in account name.
|
|
775
|
+
# re.IGNORECASE per hledger spec ("REGEX is case-insensitive as usual").
|
|
776
|
+
# Backreferences in `new` (e.g. \1) are supported by re.sub natively.
|
|
777
|
+
account = re.sub(old, new, account, flags=re.IGNORECASE)
|
|
778
|
+
else:
|
|
779
|
+
# Basic alias: replace OLD as exact match or as colon-delimited prefix.
|
|
780
|
+
# "checking" rewrites "checking" → new and "checking:a" → new + ":a"
|
|
781
|
+
# but NOT "other:checking" (prefix boundary enforced by + ":").
|
|
782
|
+
if account == old:
|
|
783
|
+
account = new
|
|
784
|
+
elif account.startswith(old + ":"):
|
|
785
|
+
account = new + account[len(old):]
|
|
786
|
+
return account
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
# ---------------------------------------------------------------------------
|
|
790
|
+
# Public API
|
|
791
|
+
# ---------------------------------------------------------------------------
|
|
792
|
+
|
|
793
|
+
def resolve_elision(txn: Transaction) -> list[Posting]:
|
|
794
|
+
"""Return the full posting list for txn with any elided amount resolved.
|
|
795
|
+
|
|
796
|
+
If txn has zero elided postings, returns list(txn.postings) unchanged.
|
|
797
|
+
If txn has one elided posting and exactly one commodity in the explicit
|
|
798
|
+
postings, replaces the elided posting with one inferred Posting whose
|
|
799
|
+
amount is the negation of that commodity's net.
|
|
800
|
+
If txn has one elided posting and N > 1 commodities, replaces the elided
|
|
801
|
+
posting with N inferred Postings (one per commodity, sorted by symbol).
|
|
802
|
+
|
|
803
|
+
Edge cases:
|
|
804
|
+
- 2+ elided postings: returns list(txn.postings) unchanged (parse-time error).
|
|
805
|
+
- All explicit postings have amount=None (empty commodity_sums): returns
|
|
806
|
+
list(txn.postings) unchanged.
|
|
807
|
+
- Inferred postings carry inferred=True and the elided posting's source_line.
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
txn: A Transaction, parsed or constructed programmatically.
|
|
811
|
+
|
|
812
|
+
Returns:
|
|
813
|
+
List of Posting objects representing the resolved transaction.
|
|
814
|
+
"""
|
|
815
|
+
elided_indices = [i for i, p in enumerate(txn.postings) if p.amount is None]
|
|
816
|
+
if len(elided_indices) != 1:
|
|
817
|
+
return list(txn.postings)
|
|
818
|
+
|
|
819
|
+
elided_idx = elided_indices[0]
|
|
820
|
+
elided_posting = txn.postings[elided_idx]
|
|
821
|
+
|
|
822
|
+
# Build per-commodity sums from all explicit postings.
|
|
823
|
+
commodity_sums: dict[str, Decimal] = {}
|
|
824
|
+
for p in txn.postings:
|
|
825
|
+
if p.amount is None:
|
|
826
|
+
continue
|
|
827
|
+
c = p.amount.commodity
|
|
828
|
+
commodity_sums[c] = commodity_sums.get(c, Decimal(0)) + p.amount.quantity
|
|
829
|
+
|
|
830
|
+
if not commodity_sums:
|
|
831
|
+
return list(txn.postings)
|
|
832
|
+
|
|
833
|
+
# Generate one synthetic posting per commodity (sorted for determinism).
|
|
834
|
+
synthetic = [
|
|
835
|
+
Posting(
|
|
836
|
+
account=elided_posting.account,
|
|
837
|
+
amount=Amount(-net, commodity),
|
|
838
|
+
source_line=elided_posting.source_line,
|
|
839
|
+
inferred=True,
|
|
840
|
+
)
|
|
841
|
+
for commodity, net in sorted(commodity_sums.items())
|
|
842
|
+
]
|
|
843
|
+
|
|
844
|
+
result = list(txn.postings)
|
|
845
|
+
result[elided_idx : elided_idx + 1] = synthetic
|
|
846
|
+
return result
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def _flush_txn(
|
|
850
|
+
txn: Transaction,
|
|
851
|
+
end: int,
|
|
852
|
+
all_lines: list[str],
|
|
853
|
+
source_file: str,
|
|
854
|
+
) -> None:
|
|
855
|
+
"""Attach SourceSpan and raw_text to a transaction that is about to be finalised."""
|
|
856
|
+
start = txn.source_line or 1
|
|
857
|
+
txn.source_span = SourceSpan(file=source_file, start_line=start, end_line=end)
|
|
858
|
+
txn.raw_text = "\n".join(all_lines[start - 1 : end]) + "\n"
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _parse_string_impl(
|
|
862
|
+
text: str,
|
|
863
|
+
default_year: int,
|
|
864
|
+
errors_out: list[ParseError] | None,
|
|
865
|
+
source_file: str = "(string)",
|
|
866
|
+
) -> Journal:
|
|
867
|
+
"""Shared body for parse_string and parse_string_lenient.
|
|
868
|
+
|
|
869
|
+
When errors_out is None, raises ParseError on the first malformed line.
|
|
870
|
+
When errors_out is a list, appends errors and continues parsing; malformed
|
|
871
|
+
transactions are discarded and parsing resumes at the next boundary.
|
|
872
|
+
"""
|
|
873
|
+
all_lines = text.splitlines()
|
|
874
|
+
transactions: list[Transaction] = []
|
|
875
|
+
prices: list[PriceDirective] = []
|
|
876
|
+
declared_accounts: list[str] = []
|
|
877
|
+
declared_commodities: list[str] = []
|
|
878
|
+
declared_payees: list[str] = []
|
|
879
|
+
declared_tags: list[str] = []
|
|
880
|
+
commodity_directive_raws: dict = {} # symbol → raw amount string from directive
|
|
881
|
+
aliases: list[tuple[str, str, bool]] = [] # (old_or_pattern, replacement, is_regex)
|
|
882
|
+
ctx = _ParseContext(default_year=default_year, decimal_mark=".")
|
|
883
|
+
current_txn: Transaction | None = None
|
|
884
|
+
current_txn_last_lineno: int | None = None # tracks end_line for source_span
|
|
885
|
+
last_posting_in_txn: Posting | None = None # for standalone comment attribution
|
|
886
|
+
in_block_comment = False
|
|
887
|
+
in_subdirective = False # True while consuming indented subdirective lines
|
|
888
|
+
skip_until_blank = False # lenient mode: True while skipping a malformed transaction
|
|
889
|
+
|
|
890
|
+
for lineno, raw in enumerate(all_lines, start=1):
|
|
891
|
+
line = raw.rstrip()
|
|
892
|
+
|
|
893
|
+
# --- Block comment mode ---
|
|
894
|
+
#
|
|
895
|
+
# Purpose: skip every line between a `comment` directive and its
|
|
896
|
+
# matching `end comment` directive (or EOF).
|
|
897
|
+
#
|
|
898
|
+
# `comment` and `end comment` are non-indented directives. The
|
|
899
|
+
# `comment` keyword may be followed by an inline comment but must be
|
|
900
|
+
# the first word on the line. `end comment` is matched after stripping
|
|
901
|
+
# any surrounding whitespace to be lenient about trailing spaces.
|
|
902
|
+
#
|
|
903
|
+
# Edge cases:
|
|
904
|
+
# - A `comment` block that reaches EOF without `end comment` is
|
|
905
|
+
# silently accepted; the parser simply consumes the rest of the file
|
|
906
|
+
# - A `comment` directive encountered mid-transaction flushes the open
|
|
907
|
+
# transaction first, then enters block-comment mode
|
|
908
|
+
# - `end comment` outside a block comment falls through to the
|
|
909
|
+
# "silently skip" branch (no error raised)
|
|
910
|
+
# - Nested `comment` directives inside a block comment are ignored
|
|
911
|
+
if in_block_comment:
|
|
912
|
+
if line.strip() == "end comment":
|
|
913
|
+
in_block_comment = False
|
|
914
|
+
continue
|
|
915
|
+
|
|
916
|
+
# --- Lenient mode: skip remainder of a malformed transaction ---
|
|
917
|
+
#
|
|
918
|
+
# When errors_out is not None and a ParseError occurs mid-transaction,
|
|
919
|
+
# skip_until_blank is set True and current_txn is cleared. Subsequent
|
|
920
|
+
# lines are discarded until a blank line or a new transaction header is
|
|
921
|
+
# encountered, at which point normal parsing resumes.
|
|
922
|
+
if skip_until_blank:
|
|
923
|
+
if not line.strip():
|
|
924
|
+
skip_until_blank = False
|
|
925
|
+
in_subdirective = False
|
|
926
|
+
continue
|
|
927
|
+
if not re.match(r"^(?:\d{4}[-/.])?(?:\d{1,2})[-/.](?:\d{1,2})(?=[\s*!(=]|$)", line):
|
|
928
|
+
continue
|
|
929
|
+
skip_until_blank = False
|
|
930
|
+
# Fall through: treat this line as a new transaction header.
|
|
931
|
+
|
|
932
|
+
# --- Blank line: end the current block ---
|
|
933
|
+
if not line.strip():
|
|
934
|
+
if current_txn is not None:
|
|
935
|
+
end = current_txn_last_lineno or (current_txn.source_line or 1)
|
|
936
|
+
_flush_txn(current_txn, end, all_lines, source_file)
|
|
937
|
+
transactions.append(current_txn)
|
|
938
|
+
current_txn = None
|
|
939
|
+
current_txn_last_lineno = None
|
|
940
|
+
last_posting_in_txn = None
|
|
941
|
+
in_subdirective = False
|
|
942
|
+
continue
|
|
943
|
+
|
|
944
|
+
# --- Comment-only line (whole-line or indented follow-on `;` / `#`) ---
|
|
945
|
+
#
|
|
946
|
+
# Two distinct sub-cases share this branch:
|
|
947
|
+
#
|
|
948
|
+
# 1. Column-0 (non-indented) `#` or `;` — a standalone top-level comment.
|
|
949
|
+
# These are ALWAYS silently skipped regardless of whether a transaction
|
|
950
|
+
# block is open. They never contribute to a transaction's comment fields
|
|
951
|
+
# and never extend its source_span.
|
|
952
|
+
#
|
|
953
|
+
# 2. Indented (leading whitespace) `#` or `;` inside an open transaction —
|
|
954
|
+
# a follow-on comment line. `;`-led lines are attached to the preceding
|
|
955
|
+
# posting's inline_comment (or to the transaction's inline_comment if no
|
|
956
|
+
# posting has been seen yet in this block). `#`-led lines update the span
|
|
957
|
+
# but do not attach their text.
|
|
958
|
+
#
|
|
959
|
+
# The `is_indented` flag disambiguates the two sub-cases; `lstrip()` is
|
|
960
|
+
# applied only for the startswith test, not for the indentation check.
|
|
961
|
+
#
|
|
962
|
+
# Edge cases:
|
|
963
|
+
# - A column-0 `;` with no blank line between two transactions is a
|
|
964
|
+
# top-level comment; it must NOT be captured as a follow-on comment
|
|
965
|
+
# on the previous transaction's last posting.
|
|
966
|
+
# - An empty `;` (nothing after the semicolon) sets inline_comment to
|
|
967
|
+
# None, not to the empty string.
|
|
968
|
+
is_indented = line[0:1].isspace()
|
|
969
|
+
stripped = line.lstrip()
|
|
970
|
+
if stripped.startswith(";") or stripped.startswith("#"):
|
|
971
|
+
if current_txn is not None and is_indented:
|
|
972
|
+
current_txn_last_lineno = lineno
|
|
973
|
+
if stripped.startswith(";"):
|
|
974
|
+
comment_text = stripped[1:].strip()
|
|
975
|
+
if last_posting_in_txn is None:
|
|
976
|
+
if current_txn.inline_comment:
|
|
977
|
+
current_txn.inline_comment += "\n" + comment_text
|
|
978
|
+
else:
|
|
979
|
+
current_txn.inline_comment = comment_text or None
|
|
980
|
+
else:
|
|
981
|
+
if last_posting_in_txn.inline_comment:
|
|
982
|
+
last_posting_in_txn.inline_comment += "\n" + comment_text
|
|
983
|
+
else:
|
|
984
|
+
last_posting_in_txn.inline_comment = comment_text or None
|
|
985
|
+
continue
|
|
986
|
+
|
|
987
|
+
# --- ~ (periodic transaction rule) ---
|
|
988
|
+
#
|
|
989
|
+
# Purpose: recognise a periodic transaction rule header and skip its
|
|
990
|
+
# posting lines without raising ParseError. The rule and its
|
|
991
|
+
# postings are not stored; --forecast expansion is out of scope
|
|
992
|
+
# for v1. A ParseWarning is appended in lenient mode (not a
|
|
993
|
+
# hard error).
|
|
994
|
+
#
|
|
995
|
+
# Group breakdown: no capture groups — match is a boolean gate only.
|
|
996
|
+
#
|
|
997
|
+
# Edge cases:
|
|
998
|
+
# - "~ monthly budget goals" — leading ~ is sufficient; rest ignored
|
|
999
|
+
# - If a transaction is currently open, flush it first (malformed input)
|
|
1000
|
+
# - skip_until_blank consumes indented postings until next blank line
|
|
1001
|
+
if not line[0:1].isspace() and line.startswith("~"):
|
|
1002
|
+
if current_txn is not None:
|
|
1003
|
+
end = current_txn_last_lineno or (current_txn.source_line or 1)
|
|
1004
|
+
_flush_txn(current_txn, end, all_lines, source_file)
|
|
1005
|
+
transactions.append(current_txn)
|
|
1006
|
+
current_txn = None
|
|
1007
|
+
current_txn_last_lineno = None
|
|
1008
|
+
last_posting_in_txn = None
|
|
1009
|
+
if errors_out is not None:
|
|
1010
|
+
errors_out.append(ParseWarning(
|
|
1011
|
+
"periodic transaction rule (~) skipped (not supported in v1)",
|
|
1012
|
+
lineno,
|
|
1013
|
+
))
|
|
1014
|
+
skip_until_blank = True
|
|
1015
|
+
continue
|
|
1016
|
+
|
|
1017
|
+
# --- = (auto-posting rule) ---
|
|
1018
|
+
#
|
|
1019
|
+
# Purpose: recognise an auto-posting rule header and skip its posting
|
|
1020
|
+
# lines. Only the "= QUERY" form (space after =) is matched;
|
|
1021
|
+
# bare "=account" is not valid hledger auto-posting syntax.
|
|
1022
|
+
# A ParseWarning is appended in lenient mode (not a hard error).
|
|
1023
|
+
#
|
|
1024
|
+
# Group breakdown: no capture groups — match is a boolean gate only.
|
|
1025
|
+
#
|
|
1026
|
+
# Edge cases:
|
|
1027
|
+
# - "= expenses:food" matches; "=expenses:food" (no space) does NOT
|
|
1028
|
+
# - Balance-assignment postings begin with "=" but are indented; the
|
|
1029
|
+
# indentation guard (`not line[0:1].isspace()`) distinguishes them
|
|
1030
|
+
if not line[0:1].isspace() and re.match(r"^=\s+\S", line):
|
|
1031
|
+
if current_txn is not None:
|
|
1032
|
+
end = current_txn_last_lineno or (current_txn.source_line or 1)
|
|
1033
|
+
_flush_txn(current_txn, end, all_lines, source_file)
|
|
1034
|
+
transactions.append(current_txn)
|
|
1035
|
+
current_txn = None
|
|
1036
|
+
current_txn_last_lineno = None
|
|
1037
|
+
last_posting_in_txn = None
|
|
1038
|
+
if errors_out is not None:
|
|
1039
|
+
errors_out.append(ParseWarning(
|
|
1040
|
+
"auto-posting rule (=) skipped (not supported in v1)",
|
|
1041
|
+
lineno,
|
|
1042
|
+
))
|
|
1043
|
+
skip_until_blank = True
|
|
1044
|
+
continue
|
|
1045
|
+
|
|
1046
|
+
# --- Transaction header (non-indented line starting with a simple date) ---
|
|
1047
|
+
#
|
|
1048
|
+
# Purpose: quickly determine whether a non-indented line opens a new
|
|
1049
|
+
# transaction block before handing off to _parse_txn_header.
|
|
1050
|
+
# This check runs before posting detection so that a date-like
|
|
1051
|
+
# token at column 0 is always treated as a new header, never as
|
|
1052
|
+
# an un-indented posting inside an open block.
|
|
1053
|
+
# Pattern: ^(?:\d{4}[-/.])?(?:\d{1,2})[-/.](?:\d{1,2})(?=[\s*!(=]|$)
|
|
1054
|
+
# ^ — anchored to start of the rstripped line
|
|
1055
|
+
# (?:\d{4}[-/.])? — optional four-digit year + separator
|
|
1056
|
+
# (?:\d{1,2})[-/.] — month (1–2 digits) + separator
|
|
1057
|
+
# (?:\d{1,2}) — day (1–2 digits)
|
|
1058
|
+
# (?=[\s*!(=]|$) — lookahead: must be followed by whitespace,
|
|
1059
|
+
# a status flag, the start of a code, an '='
|
|
1060
|
+
# introducing a secondary date, or end-of-line;
|
|
1061
|
+
# prevents matching bare numeric expressions
|
|
1062
|
+
# Edge cases:
|
|
1063
|
+
# - "2024-13-45 Bad" passes this check but fails in _parse_simple_date
|
|
1064
|
+
# when datetime.date() rejects the invalid calendar values
|
|
1065
|
+
# - "1.5" without a trailing space/flag does NOT match (lookahead fails),
|
|
1066
|
+
# preventing accidental collision with decimal amounts on directive lines
|
|
1067
|
+
# - "2024-02-20=2024-02-22" passes because '=' is in the lookahead set;
|
|
1068
|
+
# the full _TXN_HEADER regex validates the secondary date format
|
|
1069
|
+
if re.match(r"^(?:\d{4}[-/.])?(?:\d{1,2})[-/.](?:\d{1,2})(?=[\s*!(=]|$)", line):
|
|
1070
|
+
if current_txn is not None:
|
|
1071
|
+
# No blank line between transactions — flush previous block
|
|
1072
|
+
end = current_txn_last_lineno or (current_txn.source_line or 1)
|
|
1073
|
+
_flush_txn(current_txn, end, all_lines, source_file)
|
|
1074
|
+
transactions.append(current_txn)
|
|
1075
|
+
in_subdirective = False
|
|
1076
|
+
current_txn_last_lineno = None
|
|
1077
|
+
last_posting_in_txn = None
|
|
1078
|
+
try:
|
|
1079
|
+
current_txn = _parse_txn_header(line, lineno, default_year)
|
|
1080
|
+
current_txn_last_lineno = lineno
|
|
1081
|
+
except ParseError as _err:
|
|
1082
|
+
if errors_out is None:
|
|
1083
|
+
raise
|
|
1084
|
+
errors_out.append(_err)
|
|
1085
|
+
current_txn = None
|
|
1086
|
+
skip_until_blank = True
|
|
1087
|
+
continue
|
|
1088
|
+
|
|
1089
|
+
# --- Block comment start (`comment` directive) ---
|
|
1090
|
+
#
|
|
1091
|
+
# A non-indented line whose first whitespace-delimited token is exactly
|
|
1092
|
+
# "comment" opens a block comment. Anything after "comment" on the same
|
|
1093
|
+
# line is ignored (treated as inline commentary on the directive itself).
|
|
1094
|
+
# Any open transaction is flushed before entering block-comment mode so
|
|
1095
|
+
# that a `comment` block sitting between transactions is parsed cleanly.
|
|
1096
|
+
if not line[0:1].isspace() and line.split()[0] == "comment":
|
|
1097
|
+
if current_txn is not None:
|
|
1098
|
+
transactions.append(current_txn)
|
|
1099
|
+
current_txn = None
|
|
1100
|
+
in_subdirective = False
|
|
1101
|
+
in_block_comment = True
|
|
1102
|
+
continue
|
|
1103
|
+
|
|
1104
|
+
# --- Subdirective lines (indented lines following account/commodity/payee) ---
|
|
1105
|
+
#
|
|
1106
|
+
# Purpose: consume Ledger-style indented subdirectives (e.g. "format …"
|
|
1107
|
+
# below a commodity directive) without treating them as postings
|
|
1108
|
+
# or raising ParseError. in_subdirective is set to True whenever
|
|
1109
|
+
# we finish processing an account/commodity/payee directive line;
|
|
1110
|
+
# it is cleared on the next non-indented, non-blank line.
|
|
1111
|
+
#
|
|
1112
|
+
# Edge cases:
|
|
1113
|
+
# - Blank lines above already clear in_subdirective via the blank-line
|
|
1114
|
+
# branch (which sets current_txn=None and falls through; the next
|
|
1115
|
+
# non-blank line will hit this check with in_subdirective still True
|
|
1116
|
+
# only if there was no blank line — so blank lines naturally end the
|
|
1117
|
+
# subdirective block)
|
|
1118
|
+
# - An indented subdirective line that contains a valid posting syntax
|
|
1119
|
+
# is still skipped here (subdirective wins); the containing file is
|
|
1120
|
+
# expected to be well-formed per hledger conventions
|
|
1121
|
+
if in_subdirective:
|
|
1122
|
+
if line[0:1].isspace():
|
|
1123
|
+
continue # consume indented subdirective silently
|
|
1124
|
+
in_subdirective = False
|
|
1125
|
+
# fall through to process this non-indented line normally
|
|
1126
|
+
|
|
1127
|
+
# --- account directive ---
|
|
1128
|
+
#
|
|
1129
|
+
# Purpose: record a declared account name for strict-mode checking.
|
|
1130
|
+
# The account name follows the keyword and may contain spaces
|
|
1131
|
+
# and ';' characters; an inline comment is delimited by the
|
|
1132
|
+
# first occurrence of two-or-more spaces followed by ';'.
|
|
1133
|
+
# Indented lines that follow (Ledger-style subdirectives) are
|
|
1134
|
+
# consumed silently via the in_subdirective flag.
|
|
1135
|
+
#
|
|
1136
|
+
# Edge cases:
|
|
1137
|
+
# - "account a:b;c" (single space before ';') → name is "a:b;c"
|
|
1138
|
+
# - "account a:b ; note" → name is "a:b"
|
|
1139
|
+
# - "accounts" does not match because \s+ requires whitespace after
|
|
1140
|
+
# the exact word "account"
|
|
1141
|
+
if not line[0:1].isspace() and re.match(r"^account\s+", line):
|
|
1142
|
+
body = line[len("account"):].lstrip()
|
|
1143
|
+
account_name = _strip_directive_comment(body)
|
|
1144
|
+
if ctx.account_prefix:
|
|
1145
|
+
account_name = f"{ctx.account_prefix}:{account_name}"
|
|
1146
|
+
if account_name:
|
|
1147
|
+
declared_accounts.append(_apply_aliases(account_name, aliases))
|
|
1148
|
+
in_subdirective = True
|
|
1149
|
+
continue
|
|
1150
|
+
|
|
1151
|
+
# --- commodity directive ---
|
|
1152
|
+
#
|
|
1153
|
+
# Purpose: record a declared commodity symbol for strict-mode checking.
|
|
1154
|
+
# Supports all hledger commodity directive forms: sample amount
|
|
1155
|
+
# with prefix symbol ($1,000.00), sample amount with suffix
|
|
1156
|
+
# symbol (1,000.00 EUR), bare symbol ($, INR), quoted symbol
|
|
1157
|
+
# ("AAPL 2023"), empty-quoted no-symbol (""), and numeric-only
|
|
1158
|
+
# (1000.) for format declarations.
|
|
1159
|
+
#
|
|
1160
|
+
# Edge cases:
|
|
1161
|
+
# - "commodity" with no body raises ParseError (empty symbol)
|
|
1162
|
+
# - Indented "format" subdirectives are consumed via in_subdirective
|
|
1163
|
+
# - The same symbol may be declared more than once; deduplication is
|
|
1164
|
+
# done at check time, not parse time
|
|
1165
|
+
if not line[0:1].isspace() and re.match(r"^commodity(\s|$)", line):
|
|
1166
|
+
rest = line[len("commodity"):].strip()
|
|
1167
|
+
body = _strip_directive_comment(rest)
|
|
1168
|
+
symbol = _extract_commodity_symbol(body, lineno)
|
|
1169
|
+
declared_commodities.append(symbol)
|
|
1170
|
+
# Store the raw directive body for style inference if it looks like
|
|
1171
|
+
# a sample amount (contains at least one digit).
|
|
1172
|
+
if symbol and any(ch.isdigit() for ch in body):
|
|
1173
|
+
commodity_directive_raws[symbol] = body
|
|
1174
|
+
in_subdirective = True
|
|
1175
|
+
continue
|
|
1176
|
+
|
|
1177
|
+
# --- payee directive ---
|
|
1178
|
+
#
|
|
1179
|
+
# Purpose: record a declared payee name for strict-mode / payee checking.
|
|
1180
|
+
# The payee name follows the keyword; inline comments are stripped
|
|
1181
|
+
# with the 2-space rule. Quoted empty-string payee ("") is stored
|
|
1182
|
+
# as the empty string. Tags in comments are ignored per the spec.
|
|
1183
|
+
#
|
|
1184
|
+
# Edge cases:
|
|
1185
|
+
# - 'payee ""' → stored as "" (the no-payee sentinel)
|
|
1186
|
+
# - "payee Whole Foods ; comment" → "Whole Foods"
|
|
1187
|
+
# - Indented Ledger-style subdirectives are consumed silently
|
|
1188
|
+
if not line[0:1].isspace() and re.match(r"^payee\s+", line):
|
|
1189
|
+
body = line[len("payee"):].lstrip()
|
|
1190
|
+
payee_name = _strip_directive_comment(body)
|
|
1191
|
+
# Unquote a quoted payee name (e.g. payee "" or payee "Smith & Co")
|
|
1192
|
+
if payee_name.startswith('"') and payee_name.endswith('"'):
|
|
1193
|
+
payee_name = payee_name[1:-1]
|
|
1194
|
+
declared_payees.append(payee_name)
|
|
1195
|
+
in_subdirective = True
|
|
1196
|
+
continue
|
|
1197
|
+
|
|
1198
|
+
# --- tag directive ---
|
|
1199
|
+
#
|
|
1200
|
+
# Purpose: record a declared tag name for strict-mode / tag checking.
|
|
1201
|
+
# TAGNAME follows the keyword with no spaces. Inline comments
|
|
1202
|
+
# are stripped using the 2-space separator rule. Indented
|
|
1203
|
+
# subdirectives are consumed silently via in_subdirective.
|
|
1204
|
+
#
|
|
1205
|
+
# Edge cases:
|
|
1206
|
+
# - "tag item-id ; note" → tag name is "item-id"
|
|
1207
|
+
# - "tags" does not match because \s+ requires whitespace after
|
|
1208
|
+
# the exact word "tag"
|
|
1209
|
+
if not line[0:1].isspace() and re.match(r"^tag\s+", line):
|
|
1210
|
+
body = line[len("tag"):].lstrip()
|
|
1211
|
+
tag_name = _strip_directive_comment(body)
|
|
1212
|
+
if tag_name:
|
|
1213
|
+
declared_tags.append(tag_name)
|
|
1214
|
+
in_subdirective = True
|
|
1215
|
+
continue
|
|
1216
|
+
|
|
1217
|
+
# --- decimal-mark directive ---
|
|
1218
|
+
#
|
|
1219
|
+
# Purpose: declare which character is the decimal mark for amount
|
|
1220
|
+
# parsing in this file. Affects all postings from this point
|
|
1221
|
+
# forward (typically placed at the top of file).
|
|
1222
|
+
# Only "." (default) and "," are valid values.
|
|
1223
|
+
#
|
|
1224
|
+
# Edge cases:
|
|
1225
|
+
# - "decimal-mark ." → default; no change to behaviour
|
|
1226
|
+
# - "decimal-mark ," → commas are decimal marks; periods are
|
|
1227
|
+
# digit-group marks (e.g. 1.234,56 = 1234.56)
|
|
1228
|
+
# - Any other value raises ParseError
|
|
1229
|
+
# - No subdirectives are defined for decimal-mark; in_subdirective
|
|
1230
|
+
# is NOT set (nothing to consume)
|
|
1231
|
+
if not line[0:1].isspace() and re.match(r"^decimal-mark(\s|$)", line):
|
|
1232
|
+
rest = line[len("decimal-mark"):].strip()
|
|
1233
|
+
dm = _strip_directive_comment(rest)
|
|
1234
|
+
if dm not in (".", ","):
|
|
1235
|
+
_err = ParseError(
|
|
1236
|
+
f"decimal-mark must be '.' or ',',"
|
|
1237
|
+
f" got {dm!r}",
|
|
1238
|
+
lineno,
|
|
1239
|
+
)
|
|
1240
|
+
if errors_out is None:
|
|
1241
|
+
raise _err
|
|
1242
|
+
errors_out.append(_err)
|
|
1243
|
+
continue
|
|
1244
|
+
ctx.decimal_mark = dm
|
|
1245
|
+
continue
|
|
1246
|
+
|
|
1247
|
+
# --- P directive ---
|
|
1248
|
+
#
|
|
1249
|
+
# Purpose: record a market price declaration (commodity conversion rate
|
|
1250
|
+
# on a date). Stored in `prices` for later use by valuation
|
|
1251
|
+
# reports. No subdirectives are defined for P; in_subdirective
|
|
1252
|
+
# is NOT set.
|
|
1253
|
+
#
|
|
1254
|
+
# Edge cases:
|
|
1255
|
+
# - "P DATE COMMODITY PRICE ; comment" → comment stripped before parse
|
|
1256
|
+
# - A P directive encountered while a transaction is open does NOT close
|
|
1257
|
+
# the transaction (consistent with other directive handlers); P inside
|
|
1258
|
+
# a transaction block is malformed but handled leniently
|
|
1259
|
+
# - A "P " line that fails the full regex (missing commodity or amount)
|
|
1260
|
+
# raises ParseError immediately
|
|
1261
|
+
if not line[0:1].isspace() and line.startswith("P "):
|
|
1262
|
+
m_p = _P_DIRECTIVE.match(line)
|
|
1263
|
+
if not m_p:
|
|
1264
|
+
_err = ParseError(f"invalid P directive: {line!r}", lineno)
|
|
1265
|
+
if errors_out is None:
|
|
1266
|
+
raise _err
|
|
1267
|
+
errors_out.append(_err)
|
|
1268
|
+
continue
|
|
1269
|
+
date_str, commodity1, amount_raw = m_p.groups()
|
|
1270
|
+
amount_clean = _strip_directive_comment(amount_raw)
|
|
1271
|
+
try:
|
|
1272
|
+
p_date = _parse_simple_date(date_str, lineno, default_year)
|
|
1273
|
+
p_price, _ = _parse_amount(amount_clean, lineno, ctx)
|
|
1274
|
+
except ParseError as _err:
|
|
1275
|
+
if errors_out is None:
|
|
1276
|
+
raise
|
|
1277
|
+
errors_out.append(_err)
|
|
1278
|
+
continue
|
|
1279
|
+
prices.append(PriceDirective(date=p_date, commodity=commodity1, price=p_price))
|
|
1280
|
+
continue
|
|
1281
|
+
|
|
1282
|
+
# --- alias directive ---
|
|
1283
|
+
#
|
|
1284
|
+
# Purpose: register an account-name alias rule. Rules accumulate in `aliases`
|
|
1285
|
+
# and are applied to every posting account name parsed after this point.
|
|
1286
|
+
# Basic aliases match as an exact name or colon-delimited prefix;
|
|
1287
|
+
# regex aliases match any substring (case-insensitive, backrefs supported).
|
|
1288
|
+
# Aliases are also applied to account names in `account` directives.
|
|
1289
|
+
# No subdirectives; in_subdirective is NOT set.
|
|
1290
|
+
#
|
|
1291
|
+
# Edge cases:
|
|
1292
|
+
# - "alias /invalid[/ = x" → ParseError (invalid regex detected early)
|
|
1293
|
+
# - "alias = new" (empty OLD) → ParseError
|
|
1294
|
+
# - "alias old = new ; comment" or "alias old = new # comment"
|
|
1295
|
+
# → comment stripped by _strip_directive_comment before body parse
|
|
1296
|
+
# - Rules accumulate; use "end aliases" to clear all
|
|
1297
|
+
if not line[0:1].isspace() and _ALIAS_DIRECTIVE.match(line):
|
|
1298
|
+
m_alias = _ALIAS_DIRECTIVE.match(line)
|
|
1299
|
+
body = _strip_directive_comment(m_alias.group(1))
|
|
1300
|
+
try:
|
|
1301
|
+
old, new, is_regex = _parse_alias_body(body, lineno)
|
|
1302
|
+
except ParseError as _err:
|
|
1303
|
+
if errors_out is None:
|
|
1304
|
+
raise
|
|
1305
|
+
errors_out.append(_err)
|
|
1306
|
+
continue
|
|
1307
|
+
aliases.append((old, new, is_regex))
|
|
1308
|
+
continue
|
|
1309
|
+
|
|
1310
|
+
# --- end aliases directive ---
|
|
1311
|
+
#
|
|
1312
|
+
# Purpose: clear all active alias rules so postings after this line are
|
|
1313
|
+
# not rewritten. A no-op if no aliases are currently active.
|
|
1314
|
+
# _strip_directive_comment is applied first so that trailing
|
|
1315
|
+
# "; comment" or "# comment" (with two-space separation) are
|
|
1316
|
+
# stripped before the keyword is matched.
|
|
1317
|
+
#
|
|
1318
|
+
# Edge cases:
|
|
1319
|
+
# - "end aliases ; done" or "end aliases # done" → comment stripped,
|
|
1320
|
+
# directive recognised correctly
|
|
1321
|
+
# - "end aliases" with no active aliases: silently clears empty list
|
|
1322
|
+
# - Inside a block comment: skipped by the in_block_comment guard above
|
|
1323
|
+
if not line[0:1].isspace() and _END_ALIASES.match(_strip_directive_comment(line)):
|
|
1324
|
+
aliases.clear()
|
|
1325
|
+
continue
|
|
1326
|
+
|
|
1327
|
+
# --- Y directive (default year) ---
|
|
1328
|
+
#
|
|
1329
|
+
# Purpose: set the default year for all year-omitted dates in this file,
|
|
1330
|
+
# overriding the default_year parameter passed to parse_string.
|
|
1331
|
+
# All yearless dates parsed AFTER this directive use the declared
|
|
1332
|
+
# year. Multiple Y directives are allowed; last one wins.
|
|
1333
|
+
#
|
|
1334
|
+
# Group breakdown: no capture groups — year extracted from directive body.
|
|
1335
|
+
#
|
|
1336
|
+
# Edge cases:
|
|
1337
|
+
# - "Y 2024 ; comment" → comment stripped, year = 2024
|
|
1338
|
+
# - "Y 20xx" (non-integer body) → ParseError in lenient; raises in strict
|
|
1339
|
+
if not line[0:1].isspace() and re.match(r"^Y\s", line):
|
|
1340
|
+
body = _strip_directive_comment(line[len("Y"):].strip())
|
|
1341
|
+
try:
|
|
1342
|
+
default_year = int(body)
|
|
1343
|
+
except ValueError:
|
|
1344
|
+
_err = ParseError(f"invalid Y directive year: {body!r}", lineno)
|
|
1345
|
+
if errors_out is None:
|
|
1346
|
+
raise _err
|
|
1347
|
+
errors_out.append(_err)
|
|
1348
|
+
continue
|
|
1349
|
+
|
|
1350
|
+
# --- D directive (default commodity + style) ---
|
|
1351
|
+
#
|
|
1352
|
+
# Purpose: declare a default commodity symbol and display style for
|
|
1353
|
+
# amounts that carry no explicit symbol. e.g. "D $1,000.00"
|
|
1354
|
+
# sets ctx.default_commodity = "$". The raw sample amount is
|
|
1355
|
+
# stored in commodity_directive_raws for style inference, just
|
|
1356
|
+
# like a commodity directive.
|
|
1357
|
+
#
|
|
1358
|
+
# Group breakdown: no capture groups — body extracted from directive text.
|
|
1359
|
+
#
|
|
1360
|
+
# Edge cases:
|
|
1361
|
+
# - "D $1,000.00 ; comment" → comment stripped before symbol extraction
|
|
1362
|
+
# - "D EUR" (bare symbol, no numeric sample) → default_commodity = "EUR"
|
|
1363
|
+
# - Invalid or unparseable body → ParseError in lenient; raises in strict
|
|
1364
|
+
if not line[0:1].isspace() and re.match(r"^D\s", line):
|
|
1365
|
+
body = _strip_directive_comment(line[len("D"):].strip())
|
|
1366
|
+
try:
|
|
1367
|
+
sym = _extract_commodity_symbol(body, lineno)
|
|
1368
|
+
except ParseError as _err:
|
|
1369
|
+
if errors_out is None:
|
|
1370
|
+
raise
|
|
1371
|
+
errors_out.append(_err)
|
|
1372
|
+
continue
|
|
1373
|
+
ctx.default_commodity = sym
|
|
1374
|
+
if body and sym not in commodity_directive_raws:
|
|
1375
|
+
commodity_directive_raws[sym] = body
|
|
1376
|
+
continue
|
|
1377
|
+
|
|
1378
|
+
# --- apply account / end apply account directives ---
|
|
1379
|
+
#
|
|
1380
|
+
# Purpose: prepend PREFIX: to every account name in postings and account
|
|
1381
|
+
# directives encountered inside this block. Mirrors hledger's
|
|
1382
|
+
# "apply account" / "end apply account" syntax. Aliases are
|
|
1383
|
+
# applied to the base account name BEFORE the prefix is prepended.
|
|
1384
|
+
#
|
|
1385
|
+
# Group breakdown: no capture groups — prefix extracted from directive body.
|
|
1386
|
+
#
|
|
1387
|
+
# Edge cases:
|
|
1388
|
+
# - Nested apply account: second directive replaces first; ParseWarning emitted in lenient mode
|
|
1389
|
+
# - "end apply account" with no active prefix: silently ignored
|
|
1390
|
+
# - Empty body "apply account ; comment" → prefix set to None
|
|
1391
|
+
if not line[0:1].isspace() and re.match(r"^apply\s+account\s+", line):
|
|
1392
|
+
body = _strip_directive_comment(line[len("apply account"):].strip())
|
|
1393
|
+
if ctx.account_prefix is not None and errors_out is not None:
|
|
1394
|
+
errors_out.append(ParseWarning(
|
|
1395
|
+
"nested apply account: previous prefix replaced (nesting not supported)",
|
|
1396
|
+
lineno,
|
|
1397
|
+
))
|
|
1398
|
+
ctx.account_prefix = body or None
|
|
1399
|
+
continue
|
|
1400
|
+
|
|
1401
|
+
if not line[0:1].isspace() and re.match(r"^end\s+apply\s+account\b", line):
|
|
1402
|
+
ctx.account_prefix = None
|
|
1403
|
+
continue
|
|
1404
|
+
|
|
1405
|
+
# --- Posting line ---
|
|
1406
|
+
#
|
|
1407
|
+
# Posting lines are conventionally written with 2+ leading spaces or a
|
|
1408
|
+
# tab, but indentation is not strictly required inside an open block.
|
|
1409
|
+
# Any line inside an open transaction block that has not been matched by
|
|
1410
|
+
# the blank-line, comment, transaction-header, or directive branches
|
|
1411
|
+
# above is treated as a posting.
|
|
1412
|
+
#
|
|
1413
|
+
# Indented lines (2+ spaces or tab) outside a transaction block still
|
|
1414
|
+
# raise ParseError — indentation unambiguously signals "this is a
|
|
1415
|
+
# posting", so encountering one with no open block is always an error.
|
|
1416
|
+
# Non-indented lines outside a block are silently skipped (directives,
|
|
1417
|
+
# stray text, etc.).
|
|
1418
|
+
if current_txn is None and (line.startswith(" ") or line.startswith("\t")):
|
|
1419
|
+
_err = ParseError("posting found outside a transaction block", lineno)
|
|
1420
|
+
if errors_out is None:
|
|
1421
|
+
raise _err
|
|
1422
|
+
errors_out.append(_err)
|
|
1423
|
+
skip_until_blank = True
|
|
1424
|
+
continue
|
|
1425
|
+
if current_txn is not None:
|
|
1426
|
+
try:
|
|
1427
|
+
posting = _parse_posting(stripped, lineno, ctx)
|
|
1428
|
+
except ParseError as _err:
|
|
1429
|
+
if errors_out is None:
|
|
1430
|
+
raise
|
|
1431
|
+
errors_out.append(_err)
|
|
1432
|
+
current_txn = None
|
|
1433
|
+
current_txn_last_lineno = None
|
|
1434
|
+
last_posting_in_txn = None
|
|
1435
|
+
skip_until_blank = True
|
|
1436
|
+
continue
|
|
1437
|
+
if aliases:
|
|
1438
|
+
posting = Posting(
|
|
1439
|
+
account=_apply_aliases(posting.account, aliases),
|
|
1440
|
+
amount=posting.amount,
|
|
1441
|
+
balance_assertion=posting.balance_assertion,
|
|
1442
|
+
cost_raw=posting.cost_raw,
|
|
1443
|
+
source_line=posting.source_line,
|
|
1444
|
+
inline_comment=posting.inline_comment,
|
|
1445
|
+
)
|
|
1446
|
+
if ctx.account_prefix:
|
|
1447
|
+
posting = Posting(
|
|
1448
|
+
account=f"{ctx.account_prefix}:{posting.account}",
|
|
1449
|
+
amount=posting.amount,
|
|
1450
|
+
balance_assertion=posting.balance_assertion,
|
|
1451
|
+
cost_raw=posting.cost_raw,
|
|
1452
|
+
source_line=posting.source_line,
|
|
1453
|
+
inline_comment=posting.inline_comment,
|
|
1454
|
+
)
|
|
1455
|
+
# Enforce at-most-one elided amount per block
|
|
1456
|
+
if posting.amount is None:
|
|
1457
|
+
elided = [p for p in current_txn.postings if p.amount is None]
|
|
1458
|
+
if elided:
|
|
1459
|
+
_err = ParseError(
|
|
1460
|
+
"a transaction block may have at most one elided amount", lineno
|
|
1461
|
+
)
|
|
1462
|
+
if errors_out is None:
|
|
1463
|
+
raise _err
|
|
1464
|
+
errors_out.append(_err)
|
|
1465
|
+
current_txn = None
|
|
1466
|
+
current_txn_last_lineno = None
|
|
1467
|
+
last_posting_in_txn = None
|
|
1468
|
+
skip_until_blank = True
|
|
1469
|
+
continue
|
|
1470
|
+
current_txn.postings.append(posting)
|
|
1471
|
+
current_txn_last_lineno = lineno
|
|
1472
|
+
last_posting_in_txn = posting
|
|
1473
|
+
continue
|
|
1474
|
+
|
|
1475
|
+
# --- Any other line outside a transaction block: silently skip ---
|
|
1476
|
+
|
|
1477
|
+
# Flush final block if file ends without a trailing blank line
|
|
1478
|
+
if current_txn is not None:
|
|
1479
|
+
end = current_txn_last_lineno or (current_txn.source_line or 1)
|
|
1480
|
+
_flush_txn(current_txn, end, all_lines, source_file)
|
|
1481
|
+
transactions.append(current_txn)
|
|
1482
|
+
|
|
1483
|
+
return Journal(
|
|
1484
|
+
transactions=transactions,
|
|
1485
|
+
prices=prices,
|
|
1486
|
+
declared_accounts=declared_accounts,
|
|
1487
|
+
declared_commodities=declared_commodities,
|
|
1488
|
+
declared_payees=declared_payees,
|
|
1489
|
+
declared_tags=declared_tags,
|
|
1490
|
+
_commodity_directive_raws=commodity_directive_raws,
|
|
1491
|
+
)
|
|
1492
|
+
|
|
1493
|
+
|
|
1494
|
+
def parse_string(
|
|
1495
|
+
text: str,
|
|
1496
|
+
default_year: int | None = None,
|
|
1497
|
+
source_file: str = "(string)",
|
|
1498
|
+
) -> Journal:
|
|
1499
|
+
"""Parse a journal from a string and return a Journal object.
|
|
1500
|
+
|
|
1501
|
+
Accepted date formats: YYYY-MM-DD, YYYY/MM/DD, YYYY.MM.DD, and year-omitted
|
|
1502
|
+
forms such as M/DD or MM-DD. Leading zeros on month and day are optional.
|
|
1503
|
+
When the year is omitted, default_year is used (defaults to the current
|
|
1504
|
+
calendar year when None).
|
|
1505
|
+
|
|
1506
|
+
source_file is stored in every Transaction.source_span.file. Defaults to
|
|
1507
|
+
"(string)" for direct callers; loader.py passes the resolved absolute path.
|
|
1508
|
+
|
|
1509
|
+
Raises:
|
|
1510
|
+
ParseError: if the input is not valid hledger journal syntax.
|
|
1511
|
+
"""
|
|
1512
|
+
if default_year is None:
|
|
1513
|
+
default_year = datetime.date.today().year
|
|
1514
|
+
return _parse_string_impl(text, default_year, errors_out=None, source_file=source_file)
|
|
1515
|
+
|
|
1516
|
+
|
|
1517
|
+
def parse_string_lenient(
|
|
1518
|
+
text: str,
|
|
1519
|
+
default_year: int | None = None,
|
|
1520
|
+
source_file: str = "(string)",
|
|
1521
|
+
) -> tuple[Journal, list[ParseError]]:
|
|
1522
|
+
"""Parse a journal leniently, collecting errors instead of raising.
|
|
1523
|
+
|
|
1524
|
+
Returns a (Journal, list[ParseError]) tuple. The Journal contains all
|
|
1525
|
+
transactions that were successfully parsed; malformed transactions are
|
|
1526
|
+
discarded. The error list is empty when the input is valid.
|
|
1527
|
+
|
|
1528
|
+
This function never raises. It is intended for editor integrations that
|
|
1529
|
+
call it on every text-changed event to provide real-time diagnostics while
|
|
1530
|
+
the file is being edited.
|
|
1531
|
+
|
|
1532
|
+
Args:
|
|
1533
|
+
text: Raw journal text.
|
|
1534
|
+
default_year: Year to use for year-omitted dates. Defaults to the
|
|
1535
|
+
current calendar year when None.
|
|
1536
|
+
|
|
1537
|
+
Returns:
|
|
1538
|
+
A (journal, errors) tuple where journal contains all valid transactions
|
|
1539
|
+
and errors is a (possibly empty) list of ParseError.
|
|
1540
|
+
"""
|
|
1541
|
+
if default_year is None:
|
|
1542
|
+
default_year = datetime.date.today().year
|
|
1543
|
+
errors: list[ParseError] = []
|
|
1544
|
+
journal = _parse_string_impl(text, default_year, errors_out=errors, source_file=source_file)
|
|
1545
|
+
return journal, errors
|
|
1546
|
+
|
|
1547
|
+
|