ledgerkit 1.0.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ledgerkit/models.py ADDED
@@ -0,0 +1,459 @@
1
+ """Core data models for ledgerkit.
2
+
3
+ Defines the canonical Python data structures for journal entries.
4
+ No parsing or reporting logic lives here.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import datetime
10
+ from dataclasses import dataclass, field
11
+ from decimal import Decimal
12
+ from typing import TYPE_CHECKING, Optional
13
+
14
+ if TYPE_CHECKING:
15
+ from ledgerkit.reports import JournalStats
16
+
17
+
18
+ @dataclass
19
+ class Amount:
20
+ """A numeric quantity paired with a commodity symbol.
21
+
22
+ Examples:
23
+ Amount(Decimal("30.00"), "£")
24
+ Amount(Decimal("1500.00"), "EUR")
25
+ """
26
+
27
+ quantity: Decimal
28
+ commodity: str
29
+ raw: Optional[str] = field(default=None, repr=False, compare=False)
30
+
31
+ def display(self, style: object) -> str:
32
+ """Format this amount using the provided CommodityStyle."""
33
+ return style.format(self.quantity) # type: ignore[attr-defined]
34
+
35
+
36
+ @dataclass
37
+ class BalanceAssertion:
38
+ """A balance assertion attached to a posting line.
39
+
40
+ Corresponds to the hledger syntax variants:
41
+ = single-commodity, subaccount-exclusive
42
+ == sole-commodity, subaccount-exclusive
43
+ =* single-commodity, subaccount-inclusive
44
+ ==* sole-commodity, subaccount-inclusive
45
+ """
46
+
47
+ amount: Amount
48
+ inclusive: bool = False # True for =* and ==*
49
+ sole_commodity: bool = False # True for == and ==*
50
+
51
+
52
+ @dataclass
53
+ class SourceSpan:
54
+ """Source location of a parsed transaction block.
55
+
56
+ Stores the file path and inclusive line range so that editor tools can
57
+ locate, update, or delete a transaction in the original source file.
58
+ """
59
+
60
+ file: str # absolute path, "(string)", or "(stdin)"
61
+ start_line: int # 1-indexed, inclusive — the transaction header line
62
+ end_line: int # 1-indexed, inclusive — last posting/comment line in block
63
+
64
+
65
+ @dataclass
66
+ class Posting:
67
+ """One line within a transaction: an account name and an optional amount.
68
+
69
+ When `amount` is None the amount is inferred from the other postings
70
+ in the same transaction (hledger elided-amount syntax).
71
+ """
72
+
73
+ account: str
74
+ amount: Amount | None = None
75
+ balance_assertion: BalanceAssertion | None = field(default=None)
76
+ cost_raw: Optional[str] = None # raw cost annotation (e.g. "$180.00" from "@ $180.00")
77
+ source_line: int | None = field(default=None, repr=False)
78
+ inferred: bool = field(default=False, repr=False)
79
+ inline_comment: str | None = field(default=None, repr=False, compare=False)
80
+
81
+
82
+ @dataclass
83
+ class Transaction:
84
+ """A complete journal transaction entry."""
85
+
86
+ date: datetime.date
87
+ description: str
88
+ date2: Optional[datetime.date] = None # secondary/auxiliary date
89
+ postings: list[Posting] = field(default_factory=list)
90
+ cleared: bool = False # True when marked with "*"
91
+ pending: bool = False # True when marked with "!"
92
+ code: str = "" # Optional code in parentheses before description
93
+ comment: str = "" # Inline or trailing comment text (kept for backward compat)
94
+ source_line: int | None = field(default=None, repr=False)
95
+ source_span: SourceSpan | None = field(default=None, repr=False, compare=False)
96
+ raw_text: str | None = field(default=None, repr=False, compare=False)
97
+ inline_comment: str | None = field(default=None, repr=False, compare=False)
98
+
99
+
100
+ @dataclass
101
+ class PriceDirective:
102
+ """A P directive declaring a commodity market price on a given date.
103
+
104
+ Example journal line: P 2024-03-01 AAPL $179.00
105
+ Stored in Journal.prices for use by valuation reports.
106
+ """
107
+
108
+ date: datetime.date
109
+ commodity: str # The commodity being priced (e.g. "AAPL", "EUR")
110
+ price: Amount # The price expressed as an Amount (quantity + currency)
111
+
112
+
113
+ @dataclass
114
+ class Query:
115
+ """Filter criteria for report functions.
116
+
117
+ All fields are optional and default to None. A Query where every field is
118
+ None is semantically equivalent to "no filter" — passing query=None and
119
+ passing Query() produce identical results in every report function.
120
+
121
+ The `account`, `not_account`, and `payee` fields follow hledger's matching
122
+ convention: strings containing regex metacharacters are treated as regex
123
+ patterns (re.search, case-insensitive); all others are plain substring
124
+ matches (case-insensitive).
125
+ """
126
+
127
+ account: str | None = None # substring or regex; matches posting account names
128
+ not_account: str | None = None # exclusion filter on account names
129
+ payee: str | None = None # substring or regex; matches transaction description
130
+ date_from: datetime.date | None = None # inclusive lower bound
131
+ date_to: datetime.date | None = None # inclusive upper bound
132
+ depth: int | None = None # max account tree depth (colon-segment count)
133
+
134
+
135
+ @dataclass
136
+ class RegisterRow:
137
+ """One row in a register report.
138
+
139
+ Represents a single matching posting, with the cumulative running balance
140
+ up to and including this row.
141
+ """
142
+
143
+ date: datetime.date
144
+ description: str
145
+ account: str
146
+ amount: Amount
147
+ running_balance: Decimal
148
+
149
+
150
+ @dataclass
151
+ class BalanceRow:
152
+ """One row in a tree-mode balance report.
153
+
154
+ Attributes:
155
+ account: Full colon-separated account name.
156
+ depth: Number of ':' separators (0 = single root segment).
157
+ amounts: Mapping of commodity symbol to net balance (own postings +
158
+ all descendants).
159
+ is_subtotal: True when this account has no direct postings and exists
160
+ only as an implicit parent aggregating its descendants.
161
+ """
162
+
163
+ account: str
164
+ depth: int
165
+ amounts: dict[str, Decimal]
166
+ is_subtotal: bool
167
+
168
+
169
+ # ---------------------------------------------------------------------------
170
+ # Report specification types
171
+ #
172
+ # Query answers "which transactions/postings to include" — a filter applied
173
+ # uniformly across a report.
174
+ #
175
+ # ReportSpec answers "how should the included data be structured and labelled"
176
+ # — it groups accounts into named sections, controls sign presentation, and
177
+ # overrides display labels.
178
+ #
179
+ # The two compose cleanly: a Query sets a date range; a ReportSpec controls
180
+ # the layout. Neither knows about the other.
181
+ # ---------------------------------------------------------------------------
182
+
183
+ @dataclass(frozen=True)
184
+ class ReportSection:
185
+ """One named section within a ReportSpec.
186
+
187
+ Accounts are matched using the same hledger substring/regex rules as
188
+ Query.account. Multiple patterns in `accounts` are OR-combined.
189
+ Patterns in `exclude` are applied as a final subtraction.
190
+
191
+ The `invert` flag negates all amounts in this section after aggregation,
192
+ used to display income accounts (which carry negative balances in
193
+ double-entry accounting) as positive numbers.
194
+ """
195
+
196
+ name: str # display name, e.g. "Fixed Expenses"
197
+ accounts: tuple[str, ...] # account patterns to include (OR logic)
198
+ exclude: tuple[str, ...] = () # account patterns to exclude
199
+ label: str | None = None # override for subtotal line; defaults to f"Total {name}"
200
+ depth: int | None = None # depth cap for this section; overrides outer Query depth
201
+ invert: bool = False # negate all amounts (use for income sections)
202
+
203
+
204
+ @dataclass(frozen=True)
205
+ class ReportSpec:
206
+ """A structured report definition composed of named sections.
207
+
208
+ Library callers construct specs directly:
209
+
210
+ spec = ReportSpec(
211
+ name="Income Statement",
212
+ sections=(
213
+ ReportSection("Income", accounts=("income",), invert=True),
214
+ ReportSection("Expenses", accounts=("expenses",)),
215
+ ),
216
+ )
217
+
218
+ Journal-comment-based spec parsing (the "; report" / "; end report" syntax)
219
+ is deferred to Milestone 3.
220
+ """
221
+
222
+ name: str # e.g. "Monthly Budget View"
223
+ sections: tuple[ReportSection, ...]
224
+ show_subtotals: bool = True # render a subtotal row per section
225
+ show_total: bool = True # render a grand total row
226
+ total_label: str = "Net" # label for the grand total row
227
+
228
+
229
+ @dataclass
230
+ class ReportSectionResult:
231
+ """The computed output of a single ReportSection."""
232
+
233
+ section: ReportSection
234
+ rows: dict[str, Decimal] # account name → net balance (after invert)
235
+ subtotal: Decimal # sum of all values in rows (after invert)
236
+ _commodity_styles: dict = field(default_factory=dict, repr=False)
237
+
238
+ def to_dataframe(self):
239
+ """Export this section to a pandas DataFrame.
240
+
241
+ Columns: account, amount (Decimal), commodity (str), amount_formatted.
242
+ Includes a final 'Total {section.name}' row with the section subtotal.
243
+ Requires pandas: pip install ledgerkit[pandas]
244
+ """
245
+ from ledgerkit._pandas_compat import require_pandas
246
+ pd = require_pandas()
247
+ # Infer the commodity from rows if available (single-commodity assumption).
248
+ commodity = ""
249
+ if self.rows:
250
+ # Use the first row's value — all rows share the same commodity in
251
+ # balance_from_spec (multi-commodity support is deferred).
252
+ commodity = list(self.rows.keys())[0] if self.rows else ""
253
+ # Actually rows are account names, not commodities. We need the
254
+ # commodity from the style map or just leave it as empty.
255
+ commodity = next(iter(self._commodity_styles), "")
256
+ style = self._commodity_styles.get(commodity) if commodity else None
257
+ row_list = []
258
+ for account, amount in self.rows.items():
259
+ formatted = style.format(amount) if style else str(amount)
260
+ row_list.append({
261
+ "account": account,
262
+ "amount": amount,
263
+ "commodity": commodity,
264
+ "amount_formatted": formatted,
265
+ })
266
+ # Add total row.
267
+ total_label = f"Total {self.section.name}"
268
+ total_formatted = style.format(self.subtotal) if style else str(self.subtotal)
269
+ row_list.append({
270
+ "account": total_label,
271
+ "amount": self.subtotal,
272
+ "commodity": commodity,
273
+ "amount_formatted": total_formatted,
274
+ })
275
+ df = pd.DataFrame(row_list, columns=["account", "amount", "commodity", "amount_formatted"])
276
+ if not df.empty:
277
+ df["amount"] = df["amount"].astype(object)
278
+ return df
279
+
280
+
281
+ @dataclass
282
+ class Journal:
283
+ """Top-level container for all parsed journal data.
284
+
285
+ Returned by parse_string() and parse_file(). Report methods on this class
286
+ delegate to ledgerkit.reports and use lazy imports to avoid circular
287
+ dependencies between models.py and reports.py.
288
+ """
289
+
290
+ transactions: list[Transaction] = field(default_factory=list)
291
+ prices: list[PriceDirective] = field(default_factory=list)
292
+ declared_accounts: list[str] = field(default_factory=list)
293
+ declared_commodities: list[str] = field(default_factory=list)
294
+ declared_payees: list[str] = field(default_factory=list)
295
+ declared_tags: list[str] = field(default_factory=list)
296
+ source_file: str | None = None
297
+ included_files: int = 0
298
+ # Maps commodity symbol → raw amount string from an explicit `commodity`
299
+ # directive (e.g. "USD" → "1,000.00 USD"). Used by commodity_styles to
300
+ # give directive-declared styles priority over inferred ones.
301
+ _commodity_directive_raws: dict = field(default_factory=dict, repr=False)
302
+
303
+ # ------------------------------------------------------------------
304
+ # Commodity style
305
+ # ------------------------------------------------------------------
306
+
307
+ @property
308
+ def commodity_styles(self) -> dict:
309
+ """Return Dict[str, CommodityStyle] inferred from journal data.
310
+
311
+ Priority order (highest wins):
312
+ 1. Explicit ``commodity`` directives.
313
+ 2. First posting amount seen for each commodity.
314
+ 3. First price-directive amount seen for each commodity.
315
+ """
316
+ from ledgerkit.commodity_style import CommodityStyle
317
+ styles: dict = {}
318
+ # Pass 1: infer from posting amounts (first seen per commodity).
319
+ for txn in self.transactions:
320
+ for p in txn.postings:
321
+ if p.amount and p.amount.raw and p.amount.commodity not in styles:
322
+ try:
323
+ styles[p.amount.commodity] = CommodityStyle.infer(
324
+ p.amount.commodity, p.amount.raw
325
+ )
326
+ except Exception:
327
+ pass
328
+ # Pass 2: infer from price directive amounts.
329
+ for price in self.prices:
330
+ if price.price.raw and price.price.commodity not in styles:
331
+ try:
332
+ styles[price.price.commodity] = CommodityStyle.infer(
333
+ price.price.commodity, price.price.raw
334
+ )
335
+ except Exception:
336
+ pass
337
+ # Pass 3: directive-based styles take priority over inferred.
338
+ for comm, raw in self._commodity_directive_raws.items():
339
+ try:
340
+ styles[comm] = CommodityStyle.infer(comm, raw)
341
+ except Exception:
342
+ pass
343
+ return styles
344
+
345
+ # ------------------------------------------------------------------
346
+ # Report methods — thin wrappers that delegate to ledgerkit.reports.
347
+ # Lazy imports are used to avoid a circular dependency:
348
+ # models.py → reports.py → models.py
349
+ # ------------------------------------------------------------------
350
+
351
+ def balance(
352
+ self,
353
+ accounts: list[str] | None = None,
354
+ query: Query | None = None,
355
+ tree: bool = False,
356
+ ) -> dict[str, dict[str, Decimal]] | list[BalanceRow]:
357
+ """Return per-commodity net balances for each account.
358
+
359
+ Args:
360
+ accounts: [Deprecated] Optional list of account name substrings to
361
+ filter by. Use query= for new code.
362
+ query: Optional Query to filter postings. Takes precedence over
363
+ accounts when both are supplied.
364
+ tree: When True, returns list[BalanceRow] with implicit parent
365
+ accounts and subtotals. When False (default), returns a flat
366
+ dict[str, dict[str, Decimal]] (account → commodity → net).
367
+ """
368
+ from ledgerkit.reports import balance as _balance
369
+ # Deprecated 'accounts' param: convert to Query for backward compat.
370
+ if accounts is not None and query is None:
371
+ import re as _re
372
+ if len(accounts) == 1:
373
+ query = Query(account=accounts[0])
374
+ else:
375
+ pattern = "|".join(f"(?:{_re.escape(a)})" for a in accounts)
376
+ query = Query(account=pattern)
377
+ return _balance(self, query, tree=tree)
378
+
379
+ def register(
380
+ self,
381
+ accounts: list[str] | None = None,
382
+ query: Query | None = None,
383
+ ) -> list[RegisterRow]:
384
+ """Return a chronological list of RegisterRow objects.
385
+
386
+ Args:
387
+ accounts: [Deprecated] Optional list of account name substrings to
388
+ filter by. Use query= for new code.
389
+ query: Optional Query to filter postings. Takes precedence over
390
+ accounts when both are supplied.
391
+ """
392
+ from ledgerkit.reports import register as _register
393
+ # Deprecated 'accounts' param: convert to Query for backward compat.
394
+ if accounts is not None and query is None:
395
+ import re as _re
396
+ if len(accounts) == 1:
397
+ query = Query(account=accounts[0])
398
+ else:
399
+ pattern = "|".join(f"(?:{_re.escape(a)})" for a in accounts)
400
+ query = Query(account=pattern)
401
+ return _register(self, query)
402
+
403
+ def accounts(self) -> list[str]:
404
+ """Return a sorted list of all unique account names in the journal."""
405
+ from ledgerkit.reports import accounts as _accounts
406
+ return _accounts(self)
407
+
408
+ def stats(self, query: Query | None = None) -> JournalStats:
409
+ """Return a JournalStats object with summary statistics."""
410
+ from ledgerkit.reports import stats as _stats
411
+ return _stats(self, query)
412
+
413
+ def to_dataframe(self, query: Query | None = None):
414
+ """Export postings to a pandas DataFrame (one row per posting).
415
+
416
+ Columns: date, description, cleared, pending, account,
417
+ amount (Decimal|None), commodity (str|None),
418
+ amount_formatted (str|None).
419
+
420
+ Accepts an optional Query to pre-filter the data.
421
+ Requires pandas: pip install ledgerkit[pandas]
422
+ """
423
+ from ledgerkit._pandas_compat import require_pandas
424
+ pd = require_pandas()
425
+ from ledgerkit.reports import _posting_matches
426
+ styles = self.commodity_styles
427
+ rows = []
428
+ for txn in self.transactions:
429
+ for p in txn.postings:
430
+ if not _posting_matches(p, txn, query):
431
+ continue
432
+ amt = p.amount
433
+ if amt is not None:
434
+ style = styles.get(amt.commodity)
435
+ amount_val = amt.quantity
436
+ commodity_val = amt.commodity
437
+ formatted_val = amt.display(style) if style else str(amt.quantity)
438
+ else:
439
+ amount_val = None
440
+ commodity_val = None
441
+ formatted_val = None
442
+ rows.append({
443
+ "date": txn.date,
444
+ "description": txn.description,
445
+ "cleared": txn.cleared,
446
+ "pending": txn.pending,
447
+ "account": p.account,
448
+ "amount": amount_val,
449
+ "commodity": commodity_val,
450
+ "amount_formatted": formatted_val,
451
+ })
452
+ df = pd.DataFrame(rows, columns=[
453
+ "date", "description", "cleared", "pending", "account",
454
+ "amount", "commodity", "amount_formatted",
455
+ ])
456
+ # Preserve Decimal type in amount column (avoid float coercion)
457
+ if not df.empty and "amount" in df.columns:
458
+ df["amount"] = df["amount"].astype(object)
459
+ return df