florecon-host 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
florecon/__init__.py ADDED
@@ -0,0 +1,58 @@
1
+ """florecon — incremental financial reconciliation by min-cost flow.
2
+
3
+ The host is a thin, generic wasmtime driver. A *plugin* ``.wasm`` owns the
4
+ domain (preprocessing, identity, matching) and describes itself; the host ships
5
+ the raw columns it asks for and drives the interactive workspace.
6
+
7
+ from florecon import Workspace
8
+ import polars as pl
9
+
10
+ ws = Workspace("interco_plugin.wasm")
11
+ ws.upsert(pl.DataFrame([
12
+ {"row_id": 1, "company": "A", "icp": "B", "objsub": "61500",
13
+ "indicative_usd_amt": 100.0, "trx_currency": "USD", "trx_amt": 100.0,
14
+ "gl_date": 0, "reference": "INV0001", ...},
15
+ ]))
16
+ print(ws.solve())
17
+ """
18
+
19
+ from ._host import (
20
+ ABI_VERSION,
21
+ ContractMismatch,
22
+ Florecon,
23
+ PluginError,
24
+ SchemaError,
25
+ Workspace,
26
+ )
27
+ from .persist import (
28
+ decisions,
29
+ groups_csv,
30
+ load_workspace,
31
+ report_frames,
32
+ result_json,
33
+ results_csv,
34
+ save_workspace,
35
+ )
36
+ from .projections import connected_components, primary_assignments, strict_assignments
37
+ from .tags import TagStore
38
+
39
+ __all__ = [
40
+ "Florecon",
41
+ "Workspace",
42
+ "ABI_VERSION",
43
+ "ContractMismatch",
44
+ "PluginError",
45
+ "SchemaError",
46
+ "TagStore",
47
+ "strict_assignments",
48
+ "primary_assignments",
49
+ "connected_components",
50
+ "decisions",
51
+ "save_workspace",
52
+ "load_workspace",
53
+ "report_frames",
54
+ "groups_csv",
55
+ "results_csv",
56
+ "result_json",
57
+ ]
58
+ __version__ = "0.1.0"
florecon/_host.py ADDED
@@ -0,0 +1,277 @@
1
+ """The wasmtime host for a florecon plugin.
2
+
3
+ The host is generic and dumb: it loads a plugin ``.wasm``, asks it to
4
+ ``describe()`` itself (which raw columns it needs, which one is the headline
5
+ amount), then ships a columnar table as Arrow IPC and drives the planless
6
+ ``Cmd`` protocol. State lives inside the module; only JSON + Arrow cross the
7
+ boundary. The host knows *nothing* about any domain — the same code runs every
8
+ florecon plugin.
9
+ """
10
+
11
+ import json
12
+
13
+ import pyarrow as pa
14
+ import wasmtime
15
+
16
+ # Must equal the SDK's florecon::sdk::ABI_VERSION export.
17
+ ABI_VERSION = 1
18
+
19
+
20
+ class ContractMismatch(RuntimeError):
21
+ """The plugin wasm speaks a different ABI than this host."""
22
+
23
+
24
+ class SchemaError(RuntimeError):
25
+ """A dataframe handed to :meth:`Workspace.upsert` does not match the
26
+ plugin's declared input schema (a column is missing or uncastable)."""
27
+
28
+
29
+ class PluginError(RuntimeError):
30
+ """A typed failure returned by the plugin's dispatch envelope.
31
+
32
+ Carries the stable ``code`` (e.g. ``"unknown_group"``, ``"frozen_group"``,
33
+ ``"conservation_violated"``) plus the row ``id`` / ``group_id`` it concerns
34
+ when the plugin knows them.
35
+ """
36
+
37
+ def __init__(self, code: str, message: str, id=None, group_id=None):
38
+ super().__init__(f"{code}: {message}")
39
+ self.code = code
40
+ self.id = id
41
+ self.group_id = group_id
42
+
43
+
44
+ _PA_TYPE = {"i64": pa.int64(), "f64": pa.float64(), "utf8": pa.string()}
45
+
46
+
47
+ class Florecon:
48
+ """Low-level handle around a plugin wasm: alloc/write/call/read over linear
49
+ memory, plus the ``describe`` and ``dispatch`` exports."""
50
+
51
+ def __init__(self, wasm_path):
52
+ engine = wasmtime.Engine()
53
+ self.store = wasmtime.Store(engine)
54
+ module = wasmtime.Module.from_file(engine, str(wasm_path))
55
+ self.inst = wasmtime.Instance(self.store, module, [])
56
+ ex = self.inst.exports(self.store)
57
+ self.memory = ex["memory"]
58
+ self._alloc = ex["alloc"]
59
+ self._dealloc = ex["dealloc"]
60
+ self._dispatch = ex["dispatch"]
61
+ self._describe = ex["describe"]
62
+ self.abi_version = ex["abi_version"](self.store)
63
+ if self.abi_version != ABI_VERSION:
64
+ raise ContractMismatch(f"plugin ABI v{self.abi_version} != host v{ABI_VERSION}")
65
+
66
+ def _read_packed(self, packed: int) -> bytes:
67
+ out_ptr = packed & 0xFFFFFFFF
68
+ out_len = (packed >> 32) & 0xFFFFFFFF
69
+ out = self.memory.read(self.store, out_ptr, out_ptr + out_len)
70
+ self._dealloc(self.store, out_ptr, out_len)
71
+ return bytes(out)
72
+
73
+ def describe(self) -> dict:
74
+ """The plugin's self-description: ``{abi_version, domain, input, ...}``."""
75
+ return json.loads(self._read_packed(self._describe(self.store)))
76
+
77
+ def dispatch(self, command: dict, arrow_bytes: bytes = None) -> dict:
78
+ """Drive the persistent session with one ``Cmd`` dict. ``init``/``upsert``
79
+ carry their rows in ``arrow_bytes``. Returns the raw envelope."""
80
+ data = json.dumps(command).encode("utf-8")
81
+ n = len(data)
82
+ ptr = self._alloc(self.store, n)
83
+ self.memory.write(self.store, data, ptr)
84
+
85
+ arrow_n = len(arrow_bytes) if arrow_bytes else 0
86
+ arrow_ptr = 0
87
+ if arrow_n > 0:
88
+ arrow_ptr = self._alloc(self.store, arrow_n)
89
+ self.memory.write(self.store, arrow_bytes, arrow_ptr)
90
+
91
+ packed = self._dispatch(self.store, ptr, n, arrow_ptr, arrow_n)
92
+
93
+ self._dealloc(self.store, ptr, n)
94
+ if arrow_n > 0:
95
+ self._dealloc(self.store, arrow_ptr, arrow_n)
96
+ return json.loads(self._read_packed(packed))
97
+
98
+
99
+ def _ok(env: dict) -> dict:
100
+ """Unwrap a dispatch envelope to its report, or raise the typed error."""
101
+ if not env.get("ok"):
102
+ e = env.get("error") or {}
103
+ raise PluginError(
104
+ e.get("code", "unknown"),
105
+ e.get("message", "unknown plugin error"),
106
+ id=e.get("id"),
107
+ group_id=e.get("group_id"),
108
+ )
109
+ return env.get("report", {})
110
+
111
+
112
+ def _ipc(batch: "pa.RecordBatch") -> bytes:
113
+ sink = pa.BufferOutputStream()
114
+ with pa.ipc.new_stream(sink, batch.schema) as writer:
115
+ writer.write_batch(batch)
116
+ return sink.getvalue().to_pybytes()
117
+
118
+
119
+ class Workspace:
120
+ """An interactive reconciliation session over one plugin.
121
+
122
+ The plugin owns the domain (preprocessing, identity, matching). The host
123
+ just ships the raw columns the plugin's ``describe()`` declares — as a
124
+ **dataframe** (polars / pandas / pyarrow) whose columns are validated and
125
+ cast against that schema — and drives the lifecycle (``pin``/``unpin``) and
126
+ partition (``merge``/``detach``/``dissolve``) verbs. State lives in the wasm
127
+ module, so repeated :meth:`solve` calls warm re-solve.
128
+
129
+ ``config`` (a JSON-able dict) is handed to the plugin at ``init`` — runtime
130
+ tunables (tolerances, windows) the plugin reads, so tuning needs no rebuild.
131
+
132
+ A group lives on a lifecycle axis: ``proposed`` groups are the solver's
133
+ current opinion (recomputed each :meth:`solve`); ``pinned`` groups are your
134
+ decisions, kept verbatim across solves.
135
+ """
136
+
137
+ def __init__(self, wasm_path=None, _engine: Florecon = None, config: dict = None):
138
+ self.fe = _engine or Florecon(wasm_path)
139
+ self.spec = self.fe.describe()
140
+ self.fields = self.spec["input"]
141
+ self.domain = self.spec.get("domain", {})
142
+ #: Name of the column the plugin flags as the headline display amount.
143
+ self.amount_field = next(
144
+ (f["name"] for f in self.fields if f.get("amount")), None
145
+ )
146
+ self._schema = pa.schema(
147
+ [pa.field(f["name"], _PA_TYPE[f["type"]]) for f in self.fields]
148
+ )
149
+ init_cmd = {"op": "init"}
150
+ if config is not None:
151
+ init_cmd["config"] = config
152
+ self.last = self.fe.dispatch(init_cmd, _ipc(self._empty_batch()))
153
+ _ok(self.last)
154
+
155
+ # --- schema bridge -------------------------------------------------------
156
+
157
+ def _empty_batch(self) -> "pa.RecordBatch":
158
+ """A zero-row batch carrying the declared schema (used by ``init``)."""
159
+ arrays = [pa.array([], type=_PA_TYPE[f["type"]]) for f in self.fields]
160
+ return pa.RecordBatch.from_arrays(arrays, schema=self._schema)
161
+
162
+ @staticmethod
163
+ def _as_table(frame) -> "pa.Table":
164
+ """Normalize a polars / pandas / pyarrow frame to a pyarrow Table."""
165
+ if isinstance(frame, pa.Table):
166
+ return frame
167
+ if isinstance(frame, pa.RecordBatch):
168
+ return pa.Table.from_batches([frame])
169
+ mod = type(frame).__module__.split(".", 1)[0]
170
+ if mod == "polars" and hasattr(frame, "to_arrow"):
171
+ return frame.to_arrow()
172
+ if mod == "pandas":
173
+ return pa.Table.from_pandas(frame, preserve_index=False)
174
+ raise TypeError(
175
+ f"upsert expects a polars/pandas/pyarrow dataframe, got {type(frame).__name__}"
176
+ )
177
+
178
+ def _batch(self, frame) -> "pa.RecordBatch":
179
+ """Validate a frame against the declared schema and project it to the
180
+ declared columns (extra columns ignored), cast to the declared types."""
181
+ table = self._as_table(frame)
182
+ present = set(table.column_names)
183
+ missing = [f["name"] for f in self.fields if f["name"] not in present]
184
+ if missing:
185
+ raise SchemaError(
186
+ f"dataframe is missing columns declared by {self.domain.get('id')}: {missing}"
187
+ )
188
+ arrays = []
189
+ for f in self.fields:
190
+ arr = table.column(f["name"]).combine_chunks()
191
+ try:
192
+ arr = arr.cast(_PA_TYPE[f["type"]])
193
+ except (pa.ArrowInvalid, pa.ArrowTypeError, pa.ArrowNotImplementedError) as e:
194
+ raise SchemaError(
195
+ f"column {f['name']!r}: cannot cast {arr.type} -> {f['type']}: {e}"
196
+ ) from e
197
+ arrays.append(arr)
198
+ return pa.RecordBatch.from_arrays(arrays, schema=self._schema)
199
+
200
+ # --- ledger --------------------------------------------------------------
201
+
202
+ def upsert(self, *frames) -> "Workspace":
203
+ """Insert/replace rows from one or more dataframes (polars / pandas /
204
+ pyarrow). Each frame must carry the plugin's declared raw columns;
205
+ extra columns are ignored, declared columns are cast to their wire type."""
206
+ for frame in frames:
207
+ batch = self._batch(frame)
208
+ if batch.num_rows:
209
+ self.last = self.fe.dispatch({"op": "upsert"}, _ipc(batch))
210
+ _ok(self.last)
211
+ return self
212
+
213
+ def remove(self, *ids: int) -> "Workspace":
214
+ self.last = self.fe.dispatch({"op": "remove", "ids": list(ids)})
215
+ _ok(self.last)
216
+ return self
217
+
218
+ # --- machine -------------------------------------------------------------
219
+
220
+ def solve(self) -> dict:
221
+ self.last = self.fe.dispatch({"op": "solve"})
222
+ return _ok(self.last)
223
+
224
+ # --- lifecycle -----------------------------------------------------------
225
+
226
+ def pin(self, group_id: int) -> dict:
227
+ """Pin one proposed group — keep it verbatim across later solves."""
228
+ self.last = self.fe.dispatch({"op": "pin", "by": "group", "group_id": group_id})
229
+ return _ok(self.last)
230
+
231
+ def pin_clean(self, tol: int = 0) -> dict:
232
+ """Pin every proposed match that nets to zero within ``tol``."""
233
+ self.last = self.fe.dispatch({"op": "pin", "by": "clean", "tol": int(tol)})
234
+ return _ok(self.last)
235
+
236
+ def pin_singletons(self, ids) -> dict:
237
+ """Pin the named lone lots (proposed singletons) as accepted-as-is."""
238
+ self.last = self.fe.dispatch({"op": "pin", "by": "singletons", "ids": list(ids)})
239
+ return _ok(self.last)
240
+
241
+ def unpin(self, group_id: int) -> dict:
242
+ """Release a pinned group back to the solver's control."""
243
+ self.last = self.fe.dispatch({"op": "unpin", "group_id": group_id})
244
+ return _ok(self.last)
245
+
246
+ # --- partition -----------------------------------------------------------
247
+
248
+ def merge(self, allocations, label: str = "manual", reason=None) -> dict:
249
+ """Assert a pinned group over exact allocations ``[{"id", "amount"}, ...]``."""
250
+ cmd = {
251
+ "op": "merge",
252
+ "allocations": [
253
+ {"id": int(a["id"]), "amount": int(a["amount"])} for a in allocations
254
+ ],
255
+ "label": label,
256
+ }
257
+ if reason is not None:
258
+ cmd["reason"] = str(reason)
259
+ self.last = self.fe.dispatch(cmd)
260
+ return _ok(self.last)
261
+
262
+ def detach(self, group_id: int, ids) -> dict:
263
+ """Pull rows out of a proposed group into lone singletons."""
264
+ self.last = self.fe.dispatch(
265
+ {"op": "detach", "group_id": group_id, "ids": list(ids)}
266
+ )
267
+ return _ok(self.last)
268
+
269
+ def dissolve(self, group_id: int) -> dict:
270
+ """Break a proposed group back into singletons."""
271
+ self.last = self.fe.dispatch({"op": "dissolve", "group_id": group_id})
272
+ return _ok(self.last)
273
+
274
+ # --- read ----------------------------------------------------------------
275
+
276
+ def report(self) -> dict:
277
+ return _ok(self.fe.dispatch({"op": "report"}))
florecon/persist.py ADDED
@@ -0,0 +1,177 @@
1
+ """Workspace persistence and result export.
2
+
3
+ One module that knows how to (a) serialize/restore the *operator's* durable
4
+ decisions to a portable file, and (b) export a reconciliation result as
5
+ dataframes / CSV / JSON.
6
+
7
+ Design note — what is durable state?
8
+ Everything *proposed* is a deterministic function of (rows, plugin) via
9
+ :meth:`Workspace.solve`, so it never needs saving. The only durable operator
10
+ state is:
11
+ * the **pinned** groups (committed decisions), expressed allocation-native
12
+ in stable row-id terms (group ids are ephemeral across solves), and
13
+ * the **tag** overlay (review buckets), already keyed by stable row id.
14
+ So a saved workspace = pinned decisions + tags (+ optional metadata). On load
15
+ we re-solve to recover the proposals, then re-assert the pinned decisions on
16
+ top. Robust and small — it survives plugin tweaks that only move proposals.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ from pathlib import Path
23
+
24
+ from .projections import primary_assignments
25
+
26
+ WORKSPACE_KIND = "florecon.workspace"
27
+ WORKSPACE_VERSION = 1
28
+
29
+
30
+ # --- durable-decision extraction -------------------------------------------
31
+
32
+ def decisions(report: dict) -> list[dict]:
33
+ """Collapse a report into the allocation-native *pinned* decisions, keyed by
34
+ row id. Each is ``{reason, allocations: [{id, amount}, ...]}``."""
35
+ by_g: dict[int, dict] = {}
36
+ for g in report.get("groups", []):
37
+ if g.get("status") == "pinned":
38
+ by_g[int(g["group_id"])] = {
39
+ "reason": g.get("reason"),
40
+ "origin": g.get("origin", "manual"),
41
+ "allocations": [],
42
+ }
43
+ for a in report.get("allocations", []):
44
+ d = by_g.get(int(a["group_id"]))
45
+ if d is not None:
46
+ d["allocations"].append({"id": int(a["id"]), "amount": int(a["amount"])})
47
+ return [d for d in by_g.values() if d["allocations"]]
48
+
49
+
50
+ def apply_decisions(ws, saved: list[dict]) -> dict:
51
+ """Re-assert saved pinned decisions onto a freshly solved workspace.
52
+
53
+ Multi-leg groups go through :meth:`Workspace.merge` (exact amounts, so splits
54
+ survive); lone accepted rows go through :meth:`Workspace.pin_singletons`.
55
+ Merges are applied first so they pull their rows out of any proposed group,
56
+ leaving accepted singletons free to pin. Returns a short apply summary.
57
+ """
58
+ multi = [d for d in saved if len([a for a in d["allocations"] if a["amount"]]) >= 2]
59
+ singles: list[int] = []
60
+ for d in saved:
61
+ nz = [a for a in d["allocations"] if a["amount"]]
62
+ if len(nz) < 2:
63
+ singles.extend(a["id"] for a in d["allocations"])
64
+
65
+ groups = failed = 0
66
+ errors = []
67
+ for d in multi:
68
+ allocs = [a for a in d["allocations"] if a["amount"]]
69
+ try:
70
+ ws.merge(allocs, label=d.get("origin", "manual"), reason=d.get("reason"))
71
+ groups += 1
72
+ except Exception as e: # noqa: BLE001 - collected, not swallowed
73
+ failed += 1
74
+ errors.append(str(e))
75
+ pinned_singles = 0
76
+ if singles:
77
+ try:
78
+ ws.pin_singletons(singles)
79
+ pinned_singles = len(singles)
80
+ except Exception as e: # noqa: BLE001
81
+ failed += 1
82
+ errors.append(str(e))
83
+ return {"groups": groups, "singles": pinned_singles, "failed": failed, "errors": errors}
84
+
85
+
86
+ # --- serialize / restore ----------------------------------------------------
87
+
88
+ def serialize(report: dict, *, tags=None, meta: dict | None = None) -> dict:
89
+ """A portable workspace: pinned decisions + the tag overlay + metadata. The
90
+ raw rows are *not* embedded (re-supply them with ``upsert`` before load)."""
91
+ return {
92
+ "kind": WORKSPACE_KIND,
93
+ "version": WORKSPACE_VERSION,
94
+ "domain": (meta or {}).get("domain"),
95
+ "decisions": decisions(report),
96
+ "tags": tags.dump() if tags is not None else {"tags": {}, "meta": {}},
97
+ "meta": meta or {},
98
+ }
99
+
100
+
101
+ def parse(obj_or_text) -> dict:
102
+ o = json.loads(obj_or_text) if isinstance(obj_or_text, (str, bytes)) else obj_or_text
103
+ if not isinstance(o, dict) or o.get("kind") != WORKSPACE_KIND:
104
+ raise ValueError("not a florecon workspace")
105
+ if o.get("version") != WORKSPACE_VERSION:
106
+ raise ValueError(f"workspace version {o.get('version')} != supported {WORKSPACE_VERSION}")
107
+ return o
108
+
109
+
110
+ def save_workspace(path, report: dict, *, tags=None, meta: dict | None = None) -> dict:
111
+ """Write a workspace JSON file; returns the serialized object."""
112
+ obj = serialize(report, tags=tags, meta=meta)
113
+ Path(path).write_text(json.dumps(obj, indent=2))
114
+ return obj
115
+
116
+
117
+ def load_workspace(path_or_obj, ws, *, tags=None) -> dict:
118
+ """Restore decisions (and optionally tags) onto a workspace that has already
119
+ been re-``upsert``ed and ``solve``d. Returns the apply summary."""
120
+ obj = parse(Path(path_or_obj).read_text() if isinstance(path_or_obj, (str, Path)) and Path(path_or_obj).exists() else path_or_obj)
121
+ if tags is not None:
122
+ tags.restore(obj.get("tags"))
123
+ return apply_decisions(ws, obj.get("decisions", []))
124
+
125
+
126
+ # --- result export ----------------------------------------------------------
127
+
128
+ def report_frames(report: dict):
129
+ """The report as two pyarrow Tables ``(groups, allocations)`` — the natural
130
+ bridge for writing results back to Spark/Delta in a notebook."""
131
+ import pyarrow as pa
132
+
133
+ groups = pa.Table.from_pylist(report.get("groups", []))
134
+ allocations = pa.Table.from_pylist(report.get("allocations", []))
135
+ return groups, allocations
136
+
137
+
138
+ def _csv(rows: list[list]) -> str:
139
+ def cell(v):
140
+ s = "" if v is None else str(v)
141
+ return '"' + s.replace('"', '""') + '"' if any(c in s for c in ',"\n') else s
142
+ return "\n".join(",".join(cell(c) for c in r) for r in rows)
143
+
144
+
145
+ def groups_csv(report: dict, *, money_scale: float = 0.01) -> str:
146
+ """One line per group. ``money_scale`` converts the numeraire minor units to
147
+ display (default cents -> currency units)."""
148
+ head = ["group_id", "origin", "reason", "status", "size", "net"]
149
+ rows = [head]
150
+ for g in report.get("groups", []):
151
+ rows.append([
152
+ g["group_id"], g.get("origin", ""), g.get("reason", "") or "",
153
+ g.get("status", ""), g.get("size", ""),
154
+ f"{int(g.get('net', 0)) * money_scale:.2f}",
155
+ ])
156
+ return _csv(rows)
157
+
158
+
159
+ def results_csv(report: dict, *, policy: str = "largest_abs", money_scale: float = 0.01) -> str:
160
+ """Row-level result: every row with the group it primarily landed in."""
161
+ gmeta = {int(g["group_id"]): g for g in report.get("groups", [])}
162
+ head = ["row_id", "group_id", "origin", "reason", "status", "group_net"]
163
+ rows = [head]
164
+ for rid, gid in primary_assignments(report, policy=policy):
165
+ g = gmeta.get(int(gid), {})
166
+ rows.append([
167
+ rid, gid, g.get("origin", ""), g.get("reason", "") or "",
168
+ g.get("status", ""), f"{int(g.get('net', 0)) * money_scale:.2f}",
169
+ ])
170
+ return _csv(rows)
171
+
172
+
173
+ def result_json(report: dict, *, meta: dict | None = None) -> str:
174
+ """The whole allocation-native result plus a little context."""
175
+ return json.dumps(
176
+ {"kind": "florecon.result", "meta": meta or {}, "report": report}, indent=2
177
+ )
@@ -0,0 +1,86 @@
1
+ """Explicit projections from the allocation-native Report hypergraph."""
2
+
3
+
4
+ def strict_assignments(report: dict) -> list[tuple[int, int]]:
5
+ """Return one (row id, group id) per row only if the report is not split.
6
+
7
+ Raises ValueError if a row id participates in multiple groups.
8
+ """
9
+ by_id = {}
10
+ for a in report.get("allocations", []):
11
+ by_id.setdefault(int(a["id"]), set()).add(int(a["group_id"]))
12
+ out = []
13
+ for id_, groups in sorted(by_id.items()):
14
+ if len(groups) != 1:
15
+ raise ValueError(f"row {id_} is split across groups {sorted(groups)}")
16
+ out.append((id_, next(iter(groups))))
17
+ return out
18
+
19
+
20
+ def primary_assignments(report: dict, policy: str = "largest_abs") -> list[tuple[int, int]]:
21
+ """Return one (row id, group id) per row, choosing a primary group when a
22
+ row is split across several (lot strategies). ``policy`` is one of
23
+ ``largest_abs`` (default), ``prefer_clean``, or ``first_group``.
24
+
25
+ Unlike :func:`strict_assignments` this never raises — use it for row-level
26
+ exports where every row needs exactly one home.
27
+ """
28
+ by_id: dict[int, list[dict]] = {}
29
+ for a in report.get("allocations", []):
30
+ by_id.setdefault(int(a["id"]), []).append(a)
31
+ groups = {int(g["group_id"]): g for g in report.get("groups", [])}
32
+
33
+ def score(a: dict):
34
+ g = groups.get(int(a["group_id"]), {})
35
+ clean = 1 if abs(int(g.get("net", 0))) == 0 else 0
36
+ if policy == "first_group":
37
+ return (-int(a["group_id"]),)
38
+ if policy == "prefer_clean":
39
+ return (clean, abs(int(a["amount"])), -int(a["group_id"]))
40
+ return (abs(int(a["amount"])), clean, -int(a["group_id"]))
41
+
42
+ out = []
43
+ for id_, allocs in sorted(by_id.items()):
44
+ best = max(allocs, key=score)
45
+ out.append((id_, int(best["group_id"])))
46
+ return out
47
+
48
+
49
+ def connected_components(report: dict) -> list[dict]:
50
+ """Connected components of the bipartite graph row id <-> group id."""
51
+ row_to_groups = {}
52
+ group_to_rows = {}
53
+ for a in report.get("allocations", []):
54
+ r = int(a["id"])
55
+ g = int(a["group_id"])
56
+ row_to_groups.setdefault(r, []).append(g)
57
+ group_to_rows.setdefault(g, []).append(r)
58
+
59
+ seen_rows = set()
60
+ seen_groups = set()
61
+ out = []
62
+ for start in list(row_to_groups):
63
+ if start in seen_rows:
64
+ continue
65
+ rows = set()
66
+ groups = set()
67
+ row_stack = [start]
68
+ group_stack = []
69
+ while row_stack or group_stack:
70
+ while row_stack:
71
+ r = row_stack.pop()
72
+ if r in seen_rows:
73
+ continue
74
+ seen_rows.add(r)
75
+ rows.add(r)
76
+ group_stack.extend(row_to_groups.get(r, []))
77
+ while group_stack:
78
+ g = group_stack.pop()
79
+ if g in seen_groups:
80
+ continue
81
+ seen_groups.add(g)
82
+ groups.add(g)
83
+ row_stack.extend(group_to_rows.get(g, []))
84
+ out.append({"rows": sorted(rows), "groups": sorted(groups)})
85
+ out.sort(key=lambda c: (c["rows"][0] if c["rows"] else 0, c["groups"][0] if c["groups"] else 0))
86
+ return out
florecon/tags.py ADDED
@@ -0,0 +1,99 @@
1
+ """Host-side review/attention overlay.
2
+
3
+ A *tag* is a many-to-many review axis (e.g. "needs-review", "escalate",
4
+ "FX-difference") that an operator drapes over rows. It is **orthogonal** to the
5
+ engine's lifecycle axis (``proposed`` | ``pinned``): the conservation engine
6
+ never learns the word "review". Tags are owned entirely by the host and keyed by
7
+ the **stable row id** (``ExtId``), never by group id — proposed group ids are
8
+ re-minted every solve, so keying by row id makes tags survive a re-solve for
9
+ free.
10
+
11
+ This mirrors the same orthogonality the engine enforces for groups, one layer
12
+ out: status is the machine's business, review is the operator's, and the two
13
+ never contaminate each other.
14
+ """
15
+
16
+ # A small, stable chip palette; tags get a colour by creation order.
17
+ _PALETTE = [
18
+ "#217346", "#6d3fd1", "#b7791f", "#0f8d80",
19
+ "#c2410c", "#2563eb", "#be185d", "#4d7c0f",
20
+ ]
21
+
22
+
23
+ class TagStore:
24
+ """An in-memory overlay of ``row_id -> {tag_id}`` plus tag metadata.
25
+
26
+ Nothing is persisted implicitly; serialize with :meth:`dump` (and embed it in
27
+ a saved workspace) for durability. A row may carry several tags or none.
28
+ """
29
+
30
+ def __init__(self):
31
+ self.tags: dict[int, set[str]] = {}
32
+ self.meta: dict[str, dict] = {}
33
+
34
+ def ensure_tag(self, label: str, kind: str = "bucket") -> str | None:
35
+ """Create (or look up) a tag by human label. Idempotent on the slugged
36
+ id, so reusing a bucket name reuses the same tag and colour."""
37
+ name = (label or "").strip()
38
+ if not name:
39
+ return None
40
+ tid = "tag:" + name.lower()
41
+ if tid not in self.meta:
42
+ self.meta[tid] = {
43
+ "label": name,
44
+ "color": _PALETTE[len(self.meta) % len(_PALETTE)],
45
+ "kind": kind,
46
+ }
47
+ return tid
48
+
49
+ def tags_of(self, row_id: int) -> set[str]:
50
+ return self.tags.get(int(row_id), set())
51
+
52
+ def label(self, tid: str) -> str:
53
+ return self.meta.get(tid, {}).get("label", tid)
54
+
55
+ def color(self, tid: str) -> str:
56
+ return self.meta.get(tid, {}).get("color", "#6b7280")
57
+
58
+ def add(self, row_id: int, tid: str) -> None:
59
+ self.tags.setdefault(int(row_id), set()).add(tid)
60
+
61
+ def remove(self, row_id: int, tid: str) -> None:
62
+ s = self.tags.get(int(row_id))
63
+ if not s:
64
+ return
65
+ s.discard(tid)
66
+ if not s:
67
+ del self.tags[int(row_id)]
68
+
69
+ def clear(self, row_id: int) -> None:
70
+ """Drop every tag on a row."""
71
+ self.tags.pop(int(row_id), None)
72
+
73
+ def tagged(self, tid: str) -> list[int]:
74
+ """Every row id currently carrying ``tid`` (sorted)."""
75
+ return sorted(r for r, s in self.tags.items() if tid in s)
76
+
77
+ def dump(self) -> dict:
78
+ """Serialize the whole overlay to a plain JSON-able object."""
79
+ return {
80
+ "tags": {str(r): sorted(s) for r, s in self.tags.items() if s},
81
+ "meta": dict(self.meta),
82
+ }
83
+
84
+ def restore(self, obj: dict | None) -> "TagStore":
85
+ """Replace the overlay from a :meth:`dump`. Row id keys are coerced back
86
+ to ints. Returns self."""
87
+ self.tags = {}
88
+ self.meta = {}
89
+ if not obj:
90
+ return self
91
+ for tid, m in (obj.get("meta") or {}).items():
92
+ self.meta[tid] = m
93
+ for r, arr in (obj.get("tags") or {}).items():
94
+ try:
95
+ key = int(r)
96
+ except (TypeError, ValueError):
97
+ key = r
98
+ self.tags[key] = set(arr)
99
+ return self
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.4
2
+ Name: florecon-host
3
+ Version: 0.1.0
4
+ Summary: The Python host for florecon: a generic wasmtime embedder that drives self-describing reconciliation plugins. Brings its own plugin wasm; imports as `florecon`. Nothing created, nothing lost.
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/spoj/florecon
7
+ Project-URL: Repository, https://github.com/spoj/florecon
8
+ Project-URL: Issues, https://github.com/spoj/florecon/issues
9
+ Keywords: reconciliation,finance,matching,wasm,min-cost-flow
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Financial and Insurance Industry
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Rust
15
+ Classifier: Topic :: Office/Business :: Financial :: Accounting
16
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: wasmtime>=20
20
+ Requires-Dist: pyarrow>=14
21
+
22
+ # florecon (Python host)
23
+
24
+ A generic [wasmtime](https://github.com/bytecodealliance/wasmtime-py) host that
25
+ drives self-describing **florecon** reconciliation plugins. The host knows
26
+ nothing about any domain: it loads a plugin `.wasm`, reads its `describe()`, and
27
+ ships the raw columns the plugin declares. The same code runs every florecon
28
+ plugin.
29
+
30
+ Money is integer minor units; nothing is created or lost.
31
+
32
+ ```python
33
+ from florecon import Workspace
34
+
35
+ ws = Workspace("interco_plugin.wasm") # any florecon plugin wasm
36
+
37
+ ws.upsert(
38
+ {"row_id": 1, "company": "A", "icp": "B", "objsub": "61500",
39
+ "indicative_usd_amt": 100.0, "trx_currency": "USD", "trx_amt": 100.0,
40
+ "gl_date": 0, "reference": "INV0001"},
41
+ {"row_id": 2, "company": "B", "icp": "A", "objsub": "61500",
42
+ "indicative_usd_amt": -100.0, "trx_currency": "USD", "trx_amt": 100.0,
43
+ "gl_date": 1, "reference": "INV0001"},
44
+ )
45
+ rep = ws.solve() # the proposal: groups + per-row allocations
46
+ ws.pin_clean(tol=0) # sign off every clean net-zero match
47
+ ws.solve() # warm re-solve; pinned groups kept verbatim
48
+ ```
49
+
50
+ ## Surface
51
+
52
+ A group lives on a lifecycle axis — `proposed` (the solver's current opinion,
53
+ recomputed each `solve`) or `pinned` (your decision, kept verbatim).
54
+
55
+ ```text
56
+ ledger upsert(*rows) · remove(*ids)
57
+ machine solve()
58
+ lifecycle pin(gid) · pin_clean(tol) · pin_singletons(ids) · unpin(gid)
59
+ partition merge(allocs, label, reason) · detach(gid, ids) · dissolve(gid)
60
+ read report()
61
+ ```
62
+
63
+ Failures raise `PluginError` carrying a stable `code` (e.g. `"frozen_group"`,
64
+ `"conservation_violated"`) plus the `id` / `group_id` it concerns.
65
+ `strict_assignments` / `connected_components` project the allocation hypergraph
66
+ into per-row assignments or settlement clusters.
67
+
68
+ The plugin/host ABI is versioned: the host refuses a wasm whose `abi_version`
69
+ differs from `florecon.ABI_VERSION`.
@@ -0,0 +1,9 @@
1
+ florecon/__init__.py,sha256=6B2bZ39fD1U8dkcVUNuGA-p110Sy1TYbIxEjgno4qyY,1435
2
+ florecon/_host.py,sha256=RJRf97J6VJNIMCwAjy66nDV3_vqXVBm3An7IOK6Y5Sw,11215
3
+ florecon/persist.py,sha256=z2zo-UZ5towWgRm17i_6AiP9BF_schvhkoR6p4h9eCY,7270
4
+ florecon/projections.py,sha256=uEM58CZufryW-IL5gU3JeGB-GTL-ots6E1NC99ZnDwE,3219
5
+ florecon/tags.py,sha256=_3IlL7LuEm7LguZBMWIZSJG2ZlSftk4WIgtzccLKnGo,3548
6
+ florecon_host-0.1.0.dist-info/METADATA,sha256=qmG4HLBQQyjWq-iHkpFHuv04uOvQDHgnY6IkOxZ4tYM,2924
7
+ florecon_host-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
8
+ florecon_host-0.1.0.dist-info/top_level.txt,sha256=geeDHTlWp-eDsxPxclIC-Ambsr6x6X5J1oEQQhTBxPM,9
9
+ florecon_host-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ florecon