florecon-host 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- florecon_host-0.1.0/PKG-INFO +69 -0
- florecon_host-0.1.0/README.md +48 -0
- florecon_host-0.1.0/pyproject.toml +36 -0
- florecon_host-0.1.0/setup.cfg +4 -0
- florecon_host-0.1.0/src/florecon/__init__.py +58 -0
- florecon_host-0.1.0/src/florecon/_host.py +277 -0
- florecon_host-0.1.0/src/florecon/persist.py +177 -0
- florecon_host-0.1.0/src/florecon/projections.py +86 -0
- florecon_host-0.1.0/src/florecon/tags.py +99 -0
- florecon_host-0.1.0/src/florecon_host.egg-info/PKG-INFO +69 -0
- florecon_host-0.1.0/src/florecon_host.egg-info/SOURCES.txt +12 -0
- florecon_host-0.1.0/src/florecon_host.egg-info/dependency_links.txt +1 -0
- florecon_host-0.1.0/src/florecon_host.egg-info/requires.txt +2 -0
- florecon_host-0.1.0/src/florecon_host.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: florecon-host
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The Python host for florecon: a generic wasmtime embedder that drives self-describing reconciliation plugins. Brings its own plugin wasm; imports as `florecon`. Nothing created, nothing lost.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/spoj/florecon
|
|
7
|
+
Project-URL: Repository, https://github.com/spoj/florecon
|
|
8
|
+
Project-URL: Issues, https://github.com/spoj/florecon/issues
|
|
9
|
+
Keywords: reconciliation,finance,matching,wasm,min-cost-flow
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Rust
|
|
15
|
+
Classifier: Topic :: Office/Business :: Financial :: Accounting
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: wasmtime>=20
|
|
20
|
+
Requires-Dist: pyarrow>=14
|
|
21
|
+
|
|
22
|
+
# florecon (Python host)
|
|
23
|
+
|
|
24
|
+
A generic [wasmtime](https://github.com/bytecodealliance/wasmtime-py) host that
|
|
25
|
+
drives self-describing **florecon** reconciliation plugins. The host knows
|
|
26
|
+
nothing about any domain: it loads a plugin `.wasm`, reads its `describe()`, and
|
|
27
|
+
ships the raw columns the plugin declares. The same code runs every florecon
|
|
28
|
+
plugin.
|
|
29
|
+
|
|
30
|
+
Money is integer minor units; nothing is created or lost.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from florecon import Workspace
|
|
34
|
+
|
|
35
|
+
ws = Workspace("interco_plugin.wasm") # any florecon plugin wasm
|
|
36
|
+
|
|
37
|
+
ws.upsert(
|
|
38
|
+
{"row_id": 1, "company": "A", "icp": "B", "objsub": "61500",
|
|
39
|
+
"indicative_usd_amt": 100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
40
|
+
"gl_date": 0, "reference": "INV0001"},
|
|
41
|
+
{"row_id": 2, "company": "B", "icp": "A", "objsub": "61500",
|
|
42
|
+
"indicative_usd_amt": -100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
43
|
+
"gl_date": 1, "reference": "INV0001"},
|
|
44
|
+
)
|
|
45
|
+
rep = ws.solve() # the proposal: groups + per-row allocations
|
|
46
|
+
ws.pin_clean(tol=0) # sign off every clean net-zero match
|
|
47
|
+
ws.solve() # warm re-solve; pinned groups kept verbatim
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Surface
|
|
51
|
+
|
|
52
|
+
A group lives on a lifecycle axis — `proposed` (the solver's current opinion,
|
|
53
|
+
recomputed each `solve`) or `pinned` (your decision, kept verbatim).
|
|
54
|
+
|
|
55
|
+
```text
|
|
56
|
+
ledger upsert(*rows) · remove(*ids)
|
|
57
|
+
machine solve()
|
|
58
|
+
lifecycle pin(gid) · pin_clean(tol) · pin_singletons(ids) · unpin(gid)
|
|
59
|
+
partition merge(allocs, label, reason) · detach(gid, ids) · dissolve(gid)
|
|
60
|
+
read report()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Failures raise `PluginError` carrying a stable `code` (e.g. `"frozen_group"`,
|
|
64
|
+
`"conservation_violated"`) plus the `id` / `group_id` it concerns.
|
|
65
|
+
`strict_assignments` / `connected_components` project the allocation hypergraph
|
|
66
|
+
into per-row assignments or settlement clusters.
|
|
67
|
+
|
|
68
|
+
The plugin/host ABI is versioned: the host refuses a wasm whose `abi_version`
|
|
69
|
+
differs from `florecon.ABI_VERSION`.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# florecon (Python host)
|
|
2
|
+
|
|
3
|
+
A generic [wasmtime](https://github.com/bytecodealliance/wasmtime-py) host that
|
|
4
|
+
drives self-describing **florecon** reconciliation plugins. The host knows
|
|
5
|
+
nothing about any domain: it loads a plugin `.wasm`, reads its `describe()`, and
|
|
6
|
+
ships the raw columns the plugin declares. The same code runs every florecon
|
|
7
|
+
plugin.
|
|
8
|
+
|
|
9
|
+
Money is integer minor units; nothing is created or lost.
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from florecon import Workspace
|
|
13
|
+
|
|
14
|
+
ws = Workspace("interco_plugin.wasm") # any florecon plugin wasm
|
|
15
|
+
|
|
16
|
+
ws.upsert(
|
|
17
|
+
{"row_id": 1, "company": "A", "icp": "B", "objsub": "61500",
|
|
18
|
+
"indicative_usd_amt": 100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
19
|
+
"gl_date": 0, "reference": "INV0001"},
|
|
20
|
+
{"row_id": 2, "company": "B", "icp": "A", "objsub": "61500",
|
|
21
|
+
"indicative_usd_amt": -100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
22
|
+
"gl_date": 1, "reference": "INV0001"},
|
|
23
|
+
)
|
|
24
|
+
rep = ws.solve() # the proposal: groups + per-row allocations
|
|
25
|
+
ws.pin_clean(tol=0) # sign off every clean net-zero match
|
|
26
|
+
ws.solve() # warm re-solve; pinned groups kept verbatim
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Surface
|
|
30
|
+
|
|
31
|
+
A group lives on a lifecycle axis — `proposed` (the solver's current opinion,
|
|
32
|
+
recomputed each `solve`) or `pinned` (your decision, kept verbatim).
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
ledger upsert(*rows) · remove(*ids)
|
|
36
|
+
machine solve()
|
|
37
|
+
lifecycle pin(gid) · pin_clean(tol) · pin_singletons(ids) · unpin(gid)
|
|
38
|
+
partition merge(allocs, label, reason) · detach(gid, ids) · dissolve(gid)
|
|
39
|
+
read report()
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Failures raise `PluginError` carrying a stable `code` (e.g. `"frozen_group"`,
|
|
43
|
+
`"conservation_violated"`) plus the `id` / `group_id` it concerns.
|
|
44
|
+
`strict_assignments` / `connected_components` project the allocation hypergraph
|
|
45
|
+
into per-row assignments or settlement clusters.
|
|
46
|
+
|
|
47
|
+
The plugin/host ABI is versioned: the host refuses a wasm whose `abi_version`
|
|
48
|
+
differs from `florecon.ABI_VERSION`.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "florecon-host"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "The Python host for florecon: a generic wasmtime embedder that drives self-describing reconciliation plugins. Brings its own plugin wasm; imports as `florecon`. Nothing created, nothing lost."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["reconciliation", "finance", "matching", "wasm", "min-cost-flow"]
|
|
13
|
+
dependencies = ["wasmtime>=20", "pyarrow>=14"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Rust",
|
|
20
|
+
"Topic :: Office/Business :: Financial :: Accounting",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/spoj/florecon"
|
|
26
|
+
Repository = "https://github.com/spoj/florecon"
|
|
27
|
+
Issues = "https://github.com/spoj/florecon/issues"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
where = ["src"]
|
|
31
|
+
|
|
32
|
+
# The generic host ships no domain plugin: a `.wasm` is brought by the caller
|
|
33
|
+
# (or a thin per-domain wheel). Exclude the local interco test fixture from
|
|
34
|
+
# both wheel and sdist so the package stays a pure host.
|
|
35
|
+
[tool.setuptools.exclude-package-data]
|
|
36
|
+
"*" = ["_engine.wasm"]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""florecon — incremental financial reconciliation by min-cost flow.
|
|
2
|
+
|
|
3
|
+
The host is a thin, generic wasmtime driver. A *plugin* ``.wasm`` owns the
|
|
4
|
+
domain (preprocessing, identity, matching) and describes itself; the host ships
|
|
5
|
+
the raw columns it asks for and drives the interactive workspace.
|
|
6
|
+
|
|
7
|
+
from florecon import Workspace
|
|
8
|
+
import polars as pl
|
|
9
|
+
|
|
10
|
+
ws = Workspace("interco_plugin.wasm")
|
|
11
|
+
ws.upsert(pl.DataFrame([
|
|
12
|
+
{"row_id": 1, "company": "A", "icp": "B", "objsub": "61500",
|
|
13
|
+
"indicative_usd_amt": 100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
14
|
+
"gl_date": 0, "reference": "INV0001", ...},
|
|
15
|
+
]))
|
|
16
|
+
print(ws.solve())
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from ._host import (
|
|
20
|
+
ABI_VERSION,
|
|
21
|
+
ContractMismatch,
|
|
22
|
+
Florecon,
|
|
23
|
+
PluginError,
|
|
24
|
+
SchemaError,
|
|
25
|
+
Workspace,
|
|
26
|
+
)
|
|
27
|
+
from .persist import (
|
|
28
|
+
decisions,
|
|
29
|
+
groups_csv,
|
|
30
|
+
load_workspace,
|
|
31
|
+
report_frames,
|
|
32
|
+
result_json,
|
|
33
|
+
results_csv,
|
|
34
|
+
save_workspace,
|
|
35
|
+
)
|
|
36
|
+
from .projections import connected_components, primary_assignments, strict_assignments
|
|
37
|
+
from .tags import TagStore
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"Florecon",
|
|
41
|
+
"Workspace",
|
|
42
|
+
"ABI_VERSION",
|
|
43
|
+
"ContractMismatch",
|
|
44
|
+
"PluginError",
|
|
45
|
+
"SchemaError",
|
|
46
|
+
"TagStore",
|
|
47
|
+
"strict_assignments",
|
|
48
|
+
"primary_assignments",
|
|
49
|
+
"connected_components",
|
|
50
|
+
"decisions",
|
|
51
|
+
"save_workspace",
|
|
52
|
+
"load_workspace",
|
|
53
|
+
"report_frames",
|
|
54
|
+
"groups_csv",
|
|
55
|
+
"results_csv",
|
|
56
|
+
"result_json",
|
|
57
|
+
]
|
|
58
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""The wasmtime host for a florecon plugin.
|
|
2
|
+
|
|
3
|
+
The host is generic and dumb: it loads a plugin ``.wasm``, asks it to
|
|
4
|
+
``describe()`` itself (which raw columns it needs, which one is the headline
|
|
5
|
+
amount), then ships a columnar table as Arrow IPC and drives the planless
|
|
6
|
+
``Cmd`` protocol. State lives inside the module; only JSON + Arrow cross the
|
|
7
|
+
boundary. The host knows *nothing* about any domain — the same code runs every
|
|
8
|
+
florecon plugin.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
|
|
13
|
+
import pyarrow as pa
|
|
14
|
+
import wasmtime
|
|
15
|
+
|
|
16
|
+
# Must equal the SDK's florecon::sdk::ABI_VERSION export.
|
|
17
|
+
ABI_VERSION = 1
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ContractMismatch(RuntimeError):
|
|
21
|
+
"""The plugin wasm speaks a different ABI than this host."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SchemaError(RuntimeError):
|
|
25
|
+
"""A dataframe handed to :meth:`Workspace.upsert` does not match the
|
|
26
|
+
plugin's declared input schema (a column is missing or uncastable)."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PluginError(RuntimeError):
|
|
30
|
+
"""A typed failure returned by the plugin's dispatch envelope.
|
|
31
|
+
|
|
32
|
+
Carries the stable ``code`` (e.g. ``"unknown_group"``, ``"frozen_group"``,
|
|
33
|
+
``"conservation_violated"``) plus the row ``id`` / ``group_id`` it concerns
|
|
34
|
+
when the plugin knows them.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, code: str, message: str, id=None, group_id=None):
|
|
38
|
+
super().__init__(f"{code}: {message}")
|
|
39
|
+
self.code = code
|
|
40
|
+
self.id = id
|
|
41
|
+
self.group_id = group_id
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_PA_TYPE = {"i64": pa.int64(), "f64": pa.float64(), "utf8": pa.string()}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Florecon:
|
|
48
|
+
"""Low-level handle around a plugin wasm: alloc/write/call/read over linear
|
|
49
|
+
memory, plus the ``describe`` and ``dispatch`` exports."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, wasm_path):
|
|
52
|
+
engine = wasmtime.Engine()
|
|
53
|
+
self.store = wasmtime.Store(engine)
|
|
54
|
+
module = wasmtime.Module.from_file(engine, str(wasm_path))
|
|
55
|
+
self.inst = wasmtime.Instance(self.store, module, [])
|
|
56
|
+
ex = self.inst.exports(self.store)
|
|
57
|
+
self.memory = ex["memory"]
|
|
58
|
+
self._alloc = ex["alloc"]
|
|
59
|
+
self._dealloc = ex["dealloc"]
|
|
60
|
+
self._dispatch = ex["dispatch"]
|
|
61
|
+
self._describe = ex["describe"]
|
|
62
|
+
self.abi_version = ex["abi_version"](self.store)
|
|
63
|
+
if self.abi_version != ABI_VERSION:
|
|
64
|
+
raise ContractMismatch(f"plugin ABI v{self.abi_version} != host v{ABI_VERSION}")
|
|
65
|
+
|
|
66
|
+
def _read_packed(self, packed: int) -> bytes:
|
|
67
|
+
out_ptr = packed & 0xFFFFFFFF
|
|
68
|
+
out_len = (packed >> 32) & 0xFFFFFFFF
|
|
69
|
+
out = self.memory.read(self.store, out_ptr, out_ptr + out_len)
|
|
70
|
+
self._dealloc(self.store, out_ptr, out_len)
|
|
71
|
+
return bytes(out)
|
|
72
|
+
|
|
73
|
+
def describe(self) -> dict:
|
|
74
|
+
"""The plugin's self-description: ``{abi_version, domain, input, ...}``."""
|
|
75
|
+
return json.loads(self._read_packed(self._describe(self.store)))
|
|
76
|
+
|
|
77
|
+
def dispatch(self, command: dict, arrow_bytes: bytes = None) -> dict:
|
|
78
|
+
"""Drive the persistent session with one ``Cmd`` dict. ``init``/``upsert``
|
|
79
|
+
carry their rows in ``arrow_bytes``. Returns the raw envelope."""
|
|
80
|
+
data = json.dumps(command).encode("utf-8")
|
|
81
|
+
n = len(data)
|
|
82
|
+
ptr = self._alloc(self.store, n)
|
|
83
|
+
self.memory.write(self.store, data, ptr)
|
|
84
|
+
|
|
85
|
+
arrow_n = len(arrow_bytes) if arrow_bytes else 0
|
|
86
|
+
arrow_ptr = 0
|
|
87
|
+
if arrow_n > 0:
|
|
88
|
+
arrow_ptr = self._alloc(self.store, arrow_n)
|
|
89
|
+
self.memory.write(self.store, arrow_bytes, arrow_ptr)
|
|
90
|
+
|
|
91
|
+
packed = self._dispatch(self.store, ptr, n, arrow_ptr, arrow_n)
|
|
92
|
+
|
|
93
|
+
self._dealloc(self.store, ptr, n)
|
|
94
|
+
if arrow_n > 0:
|
|
95
|
+
self._dealloc(self.store, arrow_ptr, arrow_n)
|
|
96
|
+
return json.loads(self._read_packed(packed))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _ok(env: dict) -> dict:
|
|
100
|
+
"""Unwrap a dispatch envelope to its report, or raise the typed error."""
|
|
101
|
+
if not env.get("ok"):
|
|
102
|
+
e = env.get("error") or {}
|
|
103
|
+
raise PluginError(
|
|
104
|
+
e.get("code", "unknown"),
|
|
105
|
+
e.get("message", "unknown plugin error"),
|
|
106
|
+
id=e.get("id"),
|
|
107
|
+
group_id=e.get("group_id"),
|
|
108
|
+
)
|
|
109
|
+
return env.get("report", {})
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _ipc(batch: "pa.RecordBatch") -> bytes:
|
|
113
|
+
sink = pa.BufferOutputStream()
|
|
114
|
+
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
115
|
+
writer.write_batch(batch)
|
|
116
|
+
return sink.getvalue().to_pybytes()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class Workspace:
|
|
120
|
+
"""An interactive reconciliation session over one plugin.
|
|
121
|
+
|
|
122
|
+
The plugin owns the domain (preprocessing, identity, matching). The host
|
|
123
|
+
just ships the raw columns the plugin's ``describe()`` declares — as a
|
|
124
|
+
**dataframe** (polars / pandas / pyarrow) whose columns are validated and
|
|
125
|
+
cast against that schema — and drives the lifecycle (``pin``/``unpin``) and
|
|
126
|
+
partition (``merge``/``detach``/``dissolve``) verbs. State lives in the wasm
|
|
127
|
+
module, so repeated :meth:`solve` calls warm re-solve.
|
|
128
|
+
|
|
129
|
+
``config`` (a JSON-able dict) is handed to the plugin at ``init`` — runtime
|
|
130
|
+
tunables (tolerances, windows) the plugin reads, so tuning needs no rebuild.
|
|
131
|
+
|
|
132
|
+
A group lives on a lifecycle axis: ``proposed`` groups are the solver's
|
|
133
|
+
current opinion (recomputed each :meth:`solve`); ``pinned`` groups are your
|
|
134
|
+
decisions, kept verbatim across solves.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
def __init__(self, wasm_path=None, _engine: Florecon = None, config: dict = None):
|
|
138
|
+
self.fe = _engine or Florecon(wasm_path)
|
|
139
|
+
self.spec = self.fe.describe()
|
|
140
|
+
self.fields = self.spec["input"]
|
|
141
|
+
self.domain = self.spec.get("domain", {})
|
|
142
|
+
#: Name of the column the plugin flags as the headline display amount.
|
|
143
|
+
self.amount_field = next(
|
|
144
|
+
(f["name"] for f in self.fields if f.get("amount")), None
|
|
145
|
+
)
|
|
146
|
+
self._schema = pa.schema(
|
|
147
|
+
[pa.field(f["name"], _PA_TYPE[f["type"]]) for f in self.fields]
|
|
148
|
+
)
|
|
149
|
+
init_cmd = {"op": "init"}
|
|
150
|
+
if config is not None:
|
|
151
|
+
init_cmd["config"] = config
|
|
152
|
+
self.last = self.fe.dispatch(init_cmd, _ipc(self._empty_batch()))
|
|
153
|
+
_ok(self.last)
|
|
154
|
+
|
|
155
|
+
# --- schema bridge -------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
def _empty_batch(self) -> "pa.RecordBatch":
|
|
158
|
+
"""A zero-row batch carrying the declared schema (used by ``init``)."""
|
|
159
|
+
arrays = [pa.array([], type=_PA_TYPE[f["type"]]) for f in self.fields]
|
|
160
|
+
return pa.RecordBatch.from_arrays(arrays, schema=self._schema)
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def _as_table(frame) -> "pa.Table":
|
|
164
|
+
"""Normalize a polars / pandas / pyarrow frame to a pyarrow Table."""
|
|
165
|
+
if isinstance(frame, pa.Table):
|
|
166
|
+
return frame
|
|
167
|
+
if isinstance(frame, pa.RecordBatch):
|
|
168
|
+
return pa.Table.from_batches([frame])
|
|
169
|
+
mod = type(frame).__module__.split(".", 1)[0]
|
|
170
|
+
if mod == "polars" and hasattr(frame, "to_arrow"):
|
|
171
|
+
return frame.to_arrow()
|
|
172
|
+
if mod == "pandas":
|
|
173
|
+
return pa.Table.from_pandas(frame, preserve_index=False)
|
|
174
|
+
raise TypeError(
|
|
175
|
+
f"upsert expects a polars/pandas/pyarrow dataframe, got {type(frame).__name__}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _batch(self, frame) -> "pa.RecordBatch":
|
|
179
|
+
"""Validate a frame against the declared schema and project it to the
|
|
180
|
+
declared columns (extra columns ignored), cast to the declared types."""
|
|
181
|
+
table = self._as_table(frame)
|
|
182
|
+
present = set(table.column_names)
|
|
183
|
+
missing = [f["name"] for f in self.fields if f["name"] not in present]
|
|
184
|
+
if missing:
|
|
185
|
+
raise SchemaError(
|
|
186
|
+
f"dataframe is missing columns declared by {self.domain.get('id')}: {missing}"
|
|
187
|
+
)
|
|
188
|
+
arrays = []
|
|
189
|
+
for f in self.fields:
|
|
190
|
+
arr = table.column(f["name"]).combine_chunks()
|
|
191
|
+
try:
|
|
192
|
+
arr = arr.cast(_PA_TYPE[f["type"]])
|
|
193
|
+
except (pa.ArrowInvalid, pa.ArrowTypeError, pa.ArrowNotImplementedError) as e:
|
|
194
|
+
raise SchemaError(
|
|
195
|
+
f"column {f['name']!r}: cannot cast {arr.type} -> {f['type']}: {e}"
|
|
196
|
+
) from e
|
|
197
|
+
arrays.append(arr)
|
|
198
|
+
return pa.RecordBatch.from_arrays(arrays, schema=self._schema)
|
|
199
|
+
|
|
200
|
+
# --- ledger --------------------------------------------------------------
|
|
201
|
+
|
|
202
|
+
def upsert(self, *frames) -> "Workspace":
|
|
203
|
+
"""Insert/replace rows from one or more dataframes (polars / pandas /
|
|
204
|
+
pyarrow). Each frame must carry the plugin's declared raw columns;
|
|
205
|
+
extra columns are ignored, declared columns are cast to their wire type."""
|
|
206
|
+
for frame in frames:
|
|
207
|
+
batch = self._batch(frame)
|
|
208
|
+
if batch.num_rows:
|
|
209
|
+
self.last = self.fe.dispatch({"op": "upsert"}, _ipc(batch))
|
|
210
|
+
_ok(self.last)
|
|
211
|
+
return self
|
|
212
|
+
|
|
213
|
+
def remove(self, *ids: int) -> "Workspace":
|
|
214
|
+
self.last = self.fe.dispatch({"op": "remove", "ids": list(ids)})
|
|
215
|
+
_ok(self.last)
|
|
216
|
+
return self
|
|
217
|
+
|
|
218
|
+
# --- machine -------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
def solve(self) -> dict:
|
|
221
|
+
self.last = self.fe.dispatch({"op": "solve"})
|
|
222
|
+
return _ok(self.last)
|
|
223
|
+
|
|
224
|
+
# --- lifecycle -----------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
def pin(self, group_id: int) -> dict:
|
|
227
|
+
"""Pin one proposed group — keep it verbatim across later solves."""
|
|
228
|
+
self.last = self.fe.dispatch({"op": "pin", "by": "group", "group_id": group_id})
|
|
229
|
+
return _ok(self.last)
|
|
230
|
+
|
|
231
|
+
def pin_clean(self, tol: int = 0) -> dict:
|
|
232
|
+
"""Pin every proposed match that nets to zero within ``tol``."""
|
|
233
|
+
self.last = self.fe.dispatch({"op": "pin", "by": "clean", "tol": int(tol)})
|
|
234
|
+
return _ok(self.last)
|
|
235
|
+
|
|
236
|
+
def pin_singletons(self, ids) -> dict:
|
|
237
|
+
"""Pin the named lone lots (proposed singletons) as accepted-as-is."""
|
|
238
|
+
self.last = self.fe.dispatch({"op": "pin", "by": "singletons", "ids": list(ids)})
|
|
239
|
+
return _ok(self.last)
|
|
240
|
+
|
|
241
|
+
def unpin(self, group_id: int) -> dict:
|
|
242
|
+
"""Release a pinned group back to the solver's control."""
|
|
243
|
+
self.last = self.fe.dispatch({"op": "unpin", "group_id": group_id})
|
|
244
|
+
return _ok(self.last)
|
|
245
|
+
|
|
246
|
+
# --- partition -----------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
def merge(self, allocations, label: str = "manual", reason=None) -> dict:
|
|
249
|
+
"""Assert a pinned group over exact allocations ``[{"id", "amount"}, ...]``."""
|
|
250
|
+
cmd = {
|
|
251
|
+
"op": "merge",
|
|
252
|
+
"allocations": [
|
|
253
|
+
{"id": int(a["id"]), "amount": int(a["amount"])} for a in allocations
|
|
254
|
+
],
|
|
255
|
+
"label": label,
|
|
256
|
+
}
|
|
257
|
+
if reason is not None:
|
|
258
|
+
cmd["reason"] = str(reason)
|
|
259
|
+
self.last = self.fe.dispatch(cmd)
|
|
260
|
+
return _ok(self.last)
|
|
261
|
+
|
|
262
|
+
def detach(self, group_id: int, ids) -> dict:
|
|
263
|
+
"""Pull rows out of a proposed group into lone singletons."""
|
|
264
|
+
self.last = self.fe.dispatch(
|
|
265
|
+
{"op": "detach", "group_id": group_id, "ids": list(ids)}
|
|
266
|
+
)
|
|
267
|
+
return _ok(self.last)
|
|
268
|
+
|
|
269
|
+
def dissolve(self, group_id: int) -> dict:
|
|
270
|
+
"""Break a proposed group back into singletons."""
|
|
271
|
+
self.last = self.fe.dispatch({"op": "dissolve", "group_id": group_id})
|
|
272
|
+
return _ok(self.last)
|
|
273
|
+
|
|
274
|
+
# --- read ----------------------------------------------------------------
|
|
275
|
+
|
|
276
|
+
def report(self) -> dict:
|
|
277
|
+
return _ok(self.fe.dispatch({"op": "report"}))
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Workspace persistence and result export.
|
|
2
|
+
|
|
3
|
+
One module that knows how to (a) serialize/restore the *operator's* durable
|
|
4
|
+
decisions to a portable file, and (b) export a reconciliation result as
|
|
5
|
+
dataframes / CSV / JSON.
|
|
6
|
+
|
|
7
|
+
Design note — what is durable state?
|
|
8
|
+
Everything *proposed* is a deterministic function of (rows, plugin) via
|
|
9
|
+
:meth:`Workspace.solve`, so it never needs saving. The only durable operator
|
|
10
|
+
state is:
|
|
11
|
+
* the **pinned** groups (committed decisions), expressed allocation-native
|
|
12
|
+
in stable row-id terms (group ids are ephemeral across solves), and
|
|
13
|
+
* the **tag** overlay (review buckets), already keyed by stable row id.
|
|
14
|
+
So a saved workspace = pinned decisions + tags (+ optional metadata). On load
|
|
15
|
+
we re-solve to recover the proposals, then re-assert the pinned decisions on
|
|
16
|
+
top. Robust and small — it survives plugin tweaks that only move proposals.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from .projections import primary_assignments
|
|
25
|
+
|
|
26
|
+
WORKSPACE_KIND = "florecon.workspace"
|
|
27
|
+
WORKSPACE_VERSION = 1
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# --- durable-decision extraction -------------------------------------------
|
|
31
|
+
|
|
32
|
+
def decisions(report: dict) -> list[dict]:
|
|
33
|
+
"""Collapse a report into the allocation-native *pinned* decisions, keyed by
|
|
34
|
+
row id. Each is ``{reason, allocations: [{id, amount}, ...]}``."""
|
|
35
|
+
by_g: dict[int, dict] = {}
|
|
36
|
+
for g in report.get("groups", []):
|
|
37
|
+
if g.get("status") == "pinned":
|
|
38
|
+
by_g[int(g["group_id"])] = {
|
|
39
|
+
"reason": g.get("reason"),
|
|
40
|
+
"origin": g.get("origin", "manual"),
|
|
41
|
+
"allocations": [],
|
|
42
|
+
}
|
|
43
|
+
for a in report.get("allocations", []):
|
|
44
|
+
d = by_g.get(int(a["group_id"]))
|
|
45
|
+
if d is not None:
|
|
46
|
+
d["allocations"].append({"id": int(a["id"]), "amount": int(a["amount"])})
|
|
47
|
+
return [d for d in by_g.values() if d["allocations"]]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def apply_decisions(ws, saved: list[dict]) -> dict:
|
|
51
|
+
"""Re-assert saved pinned decisions onto a freshly solved workspace.
|
|
52
|
+
|
|
53
|
+
Multi-leg groups go through :meth:`Workspace.merge` (exact amounts, so splits
|
|
54
|
+
survive); lone accepted rows go through :meth:`Workspace.pin_singletons`.
|
|
55
|
+
Merges are applied first so they pull their rows out of any proposed group,
|
|
56
|
+
leaving accepted singletons free to pin. Returns a short apply summary.
|
|
57
|
+
"""
|
|
58
|
+
multi = [d for d in saved if len([a for a in d["allocations"] if a["amount"]]) >= 2]
|
|
59
|
+
singles: list[int] = []
|
|
60
|
+
for d in saved:
|
|
61
|
+
nz = [a for a in d["allocations"] if a["amount"]]
|
|
62
|
+
if len(nz) < 2:
|
|
63
|
+
singles.extend(a["id"] for a in d["allocations"])
|
|
64
|
+
|
|
65
|
+
groups = failed = 0
|
|
66
|
+
errors = []
|
|
67
|
+
for d in multi:
|
|
68
|
+
allocs = [a for a in d["allocations"] if a["amount"]]
|
|
69
|
+
try:
|
|
70
|
+
ws.merge(allocs, label=d.get("origin", "manual"), reason=d.get("reason"))
|
|
71
|
+
groups += 1
|
|
72
|
+
except Exception as e: # noqa: BLE001 - collected, not swallowed
|
|
73
|
+
failed += 1
|
|
74
|
+
errors.append(str(e))
|
|
75
|
+
pinned_singles = 0
|
|
76
|
+
if singles:
|
|
77
|
+
try:
|
|
78
|
+
ws.pin_singletons(singles)
|
|
79
|
+
pinned_singles = len(singles)
|
|
80
|
+
except Exception as e: # noqa: BLE001
|
|
81
|
+
failed += 1
|
|
82
|
+
errors.append(str(e))
|
|
83
|
+
return {"groups": groups, "singles": pinned_singles, "failed": failed, "errors": errors}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# --- serialize / restore ----------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def serialize(report: dict, *, tags=None, meta: dict | None = None) -> dict:
|
|
89
|
+
"""A portable workspace: pinned decisions + the tag overlay + metadata. The
|
|
90
|
+
raw rows are *not* embedded (re-supply them with ``upsert`` before load)."""
|
|
91
|
+
return {
|
|
92
|
+
"kind": WORKSPACE_KIND,
|
|
93
|
+
"version": WORKSPACE_VERSION,
|
|
94
|
+
"domain": (meta or {}).get("domain"),
|
|
95
|
+
"decisions": decisions(report),
|
|
96
|
+
"tags": tags.dump() if tags is not None else {"tags": {}, "meta": {}},
|
|
97
|
+
"meta": meta or {},
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def parse(obj_or_text) -> dict:
|
|
102
|
+
o = json.loads(obj_or_text) if isinstance(obj_or_text, (str, bytes)) else obj_or_text
|
|
103
|
+
if not isinstance(o, dict) or o.get("kind") != WORKSPACE_KIND:
|
|
104
|
+
raise ValueError("not a florecon workspace")
|
|
105
|
+
if o.get("version") != WORKSPACE_VERSION:
|
|
106
|
+
raise ValueError(f"workspace version {o.get('version')} != supported {WORKSPACE_VERSION}")
|
|
107
|
+
return o
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def save_workspace(path, report: dict, *, tags=None, meta: dict | None = None) -> dict:
|
|
111
|
+
"""Write a workspace JSON file; returns the serialized object."""
|
|
112
|
+
obj = serialize(report, tags=tags, meta=meta)
|
|
113
|
+
Path(path).write_text(json.dumps(obj, indent=2))
|
|
114
|
+
return obj
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def load_workspace(path_or_obj, ws, *, tags=None) -> dict:
|
|
118
|
+
"""Restore decisions (and optionally tags) onto a workspace that has already
|
|
119
|
+
been re-``upsert``ed and ``solve``d. Returns the apply summary."""
|
|
120
|
+
obj = parse(Path(path_or_obj).read_text() if isinstance(path_or_obj, (str, Path)) and Path(path_or_obj).exists() else path_or_obj)
|
|
121
|
+
if tags is not None:
|
|
122
|
+
tags.restore(obj.get("tags"))
|
|
123
|
+
return apply_decisions(ws, obj.get("decisions", []))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# --- result export ----------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
def report_frames(report: dict):
|
|
129
|
+
"""The report as two pyarrow Tables ``(groups, allocations)`` — the natural
|
|
130
|
+
bridge for writing results back to Spark/Delta in a notebook."""
|
|
131
|
+
import pyarrow as pa
|
|
132
|
+
|
|
133
|
+
groups = pa.Table.from_pylist(report.get("groups", []))
|
|
134
|
+
allocations = pa.Table.from_pylist(report.get("allocations", []))
|
|
135
|
+
return groups, allocations
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _csv(rows: list[list]) -> str:
|
|
139
|
+
def cell(v):
|
|
140
|
+
s = "" if v is None else str(v)
|
|
141
|
+
return '"' + s.replace('"', '""') + '"' if any(c in s for c in ',"\n') else s
|
|
142
|
+
return "\n".join(",".join(cell(c) for c in r) for r in rows)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def groups_csv(report: dict, *, money_scale: float = 0.01) -> str:
|
|
146
|
+
"""One line per group. ``money_scale`` converts the numeraire minor units to
|
|
147
|
+
display (default cents -> currency units)."""
|
|
148
|
+
head = ["group_id", "origin", "reason", "status", "size", "net"]
|
|
149
|
+
rows = [head]
|
|
150
|
+
for g in report.get("groups", []):
|
|
151
|
+
rows.append([
|
|
152
|
+
g["group_id"], g.get("origin", ""), g.get("reason", "") or "",
|
|
153
|
+
g.get("status", ""), g.get("size", ""),
|
|
154
|
+
f"{int(g.get('net', 0)) * money_scale:.2f}",
|
|
155
|
+
])
|
|
156
|
+
return _csv(rows)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def results_csv(report: dict, *, policy: str = "largest_abs", money_scale: float = 0.01) -> str:
|
|
160
|
+
"""Row-level result: every row with the group it primarily landed in."""
|
|
161
|
+
gmeta = {int(g["group_id"]): g for g in report.get("groups", [])}
|
|
162
|
+
head = ["row_id", "group_id", "origin", "reason", "status", "group_net"]
|
|
163
|
+
rows = [head]
|
|
164
|
+
for rid, gid in primary_assignments(report, policy=policy):
|
|
165
|
+
g = gmeta.get(int(gid), {})
|
|
166
|
+
rows.append([
|
|
167
|
+
rid, gid, g.get("origin", ""), g.get("reason", "") or "",
|
|
168
|
+
g.get("status", ""), f"{int(g.get('net', 0)) * money_scale:.2f}",
|
|
169
|
+
])
|
|
170
|
+
return _csv(rows)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def result_json(report: dict, *, meta: dict | None = None) -> str:
|
|
174
|
+
"""The whole allocation-native result plus a little context."""
|
|
175
|
+
return json.dumps(
|
|
176
|
+
{"kind": "florecon.result", "meta": meta or {}, "report": report}, indent=2
|
|
177
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Explicit projections from the allocation-native Report hypergraph."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def strict_assignments(report: dict) -> list[tuple[int, int]]:
|
|
5
|
+
"""Return one (row id, group id) per row only if the report is not split.
|
|
6
|
+
|
|
7
|
+
Raises ValueError if a row id participates in multiple groups.
|
|
8
|
+
"""
|
|
9
|
+
by_id = {}
|
|
10
|
+
for a in report.get("allocations", []):
|
|
11
|
+
by_id.setdefault(int(a["id"]), set()).add(int(a["group_id"]))
|
|
12
|
+
out = []
|
|
13
|
+
for id_, groups in sorted(by_id.items()):
|
|
14
|
+
if len(groups) != 1:
|
|
15
|
+
raise ValueError(f"row {id_} is split across groups {sorted(groups)}")
|
|
16
|
+
out.append((id_, next(iter(groups))))
|
|
17
|
+
return out
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def primary_assignments(report: dict, policy: str = "largest_abs") -> list[tuple[int, int]]:
|
|
21
|
+
"""Return one (row id, group id) per row, choosing a primary group when a
|
|
22
|
+
row is split across several (lot strategies). ``policy`` is one of
|
|
23
|
+
``largest_abs`` (default), ``prefer_clean``, or ``first_group``.
|
|
24
|
+
|
|
25
|
+
Unlike :func:`strict_assignments` this never raises — use it for row-level
|
|
26
|
+
exports where every row needs exactly one home.
|
|
27
|
+
"""
|
|
28
|
+
by_id: dict[int, list[dict]] = {}
|
|
29
|
+
for a in report.get("allocations", []):
|
|
30
|
+
by_id.setdefault(int(a["id"]), []).append(a)
|
|
31
|
+
groups = {int(g["group_id"]): g for g in report.get("groups", [])}
|
|
32
|
+
|
|
33
|
+
def score(a: dict):
|
|
34
|
+
g = groups.get(int(a["group_id"]), {})
|
|
35
|
+
clean = 1 if abs(int(g.get("net", 0))) == 0 else 0
|
|
36
|
+
if policy == "first_group":
|
|
37
|
+
return (-int(a["group_id"]),)
|
|
38
|
+
if policy == "prefer_clean":
|
|
39
|
+
return (clean, abs(int(a["amount"])), -int(a["group_id"]))
|
|
40
|
+
return (abs(int(a["amount"])), clean, -int(a["group_id"]))
|
|
41
|
+
|
|
42
|
+
out = []
|
|
43
|
+
for id_, allocs in sorted(by_id.items()):
|
|
44
|
+
best = max(allocs, key=score)
|
|
45
|
+
out.append((id_, int(best["group_id"])))
|
|
46
|
+
return out
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def connected_components(report: dict) -> list[dict]:
|
|
50
|
+
"""Connected components of the bipartite graph row id <-> group id."""
|
|
51
|
+
row_to_groups = {}
|
|
52
|
+
group_to_rows = {}
|
|
53
|
+
for a in report.get("allocations", []):
|
|
54
|
+
r = int(a["id"])
|
|
55
|
+
g = int(a["group_id"])
|
|
56
|
+
row_to_groups.setdefault(r, []).append(g)
|
|
57
|
+
group_to_rows.setdefault(g, []).append(r)
|
|
58
|
+
|
|
59
|
+
seen_rows = set()
|
|
60
|
+
seen_groups = set()
|
|
61
|
+
out = []
|
|
62
|
+
for start in list(row_to_groups):
|
|
63
|
+
if start in seen_rows:
|
|
64
|
+
continue
|
|
65
|
+
rows = set()
|
|
66
|
+
groups = set()
|
|
67
|
+
row_stack = [start]
|
|
68
|
+
group_stack = []
|
|
69
|
+
while row_stack or group_stack:
|
|
70
|
+
while row_stack:
|
|
71
|
+
r = row_stack.pop()
|
|
72
|
+
if r in seen_rows:
|
|
73
|
+
continue
|
|
74
|
+
seen_rows.add(r)
|
|
75
|
+
rows.add(r)
|
|
76
|
+
group_stack.extend(row_to_groups.get(r, []))
|
|
77
|
+
while group_stack:
|
|
78
|
+
g = group_stack.pop()
|
|
79
|
+
if g in seen_groups:
|
|
80
|
+
continue
|
|
81
|
+
seen_groups.add(g)
|
|
82
|
+
groups.add(g)
|
|
83
|
+
row_stack.extend(group_to_rows.get(g, []))
|
|
84
|
+
out.append({"rows": sorted(rows), "groups": sorted(groups)})
|
|
85
|
+
out.sort(key=lambda c: (c["rows"][0] if c["rows"] else 0, c["groups"][0] if c["groups"] else 0))
|
|
86
|
+
return out
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Host-side review/attention overlay.
|
|
2
|
+
|
|
3
|
+
A *tag* is a many-to-many review axis (e.g. "needs-review", "escalate",
|
|
4
|
+
"FX-difference") that an operator drapes over rows. It is **orthogonal** to the
|
|
5
|
+
engine's lifecycle axis (``proposed`` | ``pinned``): the conservation engine
|
|
6
|
+
never learns the word "review". Tags are owned entirely by the host and keyed by
|
|
7
|
+
the **stable row id** (``ExtId``), never by group id — proposed group ids are
|
|
8
|
+
re-minted every solve, so keying by row id makes tags survive a re-solve for
|
|
9
|
+
free.
|
|
10
|
+
|
|
11
|
+
This mirrors the same orthogonality the engine enforces for groups, one layer
|
|
12
|
+
out: status is the machine's business, review is the operator's, and the two
|
|
13
|
+
never contaminate each other.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
# A small, stable chip palette; tags get a colour by creation order.
|
|
17
|
+
_PALETTE = [
|
|
18
|
+
"#217346", "#6d3fd1", "#b7791f", "#0f8d80",
|
|
19
|
+
"#c2410c", "#2563eb", "#be185d", "#4d7c0f",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TagStore:
|
|
24
|
+
"""An in-memory overlay of ``row_id -> {tag_id}`` plus tag metadata.
|
|
25
|
+
|
|
26
|
+
Nothing is persisted implicitly; serialize with :meth:`dump` (and embed it in
|
|
27
|
+
a saved workspace) for durability. A row may carry several tags or none.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self.tags: dict[int, set[str]] = {}
|
|
32
|
+
self.meta: dict[str, dict] = {}
|
|
33
|
+
|
|
34
|
+
def ensure_tag(self, label: str, kind: str = "bucket") -> str | None:
|
|
35
|
+
"""Create (or look up) a tag by human label. Idempotent on the slugged
|
|
36
|
+
id, so reusing a bucket name reuses the same tag and colour."""
|
|
37
|
+
name = (label or "").strip()
|
|
38
|
+
if not name:
|
|
39
|
+
return None
|
|
40
|
+
tid = "tag:" + name.lower()
|
|
41
|
+
if tid not in self.meta:
|
|
42
|
+
self.meta[tid] = {
|
|
43
|
+
"label": name,
|
|
44
|
+
"color": _PALETTE[len(self.meta) % len(_PALETTE)],
|
|
45
|
+
"kind": kind,
|
|
46
|
+
}
|
|
47
|
+
return tid
|
|
48
|
+
|
|
49
|
+
def tags_of(self, row_id: int) -> set[str]:
|
|
50
|
+
return self.tags.get(int(row_id), set())
|
|
51
|
+
|
|
52
|
+
def label(self, tid: str) -> str:
|
|
53
|
+
return self.meta.get(tid, {}).get("label", tid)
|
|
54
|
+
|
|
55
|
+
def color(self, tid: str) -> str:
|
|
56
|
+
return self.meta.get(tid, {}).get("color", "#6b7280")
|
|
57
|
+
|
|
58
|
+
def add(self, row_id: int, tid: str) -> None:
|
|
59
|
+
self.tags.setdefault(int(row_id), set()).add(tid)
|
|
60
|
+
|
|
61
|
+
def remove(self, row_id: int, tid: str) -> None:
|
|
62
|
+
s = self.tags.get(int(row_id))
|
|
63
|
+
if not s:
|
|
64
|
+
return
|
|
65
|
+
s.discard(tid)
|
|
66
|
+
if not s:
|
|
67
|
+
del self.tags[int(row_id)]
|
|
68
|
+
|
|
69
|
+
def clear(self, row_id: int) -> None:
|
|
70
|
+
"""Drop every tag on a row."""
|
|
71
|
+
self.tags.pop(int(row_id), None)
|
|
72
|
+
|
|
73
|
+
def tagged(self, tid: str) -> list[int]:
|
|
74
|
+
"""Every row id currently carrying ``tid`` (sorted)."""
|
|
75
|
+
return sorted(r for r, s in self.tags.items() if tid in s)
|
|
76
|
+
|
|
77
|
+
def dump(self) -> dict:
|
|
78
|
+
"""Serialize the whole overlay to a plain JSON-able object."""
|
|
79
|
+
return {
|
|
80
|
+
"tags": {str(r): sorted(s) for r, s in self.tags.items() if s},
|
|
81
|
+
"meta": dict(self.meta),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def restore(self, obj: dict | None) -> "TagStore":
|
|
85
|
+
"""Replace the overlay from a :meth:`dump`. Row id keys are coerced back
|
|
86
|
+
to ints. Returns self."""
|
|
87
|
+
self.tags = {}
|
|
88
|
+
self.meta = {}
|
|
89
|
+
if not obj:
|
|
90
|
+
return self
|
|
91
|
+
for tid, m in (obj.get("meta") or {}).items():
|
|
92
|
+
self.meta[tid] = m
|
|
93
|
+
for r, arr in (obj.get("tags") or {}).items():
|
|
94
|
+
try:
|
|
95
|
+
key = int(r)
|
|
96
|
+
except (TypeError, ValueError):
|
|
97
|
+
key = r
|
|
98
|
+
self.tags[key] = set(arr)
|
|
99
|
+
return self
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: florecon-host
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The Python host for florecon: a generic wasmtime embedder that drives self-describing reconciliation plugins. Brings its own plugin wasm; imports as `florecon`. Nothing created, nothing lost.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/spoj/florecon
|
|
7
|
+
Project-URL: Repository, https://github.com/spoj/florecon
|
|
8
|
+
Project-URL: Issues, https://github.com/spoj/florecon/issues
|
|
9
|
+
Keywords: reconciliation,finance,matching,wasm,min-cost-flow
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Rust
|
|
15
|
+
Classifier: Topic :: Office/Business :: Financial :: Accounting
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: wasmtime>=20
|
|
20
|
+
Requires-Dist: pyarrow>=14
|
|
21
|
+
|
|
22
|
+
# florecon (Python host)
|
|
23
|
+
|
|
24
|
+
A generic [wasmtime](https://github.com/bytecodealliance/wasmtime-py) host that
|
|
25
|
+
drives self-describing **florecon** reconciliation plugins. The host knows
|
|
26
|
+
nothing about any domain: it loads a plugin `.wasm`, reads its `describe()`, and
|
|
27
|
+
ships the raw columns the plugin declares. The same code runs every florecon
|
|
28
|
+
plugin.
|
|
29
|
+
|
|
30
|
+
Money is integer minor units; nothing is created or lost.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from florecon import Workspace
|
|
34
|
+
|
|
35
|
+
ws = Workspace("interco_plugin.wasm") # any florecon plugin wasm
|
|
36
|
+
|
|
37
|
+
ws.upsert(
|
|
38
|
+
{"row_id": 1, "company": "A", "icp": "B", "objsub": "61500",
|
|
39
|
+
"indicative_usd_amt": 100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
40
|
+
"gl_date": 0, "reference": "INV0001"},
|
|
41
|
+
{"row_id": 2, "company": "B", "icp": "A", "objsub": "61500",
|
|
42
|
+
"indicative_usd_amt": -100.0, "trx_currency": "USD", "trx_amt": 100.0,
|
|
43
|
+
"gl_date": 1, "reference": "INV0001"},
|
|
44
|
+
)
|
|
45
|
+
rep = ws.solve() # the proposal: groups + per-row allocations
|
|
46
|
+
ws.pin_clean(tol=0) # sign off every clean net-zero match
|
|
47
|
+
ws.solve() # warm re-solve; pinned groups kept verbatim
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Surface
|
|
51
|
+
|
|
52
|
+
A group lives on a lifecycle axis — `proposed` (the solver's current opinion,
|
|
53
|
+
recomputed each `solve`) or `pinned` (your decision, kept verbatim).
|
|
54
|
+
|
|
55
|
+
```text
|
|
56
|
+
ledger upsert(*rows) · remove(*ids)
|
|
57
|
+
machine solve()
|
|
58
|
+
lifecycle pin(gid) · pin_clean(tol) · pin_singletons(ids) · unpin(gid)
|
|
59
|
+
partition merge(allocs, label, reason) · detach(gid, ids) · dissolve(gid)
|
|
60
|
+
read report()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Failures raise `PluginError` carrying a stable `code` (e.g. `"frozen_group"`,
|
|
64
|
+
`"conservation_violated"`) plus the `id` / `group_id` it concerns.
|
|
65
|
+
`strict_assignments` / `connected_components` project the allocation hypergraph
|
|
66
|
+
into per-row assignments or settlement clusters.
|
|
67
|
+
|
|
68
|
+
The plugin/host ABI is versioned: the host refuses a wasm whose `abi_version`
|
|
69
|
+
differs from `florecon.ABI_VERSION`.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/florecon/__init__.py
|
|
4
|
+
src/florecon/_host.py
|
|
5
|
+
src/florecon/persist.py
|
|
6
|
+
src/florecon/projections.py
|
|
7
|
+
src/florecon/tags.py
|
|
8
|
+
src/florecon_host.egg-info/PKG-INFO
|
|
9
|
+
src/florecon_host.egg-info/SOURCES.txt
|
|
10
|
+
src/florecon_host.egg-info/dependency_links.txt
|
|
11
|
+
src/florecon_host.egg-info/requires.txt
|
|
12
|
+
src/florecon_host.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
florecon
|