goldenmatch 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +68 -0
- package/dist/core/index.cjs +269 -0
- package/dist/core/index.cjs.map +1 -1
- package/dist/core/index.d.cts +188 -1
- package/dist/core/index.d.ts +188 -1
- package/dist/core/index.js +262 -1
- package/dist/core/index.js.map +1 -1
- package/dist/index.cjs +269 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +262 -1
- package/dist/index.js.map +1 -1
- package/dist/node/index.cjs +269 -0
- package/dist/node/index.cjs.map +1 -1
- package/dist/node/index.d.cts +1 -1
- package/dist/node/index.d.ts +1 -1
- package/dist/node/index.js +262 -1
- package/dist/node/index.js.map +1 -1
- package/package.json +8 -2
- package/src/core/identity/in-memory-store.ts +193 -0
- package/src/core/identity/index.ts +19 -0
- package/src/core/identity/new-entity-id.ts +49 -0
- package/src/core/identity/query.ts +142 -0
- package/src/core/identity/types.ts +136 -0
- package/src/core/index.ts +1 -0
- package/tests/identity/in-memory-store.test.ts +220 -0
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# goldenmatch (TypeScript)
|
|
2
|
+
|
|
3
|
+
npm package `goldenmatch`. Three-wave parity port of the Python sibling at `packages/python/goldenmatch/`. Currently at **v0.7.0** (parity with Python v1.12).
|
|
4
|
+
|
|
5
|
+
## Wave history
|
|
6
|
+
| npm | Python parity | Headline |
|
|
7
|
+
|-----|---------------|----------|
|
|
8
|
+
| 0.4.0 | v1.6.0 | Learning Memory + scorer ground truth |
|
|
9
|
+
| 0.5.0 | v1.7 + v1.8 | AutoConfigController, ComplexityProfile, RunHistory, StopReason telemetry |
|
|
10
|
+
| 0.6.0 | v1.9 + v1.10 | 5 complexity indicators + indicator-aware refit rules; scorer selection aligned with Python |
|
|
11
|
+
| 0.7.0 | v1.11 + v1.12 | NegativeEvidenceField + Path Y (exact-MK post-filter) |
|
|
12
|
+
|
|
13
|
+
Each wave's spec/plan: `docs/superpowers/specs/2026-05-10-ts-parity-arc-design.md` + per-wave plans.
|
|
14
|
+
|
|
15
|
+
## Commands
|
|
16
|
+
```bash
|
|
17
|
+
cd packages/typescript/goldenmatch
|
|
18
|
+
pnpm --filter goldenmatch test # vitest (841 tests at v0.7.0)
|
|
19
|
+
pnpm --filter goldenmatch typecheck # tsc --noEmit (strict)
|
|
20
|
+
pnpm --filter goldenmatch build # tsup (5 entry points)
|
|
21
|
+
npx vitest run tests/parity/ # parity-only suite
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Edge-safety rule
|
|
25
|
+
`src/core/**` MUST NOT import `node:*`. Node-only code lives in `src/node/`. Memory backed by SQLite is `src/node/memory/`; the edge-safe interface is `src/core/memory/`. This is enforced by build separation, not by lint — verify when adding new imports.
|
|
26
|
+
|
|
27
|
+
## Strict TS
|
|
28
|
+
`noUncheckedIndexedAccess` + `exactOptionalPropertyTypes`. Idioms:
|
|
29
|
+
- Bounded-loop indices: use `arr[i]!` after a length check.
|
|
30
|
+
- Optional props: `...(x !== undefined ? { field: x } : {})` — never spread `undefined`.
|
|
31
|
+
- Optional peer deps (sqlite, sentence-transformers): `await import("pkg-name" as string)` — the `as string` cast prevents tsup from resolving at build time.
|
|
32
|
+
|
|
33
|
+
## Parity contract
|
|
34
|
+
- **Scorer output:** 4-decimal tolerance vs Python (`tests/parity/scorer-ground-truth.test.ts`).
|
|
35
|
+
- **Hash bytes:** SHA-256 truncated to 16 hex via Web Crypto. UTF-8 mandatory. Hash input = values joined by `|` (NOT `<col>=<val>`). `__row_id__` excluded from `record_hash` so corrections survive row reordering.
|
|
36
|
+
- **Cross-language fixtures:** committed under `tests/parity/fixtures/`. Regen via `packages/python/goldenmatch/tests/parity/memory/gen_memory_fixtures.py --rebuild-db` and the wave-specific emitters in `packages/python/goldenmatch/scripts/emit_ts_parity_fixtures.py`. Determinism clamp: pinned UUIDs, pinned `created_at` (no `datetime.now()`).
|
|
37
|
+
- **Negative-evidence parity** (v0.7.0): 6 fixture datasets exercising Path Y filtering on exact MKs + weighted-MK NE. Live in `tests/parity/negative-evidence-fixtures.json`.
|
|
38
|
+
- **Controller parity** (v0.5.0): structural-only on 4 of 6 fixtures, byte-equal on 2. Python-side `ModuleNotFoundError` on polars/sklearn in the divergent 4 — TS doesn't replicate that import wart.
|
|
39
|
+
|
|
40
|
+
## Public API surface (v0.7.0)
|
|
41
|
+
- `dedupeFile`, `dedupe`, `matchFile`, `match` — all return Promises.
|
|
42
|
+
- `autoConfigureRows` (sync, single-pass) and `autoConfigureRowsIterate` (Promise, full controller).
|
|
43
|
+
- `AutoConfigController`, `RunHistory`, `ComplexityProfile`, `HealthVerdict`, `StopReason`.
|
|
44
|
+
- `NegativeEvidenceField`, `applyNegativeEvidence`, `applyNegativeEvidenceToExactPairs`, `promoteNegativeEvidence`.
|
|
45
|
+
- Memory mirror: `getMemory`, `addCorrection`, `learn`, `memoryStats`.
|
|
46
|
+
- MCP tool count: 24 (19 base + 5 memory). Description literal at `src/node/mcp/server.ts:6` — keep in sync via the existing regex test.
|
|
47
|
+
|
|
48
|
+
## Build outputs
|
|
49
|
+
- tsup with 5 entry points: `index`, `core/index`, `node/index`, `cli`, `node/backends/score-worker` (piscina worker).
|
|
50
|
+
- Build artifacts to `dist/` (gitignored).
|
|
51
|
+
- Test count discipline: bump when adding parity datasets so future audits can diff.
|
|
52
|
+
|
|
53
|
+
## Config-types invariants
|
|
54
|
+
- **No `make*` factory functions** for config types — test fixtures use full literals. Required fields:
|
|
55
|
+
- `MatchkeyField`: `field` + `transforms` + `scorer` + `weight`
|
|
56
|
+
- `BlockingKeyConfig`: `fields` + `transforms`
|
|
57
|
+
- `BlockingConfig`: `strategy` + `keys` + `maxBlockSize` + `skipOversized`
|
|
58
|
+
- **Scorer names are snake_case** (same as Python): `token_sort`, `record_embedding`, `soundex_match`, `ensemble`, `exact`, `jaro_winkler`, `levenshtein`.
|
|
59
|
+
- **`DOMAIN_EXTRACTED_COLS`** (in `src/core/domain.ts`) has only 3 entries (`__brand__`, `__model__`, `__version__`); Python's has 12. Don't assume parity when porting domain features.
|
|
60
|
+
|
|
61
|
+
## Vitest gotchas
|
|
62
|
+
- Default timeout 5s. Heavier integration tests (PPRL multi-level, postflight end-to-end) need `{ timeout: 15000 }`. CI concurrent load has bitten this (cost a release: v0.3.0 → v0.3.1).
|
|
63
|
+
|
|
64
|
+
## Publish workflow
|
|
65
|
+
- `.github/workflows/publish-goldenmatch-js.yml` at monorepo root. Triggers on `goldenmatch-js-v*` tag or `workflow_dispatch` with `ref` input.
|
|
66
|
+
- Tag MUST point at a commit that has the workflow file, otherwise the trigger doesn't fire (root CLAUDE.md "Workflow trigger ordering" gotcha).
|
|
67
|
+
- Uses `NPM_TOKEN` secret. Trusted publishing not configured.
|
|
68
|
+
- The tag-version-must-match-package.json check (in the workflow) means you cannot tag multiple versions at the same commit. Each release commit has its own version bump and tag.
|
package/dist/core/index.cjs
CHANGED
|
@@ -11661,6 +11661,267 @@ init_corrections();
|
|
|
11661
11661
|
init_learner();
|
|
11662
11662
|
init_hash();
|
|
11663
11663
|
|
|
11664
|
+
// src/core/identity/types.ts
|
|
11665
|
+
function canonRecordPair(a, b) {
|
|
11666
|
+
return a <= b ? [a, b] : [b, a];
|
|
11667
|
+
}
|
|
11668
|
+
|
|
11669
|
+
// src/core/identity/new-entity-id.ts
|
|
11670
|
+
function newEntityId() {
|
|
11671
|
+
const tsMs = BigInt(Date.now()) & (1n << 48n) - 1n;
|
|
11672
|
+
const randBytes = randomBytes(10);
|
|
11673
|
+
const randA = BigInt(randBytes[0] & 15) << 8n | BigInt(randBytes[1]);
|
|
11674
|
+
let lo = 0n;
|
|
11675
|
+
for (let i = 2; i < 10; i++) {
|
|
11676
|
+
lo = lo << 8n | BigInt(randBytes[i]);
|
|
11677
|
+
}
|
|
11678
|
+
lo = lo & (1n << 62n) - 1n;
|
|
11679
|
+
const high = tsMs << 16n | 0x7n << 12n | randA;
|
|
11680
|
+
const low = 0b10n << 62n | lo;
|
|
11681
|
+
return formatUuid(high, low);
|
|
11682
|
+
}
|
|
11683
|
+
function randomBytes(n) {
|
|
11684
|
+
const out = new Uint8Array(n);
|
|
11685
|
+
if (typeof crypto !== "undefined" && typeof crypto.getRandomValues === "function") {
|
|
11686
|
+
crypto.getRandomValues(out);
|
|
11687
|
+
return out;
|
|
11688
|
+
}
|
|
11689
|
+
for (let i = 0; i < n; i++) out[i] = Math.floor(Math.random() * 256);
|
|
11690
|
+
return out;
|
|
11691
|
+
}
|
|
11692
|
+
function formatUuid(high, low) {
|
|
11693
|
+
const hh = high.toString(16).padStart(16, "0");
|
|
11694
|
+
const ll = low.toString(16).padStart(16, "0");
|
|
11695
|
+
return hh.slice(0, 8) + "-" + hh.slice(8, 12) + "-" + hh.slice(12, 16) + "-" + ll.slice(0, 4) + "-" + ll.slice(4, 16);
|
|
11696
|
+
}
|
|
11697
|
+
|
|
11698
|
+
// src/core/identity/in-memory-store.ts
|
|
11699
|
+
var InMemoryIdentityStore = class {
|
|
11700
|
+
identities = /* @__PURE__ */ new Map();
|
|
11701
|
+
records = /* @__PURE__ */ new Map();
|
|
11702
|
+
edges = [];
|
|
11703
|
+
events = [];
|
|
11704
|
+
aliases = /* @__PURE__ */ new Map();
|
|
11705
|
+
nextEdgeId = 1;
|
|
11706
|
+
nextEventId = 1;
|
|
11707
|
+
async upsertIdentity(node) {
|
|
11708
|
+
const existing = this.identities.get(node.entityId);
|
|
11709
|
+
if (existing) {
|
|
11710
|
+
this.identities.set(node.entityId, {
|
|
11711
|
+
...node,
|
|
11712
|
+
createdAt: existing.createdAt,
|
|
11713
|
+
updatedAt: node.updatedAt
|
|
11714
|
+
});
|
|
11715
|
+
} else {
|
|
11716
|
+
this.identities.set(node.entityId, { ...node });
|
|
11717
|
+
}
|
|
11718
|
+
}
|
|
11719
|
+
async getIdentity(entityId) {
|
|
11720
|
+
const n = this.identities.get(entityId);
|
|
11721
|
+
return n ? { ...n } : null;
|
|
11722
|
+
}
|
|
11723
|
+
async listIdentities(opts = {}) {
|
|
11724
|
+
const all = Array.from(this.identities.values()).filter((n) => opts.dataset === void 0 || n.dataset === opts.dataset).filter((n) => opts.status === void 0 || n.status === opts.status).sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
11725
|
+
const offset = opts.offset ?? 0;
|
|
11726
|
+
const limit = opts.limit ?? 100;
|
|
11727
|
+
return all.slice(offset, offset + limit).map((n) => ({ ...n }));
|
|
11728
|
+
}
|
|
11729
|
+
async countIdentities(dataset) {
|
|
11730
|
+
if (dataset === void 0) return this.identities.size;
|
|
11731
|
+
let n = 0;
|
|
11732
|
+
for (const node of this.identities.values()) {
|
|
11733
|
+
if (node.dataset === dataset) n++;
|
|
11734
|
+
}
|
|
11735
|
+
return n;
|
|
11736
|
+
}
|
|
11737
|
+
async retireIdentity(entityId, mergedInto) {
|
|
11738
|
+
const node = this.identities.get(entityId);
|
|
11739
|
+
if (!node) return;
|
|
11740
|
+
const next = {
|
|
11741
|
+
...node,
|
|
11742
|
+
status: mergedInto ? "merged_into" : "retired",
|
|
11743
|
+
mergedInto: mergedInto ?? null,
|
|
11744
|
+
updatedAt: /* @__PURE__ */ new Date()
|
|
11745
|
+
};
|
|
11746
|
+
this.identities.set(entityId, next);
|
|
11747
|
+
}
|
|
11748
|
+
async upsertRecord(rec) {
|
|
11749
|
+
const existing = this.records.get(rec.recordId);
|
|
11750
|
+
if (existing) {
|
|
11751
|
+
this.records.set(rec.recordId, {
|
|
11752
|
+
...rec,
|
|
11753
|
+
firstSeenAt: existing.firstSeenAt,
|
|
11754
|
+
lastSeenAt: rec.lastSeenAt
|
|
11755
|
+
});
|
|
11756
|
+
} else {
|
|
11757
|
+
this.records.set(rec.recordId, { ...rec });
|
|
11758
|
+
}
|
|
11759
|
+
}
|
|
11760
|
+
async getRecord(recordId) {
|
|
11761
|
+
const r = this.records.get(recordId);
|
|
11762
|
+
return r ? { ...r } : null;
|
|
11763
|
+
}
|
|
11764
|
+
async getRecordsForEntity(entityId) {
|
|
11765
|
+
return Array.from(this.records.values()).filter((r) => r.entityId === entityId).sort((a, b) => a.firstSeenAt.getTime() - b.firstSeenAt.getTime()).map((r) => ({ ...r }));
|
|
11766
|
+
}
|
|
11767
|
+
async findEntityByRecord(recordId) {
|
|
11768
|
+
return this.records.get(recordId)?.entityId ?? null;
|
|
11769
|
+
}
|
|
11770
|
+
async lookupEntityIds(recordIds) {
|
|
11771
|
+
const out = /* @__PURE__ */ new Map();
|
|
11772
|
+
for (const rid of recordIds) {
|
|
11773
|
+
const eid = this.records.get(rid)?.entityId;
|
|
11774
|
+
if (eid) out.set(rid, eid);
|
|
11775
|
+
}
|
|
11776
|
+
return out;
|
|
11777
|
+
}
|
|
11778
|
+
async addEdge(edge) {
|
|
11779
|
+
const [a, b] = canonRecordPair(edge.recordAId, edge.recordBId);
|
|
11780
|
+
const runKey = edge.runName ?? "";
|
|
11781
|
+
for (const e of this.edges) {
|
|
11782
|
+
if (e.entityId === edge.entityId && e.recordAId === a && e.recordBId === b && (e.runName ?? "") === runKey) {
|
|
11783
|
+
return e.edgeId;
|
|
11784
|
+
}
|
|
11785
|
+
}
|
|
11786
|
+
const stored = {
|
|
11787
|
+
...edge,
|
|
11788
|
+
recordAId: a,
|
|
11789
|
+
recordBId: b,
|
|
11790
|
+
edgeId: this.nextEdgeId++
|
|
11791
|
+
};
|
|
11792
|
+
this.edges.push(stored);
|
|
11793
|
+
return stored.edgeId;
|
|
11794
|
+
}
|
|
11795
|
+
async edgesForEntity(entityId) {
|
|
11796
|
+
return this.edges.filter((e) => e.entityId === entityId).sort((a, b) => a.recordedAt.getTime() - b.recordedAt.getTime()).map((e) => ({ ...e }));
|
|
11797
|
+
}
|
|
11798
|
+
async findConflicts(dataset) {
|
|
11799
|
+
return this.edges.filter((e) => e.kind === "conflicts_with").filter((e) => dataset === void 0 || e.dataset === dataset).sort((a, b) => b.recordedAt.getTime() - a.recordedAt.getTime()).map((e) => ({ ...e }));
|
|
11800
|
+
}
|
|
11801
|
+
async emitEvent(event) {
|
|
11802
|
+
const stored = { ...event, eventId: this.nextEventId++ };
|
|
11803
|
+
this.events.push(stored);
|
|
11804
|
+
return stored.eventId;
|
|
11805
|
+
}
|
|
11806
|
+
async history(entityId, limit) {
|
|
11807
|
+
const filtered = this.events.filter((e) => e.entityId === entityId).sort((a, b) => (a.eventId ?? 0) - (b.eventId ?? 0));
|
|
11808
|
+
return (limit ? filtered.slice(0, limit) : filtered).map((e) => ({ ...e }));
|
|
11809
|
+
}
|
|
11810
|
+
async hasRunEvent(entityId, runName, kind) {
|
|
11811
|
+
return this.events.some(
|
|
11812
|
+
(e) => e.entityId === entityId && e.runName === runName && e.kind === kind
|
|
11813
|
+
);
|
|
11814
|
+
}
|
|
11815
|
+
async addAlias(alias) {
|
|
11816
|
+
this.aliases.set(`${alias.alias}|${alias.kind}|${alias.dataset ?? ""}`, { ...alias });
|
|
11817
|
+
}
|
|
11818
|
+
async resolveAlias(alias, kind = "external_id") {
|
|
11819
|
+
for (const [key, val] of this.aliases.entries()) {
|
|
11820
|
+
const parts = key.split("|");
|
|
11821
|
+
if (parts[0] === alias && parts[1] === kind) return val.entityId;
|
|
11822
|
+
}
|
|
11823
|
+
return null;
|
|
11824
|
+
}
|
|
11825
|
+
async close() {
|
|
11826
|
+
}
|
|
11827
|
+
};
|
|
11828
|
+
|
|
11829
|
+
// src/core/identity/query.ts
|
|
11830
|
+
async function getEntity(store, entityId, eventLimit = 100) {
|
|
11831
|
+
const node = await store.getIdentity(entityId);
|
|
11832
|
+
if (!node) return null;
|
|
11833
|
+
const [records, edges, events] = await Promise.all([
|
|
11834
|
+
store.getRecordsForEntity(entityId),
|
|
11835
|
+
store.edgesForEntity(entityId),
|
|
11836
|
+
store.history(entityId, eventLimit)
|
|
11837
|
+
]);
|
|
11838
|
+
return { node, records, edges, events };
|
|
11839
|
+
}
|
|
11840
|
+
async function findByRecord(store, recordId) {
|
|
11841
|
+
const eid = await store.findEntityByRecord(recordId);
|
|
11842
|
+
if (!eid) return null;
|
|
11843
|
+
return getEntity(store, eid);
|
|
11844
|
+
}
|
|
11845
|
+
async function listEntities(store, opts = {}) {
|
|
11846
|
+
return store.listIdentities(opts);
|
|
11847
|
+
}
|
|
11848
|
+
async function manualMerge(store, keepEntityId, absorbEntityId, opts = {}) {
|
|
11849
|
+
const winner = await store.getIdentity(keepEntityId);
|
|
11850
|
+
const loser = await store.getIdentity(absorbEntityId);
|
|
11851
|
+
if (!winner || !loser) throw new Error("Both entity_ids must exist");
|
|
11852
|
+
if (winner.status !== "active") throw new Error("Winner must be active");
|
|
11853
|
+
const now2 = /* @__PURE__ */ new Date();
|
|
11854
|
+
const losersRecords = await store.getRecordsForEntity(absorbEntityId);
|
|
11855
|
+
for (const r of losersRecords) {
|
|
11856
|
+
await store.upsertRecord({ ...r, entityId: keepEntityId, lastSeenAt: now2 });
|
|
11857
|
+
}
|
|
11858
|
+
await store.retireIdentity(absorbEntityId, keepEntityId);
|
|
11859
|
+
const runName = opts.runName ?? "manual";
|
|
11860
|
+
await store.emitEvent({
|
|
11861
|
+
eventId: null,
|
|
11862
|
+
entityId: keepEntityId,
|
|
11863
|
+
kind: "manual_merge",
|
|
11864
|
+
payload: { absorbed: absorbEntityId, reason: opts.reason ?? null },
|
|
11865
|
+
runName,
|
|
11866
|
+
dataset: winner.dataset,
|
|
11867
|
+
recordedAt: now2
|
|
11868
|
+
});
|
|
11869
|
+
await store.emitEvent({
|
|
11870
|
+
eventId: null,
|
|
11871
|
+
entityId: absorbEntityId,
|
|
11872
|
+
kind: "manual_merge",
|
|
11873
|
+
payload: { merged_into: keepEntityId, reason: opts.reason ?? null },
|
|
11874
|
+
runName,
|
|
11875
|
+
dataset: loser.dataset,
|
|
11876
|
+
recordedAt: now2
|
|
11877
|
+
});
|
|
11878
|
+
return { keep: keepEntityId, absorbed: absorbEntityId, at: now2.toISOString() };
|
|
11879
|
+
}
|
|
11880
|
+
async function manualSplit(store, entityId, recordIds, opts = {}) {
|
|
11881
|
+
const parent = await store.getIdentity(entityId);
|
|
11882
|
+
if (!parent) throw new Error(`Entity ${entityId} not found`);
|
|
11883
|
+
if (recordIds.length === 0) throw new Error("recordIds must be non-empty");
|
|
11884
|
+
const now2 = /* @__PURE__ */ new Date();
|
|
11885
|
+
const newId = newEntityId();
|
|
11886
|
+
await store.upsertIdentity({
|
|
11887
|
+
entityId: newId,
|
|
11888
|
+
status: "active",
|
|
11889
|
+
mergedInto: null,
|
|
11890
|
+
goldenRecord: null,
|
|
11891
|
+
confidence: null,
|
|
11892
|
+
dataset: parent.dataset,
|
|
11893
|
+
createdAt: now2,
|
|
11894
|
+
updatedAt: now2
|
|
11895
|
+
});
|
|
11896
|
+
const moved = [];
|
|
11897
|
+
for (const rid of recordIds) {
|
|
11898
|
+
const rec = await store.getRecord(rid);
|
|
11899
|
+
if (!rec || rec.entityId !== entityId) continue;
|
|
11900
|
+
await store.upsertRecord({ ...rec, entityId: newId, lastSeenAt: now2 });
|
|
11901
|
+
moved.push(rid);
|
|
11902
|
+
}
|
|
11903
|
+
const runName = opts.runName ?? "manual";
|
|
11904
|
+
await store.emitEvent({
|
|
11905
|
+
eventId: null,
|
|
11906
|
+
entityId,
|
|
11907
|
+
kind: "manual_split",
|
|
11908
|
+
payload: { split_to: newId, records: moved, reason: opts.reason ?? null },
|
|
11909
|
+
runName,
|
|
11910
|
+
dataset: parent.dataset,
|
|
11911
|
+
recordedAt: now2
|
|
11912
|
+
});
|
|
11913
|
+
await store.emitEvent({
|
|
11914
|
+
eventId: null,
|
|
11915
|
+
entityId: newId,
|
|
11916
|
+
kind: "manual_split",
|
|
11917
|
+
payload: { split_from: entityId, records: moved, reason: opts.reason ?? null },
|
|
11918
|
+
runName,
|
|
11919
|
+
dataset: parent.dataset,
|
|
11920
|
+
recordedAt: now2
|
|
11921
|
+
});
|
|
11922
|
+
return { newEntityId: newId, moved, at: now2.toISOString() };
|
|
11923
|
+
}
|
|
11924
|
+
|
|
11664
11925
|
// src/core/pprl/protocol.ts
|
|
11665
11926
|
init_transforms();
|
|
11666
11927
|
init_scorer();
|
|
@@ -11757,6 +12018,7 @@ exports.BudgetTracker = BudgetTracker;
|
|
|
11757
12018
|
exports.CrossEncoderHttpError = CrossEncoderHttpError;
|
|
11758
12019
|
exports.CrossEncoderModel = CrossEncoderModel;
|
|
11759
12020
|
exports.HIGH_TRUST_SOURCES = HIGH_TRUST_SOURCES;
|
|
12021
|
+
exports.InMemoryIdentityStore = InMemoryIdentityStore;
|
|
11760
12022
|
exports.InMemoryStore = InMemoryStore;
|
|
11761
12023
|
exports.ReviewQueue = ReviewQueue;
|
|
11762
12024
|
exports.StreamProcessor = StreamProcessor;
|
|
@@ -11792,6 +12054,7 @@ exports.buildLineage = buildLineage;
|
|
|
11792
12054
|
exports.buildMst = buildMst;
|
|
11793
12055
|
exports.buildMultiPassBlocks = buildMultiPassBlocks;
|
|
11794
12056
|
exports.buildStaticBlocks = buildStaticBlocks;
|
|
12057
|
+
exports.canonRecordPair = canonRecordPair;
|
|
11795
12058
|
exports.compareClusters = compareClusters;
|
|
11796
12059
|
exports.complexityHealth = complexityHealth;
|
|
11797
12060
|
exports.computeClusterConfidence = computeClusterConfidence;
|
|
@@ -11819,12 +12082,14 @@ exports.evaluatePairs = evaluatePairs;
|
|
|
11819
12082
|
exports.explainCluster = explainCluster;
|
|
11820
12083
|
exports.explainPair = explainPair;
|
|
11821
12084
|
exports.extractFeatures = extractFeatures;
|
|
12085
|
+
exports.findByRecord = findByRecord;
|
|
11822
12086
|
exports.findExactMatches = findExactMatches;
|
|
11823
12087
|
exports.findExactMatchesOne = findExactMatchesOne;
|
|
11824
12088
|
exports.findFuzzyMatches = findFuzzyMatches;
|
|
11825
12089
|
exports.gatePairs = gatePairs;
|
|
11826
12090
|
exports.getClusterPairScores = getClusterPairScores;
|
|
11827
12091
|
exports.getEmbedder = getEmbedder;
|
|
12092
|
+
exports.getEntity = getEntity;
|
|
11828
12093
|
exports.getLastControllerRun = getLastControllerRun;
|
|
11829
12094
|
exports.getMatchkeys = getMatchkeys;
|
|
11830
12095
|
exports.indelDistance = indelDistance;
|
|
@@ -11838,6 +12103,7 @@ exports.levenshteinDistance = levenshteinDistance;
|
|
|
11838
12103
|
exports.levenshteinSimilarity = levenshteinSimilarity;
|
|
11839
12104
|
exports.lineageFromJson = lineageFromJson;
|
|
11840
12105
|
exports.lineageToJson = lineageToJson;
|
|
12106
|
+
exports.listEntities = listEntities;
|
|
11841
12107
|
exports.llmClusterPairs = llmClusterPairs;
|
|
11842
12108
|
exports.llmExplainPair = llmExplainPair;
|
|
11843
12109
|
exports.llmScorePairs = llmScorePairs;
|
|
@@ -11859,10 +12125,13 @@ exports.makePreflightReport = makePreflightReport;
|
|
|
11859
12125
|
exports.makeProfileMeta = makeProfileMeta;
|
|
11860
12126
|
exports.makeScoredPair = makeScoredPair;
|
|
11861
12127
|
exports.makeScoringProfile = makeScoringProfile;
|
|
12128
|
+
exports.manualMerge = manualMerge;
|
|
12129
|
+
exports.manualSplit = manualSplit;
|
|
11862
12130
|
exports.match = match;
|
|
11863
12131
|
exports.matchOne = matchOne;
|
|
11864
12132
|
exports.mergeField = mergeField;
|
|
11865
12133
|
exports.metaphone = metaphone;
|
|
12134
|
+
exports.newEntityId = newEntityId;
|
|
11866
12135
|
exports.normalizedSignalVector = normalizedSignalVector;
|
|
11867
12136
|
exports.pairKey = pairKey;
|
|
11868
12137
|
exports.parseConfig = parseConfig;
|