goldenmatch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/README.md +140 -0
  2. package/dist/cli.cjs +6079 -0
  3. package/dist/cli.cjs.map +1 -0
  4. package/dist/cli.d.cts +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +6076 -0
  7. package/dist/cli.js.map +1 -0
  8. package/dist/core/index.cjs +8449 -0
  9. package/dist/core/index.cjs.map +1 -0
  10. package/dist/core/index.d.cts +1972 -0
  11. package/dist/core/index.d.ts +1972 -0
  12. package/dist/core/index.js +8318 -0
  13. package/dist/core/index.js.map +1 -0
  14. package/dist/index.cjs +8449 -0
  15. package/dist/index.cjs.map +1 -0
  16. package/dist/index.d.cts +2 -0
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.js +8318 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/node/backends/score-worker.cjs +934 -0
  21. package/dist/node/backends/score-worker.cjs.map +1 -0
  22. package/dist/node/backends/score-worker.d.cts +14 -0
  23. package/dist/node/backends/score-worker.d.ts +14 -0
  24. package/dist/node/backends/score-worker.js +932 -0
  25. package/dist/node/backends/score-worker.js.map +1 -0
  26. package/dist/node/index.cjs +11430 -0
  27. package/dist/node/index.cjs.map +1 -0
  28. package/dist/node/index.d.cts +554 -0
  29. package/dist/node/index.d.ts +554 -0
  30. package/dist/node/index.js +11277 -0
  31. package/dist/node/index.js.map +1 -0
  32. package/dist/types-DhUdX5Rc.d.cts +304 -0
  33. package/dist/types-DhUdX5Rc.d.ts +304 -0
  34. package/examples/01-basic-dedupe.ts +60 -0
  35. package/examples/02-match-two-datasets.ts +48 -0
  36. package/examples/03-csv-file-pipeline.ts +62 -0
  37. package/examples/04-string-scoring.ts +63 -0
  38. package/examples/05-custom-config.ts +94 -0
  39. package/examples/06-probabilistic-fs.ts +72 -0
  40. package/examples/07-pprl-privacy.ts +76 -0
  41. package/examples/08-streaming.ts +79 -0
  42. package/examples/09-llm-scorer.ts +79 -0
  43. package/examples/10-explain.ts +60 -0
  44. package/examples/11-evaluate.ts +61 -0
  45. package/examples/README.md +53 -0
  46. package/package.json +66 -0
  47. package/src/cli.ts +372 -0
  48. package/src/core/ann-blocker.ts +593 -0
  49. package/src/core/api.ts +220 -0
  50. package/src/core/autoconfig.ts +363 -0
  51. package/src/core/autofix.ts +102 -0
  52. package/src/core/blocker.ts +655 -0
  53. package/src/core/cluster.ts +699 -0
  54. package/src/core/compare-clusters.ts +176 -0
  55. package/src/core/config/loader.ts +869 -0
  56. package/src/core/cross-encoder.ts +614 -0
  57. package/src/core/data.ts +430 -0
  58. package/src/core/domain.ts +277 -0
  59. package/src/core/embedder.ts +562 -0
  60. package/src/core/evaluate.ts +156 -0
  61. package/src/core/explain.ts +352 -0
  62. package/src/core/golden.ts +524 -0
  63. package/src/core/graph-er.ts +371 -0
  64. package/src/core/index.ts +314 -0
  65. package/src/core/ingest.ts +112 -0
  66. package/src/core/learned-blocking.ts +305 -0
  67. package/src/core/lineage.ts +221 -0
  68. package/src/core/llm/budget.ts +258 -0
  69. package/src/core/llm/cluster.ts +542 -0
  70. package/src/core/llm/scorer.ts +396 -0
  71. package/src/core/match-one.ts +95 -0
  72. package/src/core/matchkey.ts +97 -0
  73. package/src/core/memory/corrections.ts +179 -0
  74. package/src/core/memory/learner.ts +218 -0
  75. package/src/core/memory/store.ts +114 -0
  76. package/src/core/pipeline.ts +366 -0
  77. package/src/core/pprl/protocol.ts +216 -0
  78. package/src/core/probabilistic.ts +511 -0
  79. package/src/core/profiler.ts +212 -0
  80. package/src/core/quality.ts +197 -0
  81. package/src/core/review-queue.ts +177 -0
  82. package/src/core/scorer.ts +855 -0
  83. package/src/core/sensitivity.ts +196 -0
  84. package/src/core/standardize.ts +279 -0
  85. package/src/core/streaming.ts +128 -0
  86. package/src/core/transforms.ts +599 -0
  87. package/src/core/types.ts +570 -0
  88. package/src/core/validate.ts +243 -0
  89. package/src/index.ts +8 -0
  90. package/src/node/a2a/server.ts +470 -0
  91. package/src/node/api/server.ts +412 -0
  92. package/src/node/backends/duckdb.ts +130 -0
  93. package/src/node/backends/score-worker.ts +41 -0
  94. package/src/node/backends/workers.ts +212 -0
  95. package/src/node/config-file.ts +66 -0
  96. package/src/node/connectors/base.ts +57 -0
  97. package/src/node/connectors/bigquery.ts +61 -0
  98. package/src/node/connectors/databricks.ts +69 -0
  99. package/src/node/connectors/file.ts +350 -0
  100. package/src/node/connectors/hubspot.ts +62 -0
  101. package/src/node/connectors/index.ts +43 -0
  102. package/src/node/connectors/salesforce.ts +93 -0
  103. package/src/node/connectors/snowflake.ts +73 -0
  104. package/src/node/db/postgres.ts +173 -0
  105. package/src/node/db/sync.ts +103 -0
  106. package/src/node/dedupe-file.ts +156 -0
  107. package/src/node/index.ts +89 -0
  108. package/src/node/mcp/server.ts +940 -0
  109. package/src/node/tui/app.ts +756 -0
  110. package/src/node/tui/index.ts +6 -0
  111. package/src/node/tui/widgets.ts +128 -0
  112. package/tests/parity/scorer-ground-truth.test.ts +118 -0
  113. package/tests/smoke.test.ts +46 -0
  114. package/tests/unit/a2a-server.test.ts +175 -0
  115. package/tests/unit/ann-blocker.test.ts +117 -0
  116. package/tests/unit/api-server.test.ts +239 -0
  117. package/tests/unit/api.test.ts +77 -0
  118. package/tests/unit/autoconfig.test.ts +103 -0
  119. package/tests/unit/autofix.test.ts +71 -0
  120. package/tests/unit/blocker.test.ts +164 -0
  121. package/tests/unit/buildBlocksAsync.test.ts +63 -0
  122. package/tests/unit/cluster.test.ts +213 -0
  123. package/tests/unit/compare-clusters.test.ts +42 -0
  124. package/tests/unit/config-loader.test.ts +301 -0
  125. package/tests/unit/connectors-base.test.ts +48 -0
  126. package/tests/unit/cross-encoder-model.test.ts +198 -0
  127. package/tests/unit/cross-encoder.test.ts +173 -0
  128. package/tests/unit/db-connectors.test.ts +37 -0
  129. package/tests/unit/domain.test.ts +80 -0
  130. package/tests/unit/embedder.test.ts +151 -0
  131. package/tests/unit/evaluate.test.ts +85 -0
  132. package/tests/unit/explain.test.ts +73 -0
  133. package/tests/unit/golden.test.ts +97 -0
  134. package/tests/unit/graph-er.test.ts +173 -0
  135. package/tests/unit/hnsw-ann.test.ts +283 -0
  136. package/tests/unit/hubspot-connector.test.ts +118 -0
  137. package/tests/unit/ingest.test.ts +97 -0
  138. package/tests/unit/learned-blocking.test.ts +134 -0
  139. package/tests/unit/lineage.test.ts +135 -0
  140. package/tests/unit/match-one.test.ts +129 -0
  141. package/tests/unit/matchkey.test.ts +97 -0
  142. package/tests/unit/mcp-server.test.ts +183 -0
  143. package/tests/unit/memory.test.ts +119 -0
  144. package/tests/unit/pipeline.test.ts +118 -0
  145. package/tests/unit/pprl-protocol.test.ts +381 -0
  146. package/tests/unit/probabilistic.test.ts +494 -0
  147. package/tests/unit/profiler.test.ts +68 -0
  148. package/tests/unit/review-queue.test.ts +68 -0
  149. package/tests/unit/salesforce-connector.test.ts +148 -0
  150. package/tests/unit/scorer.test.ts +301 -0
  151. package/tests/unit/sensitivity.test.ts +154 -0
  152. package/tests/unit/standardize.test.ts +84 -0
  153. package/tests/unit/streaming.test.ts +82 -0
  154. package/tests/unit/transforms.test.ts +208 -0
  155. package/tests/unit/tui-widgets.test.ts +42 -0
  156. package/tests/unit/tui.test.ts +24 -0
  157. package/tests/unit/validate.test.ts +145 -0
  158. package/tests/unit/workers-parallel.test.ts +99 -0
  159. package/tests/unit/workers.test.ts +74 -0
  160. package/tsconfig.json +25 -0
  161. package/tsup.config.ts +37 -0
  162. package/vitest.config.ts +11 -0
@@ -0,0 +1,6 @@
1
+ /**
2
+ * index.ts -- TUI module entry point.
3
+ */
4
+
5
+ export { startTui } from "./app.js";
6
+ export type { TuiOptions } from "./app.js";
@@ -0,0 +1,128 @@
1
+ /**
2
+ * widgets.ts -- Optional ink ecosystem addon loaders.
3
+ *
4
+ * Each addon (ink-table, ink-select-input, ink-text-input, ink-spinner,
5
+ * ink-gradient) is an optional peer dependency. We provide a uniform
6
+ * mechanism to try-load them so callers can render rich UI when installed
7
+ * and fall back to plain `ink.Text` / `ink.Box` output otherwise.
8
+ *
9
+ * Two loading styles are provided:
10
+ * - `tryLoad<T>(name)`: synchronous `require`-based load. Works for CJS
11
+ * packages (ink-select-input, ink-text-input, ink-spinner, ink-gradient
12
+ * in older versions).
13
+ * - `loadAddons()`: async dynamic `import()` load. Works for ESM-only
14
+ * packages (ink-table v3+).
15
+ *
16
+ * Both return `null` for a missing package rather than throwing, so the
17
+ * caller can branch on presence.
18
+ */
19
+
20
+ import { createRequire } from "node:module";
21
+
22
+ /* eslint-disable @typescript-eslint/no-explicit-any */
23
+
24
+ const require = createRequire(import.meta.url);
25
+
26
+ /**
27
+ * Synchronously try to require a package. Returns `null` if the package is
28
+ * not installed (or fails to load for any reason).
29
+ */
30
+ export function tryLoad<T = any>(name: string): T | null {
31
+ try {
32
+ return require(name) as T;
33
+ } catch {
34
+ return null;
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Lazy-getter bag of synchronously-loaded ink addons. Each access retries
40
+ * the require in case the environment changed, but most callers will prefer
41
+ * `loadAddons()` (async) since ink-table is ESM-only.
42
+ */
43
+ export const inkAddons = {
44
+ get table(): any {
45
+ return tryLoad("ink-table");
46
+ },
47
+ get selectInput(): any {
48
+ return tryLoad("ink-select-input");
49
+ },
50
+ get textInput(): any {
51
+ return tryLoad("ink-text-input");
52
+ },
53
+ get spinner(): any {
54
+ return tryLoad("ink-spinner");
55
+ },
56
+ get gradient(): any {
57
+ return tryLoad("ink-gradient");
58
+ },
59
+ };
60
+
61
+ /**
62
+ * Collected addon components, each either the default export of its package
63
+ * or `null` if the package isn't installed.
64
+ */
65
+ export interface LoadedAddons {
66
+ Table: any | null;
67
+ SelectInput: any | null;
68
+ TextInput: any | null;
69
+ Spinner: any | null;
70
+ Gradient: any | null;
71
+ }
72
+
73
+ /**
74
+ * Asynchronously load all optional ink addons via dynamic `import()`.
75
+ *
76
+ * Uses `import()` rather than `require()` because ink-table v3+ ships as
77
+ * ESM-only and cannot be loaded from CJS. The other addons work either
78
+ * way; we standardise on import for consistency.
79
+ *
80
+ * Any addon that fails to load (missing package, import error, etc.) is
81
+ * silently set to `null`. Callers should branch on each field.
82
+ */
83
+ export async function loadAddons(): Promise<LoadedAddons> {
84
+ const addons: LoadedAddons = {
85
+ Table: null,
86
+ SelectInput: null,
87
+ TextInput: null,
88
+ Spinner: null,
89
+ Gradient: null,
90
+ };
91
+
92
+ // Wrap each import in its own try so a single missing addon doesn't
93
+ // poison the others.
94
+ try {
95
+ const mod: any = await import("ink-table" as string);
96
+ addons.Table = mod.default ?? mod;
97
+ } catch {
98
+ /* optional */
99
+ }
100
+ try {
101
+ const mod: any = await import("ink-select-input" as string);
102
+ addons.SelectInput = mod.default ?? mod;
103
+ } catch {
104
+ /* optional */
105
+ }
106
+ try {
107
+ const mod: any = await import("ink-text-input" as string);
108
+ addons.TextInput = mod.default ?? mod;
109
+ } catch {
110
+ /* optional */
111
+ }
112
+ try {
113
+ const mod: any = await import("ink-spinner" as string);
114
+ addons.Spinner = mod.default ?? mod;
115
+ } catch {
116
+ /* optional */
117
+ }
118
+ try {
119
+ const mod: any = await import("ink-gradient" as string);
120
+ addons.Gradient = mod.default ?? mod;
121
+ } catch {
122
+ /* optional */
123
+ }
124
+
125
+ return addons;
126
+ }
127
+
128
+ /* eslint-enable @typescript-eslint/no-explicit-any */
@@ -0,0 +1,118 @@
1
+ /**
2
+ * Python parity ground-truth for every scorer.
3
+ *
4
+ * Each row: (scorer, input_a, input_b, python_score).
5
+ * Tolerance: 1e-4 (four decimal places) — tight enough to catch a real
6
+ * bug but loose enough to survive last-digit floating-point drift.
7
+ *
8
+ * The canonical Jaro-Winkler reference values (MARTHA/MARHTA = 0.9611,
9
+ * DIXON/DICKSONX = 0.8133, etc.) come from Winkler's original paper and
10
+ * are reproduced by rapidfuzz, jellyfish, and every other mainstream
11
+ * implementation. Other values (token_sort, levenshtein) are computed
12
+ * from their well-defined formulas:
13
+ * - Levenshtein similarity: 1 - dist / max(|a|, |b|)
14
+ * - Indel similarity (rapidfuzz token_sort base): 1 - d_indel / (|a| + |b|)
15
+ * - Soundex match: 1.0 iff soundex codes match
16
+ *
17
+ * To regenerate / extend: run the equivalent Python via
18
+ * from rapidfuzz.fuzz import token_sort_ratio
19
+ * from rapidfuzz.distance import JaroWinkler, Levenshtein, Indel
20
+ * and paste the results below.
21
+ */
22
+ import { describe, it, expect } from "vitest";
23
+ import {
24
+ scoreField,
25
+ jaro,
26
+ diceCoefficient,
27
+ jaccardSimilarity,
28
+ } from "../../src/core/index.js";
29
+
30
+ type Case = readonly [scorer: string, a: string, b: string, expected: number];
31
+
32
+ const CASES: readonly Case[] = [
33
+ // Jaro-Winkler — canonical reference values from Winkler's paper
34
+ ["jaro_winkler", "MARTHA", "MARHTA", 0.9611],
35
+ ["jaro_winkler", "DIXON", "DICKSONX", 0.8133],
36
+ ["jaro_winkler", "JELLYFISH", "SMELLYFISH", 0.8963],
37
+ ["jaro_winkler", "DWAYNE", "DUANE", 0.84],
38
+ ["jaro_winkler", "abc", "abc", 1.0],
39
+ ["jaro_winkler", "", "", 1.0],
40
+ ["jaro_winkler", "abc", "", 0.0],
41
+ // John/Jon: jaro = (3/4 + 3/3 + 1)/3 = 11/12 = 0.9167; prefix=3, jw = 0.9167 + 3*0.1*(1-0.9167) = 0.9333
42
+ ["jaro_winkler", "John", "Jon", 0.9333],
43
+
44
+ // Levenshtein similarity (1 - dist/max_len)
45
+ ["levenshtein", "kitten", "sitting", 0.5714], // 1 - 3/7
46
+ ["levenshtein", "saturday", "sunday", 0.625], // 1 - 3/8
47
+ ["levenshtein", "abc", "abc", 1.0],
48
+ ["levenshtein", "", "", 1.0],
49
+ ["levenshtein", "abc", "xyz", 0.0],
50
+
51
+ // token_sort via rapidfuzz Indel ratio, with lowercase + strip-nonalnum preprocessing
52
+ ["token_sort", "New York Mets", "Mets New York", 1.0],
53
+ // "john smith" (10) vs "johnson smith" (13): indel dist = 3 (insert 's','o','n')
54
+ // similarity = 1 - 3/23 = 20/23 ≈ 0.8696
55
+ ["token_sort", "John Smith", "Smith Johnson", 0.8696],
56
+ ["token_sort", "the quick brown fox", "fox quick the brown", 1.0],
57
+ ["token_sort", "a b c", "c b a", 1.0],
58
+ ["token_sort", "John, Smith!", "smith john.", 1.0], // strips punctuation
59
+ ["token_sort", "John SMITH", "smith john", 1.0], // lowercases
60
+
61
+ // Exact
62
+ ["exact", "abc", "abc", 1.0],
63
+ ["exact", "abc", "xyz", 0.0],
64
+
65
+ // Soundex
66
+ ["soundex_match", "Robert", "Rupert", 1.0], // both R163
67
+ ["soundex_match", "Robert", "Smith", 0.0],
68
+ ["soundex_match", "Smith", "Smyth", 1.0], // both S530
69
+ ];
70
+
71
+ describe("scorer Python parity (4-decimal tolerance)", () => {
72
+ for (const [scorer, a, b, expected] of CASES) {
73
+ it(`${scorer}(${JSON.stringify(a)}, ${JSON.stringify(b)}) ≈ ${expected}`, () => {
74
+ const actual = scoreField(a, b, scorer);
75
+ expect(actual).not.toBeNull();
76
+ expect(actual as number).toBeCloseTo(expected, 4);
77
+ });
78
+ }
79
+ });
80
+
81
+ describe("jaro parity (not exposed via scoreField)", () => {
82
+ it("jaro(MARTHA, MARHTA) ≈ 0.9444", () => {
83
+ expect(jaro("MARTHA", "MARHTA")).toBeCloseTo(0.9444, 4);
84
+ });
85
+ });
86
+
87
+ // Bloom-filter scorers — dice / jaccard. Not Python-sourced since Python's
88
+ // implementation is trivial (bitwise ops on CLK bitvectors); we lock behavior
89
+ // on hand-computed edge cases.
90
+ describe("bloom-filter similarity sanity", () => {
91
+ it("identical bloom filters score 1.0 (dice)", () => {
92
+ const hex = "ff00ff00";
93
+ expect(diceCoefficient(hex, hex)).toBe(1.0);
94
+ });
95
+
96
+ it("identical bloom filters score 1.0 (jaccard)", () => {
97
+ const hex = "ff00ff00";
98
+ expect(jaccardSimilarity(hex, hex)).toBe(1.0);
99
+ });
100
+
101
+ it("non-overlapping bloom filters score 0.0 (dice)", () => {
102
+ expect(diceCoefficient("ff00", "00ff")).toBe(0.0);
103
+ });
104
+
105
+ it("non-overlapping bloom filters score 0.0 (jaccard)", () => {
106
+ expect(jaccardSimilarity("ff00", "00ff")).toBe(0.0);
107
+ });
108
+
109
+ it("half-overlap: ff00 vs ffff -> dice = 2*8/(8+16) = 0.6667", () => {
110
+ // ff00 has 8 bits set; ffff has 16 bits; intersection = 8
111
+ // dice = 2*8 / (8+16) = 16/24 = 0.6667
112
+ expect(diceCoefficient("ff00", "ffff")).toBeCloseTo(0.6667, 4);
113
+ });
114
+
115
+ it("half-overlap: ff00 vs ffff -> jaccard = 8/16 = 0.5", () => {
116
+ expect(jaccardSimilarity("ff00", "ffff")).toBeCloseTo(0.5, 4);
117
+ });
118
+ });
@@ -0,0 +1,46 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import {
3
+ dedupe,
4
+ scoreField,
5
+ scorePair,
6
+ applyTransform,
7
+ applyTransforms,
8
+ buildClusters,
9
+ UnionFind,
10
+ scoreStrings,
11
+ } from "../src/core/index.js";
12
+
13
+ describe("smoke", () => {
14
+ it("imports work", () => {
15
+ expect(typeof dedupe).toBe("function");
16
+ expect(typeof scoreField).toBe("function");
17
+ expect(typeof scorePair).toBe("function");
18
+ expect(typeof applyTransform).toBe("function");
19
+ expect(typeof applyTransforms).toBe("function");
20
+ expect(typeof buildClusters).toBe("function");
21
+ expect(typeof UnionFind).toBe("function");
22
+ expect(typeof scoreStrings).toBe("function");
23
+ });
24
+
25
+ it("basic dedupe works end-to-end", () => {
26
+ const rows = [
27
+ { id: 1, name: "John Smith", email: "john@example.com", zip: "12345" },
28
+ { id: 2, name: "Jon Smith", email: "jon@example.com", zip: "12345" },
29
+ { id: 3, name: "Jane Doe", email: "jane@example.com", zip: "54321" },
30
+ ];
31
+ const result = dedupe(rows, {
32
+ fuzzy: { name: 0.7 },
33
+ blocking: ["zip"],
34
+ threshold: 0.7,
35
+ });
36
+ expect(result.stats.totalRecords).toBe(3);
37
+ });
38
+
39
+ it("scoreStrings returns a number between 0 and 1", () => {
40
+ const s = scoreStrings("hello", "hello");
41
+ expect(s).toBe(1.0);
42
+ const s2 = scoreStrings("hello", "world");
43
+ expect(s2).toBeGreaterThanOrEqual(0);
44
+ expect(s2).toBeLessThanOrEqual(1);
45
+ });
46
+ });
@@ -0,0 +1,175 @@
1
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
2
+ import type { Server } from "node:http";
3
+ import { startA2aServer, AGENT_CARD } from "../../src/node/a2a/server.js";
4
+
5
+ let server: Server;
6
+ let baseUrl: string;
7
+
8
+ beforeAll(async () => {
9
+ server = startA2aServer({ port: 0, host: "127.0.0.1" });
10
+ await new Promise<void>((resolveFn) => {
11
+ if (server.listening) {
12
+ resolveFn();
13
+ return;
14
+ }
15
+ server.once("listening", () => resolveFn());
16
+ });
17
+ const addr = server.address();
18
+ const port =
19
+ typeof addr === "object" && addr !== null && "port" in addr ? addr.port : 8200;
20
+ baseUrl = `http://127.0.0.1:${port}`;
21
+ });
22
+
23
+ afterAll(async () => {
24
+ if (server) {
25
+ await new Promise<void>((resolveFn, rejectFn) => {
26
+ server.close((err) => (err ? rejectFn(err) : resolveFn()));
27
+ });
28
+ }
29
+ });
30
+
31
+ describe("A2A agent card (exported constant)", () => {
32
+ it("has name, description, version, provider, skills", () => {
33
+ expect(typeof AGENT_CARD.name).toBe("string");
34
+ expect(typeof AGENT_CARD.description).toBe("string");
35
+ expect(typeof AGENT_CARD.version).toBe("string");
36
+ expect(AGENT_CARD.provider).toBeDefined();
37
+ expect(typeof AGENT_CARD.provider.organization).toBe("string");
38
+ expect(Array.isArray(AGENT_CARD.skills)).toBe(true);
39
+ });
40
+
41
+ it("has at least 5 skills", () => {
42
+ expect(AGENT_CARD.skills.length).toBeGreaterThanOrEqual(5);
43
+ });
44
+
45
+ it("every skill has name, description, inputModes, outputModes", () => {
46
+ for (const skill of AGENT_CARD.skills) {
47
+ expect(typeof skill.name).toBe("string");
48
+ expect(skill.name.length).toBeGreaterThan(0);
49
+ expect(typeof skill.description).toBe("string");
50
+ expect(Array.isArray(skill.inputModes)).toBe(true);
51
+ expect(skill.inputModes.length).toBeGreaterThan(0);
52
+ expect(Array.isArray(skill.outputModes)).toBe(true);
53
+ expect(skill.outputModes.length).toBeGreaterThan(0);
54
+ }
55
+ });
56
+ });
57
+
58
+ describe("A2A server HTTP endpoints", () => {
59
+ it("GET /.well-known/agent.json returns the AgentCard", async () => {
60
+ const res = await fetch(baseUrl + "/.well-known/agent.json");
61
+ expect(res.status).toBe(200);
62
+ const body = (await res.json()) as {
63
+ name: string;
64
+ description: string;
65
+ version: string;
66
+ provider: { organization: string };
67
+ skills: Array<{
68
+ name: string;
69
+ description: string;
70
+ inputModes: string[];
71
+ outputModes: string[];
72
+ }>;
73
+ };
74
+ expect(body.name).toBe("goldenmatch-js");
75
+ expect(typeof body.description).toBe("string");
76
+ expect(typeof body.version).toBe("string");
77
+ expect(body.provider.organization).toBe("goldenmatch");
78
+ expect(body.skills.length).toBeGreaterThanOrEqual(5);
79
+ for (const skill of body.skills) {
80
+ expect(typeof skill.name).toBe("string");
81
+ expect(typeof skill.description).toBe("string");
82
+ expect(Array.isArray(skill.inputModes)).toBe(true);
83
+ expect(Array.isArray(skill.outputModes)).toBe(true);
84
+ }
85
+ });
86
+
87
+ it("POST /tasks with skill=dedupe completes and returns result", async () => {
88
+ const res = await fetch(baseUrl + "/tasks", {
89
+ method: "POST",
90
+ headers: { "Content-Type": "application/json" },
91
+ body: JSON.stringify({
92
+ skill: "dedupe",
93
+ input: {
94
+ rows: [
95
+ { email: "a@x.com", name: "Alice" },
96
+ { email: "a@x.com", name: "A." },
97
+ { email: "b@x.com", name: "Bob" },
98
+ ],
99
+ exact: ["email"],
100
+ },
101
+ }),
102
+ });
103
+ expect(res.status).toBe(200);
104
+ const body = (await res.json()) as {
105
+ id: string;
106
+ status: string;
107
+ skill: string;
108
+ result?: unknown;
109
+ };
110
+ expect(typeof body.id).toBe("string");
111
+ expect(body.id.length).toBeGreaterThan(0);
112
+ expect(body.skill).toBe("dedupe");
113
+ expect(["completed", "running", "pending"]).toContain(body.status);
114
+ if (body.status === "completed") {
115
+ expect(body.result).toBeDefined();
116
+ }
117
+ });
118
+
119
+ it("GET /tasks/{id} returns task status after creation", async () => {
120
+ const postRes = await fetch(baseUrl + "/tasks", {
121
+ method: "POST",
122
+ headers: { "Content-Type": "application/json" },
123
+ body: JSON.stringify({
124
+ skill: "score",
125
+ input: { a: "John", b: "Jon", scorer: "jaro_winkler" },
126
+ }),
127
+ });
128
+ const postBody = (await postRes.json()) as { id: string };
129
+ expect(typeof postBody.id).toBe("string");
130
+
131
+ const getRes = await fetch(baseUrl + "/tasks/" + postBody.id);
132
+ expect(getRes.status).toBe(200);
133
+ const getBody = (await getRes.json()) as { id: string; status: string; skill: string };
134
+ expect(getBody.id).toBe(postBody.id);
135
+ expect(getBody.skill).toBe("score");
136
+ expect(["completed", "running", "pending", "failed"]).toContain(getBody.status);
137
+ });
138
+
139
+ it("GET /tasks/nonexistent returns 404", async () => {
140
+ const res = await fetch(baseUrl + "/tasks/does-not-exist-xyz");
141
+ expect(res.status).toBe(404);
142
+ const body = (await res.json()) as { error: string };
143
+ expect(typeof body.error).toBe("string");
144
+ });
145
+
146
+ it("POST /tasks with unknown skill returns failed task (or error)", async () => {
147
+ const res = await fetch(baseUrl + "/tasks", {
148
+ method: "POST",
149
+ headers: { "Content-Type": "application/json" },
150
+ body: JSON.stringify({ skill: "not_a_real_skill", input: {} }),
151
+ });
152
+ // Implementation returns 200 with status=failed. Some implementations use 400.
153
+ expect([200, 400]).toContain(res.status);
154
+ const body = (await res.json()) as {
155
+ status?: string;
156
+ error?: string;
157
+ };
158
+ // Either body.error is set or body.status === "failed".
159
+ const hasFailure =
160
+ (typeof body.error === "string" && body.error.length > 0) ||
161
+ body.status === "failed";
162
+ expect(hasFailure).toBe(true);
163
+ });
164
+
165
+ it("POST /tasks without skill returns 400", async () => {
166
+ const res = await fetch(baseUrl + "/tasks", {
167
+ method: "POST",
168
+ headers: { "Content-Type": "application/json" },
169
+ body: JSON.stringify({ input: {} }),
170
+ });
171
+ expect(res.status).toBe(400);
172
+ const body = (await res.json()) as { error: string };
173
+ expect(typeof body.error).toBe("string");
174
+ });
175
+ });
@@ -0,0 +1,117 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import {
3
+ ANNBlocker,
4
+ cosineSim,
5
+ euclideanDist,
6
+ } from "../../src/core/index.js";
7
+
8
+ function vec(...nums: number[]): Float32Array {
9
+ return new Float32Array(nums);
10
+ }
11
+
12
+ describe("cosineSim", () => {
13
+ it("orthogonal vectors -> 0", () => {
14
+ expect(cosineSim(vec(1, 0, 0), vec(0, 1, 0))).toBe(0);
15
+ });
16
+
17
+ it("identical vectors -> 1", () => {
18
+ expect(cosineSim(vec(1, 0, 0), vec(1, 0, 0))).toBeCloseTo(1, 6);
19
+ });
20
+
21
+ it("opposite vectors -> -1", () => {
22
+ expect(cosineSim(vec(1, 0, 0), vec(-1, 0, 0))).toBeCloseTo(-1, 6);
23
+ });
24
+
25
+ it("zero vector -> 0 (no NaN)", () => {
26
+ expect(cosineSim(vec(0, 0, 0), vec(1, 2, 3))).toBe(0);
27
+ });
28
+ });
29
+
30
+ describe("euclideanDist", () => {
31
+ it("3-4-5 Pythagorean triple", () => {
32
+ expect(euclideanDist(vec(0, 0), vec(3, 4))).toBeCloseTo(5, 6);
33
+ });
34
+
35
+ it("identical -> 0", () => {
36
+ expect(euclideanDist(vec(1, 2, 3), vec(1, 2, 3))).toBe(0);
37
+ });
38
+
39
+ it("5-12-13 triple", () => {
40
+ expect(euclideanDist(vec(0, 0), vec(5, 12))).toBeCloseTo(13, 6);
41
+ });
42
+ });
43
+
44
+ describe("ANNBlocker", () => {
45
+ it("buildIndex then query returns top-K", () => {
46
+ const blocker = new ANNBlocker({ topK: 2 });
47
+ const embeddings = [
48
+ vec(1, 0, 0),
49
+ vec(0.99, 0.01, 0), // close to 0
50
+ vec(0, 1, 0),
51
+ vec(0, 0.98, 0.02), // close to 2
52
+ ];
53
+ blocker.buildIndex(embeddings);
54
+ const pairs = blocker.query(embeddings);
55
+ // Should pair (0,1) and (2,3) at minimum
56
+ const pairKeys = new Set(pairs.map((p) => `${p[0]}-${p[1]}`));
57
+ expect(pairKeys.has("0-1")).toBe(true);
58
+ expect(pairKeys.has("2-3")).toBe(true);
59
+ });
60
+
61
+ it("indexSize reflects buildIndex", () => {
62
+ const blocker = new ANNBlocker();
63
+ expect(blocker.indexSize).toBe(0);
64
+ blocker.buildIndex([vec(1, 0), vec(0, 1)]);
65
+ expect(blocker.indexSize).toBe(2);
66
+ });
67
+
68
+ it("addToIndex grows index and returns position", () => {
69
+ const blocker = new ANNBlocker();
70
+ blocker.buildIndex([vec(1, 0)]);
71
+ expect(blocker.indexSize).toBe(1);
72
+ const pos = blocker.addToIndex(vec(0, 1));
73
+ expect(pos).toBe(1);
74
+ expect(blocker.indexSize).toBe(2);
75
+ });
76
+
77
+ it("queryWithScores returns [a, b, score] tuples", () => {
78
+ const blocker = new ANNBlocker({ topK: 2 });
79
+ const embeddings = [vec(1, 0), vec(0.99, 0.01), vec(0, 1)];
80
+ blocker.buildIndex(embeddings);
81
+ // Use the same array reference to enable self-pair filtering.
82
+ const scored = blocker.queryWithScores(
83
+ embeddings as unknown as readonly Float32Array[],
84
+ );
85
+ expect(scored.length).toBeGreaterThan(0);
86
+ for (const [a, b, score] of scored) {
87
+ expect(typeof a).toBe("number");
88
+ expect(typeof b).toBe("number");
89
+ expect(typeof score).toBe("number");
90
+ // Pairs are canonicalized, so a <= b. Self-pairs are excluded when
91
+ // the same array reference is used as both index and queries.
92
+ expect(a).toBeLessThanOrEqual(b);
93
+ }
94
+ });
95
+
96
+ it("queryOne returns top-K neighbors with scores", () => {
97
+ const blocker = new ANNBlocker({ topK: 2 });
98
+ blocker.buildIndex([vec(1, 0), vec(0, 1), vec(0.9, 0.1)]);
99
+ const top = blocker.queryOne(vec(1, 0));
100
+ expect(top.length).toBe(2);
101
+ // Best match should be index 0 (cosine=1)
102
+ expect(top[0]![0]).toBe(0);
103
+ });
104
+
105
+ it("query on empty index -> empty", () => {
106
+ const blocker = new ANNBlocker();
107
+ blocker.buildIndex([]);
108
+ expect(blocker.query([vec(1, 0)])).toEqual([]);
109
+ });
110
+
111
+ it("euclidean metric still ranks closest first", () => {
112
+ const blocker = new ANNBlocker({ topK: 1, metric: "euclidean" });
113
+ blocker.buildIndex([vec(0, 0), vec(10, 10), vec(0.1, 0.1)]);
114
+ const top = blocker.queryOne(vec(0, 0));
115
+ expect(top[0]![0]).toBe(0); // self is closest
116
+ });
117
+ });