goldenmatch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -0
- package/dist/cli.cjs +6079 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +6076 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/index.cjs +8449 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +1972 -0
- package/dist/core/index.d.ts +1972 -0
- package/dist/core/index.js +8318 -0
- package/dist/core/index.js.map +1 -0
- package/dist/index.cjs +8449 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8318 -0
- package/dist/index.js.map +1 -0
- package/dist/node/backends/score-worker.cjs +934 -0
- package/dist/node/backends/score-worker.cjs.map +1 -0
- package/dist/node/backends/score-worker.d.cts +14 -0
- package/dist/node/backends/score-worker.d.ts +14 -0
- package/dist/node/backends/score-worker.js +932 -0
- package/dist/node/backends/score-worker.js.map +1 -0
- package/dist/node/index.cjs +11430 -0
- package/dist/node/index.cjs.map +1 -0
- package/dist/node/index.d.cts +554 -0
- package/dist/node/index.d.ts +554 -0
- package/dist/node/index.js +11277 -0
- package/dist/node/index.js.map +1 -0
- package/dist/types-DhUdX5Rc.d.cts +304 -0
- package/dist/types-DhUdX5Rc.d.ts +304 -0
- package/examples/01-basic-dedupe.ts +60 -0
- package/examples/02-match-two-datasets.ts +48 -0
- package/examples/03-csv-file-pipeline.ts +62 -0
- package/examples/04-string-scoring.ts +63 -0
- package/examples/05-custom-config.ts +94 -0
- package/examples/06-probabilistic-fs.ts +72 -0
- package/examples/07-pprl-privacy.ts +76 -0
- package/examples/08-streaming.ts +79 -0
- package/examples/09-llm-scorer.ts +79 -0
- package/examples/10-explain.ts +60 -0
- package/examples/11-evaluate.ts +61 -0
- package/examples/README.md +53 -0
- package/package.json +66 -0
- package/src/cli.ts +372 -0
- package/src/core/ann-blocker.ts +593 -0
- package/src/core/api.ts +220 -0
- package/src/core/autoconfig.ts +363 -0
- package/src/core/autofix.ts +102 -0
- package/src/core/blocker.ts +655 -0
- package/src/core/cluster.ts +699 -0
- package/src/core/compare-clusters.ts +176 -0
- package/src/core/config/loader.ts +869 -0
- package/src/core/cross-encoder.ts +614 -0
- package/src/core/data.ts +430 -0
- package/src/core/domain.ts +277 -0
- package/src/core/embedder.ts +562 -0
- package/src/core/evaluate.ts +156 -0
- package/src/core/explain.ts +352 -0
- package/src/core/golden.ts +524 -0
- package/src/core/graph-er.ts +371 -0
- package/src/core/index.ts +314 -0
- package/src/core/ingest.ts +112 -0
- package/src/core/learned-blocking.ts +305 -0
- package/src/core/lineage.ts +221 -0
- package/src/core/llm/budget.ts +258 -0
- package/src/core/llm/cluster.ts +542 -0
- package/src/core/llm/scorer.ts +396 -0
- package/src/core/match-one.ts +95 -0
- package/src/core/matchkey.ts +97 -0
- package/src/core/memory/corrections.ts +179 -0
- package/src/core/memory/learner.ts +218 -0
- package/src/core/memory/store.ts +114 -0
- package/src/core/pipeline.ts +366 -0
- package/src/core/pprl/protocol.ts +216 -0
- package/src/core/probabilistic.ts +511 -0
- package/src/core/profiler.ts +212 -0
- package/src/core/quality.ts +197 -0
- package/src/core/review-queue.ts +177 -0
- package/src/core/scorer.ts +855 -0
- package/src/core/sensitivity.ts +196 -0
- package/src/core/standardize.ts +279 -0
- package/src/core/streaming.ts +128 -0
- package/src/core/transforms.ts +599 -0
- package/src/core/types.ts +570 -0
- package/src/core/validate.ts +243 -0
- package/src/index.ts +8 -0
- package/src/node/a2a/server.ts +470 -0
- package/src/node/api/server.ts +412 -0
- package/src/node/backends/duckdb.ts +130 -0
- package/src/node/backends/score-worker.ts +41 -0
- package/src/node/backends/workers.ts +212 -0
- package/src/node/config-file.ts +66 -0
- package/src/node/connectors/base.ts +57 -0
- package/src/node/connectors/bigquery.ts +61 -0
- package/src/node/connectors/databricks.ts +69 -0
- package/src/node/connectors/file.ts +350 -0
- package/src/node/connectors/hubspot.ts +62 -0
- package/src/node/connectors/index.ts +43 -0
- package/src/node/connectors/salesforce.ts +93 -0
- package/src/node/connectors/snowflake.ts +73 -0
- package/src/node/db/postgres.ts +173 -0
- package/src/node/db/sync.ts +103 -0
- package/src/node/dedupe-file.ts +156 -0
- package/src/node/index.ts +89 -0
- package/src/node/mcp/server.ts +940 -0
- package/src/node/tui/app.ts +756 -0
- package/src/node/tui/index.ts +6 -0
- package/src/node/tui/widgets.ts +128 -0
- package/tests/parity/scorer-ground-truth.test.ts +118 -0
- package/tests/smoke.test.ts +46 -0
- package/tests/unit/a2a-server.test.ts +175 -0
- package/tests/unit/ann-blocker.test.ts +117 -0
- package/tests/unit/api-server.test.ts +239 -0
- package/tests/unit/api.test.ts +77 -0
- package/tests/unit/autoconfig.test.ts +103 -0
- package/tests/unit/autofix.test.ts +71 -0
- package/tests/unit/blocker.test.ts +164 -0
- package/tests/unit/buildBlocksAsync.test.ts +63 -0
- package/tests/unit/cluster.test.ts +213 -0
- package/tests/unit/compare-clusters.test.ts +42 -0
- package/tests/unit/config-loader.test.ts +301 -0
- package/tests/unit/connectors-base.test.ts +48 -0
- package/tests/unit/cross-encoder-model.test.ts +198 -0
- package/tests/unit/cross-encoder.test.ts +173 -0
- package/tests/unit/db-connectors.test.ts +37 -0
- package/tests/unit/domain.test.ts +80 -0
- package/tests/unit/embedder.test.ts +151 -0
- package/tests/unit/evaluate.test.ts +85 -0
- package/tests/unit/explain.test.ts +73 -0
- package/tests/unit/golden.test.ts +97 -0
- package/tests/unit/graph-er.test.ts +173 -0
- package/tests/unit/hnsw-ann.test.ts +283 -0
- package/tests/unit/hubspot-connector.test.ts +118 -0
- package/tests/unit/ingest.test.ts +97 -0
- package/tests/unit/learned-blocking.test.ts +134 -0
- package/tests/unit/lineage.test.ts +135 -0
- package/tests/unit/match-one.test.ts +129 -0
- package/tests/unit/matchkey.test.ts +97 -0
- package/tests/unit/mcp-server.test.ts +183 -0
- package/tests/unit/memory.test.ts +119 -0
- package/tests/unit/pipeline.test.ts +118 -0
- package/tests/unit/pprl-protocol.test.ts +381 -0
- package/tests/unit/probabilistic.test.ts +494 -0
- package/tests/unit/profiler.test.ts +68 -0
- package/tests/unit/review-queue.test.ts +68 -0
- package/tests/unit/salesforce-connector.test.ts +148 -0
- package/tests/unit/scorer.test.ts +301 -0
- package/tests/unit/sensitivity.test.ts +154 -0
- package/tests/unit/standardize.test.ts +84 -0
- package/tests/unit/streaming.test.ts +82 -0
- package/tests/unit/transforms.test.ts +208 -0
- package/tests/unit/tui-widgets.test.ts +42 -0
- package/tests/unit/tui.test.ts +24 -0
- package/tests/unit/validate.test.ts +145 -0
- package/tests/unit/workers-parallel.test.ts +99 -0
- package/tests/unit/workers.test.ts +74 -0
- package/tsconfig.json +25 -0
- package/tsup.config.ts +37 -0
- package/vitest.config.ts +11 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* file.ts -- CSV/JSON/JSONL file I/O connector.
|
|
3
|
+
*
|
|
4
|
+
* Node-only: uses node:fs, node:path. NOT edge-safe.
|
|
5
|
+
*
|
|
6
|
+
* CSV parser rules (CRITICAL):
|
|
7
|
+
* - Quoted fields preserve embedded commas and newlines
|
|
8
|
+
* - Doubled quotes inside quoted fields unescape to a single quote
|
|
9
|
+
* - Empty unquoted fields become null
|
|
10
|
+
* - Leading-zero strings (zip codes "01234", SSNs, phones) are NEVER
|
|
11
|
+
* coerced to numbers
|
|
12
|
+
* - Booleans "true"/"false" (case-insensitive) coerce to boolean
|
|
13
|
+
* - Numeric strings coerce to number only when fully numeric and not
|
|
14
|
+
* leading-zero
|
|
15
|
+
* - Supports both `\n` and `\r\n` line endings
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
19
|
+
import { resolve, extname, dirname } from "node:path";
|
|
20
|
+
import type { Row } from "../../core/types.js";
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Value coercion
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Coerce a raw CSV field string to string | number | boolean.
|
|
28
|
+
* Preserves leading-zero strings (zip codes, SSNs, phone numbers) as strings.
|
|
29
|
+
*/
|
|
30
|
+
function coerceValue(raw: string): string | number | boolean {
|
|
31
|
+
// Booleans (case insensitive)
|
|
32
|
+
const lower = raw.toLowerCase();
|
|
33
|
+
if (lower === "true") return true;
|
|
34
|
+
if (lower === "false") return false;
|
|
35
|
+
|
|
36
|
+
// Only try numeric coercion for strings that are already trimmed
|
|
37
|
+
if (raw.length === 0 || raw !== raw.trim()) return raw;
|
|
38
|
+
|
|
39
|
+
// NEVER coerce leading-zero strings to numbers, except "0" itself and
|
|
40
|
+
// decimals like "0.5".
|
|
41
|
+
if (raw.length > 1 && raw[0] === "0" && raw[1] !== ".") return raw;
|
|
42
|
+
|
|
43
|
+
// Also preserve negative leading-zero strings like "-0123" as strings
|
|
44
|
+
if (raw.length > 2 && raw[0] === "-" && raw[1] === "0" && raw[2] !== ".") {
|
|
45
|
+
return raw;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const n = Number(raw);
|
|
49
|
+
if (Number.isFinite(n)) return n;
|
|
50
|
+
|
|
51
|
+
return raw;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// CSV parsing
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Parse a full CSV document honoring quoted fields, doubled quotes,
|
|
60
|
+
* embedded newlines/commas, and both CRLF/LF line endings.
|
|
61
|
+
*
|
|
62
|
+
* Returns an array of raw string rows. The caller is responsible for
|
|
63
|
+
* interpreting the first row as a header.
|
|
64
|
+
*/
|
|
65
|
+
function parseCsvDocument(content: string, delimiter: string): string[][] {
|
|
66
|
+
const rows: string[][] = [];
|
|
67
|
+
let current = "";
|
|
68
|
+
let row: string[] = [];
|
|
69
|
+
let inQuotes = false;
|
|
70
|
+
let i = 0;
|
|
71
|
+
const n = content.length;
|
|
72
|
+
|
|
73
|
+
while (i < n) {
|
|
74
|
+
const ch = content[i]!;
|
|
75
|
+
|
|
76
|
+
if (inQuotes) {
|
|
77
|
+
if (ch === '"') {
|
|
78
|
+
if (i + 1 < n && content[i + 1] === '"') {
|
|
79
|
+
current += '"';
|
|
80
|
+
i += 2;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
inQuotes = false;
|
|
84
|
+
i++;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
current += ch;
|
|
88
|
+
i++;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Not in quotes
|
|
93
|
+
if (ch === '"') {
|
|
94
|
+
inQuotes = true;
|
|
95
|
+
i++;
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
if (ch === delimiter) {
|
|
99
|
+
row.push(current);
|
|
100
|
+
current = "";
|
|
101
|
+
i++;
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
if (ch === "\r") {
|
|
105
|
+
// Swallow \r, then handle \n on next iteration
|
|
106
|
+
i++;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
if (ch === "\n") {
|
|
110
|
+
row.push(current);
|
|
111
|
+
current = "";
|
|
112
|
+
rows.push(row);
|
|
113
|
+
row = [];
|
|
114
|
+
i++;
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
current += ch;
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Flush trailing field/row if any
|
|
122
|
+
if (current.length > 0 || row.length > 0) {
|
|
123
|
+
row.push(current);
|
|
124
|
+
rows.push(row);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return rows;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
// Public API
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
export interface ReadCsvOptions {
|
|
135
|
+
readonly delimiter?: string;
|
|
136
|
+
readonly hasHeader?: boolean;
|
|
137
|
+
readonly encoding?: BufferEncoding;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Read a CSV (or TSV, via `delimiter: "\t"`) file from disk.
|
|
142
|
+
*
|
|
143
|
+
* Returns an array of `Row` objects (header -> coerced value). If
|
|
144
|
+
* `hasHeader` is false, synthetic headers `col_0`, `col_1`, ... are used.
|
|
145
|
+
*/
|
|
146
|
+
export function readCsv(path: string, options?: ReadCsvOptions): Row[] {
|
|
147
|
+
const resolved = resolve(path);
|
|
148
|
+
if (!existsSync(resolved)) {
|
|
149
|
+
throw new Error(`File not found: ${resolved}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const delimiter = options?.delimiter ?? ",";
|
|
153
|
+
const hasHeader = options?.hasHeader ?? true;
|
|
154
|
+
const encoding = options?.encoding ?? "utf8";
|
|
155
|
+
|
|
156
|
+
const content = readFileSync(resolved, encoding);
|
|
157
|
+
// Strip UTF-8 BOM if present (Excel-exported CSVs often include it)
|
|
158
|
+
const cleaned = content.charCodeAt(0) === 0xfeff ? content.slice(1) : content;
|
|
159
|
+
|
|
160
|
+
const rawRows = parseCsvDocument(cleaned, delimiter);
|
|
161
|
+
if (rawRows.length === 0) return [];
|
|
162
|
+
|
|
163
|
+
// Skip completely empty rows (trailing newlines, blank lines between records)
|
|
164
|
+
const nonEmpty = rawRows.filter(
|
|
165
|
+
(r) => !(r.length === 1 && r[0] === ""),
|
|
166
|
+
);
|
|
167
|
+
if (nonEmpty.length === 0) return [];
|
|
168
|
+
|
|
169
|
+
let headers: string[];
|
|
170
|
+
let dataRows: string[][];
|
|
171
|
+
if (hasHeader) {
|
|
172
|
+
headers = nonEmpty[0]!.map((h) => h.trim());
|
|
173
|
+
dataRows = nonEmpty.slice(1);
|
|
174
|
+
} else {
|
|
175
|
+
const width = nonEmpty[0]!.length;
|
|
176
|
+
headers = Array.from({ length: width }, (_, i) => `col_${i}`);
|
|
177
|
+
dataRows = nonEmpty;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const rows: Row[] = [];
|
|
181
|
+
for (const raw of dataRows) {
|
|
182
|
+
const record: Record<string, unknown> = {};
|
|
183
|
+
for (let j = 0; j < headers.length; j++) {
|
|
184
|
+
const field = raw[j] ?? "";
|
|
185
|
+
record[headers[j]!] = field === "" ? null : coerceValue(field);
|
|
186
|
+
}
|
|
187
|
+
rows.push(record);
|
|
188
|
+
}
|
|
189
|
+
return rows;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Read a JSON or JSONL file.
|
|
194
|
+
*
|
|
195
|
+
* - `.json`: expects an array of objects at the top level.
|
|
196
|
+
* - `.jsonl` / `.ndjson`: one JSON object per line.
|
|
197
|
+
*
|
|
198
|
+
* Auto-detected based on whether the first non-whitespace character is `[`.
|
|
199
|
+
*/
|
|
200
|
+
export function readJson(path: string): Row[] {
|
|
201
|
+
const resolved = resolve(path);
|
|
202
|
+
if (!existsSync(resolved)) {
|
|
203
|
+
throw new Error(`File not found: ${resolved}`);
|
|
204
|
+
}
|
|
205
|
+
const content = readFileSync(resolved, "utf8");
|
|
206
|
+
const trimmed = content.trimStart();
|
|
207
|
+
|
|
208
|
+
// JSONL: one object per line, detected by lack of opening `[`
|
|
209
|
+
const ext = extname(resolved).toLowerCase();
|
|
210
|
+
const isJsonl =
|
|
211
|
+
ext === ".jsonl" ||
|
|
212
|
+
ext === ".ndjson" ||
|
|
213
|
+
(trimmed.length > 0 && trimmed[0] !== "[");
|
|
214
|
+
|
|
215
|
+
if (isJsonl) {
|
|
216
|
+
const rows: Row[] = [];
|
|
217
|
+
const lines = content.split(/\r?\n/);
|
|
218
|
+
for (let i = 0; i < lines.length; i++) {
|
|
219
|
+
const line = lines[i]!.trim();
|
|
220
|
+
if (line === "") continue;
|
|
221
|
+
try {
|
|
222
|
+
const parsed = JSON.parse(line);
|
|
223
|
+
if (parsed !== null && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
224
|
+
rows.push(parsed as Row);
|
|
225
|
+
} else {
|
|
226
|
+
throw new Error(
|
|
227
|
+
`JSONL line ${i + 1}: expected an object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`,
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
} catch (err) {
|
|
231
|
+
if (err instanceof SyntaxError) {
|
|
232
|
+
throw new Error(`JSONL parse error at line ${i + 1}: ${err.message}`);
|
|
233
|
+
}
|
|
234
|
+
throw err;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return rows;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Standard JSON array
|
|
241
|
+
let parsed: unknown;
|
|
242
|
+
try {
|
|
243
|
+
parsed = JSON.parse(content);
|
|
244
|
+
} catch (err) {
|
|
245
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
246
|
+
throw new Error(`JSON parse error in ${resolved}: ${msg}`);
|
|
247
|
+
}
|
|
248
|
+
if (!Array.isArray(parsed)) {
|
|
249
|
+
throw new Error(
|
|
250
|
+
`JSON file ${resolved}: expected an array of objects at the root`,
|
|
251
|
+
);
|
|
252
|
+
}
|
|
253
|
+
return parsed as Row[];
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Dispatch to readCsv / readJson based on file extension.
|
|
258
|
+
*
|
|
259
|
+
* Supported: `.csv`, `.tsv`, `.json`, `.jsonl`, `.ndjson`.
|
|
260
|
+
*/
|
|
261
|
+
export function readFile(path: string): Row[] {
|
|
262
|
+
const ext = extname(path).toLowerCase();
|
|
263
|
+
if (ext === ".csv") return readCsv(path, { delimiter: "," });
|
|
264
|
+
if (ext === ".tsv") return readCsv(path, { delimiter: "\t" });
|
|
265
|
+
if (ext === ".json" || ext === ".jsonl" || ext === ".ndjson") {
|
|
266
|
+
return readJson(path);
|
|
267
|
+
}
|
|
268
|
+
throw new Error(
|
|
269
|
+
`Unsupported file format: ${ext}. Supported: .csv, .tsv, .json, .jsonl, .ndjson`,
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// Serialization
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
/** Escape a CSV field: quote if it contains delimiter, quote, or newline. */
|
|
278
|
+
function escapeCsvField(value: unknown, delimiter: string): string {
|
|
279
|
+
if (value === null || value === undefined) return "";
|
|
280
|
+
const s = String(value);
|
|
281
|
+
const needsQuoting =
|
|
282
|
+
s.includes(delimiter) ||
|
|
283
|
+
s.includes('"') ||
|
|
284
|
+
s.includes("\n") ||
|
|
285
|
+
s.includes("\r");
|
|
286
|
+
if (!needsQuoting) return s;
|
|
287
|
+
return `"${s.replace(/"/g, '""')}"`;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
export interface WriteCsvOptions {
|
|
291
|
+
readonly columns?: readonly string[];
|
|
292
|
+
readonly delimiter?: string;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Write rows to a CSV file. Creates parent directories as needed.
|
|
297
|
+
*
|
|
298
|
+
* If `columns` is not supplied, the union of keys from all rows is used,
|
|
299
|
+
* ordered by first appearance.
|
|
300
|
+
*/
|
|
301
|
+
export function writeCsv(
|
|
302
|
+
path: string,
|
|
303
|
+
rows: readonly Row[],
|
|
304
|
+
options?: WriteCsvOptions,
|
|
305
|
+
): void {
|
|
306
|
+
const resolved = resolve(path);
|
|
307
|
+
const dir = dirname(resolved);
|
|
308
|
+
if (dir && dir !== "." && !existsSync(dir)) {
|
|
309
|
+
mkdirSync(dir, { recursive: true });
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
const delimiter = options?.delimiter ?? ",";
|
|
313
|
+
|
|
314
|
+
let columns: string[];
|
|
315
|
+
if (options?.columns && options.columns.length > 0) {
|
|
316
|
+
columns = [...options.columns];
|
|
317
|
+
} else if (rows.length === 0) {
|
|
318
|
+
writeFileSync(resolved, "", "utf8");
|
|
319
|
+
return;
|
|
320
|
+
} else {
|
|
321
|
+
const seen = new Set<string>();
|
|
322
|
+
columns = [];
|
|
323
|
+
for (const row of rows) {
|
|
324
|
+
for (const key of Object.keys(row)) {
|
|
325
|
+
if (!seen.has(key)) {
|
|
326
|
+
seen.add(key);
|
|
327
|
+
columns.push(key);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const lines: string[] = [];
|
|
334
|
+
lines.push(columns.map((c) => escapeCsvField(c, delimiter)).join(delimiter));
|
|
335
|
+
for (const row of rows) {
|
|
336
|
+
const fields = columns.map((c) => escapeCsvField(row[c], delimiter));
|
|
337
|
+
lines.push(fields.join(delimiter));
|
|
338
|
+
}
|
|
339
|
+
writeFileSync(resolved, lines.join("\n") + "\n", "utf8");
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/** Write rows to a JSON file as a pretty-printed array. */
|
|
343
|
+
export function writeJson(path: string, rows: readonly Row[]): void {
|
|
344
|
+
const resolved = resolve(path);
|
|
345
|
+
const dir = dirname(resolved);
|
|
346
|
+
if (dir && dir !== "." && !existsSync(dir)) {
|
|
347
|
+
mkdirSync(dir, { recursive: true });
|
|
348
|
+
}
|
|
349
|
+
writeFileSync(resolved, JSON.stringify(rows, null, 2), "utf8");
|
|
350
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* hubspot.ts -- HubSpot CRM connector via REST API + fetch().
|
|
3
|
+
*
|
|
4
|
+
* `query.table` selects the HubSpot object: "contacts", "companies", "deals", etc.
|
|
5
|
+
* SQL strings are not supported -- use object queries.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Row } from "../../core/types.js";
|
|
9
|
+
import type { BaseConnector } from "./base.js";
|
|
10
|
+
|
|
11
|
+
export interface HubSpotConfig {
|
|
12
|
+
readonly apiKey: string; // HubSpot private app access token
|
|
13
|
+
readonly apiBase?: string; // default "https://api.hubapi.com"
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface HubSpotResult {
|
|
17
|
+
id: string;
|
|
18
|
+
properties: Record<string, unknown>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
interface HubSpotPage {
|
|
22
|
+
results: HubSpotResult[];
|
|
23
|
+
paging?: { next?: { link: string } };
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function createHubSpotConnector(config: HubSpotConfig): BaseConnector {
|
|
27
|
+
const base = config.apiBase ?? "https://api.hubapi.com";
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
name: "hubspot",
|
|
31
|
+
async connect() {
|
|
32
|
+
// No-op: stateless REST.
|
|
33
|
+
},
|
|
34
|
+
async read(query) {
|
|
35
|
+
if (typeof query === "string") {
|
|
36
|
+
throw new Error("HubSpot connector requires an object query (table/columns/limit), not SQL.");
|
|
37
|
+
}
|
|
38
|
+
const limit = query.limit ?? 100;
|
|
39
|
+
const propsParam = query.columns?.length ? `&properties=${query.columns.join(",")}` : "";
|
|
40
|
+
const endpoint = `${base}/crm/v3/objects/${query.table}?limit=${limit}${propsParam}`;
|
|
41
|
+
const rows: Row[] = [];
|
|
42
|
+
let next: string | undefined = endpoint;
|
|
43
|
+
while (next) {
|
|
44
|
+
const resp = await fetch(next, {
|
|
45
|
+
headers: { Authorization: `Bearer ${config.apiKey}` },
|
|
46
|
+
});
|
|
47
|
+
if (!resp.ok) {
|
|
48
|
+
throw new Error(`HubSpot query failed: ${resp.status} ${await resp.text()}`);
|
|
49
|
+
}
|
|
50
|
+
const j = (await resp.json()) as HubSpotPage;
|
|
51
|
+
for (const r of j.results) {
|
|
52
|
+
rows.push({ id: r.id, ...r.properties });
|
|
53
|
+
}
|
|
54
|
+
next = j.paging?.next?.link;
|
|
55
|
+
}
|
|
56
|
+
return rows;
|
|
57
|
+
},
|
|
58
|
+
async close() {
|
|
59
|
+
// Stateless.
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* index.ts -- Connector registry bootstrap and re-exports.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { registerConnector, loadConnector, listConnectors } from "./base.js";
|
|
6
|
+
import { createSnowflakeConnector, type SnowflakeConfig } from "./snowflake.js";
|
|
7
|
+
import { createBigQueryConnector, type BigQueryConfig } from "./bigquery.js";
|
|
8
|
+
import { createDatabricksConnector, type DatabricksConfig } from "./databricks.js";
|
|
9
|
+
import { createSalesforceConnector, type SalesforceConfig } from "./salesforce.js";
|
|
10
|
+
import { createHubSpotConnector, type HubSpotConfig } from "./hubspot.js";
|
|
11
|
+
|
|
12
|
+
registerConnector("snowflake", (c) => createSnowflakeConnector(c as unknown as SnowflakeConfig));
|
|
13
|
+
registerConnector("bigquery", (c) => createBigQueryConnector(c as unknown as BigQueryConfig));
|
|
14
|
+
registerConnector("databricks", (c) => createDatabricksConnector(c as unknown as DatabricksConfig));
|
|
15
|
+
registerConnector("salesforce", (c) => createSalesforceConnector(c as unknown as SalesforceConfig));
|
|
16
|
+
registerConnector("hubspot", (c) => createHubSpotConnector(c as unknown as HubSpotConfig));
|
|
17
|
+
|
|
18
|
+
export {
|
|
19
|
+
createSnowflakeConnector,
|
|
20
|
+
createBigQueryConnector,
|
|
21
|
+
createDatabricksConnector,
|
|
22
|
+
createSalesforceConnector,
|
|
23
|
+
createHubSpotConnector,
|
|
24
|
+
registerConnector,
|
|
25
|
+
loadConnector,
|
|
26
|
+
listConnectors,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type {
|
|
30
|
+
BaseConnector,
|
|
31
|
+
ConnectorConfig,
|
|
32
|
+
ConnectorFactory,
|
|
33
|
+
ConnectorQuery,
|
|
34
|
+
} from "./base.js";
|
|
35
|
+
export type { SnowflakeConfig } from "./snowflake.js";
|
|
36
|
+
export type { BigQueryConfig } from "./bigquery.js";
|
|
37
|
+
export type { DatabricksConfig } from "./databricks.js";
|
|
38
|
+
export type { SalesforceConfig } from "./salesforce.js";
|
|
39
|
+
export type { HubSpotConfig } from "./hubspot.js";
|
|
40
|
+
|
|
41
|
+
// Re-export the existing local-file connector for convenience.
|
|
42
|
+
export { readFile, readCsv, readJson, writeCsv, writeJson } from "./file.js";
|
|
43
|
+
export type { ReadCsvOptions, WriteCsvOptions } from "./file.js";
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* salesforce.ts -- Salesforce connector using the REST API via fetch().
|
|
3
|
+
*
|
|
4
|
+
* No SDK dependency: stays edge-adjacent. Supports either a pre-issued
|
|
5
|
+
* accessToken or the OAuth 2.0 password grant flow.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Row } from "../../core/types.js";
|
|
9
|
+
import type { BaseConnector, ConnectorQuery } from "./base.js";
|
|
10
|
+
|
|
11
|
+
export interface SalesforceConfig {
|
|
12
|
+
readonly instanceUrl: string; // e.g. "https://xxx.my.salesforce.com"
|
|
13
|
+
readonly accessToken?: string;
|
|
14
|
+
readonly clientId?: string;
|
|
15
|
+
readonly clientSecret?: string;
|
|
16
|
+
readonly username?: string;
|
|
17
|
+
readonly password?: string;
|
|
18
|
+
readonly securityToken?: string;
|
|
19
|
+
readonly apiVersion?: string; // default "v61.0"
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface OAuthResponse {
|
|
23
|
+
access_token: string;
|
|
24
|
+
instance_url: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface QueryResponse {
|
|
28
|
+
records: Row[];
|
|
29
|
+
done: boolean;
|
|
30
|
+
nextRecordsUrl?: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function createSalesforceConnector(config: SalesforceConfig): BaseConnector {
|
|
34
|
+
const apiVersion = config.apiVersion ?? "v61.0";
|
|
35
|
+
let accessToken = config.accessToken;
|
|
36
|
+
let instanceUrl = config.instanceUrl;
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
name: "salesforce",
|
|
40
|
+
async connect() {
|
|
41
|
+
if (accessToken) return;
|
|
42
|
+
const body = new URLSearchParams({
|
|
43
|
+
grant_type: "password",
|
|
44
|
+
client_id: config.clientId ?? "",
|
|
45
|
+
client_secret: config.clientSecret ?? "",
|
|
46
|
+
username: config.username ?? "",
|
|
47
|
+
password: (config.password ?? "") + (config.securityToken ?? ""),
|
|
48
|
+
});
|
|
49
|
+
const resp = await fetch(`${instanceUrl}/services/oauth2/token`, {
|
|
50
|
+
method: "POST",
|
|
51
|
+
headers: { "Content-Type": "application/x-www-form-urlencoded" },
|
|
52
|
+
body: body.toString(),
|
|
53
|
+
});
|
|
54
|
+
if (!resp.ok) {
|
|
55
|
+
throw new Error(`Salesforce auth failed: ${resp.status} ${await resp.text()}`);
|
|
56
|
+
}
|
|
57
|
+
const j = (await resp.json()) as OAuthResponse;
|
|
58
|
+
accessToken = j.access_token;
|
|
59
|
+
instanceUrl = j.instance_url;
|
|
60
|
+
},
|
|
61
|
+
async read(query) {
|
|
62
|
+
if (!accessToken) {
|
|
63
|
+
throw new Error("Salesforce connector not connected. Call connect() first.");
|
|
64
|
+
}
|
|
65
|
+
const soql = typeof query === "string" ? query : buildSOQL(query);
|
|
66
|
+
const url = `${instanceUrl}/services/data/${apiVersion}/query?q=${encodeURIComponent(soql)}`;
|
|
67
|
+
const rows: Row[] = [];
|
|
68
|
+
let next: string | undefined = url;
|
|
69
|
+
while (next) {
|
|
70
|
+
const resp = await fetch(next, {
|
|
71
|
+
headers: { Authorization: `Bearer ${accessToken}` },
|
|
72
|
+
});
|
|
73
|
+
if (!resp.ok) {
|
|
74
|
+
throw new Error(`Salesforce query failed: ${resp.status} ${await resp.text()}`);
|
|
75
|
+
}
|
|
76
|
+
const j = (await resp.json()) as QueryResponse;
|
|
77
|
+
rows.push(...j.records);
|
|
78
|
+
next = j.nextRecordsUrl ? `${instanceUrl}${j.nextRecordsUrl}` : undefined;
|
|
79
|
+
}
|
|
80
|
+
return rows;
|
|
81
|
+
},
|
|
82
|
+
async close() {
|
|
83
|
+
// Stateless: nothing to tear down.
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function buildSOQL(q: ConnectorQuery): string {
|
|
89
|
+
const cols = q.columns?.length ? q.columns.join(",") : "Id";
|
|
90
|
+
let sql = `SELECT ${cols} FROM ${q.table}`;
|
|
91
|
+
if (q.limit) sql += ` LIMIT ${q.limit}`;
|
|
92
|
+
return sql;
|
|
93
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* snowflake.ts -- Snowflake connector via the optional `snowflake-sdk` peer dep.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { createRequire } from "node:module";
|
|
6
|
+
import type { Row } from "../../core/types.js";
|
|
7
|
+
import type { BaseConnector, ConnectorQuery } from "./base.js";
|
|
8
|
+
|
|
9
|
+
export interface SnowflakeConfig {
|
|
10
|
+
readonly account: string;
|
|
11
|
+
readonly username: string;
|
|
12
|
+
readonly password?: string;
|
|
13
|
+
readonly privateKey?: string;
|
|
14
|
+
readonly warehouse?: string;
|
|
15
|
+
readonly database?: string;
|
|
16
|
+
readonly schema?: string;
|
|
17
|
+
readonly role?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function createSnowflakeConnector(config: SnowflakeConfig): BaseConnector {
|
|
21
|
+
const require = createRequire(import.meta.url);
|
|
22
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
23
|
+
let snowflake: any;
|
|
24
|
+
try {
|
|
25
|
+
snowflake = require("snowflake-sdk");
|
|
26
|
+
} catch {
|
|
27
|
+
throw new Error(
|
|
28
|
+
"'snowflake-sdk' is required for the Snowflake connector. Install: npm install snowflake-sdk",
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const connection = snowflake.createConnection({
|
|
33
|
+
account: config.account,
|
|
34
|
+
username: config.username,
|
|
35
|
+
password: config.password,
|
|
36
|
+
privateKey: config.privateKey,
|
|
37
|
+
warehouse: config.warehouse,
|
|
38
|
+
database: config.database,
|
|
39
|
+
schema: config.schema,
|
|
40
|
+
role: config.role,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
name: "snowflake",
|
|
45
|
+
connect() {
|
|
46
|
+
return new Promise<void>((resolve, reject) => {
|
|
47
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
48
|
+
connection.connect((err: any) => (err ? reject(err) : resolve()));
|
|
49
|
+
});
|
|
50
|
+
},
|
|
51
|
+
async read(query) {
|
|
52
|
+
const sql = typeof query === "string" ? query : buildSelect(query);
|
|
53
|
+
return new Promise<Row[]>((resolve, reject) => {
|
|
54
|
+
connection.execute({
|
|
55
|
+
sqlText: sql,
|
|
56
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
57
|
+
complete: (err: any, _stmt: any, rows: Row[]) =>
|
|
58
|
+
err ? reject(err) : resolve(rows ?? []),
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
},
|
|
62
|
+
async close() {
|
|
63
|
+
await new Promise<void>((resolve) => connection.destroy(() => resolve()));
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function buildSelect(q: ConnectorQuery): string {
|
|
69
|
+
const cols = q.columns?.length ? q.columns.map((c) => `"${c}"`).join(",") : "*";
|
|
70
|
+
let sql = `SELECT ${cols} FROM ${q.table}`;
|
|
71
|
+
if (q.limit) sql += ` LIMIT ${q.limit}`;
|
|
72
|
+
return sql;
|
|
73
|
+
}
|