@caracal-lynx/sluice 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CLAUDE.md +1822 -0
  2. package/LICENCE-FAQ.md +74 -0
  3. package/LICENSE +92 -0
  4. package/README.md +582 -0
  5. package/dist/adapters/source/csv.d.ts +10 -0
  6. package/dist/adapters/source/csv.d.ts.map +1 -0
  7. package/dist/adapters/source/csv.js +110 -0
  8. package/dist/adapters/source/csv.js.map +1 -0
  9. package/dist/adapters/source/index.d.ts +9 -0
  10. package/dist/adapters/source/index.d.ts.map +1 -0
  11. package/dist/adapters/source/index.js +26 -0
  12. package/dist/adapters/source/index.js.map +1 -0
  13. package/dist/adapters/source/mssql.d.ts +11 -0
  14. package/dist/adapters/source/mssql.d.ts.map +1 -0
  15. package/dist/adapters/source/mssql.js +230 -0
  16. package/dist/adapters/source/mssql.js.map +1 -0
  17. package/dist/adapters/source/pg.d.ts +11 -0
  18. package/dist/adapters/source/pg.d.ts.map +1 -0
  19. package/dist/adapters/source/pg.js +88 -0
  20. package/dist/adapters/source/pg.js.map +1 -0
  21. package/dist/adapters/source/registry.d.ts +10 -0
  22. package/dist/adapters/source/registry.d.ts.map +1 -0
  23. package/dist/adapters/source/registry.js +36 -0
  24. package/dist/adapters/source/registry.js.map +1 -0
  25. package/dist/adapters/source/rest.d.ts +16 -0
  26. package/dist/adapters/source/rest.d.ts.map +1 -0
  27. package/dist/adapters/source/rest.js +182 -0
  28. package/dist/adapters/source/rest.js.map +1 -0
  29. package/dist/adapters/source/rest.types.d.ts +15 -0
  30. package/dist/adapters/source/rest.types.d.ts.map +1 -0
  31. package/dist/adapters/source/rest.types.js +6 -0
  32. package/dist/adapters/source/rest.types.js.map +1 -0
  33. package/dist/adapters/source/types.d.ts +23 -0
  34. package/dist/adapters/source/types.d.ts.map +1 -0
  35. package/dist/adapters/source/types.js +4 -0
  36. package/dist/adapters/source/types.js.map +1 -0
  37. package/dist/adapters/source/xlsx.d.ts +10 -0
  38. package/dist/adapters/source/xlsx.d.ts.map +1 -0
  39. package/dist/adapters/source/xlsx.js +71 -0
  40. package/dist/adapters/source/xlsx.js.map +1 -0
  41. package/dist/adapters/target/bc.d.ts +21 -0
  42. package/dist/adapters/target/bc.d.ts.map +1 -0
  43. package/dist/adapters/target/bc.js +188 -0
  44. package/dist/adapters/target/bc.js.map +1 -0
  45. package/dist/adapters/target/bluecherry.d.ts +10 -0
  46. package/dist/adapters/target/bluecherry.d.ts.map +1 -0
  47. package/dist/adapters/target/bluecherry.js +127 -0
  48. package/dist/adapters/target/bluecherry.js.map +1 -0
  49. package/dist/adapters/target/csv.d.ts +10 -0
  50. package/dist/adapters/target/csv.d.ts.map +1 -0
  51. package/dist/adapters/target/csv.js +40 -0
  52. package/dist/adapters/target/csv.js.map +1 -0
  53. package/dist/adapters/target/ifs.d.ts +10 -0
  54. package/dist/adapters/target/ifs.d.ts.map +1 -0
  55. package/dist/adapters/target/ifs.js +55 -0
  56. package/dist/adapters/target/ifs.js.map +1 -0
  57. package/dist/adapters/target/index.d.ts +8 -0
  58. package/dist/adapters/target/index.d.ts.map +1 -0
  59. package/dist/adapters/target/index.js +22 -0
  60. package/dist/adapters/target/index.js.map +1 -0
  61. package/dist/adapters/target/pg.d.ts +11 -0
  62. package/dist/adapters/target/pg.d.ts.map +1 -0
  63. package/dist/adapters/target/pg.js +103 -0
  64. package/dist/adapters/target/pg.js.map +1 -0
  65. package/dist/adapters/target/registry.d.ts +9 -0
  66. package/dist/adapters/target/registry.d.ts.map +1 -0
  67. package/dist/adapters/target/registry.js +29 -0
  68. package/dist/adapters/target/registry.js.map +1 -0
  69. package/dist/adapters/target/types.d.ts +15 -0
  70. package/dist/adapters/target/types.d.ts.map +1 -0
  71. package/dist/adapters/target/types.js +4 -0
  72. package/dist/adapters/target/types.js.map +1 -0
  73. package/dist/cli.d.ts +25 -0
  74. package/dist/cli.d.ts.map +1 -0
  75. package/dist/cli.js +354 -0
  76. package/dist/cli.js.map +1 -0
  77. package/dist/config/index.d.ts +4 -0
  78. package/dist/config/index.d.ts.map +1 -0
  79. package/dist/config/index.js +6 -0
  80. package/dist/config/index.js.map +1 -0
  81. package/dist/config/loader.d.ts +5 -0
  82. package/dist/config/loader.d.ts.map +1 -0
  83. package/dist/config/loader.js +135 -0
  84. package/dist/config/loader.js.map +1 -0
  85. package/dist/config/schema.d.ts +4162 -0
  86. package/dist/config/schema.d.ts.map +1 -0
  87. package/dist/config/schema.js +263 -0
  88. package/dist/config/schema.js.map +1 -0
  89. package/dist/config/types.d.ts +3 -0
  90. package/dist/config/types.d.ts.map +1 -0
  91. package/dist/config/types.js +4 -0
  92. package/dist/config/types.js.map +1 -0
  93. package/dist/dq/engine.d.ts +10 -0
  94. package/dist/dq/engine.d.ts.map +1 -0
  95. package/dist/dq/engine.js +114 -0
  96. package/dist/dq/engine.js.map +1 -0
  97. package/dist/dq/index.d.ts +6 -0
  98. package/dist/dq/index.d.ts.map +1 -0
  99. package/dist/dq/index.js +6 -0
  100. package/dist/dq/index.js.map +1 -0
  101. package/dist/dq/reporter.d.ts +5 -0
  102. package/dist/dq/reporter.d.ts.map +1 -0
  103. package/dist/dq/reporter.js +41 -0
  104. package/dist/dq/reporter.js.map +1 -0
  105. package/dist/dq/rules/allowedValues.d.ts +7 -0
  106. package/dist/dq/rules/allowedValues.d.ts.map +1 -0
  107. package/dist/dq/rules/allowedValues.js +26 -0
  108. package/dist/dq/rules/allowedValues.js.map +1 -0
  109. package/dist/dq/rules/email.d.ts +7 -0
  110. package/dist/dq/rules/email.d.ts.map +1 -0
  111. package/dist/dq/rules/email.js +24 -0
  112. package/dist/dq/rules/email.js.map +1 -0
  113. package/dist/dq/rules/index.d.ts +15 -0
  114. package/dist/dq/rules/index.d.ts.map +1 -0
  115. package/dist/dq/rules/index.js +30 -0
  116. package/dist/dq/rules/index.js.map +1 -0
  117. package/dist/dq/rules/maxLength.d.ts +7 -0
  118. package/dist/dq/rules/maxLength.d.ts.map +1 -0
  119. package/dist/dq/rules/maxLength.js +25 -0
  120. package/dist/dq/rules/maxLength.js.map +1 -0
  121. package/dist/dq/rules/minMax.d.ts +11 -0
  122. package/dist/dq/rules/minMax.d.ts.map +1 -0
  123. package/dist/dq/rules/minMax.js +52 -0
  124. package/dist/dq/rules/minMax.js.map +1 -0
  125. package/dist/dq/rules/notNull.d.ts +7 -0
  126. package/dist/dq/rules/notNull.d.ts.map +1 -0
  127. package/dist/dq/rules/notNull.js +21 -0
  128. package/dist/dq/rules/notNull.js.map +1 -0
  129. package/dist/dq/rules/pattern.d.ts +7 -0
  130. package/dist/dq/rules/pattern.d.ts.map +1 -0
  131. package/dist/dq/rules/pattern.js +31 -0
  132. package/dist/dq/rules/pattern.js.map +1 -0
  133. package/dist/dq/rules/types.d.ts +6 -0
  134. package/dist/dq/rules/types.d.ts.map +1 -0
  135. package/dist/dq/rules/types.js +4 -0
  136. package/dist/dq/rules/types.js.map +1 -0
  137. package/dist/dq/rules/ukPostcode.d.ts +7 -0
  138. package/dist/dq/rules/ukPostcode.d.ts.map +1 -0
  139. package/dist/dq/rules/ukPostcode.js +24 -0
  140. package/dist/dq/rules/ukPostcode.js.map +1 -0
  141. package/dist/dq/rules/unique.d.ts +14 -0
  142. package/dist/dq/rules/unique.d.ts.map +1 -0
  143. package/dist/dq/rules/unique.js +9 -0
  144. package/dist/dq/rules/unique.js.map +1 -0
  145. package/dist/dq/types.d.ts +29 -0
  146. package/dist/dq/types.d.ts.map +1 -0
  147. package/dist/dq/types.js +4 -0
  148. package/dist/dq/types.js.map +1 -0
  149. package/dist/enrich/types.d.ts +87 -0
  150. package/dist/enrich/types.d.ts.map +1 -0
  151. package/dist/enrich/types.js +4 -0
  152. package/dist/enrich/types.js.map +1 -0
  153. package/dist/index.d.ts +17 -0
  154. package/dist/index.d.ts.map +1 -0
  155. package/dist/index.js +17 -0
  156. package/dist/index.js.map +1 -0
  157. package/dist/merge/conflict-log.d.ts +9 -0
  158. package/dist/merge/conflict-log.d.ts.map +1 -0
  159. package/dist/merge/conflict-log.js +28 -0
  160. package/dist/merge/conflict-log.js.map +1 -0
  161. package/dist/merge/engine.d.ts +7 -0
  162. package/dist/merge/engine.d.ts.map +1 -0
  163. package/dist/merge/engine.js +19 -0
  164. package/dist/merge/engine.js.map +1 -0
  165. package/dist/merge/index.d.ts +11 -0
  166. package/dist/merge/index.d.ts.map +1 -0
  167. package/dist/merge/index.js +34 -0
  168. package/dist/merge/index.js.map +1 -0
  169. package/dist/merge/sql-builder.d.ts +19 -0
  170. package/dist/merge/sql-builder.d.ts.map +1 -0
  171. package/dist/merge/sql-builder.js +148 -0
  172. package/dist/merge/sql-builder.js.map +1 -0
  173. package/dist/merge/strategies/coalesce.d.ts +17 -0
  174. package/dist/merge/strategies/coalesce.d.ts.map +1 -0
  175. package/dist/merge/strategies/coalesce.js +77 -0
  176. package/dist/merge/strategies/coalesce.js.map +1 -0
  177. package/dist/merge/strategies/index.d.ts +5 -0
  178. package/dist/merge/strategies/index.d.ts.map +1 -0
  179. package/dist/merge/strategies/index.js +7 -0
  180. package/dist/merge/strategies/index.js.map +1 -0
  181. package/dist/merge/strategies/intersect.d.ts +17 -0
  182. package/dist/merge/strategies/intersect.d.ts.map +1 -0
  183. package/dist/merge/strategies/intersect.js +75 -0
  184. package/dist/merge/strategies/intersect.js.map +1 -0
  185. package/dist/merge/strategies/priority-override.d.ts +16 -0
  186. package/dist/merge/strategies/priority-override.d.ts.map +1 -0
  187. package/dist/merge/strategies/priority-override.js +78 -0
  188. package/dist/merge/strategies/priority-override.js.map +1 -0
  189. package/dist/merge/strategies/registry.d.ts +8 -0
  190. package/dist/merge/strategies/registry.d.ts.map +1 -0
  191. package/dist/merge/strategies/registry.js +19 -0
  192. package/dist/merge/strategies/registry.js.map +1 -0
  193. package/dist/merge/strategies/union.d.ts +15 -0
  194. package/dist/merge/strategies/union.d.ts.map +1 -0
  195. package/dist/merge/strategies/union.js +75 -0
  196. package/dist/merge/strategies/union.js.map +1 -0
  197. package/dist/merge/types.d.ts +24 -0
  198. package/dist/merge/types.d.ts.map +1 -0
  199. package/dist/merge/types.js +4 -0
  200. package/dist/merge/types.js.map +1 -0
  201. package/dist/multi-source-runner.d.ts +22 -0
  202. package/dist/multi-source-runner.d.ts.map +1 -0
  203. package/dist/multi-source-runner.js +398 -0
  204. package/dist/multi-source-runner.js.map +1 -0
  205. package/dist/plugins/index.d.ts +4 -0
  206. package/dist/plugins/index.d.ts.map +1 -0
  207. package/dist/plugins/index.js +5 -0
  208. package/dist/plugins/index.js.map +1 -0
  209. package/dist/plugins/loader.d.ts +22 -0
  210. package/dist/plugins/loader.d.ts.map +1 -0
  211. package/dist/plugins/loader.js +151 -0
  212. package/dist/plugins/loader.js.map +1 -0
  213. package/dist/plugins/registry.d.ts +25 -0
  214. package/dist/plugins/registry.d.ts.map +1 -0
  215. package/dist/plugins/registry.js +42 -0
  216. package/dist/plugins/registry.js.map +1 -0
  217. package/dist/plugins/types.d.ts +61 -0
  218. package/dist/plugins/types.d.ts.map +1 -0
  219. package/dist/plugins/types.js +4 -0
  220. package/dist/plugins/types.js.map +1 -0
  221. package/dist/runner.d.ts +97 -0
  222. package/dist/runner.d.ts.map +1 -0
  223. package/dist/runner.js +520 -0
  224. package/dist/runner.js.map +1 -0
  225. package/dist/staging/index.d.ts +3 -0
  226. package/dist/staging/index.d.ts.map +1 -0
  227. package/dist/staging/index.js +5 -0
  228. package/dist/staging/index.js.map +1 -0
  229. package/dist/staging/schema.d.ts +19 -0
  230. package/dist/staging/schema.d.ts.map +1 -0
  231. package/dist/staging/schema.js +15 -0
  232. package/dist/staging/schema.js.map +1 -0
  233. package/dist/staging/store.d.ts +71 -0
  234. package/dist/staging/store.d.ts.map +1 -0
  235. package/dist/staging/store.js +270 -0
  236. package/dist/staging/store.js.map +1 -0
  237. package/dist/transform/cleanse.d.ts +2 -0
  238. package/dist/transform/cleanse.d.ts.map +1 -0
  239. package/dist/transform/cleanse.js +59 -0
  240. package/dist/transform/cleanse.js.map +1 -0
  241. package/dist/transform/engine.d.ts +10 -0
  242. package/dist/transform/engine.d.ts.map +1 -0
  243. package/dist/transform/engine.js +225 -0
  244. package/dist/transform/engine.js.map +1 -0
  245. package/dist/transform/expression.d.ts +5 -0
  246. package/dist/transform/expression.d.ts.map +1 -0
  247. package/dist/transform/expression.js +52 -0
  248. package/dist/transform/expression.js.map +1 -0
  249. package/dist/transform/index.d.ts +6 -0
  250. package/dist/transform/index.d.ts.map +1 -0
  251. package/dist/transform/index.js +7 -0
  252. package/dist/transform/index.js.map +1 -0
  253. package/dist/transform/lookup.d.ts +10 -0
  254. package/dist/transform/lookup.d.ts.map +1 -0
  255. package/dist/transform/lookup.js +66 -0
  256. package/dist/transform/lookup.js.map +1 -0
  257. package/dist/transform/types.d.ts +10 -0
  258. package/dist/transform/types.d.ts.map +1 -0
  259. package/dist/transform/types.js +4 -0
  260. package/dist/transform/types.js.map +1 -0
  261. package/dist/utils/env.d.ts +3 -0
  262. package/dist/utils/env.d.ts.map +1 -0
  263. package/dist/utils/env.js +26 -0
  264. package/dist/utils/env.js.map +1 -0
  265. package/dist/utils/errors.d.ts +26 -0
  266. package/dist/utils/errors.d.ts.map +1 -0
  267. package/dist/utils/errors.js +39 -0
  268. package/dist/utils/errors.js.map +1 -0
  269. package/dist/utils/index.d.ts +5 -0
  270. package/dist/utils/index.d.ts.map +1 -0
  271. package/dist/utils/index.js +7 -0
  272. package/dist/utils/index.js.map +1 -0
  273. package/dist/utils/logger.d.ts +14 -0
  274. package/dist/utils/logger.d.ts.map +1 -0
  275. package/dist/utils/logger.js +16 -0
  276. package/dist/utils/logger.js.map +1 -0
  277. package/dist/utils/progress.d.ts +66 -0
  278. package/dist/utils/progress.d.ts.map +1 -0
  279. package/dist/utils/progress.js +283 -0
  280. package/dist/utils/progress.js.map +1 -0
  281. package/package.json +92 -0
@@ -0,0 +1,71 @@
1
+ import { type ColumnMeta } from './schema.js';
2
+ export interface ExportToCsvOptions {
3
+ delimiter?: string;
4
+ header?: boolean;
5
+ /** Token to emit for NULL values. Default: DuckDB's default (empty string). */
6
+ nullValue?: string;
7
+ /** Accepted for API symmetry; DuckDB always writes UTF-8. */
8
+ encoding?: string;
9
+ }
10
+ export declare class StagingStore {
11
+ private readonly dbPath;
12
+ private instance;
13
+ private conn;
14
+ /** `:memory:` for dryRun/tests, else a filesystem path. */
15
+ constructor(dbPath: string);
16
+ open(): Promise<void>;
17
+ close(): Promise<void>;
18
+ createTable(name: string, columns: ColumnMeta[]): Promise<void>;
19
+ /**
20
+ * Bulk-insert rows. Column schema is taken from the first row; rows missing
21
+ * a column get `null`. All rows must share the first row's column set.
22
+ *
23
+ * Builds a single multi-row prepared INSERT for efficiency:
24
+ * `INSERT INTO "t" ("a","b") VALUES ($1,$2), ($3,$4), ($5,$6)`
25
+ * and binds all params positionally.
26
+ */
27
+ insertBatch(table: string, rows: Record<string, unknown>[]): Promise<void>;
28
+ query<T>(sql: string, params?: unknown[]): Promise<T[]>;
29
+ tableExists(name: string): Promise<boolean>;
30
+ dropTable(name: string): Promise<void>;
31
+ rowCount(table: string): Promise<number>;
32
+ columnNames(table: string): Promise<string[]>;
33
+ /**
34
+ * Export a table to a CSV file. DuckDB always emits UTF-8; the `encoding`
35
+ * option is accepted for API symmetry and currently ignored.
36
+ */
37
+ exportToCsv(table: string, outputPath: string, options?: ExportToCsvOptions): Promise<void>;
38
+ /**
39
+ * Rename columns in place. Implemented as CREATE OR REPLACE TABLE ... AS
40
+ * SELECT to sidestep DuckDB's column-rename limitations. Unknown keys are
41
+ * warned and skipped (not an error). No-op when `renames` is empty.
42
+ *
43
+ * Used by `MultiSourcePipelineRunner` to apply per-source rename maps
44
+ * after extract and before merge.
45
+ */
46
+ renameColumns(tableName: string, renames: Record<string, string>): Promise<void>;
47
+ /**
48
+ * Returns distinct non-null, non-empty values for a column in stg_raw.
49
+ * Used by the Phase 4a enrich runner to deduplicate API calls.
50
+ */
51
+ selectDistinct(_field: string): Promise<string[]>;
52
+ /** Adds a column to stg_raw if it does not already exist. */
53
+ addColumnIfNotExists(_column: string, _type: 'BOOLEAN' | 'VARCHAR'): Promise<void>;
54
+ /**
55
+ * Batch-updates stg_raw. `updates` is keyed by rowid and maps each row to a
56
+ * partial column-value record to UPDATE.
57
+ */
58
+ batchUpdateColumns(_updates: Map<number, Record<string, unknown>>): Promise<void>;
59
+ /**
60
+ * Run any SQL statement. Returns native-JS row objects (Date for TIMESTAMP,
61
+ * bigint for BIGINT, null for NULL) for SELECTs; for DDL/DML the returned
62
+ * array is empty or a `[{ Count: <n>n }]` summary that callers ignore.
63
+ */
64
+ private exec;
65
+ /**
66
+ * Run a parameterised statement via a prepared statement. Translates `?`
67
+ * placeholders to `$N` and binds positional params with type sniffing.
68
+ */
69
+ private execPrepared;
70
+ }
71
+ //# sourceMappingURL=store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/staging/store.ts"],"names":[],"mappings":"AAsBA,OAAO,EAAmC,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AAE/E,MAAM,WAAW,kBAAkB;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,+EAA+E;IAC/E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6DAA6D;IAC7D,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAqDD,qBAAa,YAAY;IAKX,OAAO,CAAC,QAAQ,CAAC,MAAM;IAJnC,OAAO,CAAC,QAAQ,CAA+B;IAC/C,OAAO,CAAC,IAAI,CAAiC;IAE7C,2DAA2D;gBAC9B,MAAM,EAAE,MAAM;IAErC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAerB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiBtB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAIrE;;;;;;;OAOG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAmB1E,KAAK,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,EAAO,GAAG,OAAO,CAAC,CAAC,EAAE,CAAC;IAK3D,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAS3C,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAItC,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAOxC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAQnD;;;OAGG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,IAAI,CAAC;IAiBjG;;;;;;;OAOG;IACG,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA6BtF;;;OAGG;IACG,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAMvD,6DAA6D;IACvD,oBAAoB,CACxB,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,SAAS,GAAG,SAAS,GAC3B,OAAO,CAAC,IAAI,CAAC;IAMhB;;;OAGG;IACG,kBAAkB,CACtB,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GAC7C,OAAO,CAAC,IAAI,CAAC;IAQhB;;;;OAIG;YACW,IAAI;IAkBlB;;;OAGG;YACW,YAAY;CA6B3B"}
@@ -0,0 +1,270 @@
1
+ // SPDX-License-Identifier: Elastic-2.0
2
+ // Copyright (c) 2026 Caracal Lynx Ltd.
3
+ /**
4
+ * StagingStore — thin promisified wrapper around DuckDB.
5
+ *
6
+ * The only place in `src/` that imports `@duckdb/node-api` directly.
7
+ * PipelineRunner owns the single instance.
8
+ *
9
+ * Public API is preserved verbatim across the Node 20→24 / `duckdb` →
10
+ * `@duckdb/node-api` migration: callers still do `new StagingStore(path)`
11
+ * followed by `await store.open()`.
12
+ */
13
+ import { DuckDBInstance, } from '@duckdb/node-api';
14
+ import { StagingError } from '../utils/errors.js';
15
+ import { logger } from '../utils/logger.js';
16
+ import { buildCreateTableSql, quoteIdent } from './schema.js';
17
+ /**
18
+ * Translate `?` positional placeholders (the convention inherited from the
19
+ * old `duckdb` driver) into the `$N` form that `@duckdb/node-api` expects.
20
+ * Quoted identifiers and string literals containing `?` are not corrupted
21
+ * because internal callers in this file never put `?` inside a quoted
22
+ * literal — only as a positional bind marker.
23
+ */
24
+ function translatePlaceholders(sql) {
25
+ let i = 0;
26
+ return sql.replace(/\?/g, () => `$${++i}`);
27
+ }
28
+ /**
29
+ * Bind a single positional parameter, sniffing the JS type and routing to the
30
+ * appropriate typed bind method. DuckDB coerces between numeric column types
31
+ * (e.g. INTEGER value into BIGINT column) so we don't need to know the column
32
+ * type up-front — only enough about the JS value to pick a sensible bind.
33
+ *
34
+ * `null` and `undefined` both map to bindNull (matches the old driver, where
35
+ * `insertBatch` rewrote `undefined → null` before binding).
36
+ */
37
+ function bindParam(prep, pos, value) {
38
+ if (value === null || value === undefined) {
39
+ prep.bindNull(pos);
40
+ return;
41
+ }
42
+ if (typeof value === 'boolean') {
43
+ prep.bindBoolean(pos, value);
44
+ return;
45
+ }
46
+ if (typeof value === 'bigint') {
47
+ prep.bindBigInt(pos, value);
48
+ return;
49
+ }
50
+ if (typeof value === 'number') {
51
+ if (Number.isInteger(value) && value >= -2_147_483_648 && value <= 2_147_483_647) {
52
+ prep.bindInteger(pos, value);
53
+ }
54
+ else {
55
+ prep.bindDouble(pos, value);
56
+ }
57
+ return;
58
+ }
59
+ // Strings, Dates (ISO-stringified), and everything else go through varchar.
60
+ // DuckDB coerces e.g. '2026-04-19 12:00:00' into a TIMESTAMP column.
61
+ if (value instanceof Date) {
62
+ prep.bindVarchar(pos, value.toISOString());
63
+ return;
64
+ }
65
+ prep.bindVarchar(pos, String(value));
66
+ }
67
+ export class StagingStore {
68
+ dbPath;
69
+ instance = null;
70
+ conn = null;
71
+ /** `:memory:` for dryRun/tests, else a filesystem path. */
72
+ constructor(dbPath) {
73
+ this.dbPath = dbPath;
74
+ }
75
+ async open() {
76
+ if (this.instance)
77
+ return;
78
+ try {
79
+ this.instance = await DuckDBInstance.create(this.dbPath);
80
+ this.conn = await this.instance.connect();
81
+ }
82
+ catch (err) {
83
+ this.instance = null;
84
+ this.conn = null;
85
+ throw new StagingError(`failed to open DuckDB at ${this.dbPath}: ${err instanceof Error ? err.message : String(err)}`, err);
86
+ }
87
+ }
88
+ async close() {
89
+ if (!this.instance)
90
+ return;
91
+ const conn = this.conn;
92
+ const instance = this.instance;
93
+ this.conn = null;
94
+ this.instance = null;
95
+ try {
96
+ conn?.disconnectSync();
97
+ instance.closeSync();
98
+ }
99
+ catch (err) {
100
+ throw new StagingError(`failed to close DuckDB: ${err instanceof Error ? err.message : String(err)}`, err);
101
+ }
102
+ }
103
+ async createTable(name, columns) {
104
+ await this.exec(buildCreateTableSql(name, columns));
105
+ }
106
+ /**
107
+ * Bulk-insert rows. Column schema is taken from the first row; rows missing
108
+ * a column get `null`. All rows must share the first row's column set.
109
+ *
110
+ * Builds a single multi-row prepared INSERT for efficiency:
111
+ * `INSERT INTO "t" ("a","b") VALUES ($1,$2), ($3,$4), ($5,$6)`
112
+ * and binds all params positionally.
113
+ */
114
+ async insertBatch(table, rows) {
115
+ if (rows.length === 0)
116
+ return;
117
+ const first = rows[0];
118
+ const cols = Object.keys(first);
119
+ if (cols.length === 0) {
120
+ throw new StagingError('insertBatch: rows have no columns');
121
+ }
122
+ const colList = cols.map(quoteIdent).join(', ');
123
+ let n = 0;
124
+ const rowPlaceholders = rows
125
+ .map(() => `(${cols.map(() => `$${++n}`).join(', ')})`)
126
+ .join(', ');
127
+ const flatParams = rows.flatMap((r) => cols.map((c) => (r[c] === undefined ? null : r[c])));
128
+ const sql = `INSERT INTO ${quoteIdent(table)} (${colList}) VALUES ${rowPlaceholders}`;
129
+ await this.execPrepared(sql, flatParams);
130
+ }
131
+ async query(sql, params = []) {
132
+ const rows = await this.exec(sql, params);
133
+ return rows;
134
+ }
135
+ async tableExists(name) {
136
+ const rows = await this.query('SELECT count(*) AS n FROM information_schema.tables WHERE table_name = ?', [name]);
137
+ const n = rows[0]?.n ?? 0;
138
+ return Number(n) > 0;
139
+ }
140
+ async dropTable(name) {
141
+ await this.exec(`DROP TABLE IF EXISTS ${quoteIdent(name)}`);
142
+ }
143
+ async rowCount(table) {
144
+ const rows = await this.query(`SELECT count(*) AS n FROM ${quoteIdent(table)}`);
145
+ return Number(rows[0]?.n ?? 0);
146
+ }
147
+ async columnNames(table) {
148
+ const rows = await this.query('SELECT column_name FROM information_schema.columns WHERE table_name = ? ORDER BY ordinal_position', [table]);
149
+ return rows.map((r) => r.column_name);
150
+ }
151
+ /**
152
+ * Export a table to a CSV file. DuckDB always emits UTF-8; the `encoding`
153
+ * option is accepted for API symmetry and currently ignored.
154
+ */
155
+ async exportToCsv(table, outputPath, options) {
156
+ const delim = options?.delimiter ?? ',';
157
+ const header = options?.header ?? true;
158
+ const safePath = outputPath.replace(/\\/g, '/').replace(/'/g, "''");
159
+ const safeDelim = delim.replace(/'/g, "''");
160
+ const clauses = [
161
+ `HEADER ${header ? 'TRUE' : 'FALSE'}`,
162
+ `DELIMITER '${safeDelim}'`,
163
+ ];
164
+ if (options?.nullValue !== undefined) {
165
+ clauses.push(`NULL '${options.nullValue.replace(/'/g, "''")}'`);
166
+ }
167
+ await this.exec(`COPY ${quoteIdent(table)} TO '${safePath}' (${clauses.join(', ')})`);
168
+ }
169
+ /**
170
+ * Rename columns in place. Implemented as CREATE OR REPLACE TABLE ... AS
171
+ * SELECT to sidestep DuckDB's column-rename limitations. Unknown keys are
172
+ * warned and skipped (not an error). No-op when `renames` is empty.
173
+ *
174
+ * Used by `MultiSourcePipelineRunner` to apply per-source rename maps
175
+ * after extract and before merge.
176
+ */
177
+ async renameColumns(tableName, renames) {
178
+ if (Object.keys(renames).length === 0)
179
+ return;
180
+ const existingColumns = await this.columnNames(tableName);
181
+ const unknownKeys = Object.keys(renames).filter((k) => !existingColumns.includes(k));
182
+ if (unknownKeys.length > 0) {
183
+ logger.warn({ tableName, unknownKeys }, 'rename map contains columns not found in table');
184
+ }
185
+ const selectList = existingColumns
186
+ .map((col) => {
187
+ const newName = renames[col];
188
+ return newName !== undefined
189
+ ? `${quoteIdent(col)} AS ${quoteIdent(newName)}`
190
+ : quoteIdent(col);
191
+ })
192
+ .join(', ');
193
+ await this.exec(`CREATE OR REPLACE TABLE ${quoteIdent(tableName)} AS SELECT ${selectList} FROM ${quoteIdent(tableName)}`);
194
+ }
195
+ // ── Phase 4a stubs (implemented by @caracal-lynx/sluice-enrich) ────────
196
+ //
197
+ // The private `@caracal-lynx/sluice-enrich` package overwrites these three
198
+ // methods on `StagingStore.prototype` at import time via a `patchStagingStore()`
199
+ // helper. Until that package is installed and imported, the methods throw.
200
+ /**
201
+ * Returns distinct non-null, non-empty values for a column in stg_raw.
202
+ * Used by the Phase 4a enrich runner to deduplicate API calls.
203
+ */
204
+ async selectDistinct(_field) {
205
+ throw new StagingError('StagingStore.selectDistinct: not yet implemented — install @caracal-lynx/sluice-enrich');
206
+ }
207
+ /** Adds a column to stg_raw if it does not already exist. */
208
+ async addColumnIfNotExists(_column, _type) {
209
+ throw new StagingError('StagingStore.addColumnIfNotExists: not yet implemented — install @caracal-lynx/sluice-enrich');
210
+ }
211
+ /**
212
+ * Batch-updates stg_raw. `updates` is keyed by rowid and maps each row to a
213
+ * partial column-value record to UPDATE.
214
+ */
215
+ async batchUpdateColumns(_updates) {
216
+ throw new StagingError('StagingStore.batchUpdateColumns: not yet implemented — install @caracal-lynx/sluice-enrich');
217
+ }
218
+ // ── internal ───────────────────────────────────────────────────────────
219
+ /**
220
+ * Run any SQL statement. Returns native-JS row objects (Date for TIMESTAMP,
221
+ * bigint for BIGINT, null for NULL) for SELECTs; for DDL/DML the returned
222
+ * array is empty or a `[{ Count: <n>n }]` summary that callers ignore.
223
+ */
224
+ async exec(sql, params = []) {
225
+ if (!this.conn) {
226
+ throw new StagingError('StagingStore is not open');
227
+ }
228
+ if (params.length === 0) {
229
+ try {
230
+ const reader = await this.conn.runAndReadAll(sql);
231
+ return reader.getRowObjectsJS();
232
+ }
233
+ catch (err) {
234
+ throw new StagingError(`DuckDB query failed: ${err instanceof Error ? err.message : String(err)}`, err);
235
+ }
236
+ }
237
+ return this.execPrepared(sql, params);
238
+ }
239
+ /**
240
+ * Run a parameterised statement via a prepared statement. Translates `?`
241
+ * placeholders to `$N` and binds positional params with type sniffing.
242
+ */
243
+ async execPrepared(sql, params) {
244
+ if (!this.conn) {
245
+ throw new StagingError('StagingStore is not open');
246
+ }
247
+ const translated = translatePlaceholders(sql);
248
+ let prep = null;
249
+ try {
250
+ prep = await this.conn.prepare(translated);
251
+ params.forEach((value, i) => {
252
+ bindParam(prep, i + 1, value);
253
+ });
254
+ const reader = await prep.runAndReadAll();
255
+ return reader.getRowObjectsJS();
256
+ }
257
+ catch (err) {
258
+ throw new StagingError(`DuckDB query failed: ${err instanceof Error ? err.message : String(err)}`, err);
259
+ }
260
+ finally {
261
+ try {
262
+ prep?.destroySync();
263
+ }
264
+ catch {
265
+ /* noop — destroy errors swallowed; the connection is already done with it */
266
+ }
267
+ }
268
+ }
269
+ }
270
+ //# sourceMappingURL=store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"store.js","sourceRoot":"","sources":["../../src/staging/store.ts"],"names":[],"mappings":"AAAA,uCAAuC;AACvC,uCAAuC;AAEvC;;;;;;;;;GASG;AAEH,OAAO,EACL,cAAc,GAGf,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAmB,MAAM,aAAa,CAAC;AAW/E;;;;;;GAMG;AACH,SAAS,qBAAqB,CAAC,GAAW;IACxC,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;AAC7C,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,SAAS,CAAC,IAA6B,EAAE,GAAW,EAAE,KAAc;IAC3E,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QAC1C,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACnB,OAAO;IACT,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;QAC/B,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAC7B,OAAO;IACT,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAC5B,OAAO;IACT,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,aAAa,IAAI,KAAK,IAAI,aAAa,EAAE,CAAC;YACjF,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAC/B,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO;IACT,CAAC;IACD,4EAA4E;IAC5E,qEAAqE;IACrE,IAAI,KAAK,YAAY,IAAI,EAAE,CAAC;QAC1B,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3C,OAAO;IACT,CAAC;IACD,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,OAAO,YAAY;IAKM;IAJrB,QAAQ,GAA0B,IAAI,CAAC;IACvC,IAAI,GAA4B,IAAI,CAAC;IAE7C,2DAA2D;IAC3D,YAA6B,MAAc;QAAd,WAAM,GAAN,MAAM,CAAQ;IAAG,CAAC;IAE/C,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAC1B,IAAI,CAAC;YACH,IAAI,CAAC,QAAQ,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzD,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QAC5C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;YACrB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YACjB,MAAM,IAAI,YAAY,CACpB,4BAA4B,IAAI,CAAC,MAAM,KAAK,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC9F,GAAG,CACJ,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,IAAI,CAAC,QAAQ;YAAE,OAAO;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACrB,IAAI,CAAC;YACH,IAAI,EAAE,cAAc,EAAE,CAAC;YACvB,QAAQ,CAAC,SAAS,EAAE,CAAC;QACvB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,YAAY,CACpB,2BAA2B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC7E,GAAG,CACJ,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,OAAqB;QACnD,MAAM,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;IACtD,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa,EAAE,IAA+B;QAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAE,CAAC;QACvB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,MAAM,IAAI,YAAY,CAAC,mCAAmC,CAAC,CAAC;QAC9D,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,MAAM,eAAe,GAAG,IAAI;aACzB,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;aACtD,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACpD,CAAC;QACF,MAAM,GAAG,GAAG,eAAe,UAAU,CAAC,KAAK,CAAC,KAAK,OAAO,YAAY,eAAe,EAAE,CAAC;QACtF,MAAM,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAC3C,CAAC;IAED,KAAK,CAAC,KAAK,CAAI,GAAW,EAAE,SAAoB,EAAE;QAChD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QAC1C,OAAO,IAAW,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY;QAC5B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAC3B,0EAA0E,EAC1E,CAAC,IAAI,CAAC,CACP,CAAC;QACF,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAC1B,OAAO,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY;QAC1B,MAAM,IAAI,CAAC,IAAI,CAAC,wBAAwB,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,KAAa;QAC1B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAC3B,6BAA6B,UAAU,CAAC,KAAK,CAAC,EAAE,CACjD,CAAC;QACF,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAC3B,mGAAmG,EACnG,CAAC,KAAK,CAAC,CACR,CAAC;QACF,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IACxC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa,EAAE,UAAkB,EAAE,OAA4B;QAC/E,MAAM,KAAK,GAAG,OAAO,EAAE,SAAS,IAAI,GAAG,CAAC;QACxC,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,IAAI,CAAC;QACvC,MAAM,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG;YACd,UAAU,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE;YACrC,cAAc,SAAS,GAAG;SAC3B,CAAC;QACF,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS,EAAE,CAAC;YACrC,OAAO,CAAC,IAAI,CAAC,SAAS,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,CACb,QAAQ,UAAU,CAAC,KAAK,CAAC,QAAQ,QAAQ,MAAM,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CACrE,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,aAAa,CAAC,SAAiB,EAAE,OAA+B;QACpE,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAE9C,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAC1D,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACrF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,gDAAgD,CAAC,CAAC;QAC5F,CAAC;QAED,MAAM,UAAU,GAAG,eAAe;aAC/B,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YACX,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;YAC7B,OAAO,OAAO,KAAK,SAAS;gBAC1B,CAAC,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,OAAO,UAAU,CAAC,OAAO,CAAC,EAAE;gBAChD,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC;aACD,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,MAAM,IAAI,CAAC,IAAI,CACb,2BAA2B,UAAU,CAAC,SAAS,CAAC,cAAc,UAAU,SAAS,UAAU,CAAC,SAAS,CAAC,EAAE,CACzG,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,EAAE;IACF,2EAA2E;IAC3E,iFAAiF;IACjF,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,cAAc,CAAC,MAAc;QACjC,MAAM,IAAI,YAAY,CACpB,wFAAwF,CACzF,CAAC;IACJ,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,oBAAoB,CACxB,OAAe,EACf,KAA4B;QAE5B,MAAM,IAAI,YAAY,CACpB,8FAA8F,CAC/F,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,kBAAkB,CACtB,QAA8C;QAE9C,MAAM,IAAI,YAAY,CACpB,4FAA4F,CAC7F,CAAC;IACJ,CAAC;IAED,0EAA0E;IAE1E;;;;OAIG;IACK,KAAK,CAAC,IAAI,CAAC,GAAW,EAAE,SAAoB,EAAE;QACpD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,YAAY,CAAC,0BAA0B,CAAC,CAAC;QACrD,CAAC;QACD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBAClD,OAAO,MAAM,CAAC,eAAe,EAA+B,CAAC;YAC/D,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,IAAI,YAAY,CACpB,wBAAwB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC1E,GAAG,CACJ,CAAC;YACJ,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACxC,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,YAAY,CACxB,GAAW,EACX,MAAiB;QAEjB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,YAAY,CAAC,0BAA0B,CAAC,CAAC;QACrD,CAAC;QACD,MAAM,UAAU,GAAG,qBAAqB,CAAC,GAAG,CAAC,CAAC;QAC9C,IAAI,IAAI,GAAmC,IAAI,CAAC;QAChD,IAAI,CAAC;YACH,IAAI,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAC3C,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;gBAC1B,SAAS,CAAC,IAAK,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;YAC1C,OAAO,MAAM,CAAC,eAAe,EAA+B,CAAC;QAC/D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,YAAY,CACpB,wBAAwB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC1E,GAAG,CACJ,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC;gBACH,IAAI,EAAE,WAAW,EAAE,CAAC;YACtB,CAAC;YAAC,MAAM,CAAC;gBACP,6EAA6E;YAC/E,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,2 @@
1
+ export declare function applyCleanse(value: unknown, spec: string): unknown;
2
+ //# sourceMappingURL=cleanse.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cleanse.d.ts","sourceRoot":"","sources":["../../src/transform/cleanse.ts"],"names":[],"mappings":"AA+BA,wBAAgB,YAAY,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAkClE"}
@@ -0,0 +1,59 @@
1
+ // SPDX-License-Identifier: Elastic-2.0
2
+ // Copyright (c) 2026 Caracal Lynx Ltd.
3
+ /**
4
+ * Cleanse operations — pure string functions applied left-to-right through
5
+ * the pipe-separated `cleanse:` value on a field mapping.
6
+ *
7
+ * Each op is referenced by name in CLAUDE.md's cleanse operations table. Do
8
+ * NOT add new ops without updating that table.
9
+ */
10
+ import { TransformError } from '../utils/errors.js';
11
+ const OPS = {
12
+ trim: (v) => v.trim(),
13
+ uppercase: (v) => v.toUpperCase(),
14
+ lowercase: (v) => v.toLowerCase(),
15
+ titleCase: (v) => v.replace(/\w\S*/g, (w) => w.charAt(0).toUpperCase() + w.slice(1).toLowerCase()),
16
+ stripNonAlpha: (v) => v.replace(/[^a-zA-Z]/g, ''),
17
+ stripNonNumeric: (v) => v.replace(/[^0-9]/g, ''),
18
+ stripWhitespace: (v) => v.replace(/\s+/g, ''),
19
+ nullIfEmpty: (v) => (v === '' ? null : v),
20
+ normaliseQuotes: (v) => v.replace(/[\u2018\u2019]/g, "'").replace(/[\u201C\u201D]/g, '"'),
21
+ // NFD → strip combining marks → leave ASCII text
22
+ normaliseUnicode: (v) => v.normalize('NFD').replace(/[\u0300-\u036f]/g, ''),
23
+ };
24
+ export function applyCleanse(value, spec) {
25
+ if (value === null || value === undefined)
26
+ return value;
27
+ let current = String(value);
28
+ for (const step of spec.split('|')) {
29
+ if (current === null)
30
+ return null;
31
+ const [name, ...args] = step.split(':');
32
+ if (!name)
33
+ continue;
34
+ if (name === 'padStart') {
35
+ const width = Number.parseInt(args[0] ?? '0', 10);
36
+ const pad = args[1] ?? ' ';
37
+ if (!Number.isFinite(width) || width < 0) {
38
+ throw new TransformError(`padStart requires a non-negative width, got "${args[0]}"`);
39
+ }
40
+ current = current.padStart(width, pad);
41
+ continue;
42
+ }
43
+ if (name === 'truncate') {
44
+ const len = Number.parseInt(args[0] ?? '0', 10);
45
+ if (!Number.isFinite(len) || len < 0) {
46
+ throw new TransformError(`truncate requires a non-negative length, got "${args[0]}"`);
47
+ }
48
+ current = current.slice(0, len);
49
+ continue;
50
+ }
51
+ const op = OPS[name];
52
+ if (!op) {
53
+ throw new TransformError(`Unknown cleanse operation: "${name}"`);
54
+ }
55
+ current = op(current);
56
+ }
57
+ return current;
58
+ }
59
+ //# sourceMappingURL=cleanse.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cleanse.js","sourceRoot":"","sources":["../../src/transform/cleanse.ts"],"names":[],"mappings":"AAAA,uCAAuC;AACvC,uCAAuC;AAEvC;;;;;;GAMG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAIpD,MAAM,GAAG,GAA8B;IACrC,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE;IACrB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE;IACjC,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE;IACjC,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IAClF,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;IACjD,eAAe,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;IAChD,eAAe,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAC7C,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,eAAe,EAAE,CAAC,CAAC,EAAE,EAAE,CACrB,CAAC,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC;IACnE,iDAAiD;IACjD,gBAAgB,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;CAC5E,CAAC;AAEF,MAAM,UAAU,YAAY,CAAC,KAAc,EAAE,IAAY;IACvD,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IACxD,IAAI,OAAO,GAAkB,MAAM,CAAC,KAAK,CAAC,CAAC;IAE3C,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,IAAI,OAAO,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC;QAClC,MAAM,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YAClD,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;YAC3B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACzC,MAAM,IAAI,cAAc,CAAC,gDAAgD,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACvF,CAAC;YACD,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YACvC,SAAS;QACX,CAAC;QACD,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YAChD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC;gBACrC,MAAM,IAAI,cAAc,CAAC,iDAAiD,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACxF,CAAC;YACD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,EAAE,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QACrB,IAAI,CAAC,EAAE,EAAE,CAAC;YACR,MAAM,IAAI,cAAc,CAAC,+BAA+B,IAAI,GAAG,CAAC,CAAC;QACnE,CAAC;QACD,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,10 @@
1
+ import { TransformRegistry } from '../plugins/registry.js';
2
+ import type { Pipeline } from '../config/types.js';
3
+ import type { StagingStore } from '../staging/index.js';
4
+ import type { TransformResult } from './types.js';
5
+ export declare class TransformEngine {
6
+ private readonly transforms;
7
+ constructor(transforms?: TransformRegistry);
8
+ run(config: Pipeline, store: StagingStore, sourceTable?: string, targetTable?: string, onProgress?: (rows: number) => void): Promise<TransformResult>;
9
+ }
10
+ //# sourceMappingURL=engine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/transform/engine.ts"],"names":[],"mappings":"AAkBA,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,OAAO,KAAK,EAAgB,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,KAAK,EAAc,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAOpE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAIlD,qBAAa,eAAe;IACd,OAAO,CAAC,QAAQ,CAAC,UAAU;gBAAV,UAAU,GAAE,iBAA2C;IAE9E,GAAG,CACP,MAAM,EAAE,QAAQ,EAChB,KAAK,EAAE,YAAY,EACnB,WAAW,SAAY,EACvB,WAAW,SAAoB,EAC/B,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,GAClC,OAAO,CAAC,eAAe,CAAC;CAgE5B"}