@murumets-ee/imports 0.12.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,3 +1,3 @@
1
- import { C as IMPORT_RUN_STATUSES, E as ImportRunStatus, S as ErrorTrackerConfig, T as ImportRunClient, _ as DEFAULT_MAX_PATTERNS, b as ErrorSample, d as RunImportOptions, f as RunImportResult, g as streamFeed, h as StreamFeedRow, l as DEFAULT_BATCH_SIZE, m as StreamFeedOptions, n as FilePathResolver, p as runImport, t as EsClientResolver, u as ImportRunProgress, v as DEFAULT_MAX_SAMPLES_PER_PATTERN, w as ImportRun, x as ErrorTracker, y as ErrorPattern } from "./worker-DerGVTSI.mjs";
2
- import { a as TransformName, c as registerImportTransform, i as TransformContext, n as RowResult, o as TransformRegistry, r as RowTransform, s as getTransformRegistry, t as RowError } from "./transform-D_uhdLeo.mjs";
3
- export { DEFAULT_BATCH_SIZE, DEFAULT_MAX_PATTERNS, DEFAULT_MAX_SAMPLES_PER_PATTERN, type ErrorPattern, type ErrorSample, ErrorTracker, type ErrorTrackerConfig, type EsClientResolver, type FilePathResolver, IMPORT_RUN_STATUSES, ImportRun, type ImportRunClient, type ImportRunProgress, type ImportRunStatus, type RowError, type RowResult, type RowTransform, type RunImportOptions, type RunImportResult, type StreamFeedOptions, type StreamFeedRow, type TransformContext, type TransformName, TransformRegistry, getTransformRegistry, registerImportTransform, runImport, streamFeed };
1
+ import { C as ErrorSample, D as ImportRun, E as IMPORT_RUN_STATUSES, O as ImportRunClient, S as ErrorPattern, T as ErrorTrackerConfig, _ as StreamFeedRow, b as DEFAULT_MAX_PATTERNS, d as DEFAULT_BATCH_SIZE, f as ImportRunProgress, g as StreamFeedOptions, h as runImport, k as ImportRunStatus, m as RunImportResult, n as FilePathResolver, p as RunImportOptions, t as EsClientResolver, v as countDataRows, w as ErrorTracker, x as DEFAULT_MAX_SAMPLES_PER_PATTERN, y as streamFeed } from "./worker-B7ADOFEV.mjs";
2
+ import { a as TransformName, c as registerImportTransform, i as TransformContext, n as RowResult, o as TransformRegistry, r as RowTransform, s as getTransformRegistry, t as RowError } from "./transform-CqCKV88O.mjs";
3
+ export { DEFAULT_BATCH_SIZE, DEFAULT_MAX_PATTERNS, DEFAULT_MAX_SAMPLES_PER_PATTERN, type ErrorPattern, type ErrorSample, ErrorTracker, type ErrorTrackerConfig, type EsClientResolver, type FilePathResolver, IMPORT_RUN_STATUSES, ImportRun, type ImportRunClient, type ImportRunProgress, type ImportRunStatus, type RowError, type RowResult, type RowTransform, type RunImportOptions, type RunImportResult, type StreamFeedOptions, type StreamFeedRow, type TransformContext, type TransformName, TransformRegistry, countDataRows, getTransformRegistry, registerImportTransform, runImport, streamFeed };
package/dist/index.mjs CHANGED
@@ -1 +1 @@
1
- import{a as e,i as t,n,r,t as i}from"./transform-BUGBTotp.mjs";import{a,i as o,n as s,o as c,r as l,t as u}from"./runner-DdhiNybk.mjs";export{u as DEFAULT_BATCH_SIZE,o as DEFAULT_MAX_PATTERNS,a as DEFAULT_MAX_SAMPLES_PER_PATTERN,c as ErrorTracker,t as IMPORT_RUN_STATUSES,e as ImportRun,i as TransformRegistry,n as getTransformRegistry,r as registerImportTransform,s as runImport,l as streamFeed};
1
+ import{a as e,i as t,n,r,t as i}from"./transform-BUGBTotp.mjs";import{a,i as o,n as s,o as c,r as l,s as u,t as d}from"./runner-D9FtnIBn.mjs";export{d as DEFAULT_BATCH_SIZE,a as DEFAULT_MAX_PATTERNS,c as DEFAULT_MAX_SAMPLES_PER_PATTERN,u as ErrorTracker,t as IMPORT_RUN_STATUSES,e as ImportRun,i as TransformRegistry,l as countDataRows,n as getTransformRegistry,r as registerImportTransform,s as runImport,o as streamFeed};
package/dist/plugin.d.mts CHANGED
@@ -1,4 +1,5 @@
1
- import { n as FilePathResolver, t as EsClientResolver } from "./worker-DerGVTSI.mjs";
1
+ import { n as FilePathResolver, t as EsClientResolver } from "./worker-B7ADOFEV.mjs";
2
+ import { ImportsStorageFactory } from "./storage.mjs";
2
3
  import { Plugin } from "@murumets-ee/core";
3
4
 
4
5
  //#region src/plugin.d.ts
@@ -31,6 +32,24 @@ interface ImportsPluginOptions {
31
32
  * the absolute path on `import_run.filePath`.
32
33
  */
33
34
  resolveFilePath?: FilePathResolver;
35
+ /**
36
+ * Optional dedicated `StorageClient` factory for imports. When
37
+ * provided, the upload route (in `@murumets-ee/commerce`) and the
38
+ * worker resolver consult this client instead of the global
39
+ * `getStorageConfig()` default — so a deployment can pin imports
40
+ * to a local-disk adapter (single-VPS / dev) while media,
41
+ * ticketing attachments, and the rest of the app keep using the
42
+ * default R2 (or S3) adapter.
43
+ *
44
+ * Lazy / async so consumers can defer adapter construction until
45
+ * the first import — useful when the local-disk root is
46
+ * environment-dependent or when the factory needs `getApp()` to
47
+ * have resolved.
48
+ *
49
+ * Omit to keep the legacy single-storage behaviour (everything on
50
+ * the global default).
51
+ */
52
+ storage?: ImportsStorageFactory;
34
53
  }
35
54
  declare function imports(options: ImportsPluginOptions): Plugin;
36
55
  //#endregion
@@ -1 +1 @@
1
- {"version":3,"file":"plugin.d.mts","names":[],"sources":["../src/plugin.ts"],"mappings":";;;;UAuCiB,oBAAA;;;;;;EAMf,QAAA,EAAU,gBAAA;;;;;;;EAOV,OAAA;;;;;;;;;;;;;;;EAeA,eAAA,GAAkB,gBAAA;AAAA;AAAA,iBAGJ,OAAA,CAAQ,OAAA,EAAS,oBAAA,GAAuB,MAAA"}
1
+ {"version":3,"file":"plugin.d.mts","names":[],"sources":["../src/plugin.ts"],"mappings":";;;;;UA+DiB,oBAAA;;;;;;EAMf,QAAA,EAAU,gBAAA;;;;;;;EAOV,OAAA;;;;;;;;;;;;;;;EAeA,eAAA,GAAkB,gBAAA;;;;;;;;;;;;;;;;;;EAkBlB,OAAA,GAAU,qBAAA;AAAA;AAAA,iBAGI,OAAA,CAAQ,OAAA,EAAS,oBAAA,GAAuB,MAAA"}
package/dist/plugin.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import{a as e,n as t}from"./transform-BUGBTotp.mjs";import{createRunImportHandler as n,importsRunJob as r}from"./worker.mjs";import{createAdminClient as i}from"@murumets-ee/core/clients";function a(a){let o=a.esIndex??`parts`;return{name:`@murumets-ee/imports`,server:{entities:[e],init:async s=>{if(!s.plugins.all().some(e=>e.name===`@murumets-ee/queue`)){s.logger.warn(`imports: queue() plugin not in plugins array — imports:run jobs will not be processed`);return}let c=await import(`@murumets-ee/queue/client`),l=i(e,s),u=t();c.registerJob(r,n({importRuns:l,transforms:u,esClient:a.esClient,esIndex:o,...a.resolveFilePath!==void 0&&{resolveFilePath:a.resolveFilePath},logger:s.logger.child({pkg:`imports`})})),s.logger.info({esIndex:o},`Imports plugin initialized`)}}}}export{a as imports};
1
+ import{a as e,n as t}from"./transform-BUGBTotp.mjs";import{registerImportsStorageFactory as n}from"./storage.mjs";import{createImportsRunDeadListener as r,createRunImportHandler as i,importsRunJob as a}from"./worker.mjs";const o=Symbol.for(`@murumets-ee/imports:dead-listener-unsubscribe`);function s(s){let c=s.esIndex??`parts`;return s.storage&&n(s.storage),{name:`@murumets-ee/imports`,server:{entities:[e],init:async n=>{if(!n.plugins.all().some(e=>e.name===`@murumets-ee/queue`)){n.logger.warn(`imports: queue() plugin not in plugins array — imports:run jobs will not be processed`);return}let l=await import(`@murumets-ee/queue/client`),{createAdminClient:u}=await import(`@murumets-ee/core/clients`),d=u(e,n),f=t();l.registerJob(a,i({importRuns:d,transforms:f,esClient:s.esClient,esIndex:c,...s.resolveFilePath!==void 0&&{resolveFilePath:s.resolveFilePath},logger:n.logger.child({pkg:`imports`})}));let p=globalThis[o];p&&p();let m=l.addQueueTerminalListener(r({importRuns:d,logger:n.logger.child({pkg:`imports`})}));globalThis[o]=m,n.logger.info({esIndex:c},`Imports plugin initialized`)}}}}export{s as imports};
2
2
  //# sourceMappingURL=plugin.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"plugin.mjs","names":[],"sources":["../src/plugin.ts"],"sourcesContent":["/**\n * Imports plugin — registers the `import_run` entity and (when ES + queue\n * are wired) registers the `imports:run` job handler.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): plumbing only. The bespoke\n * admin pages (upload form, parts search) live in PR 8a. Per-feed\n * transforms (carmaker, …) live in PR 8 and self-register against the\n * process-global transform registry — this plugin's init does NOT\n * import any transform.\n *\n * @example\n * ```ts\n * import { defineLumiConfig } from '@murumets-ee/core'\n * import { imports } from '@murumets-ee/imports/plugin'\n * import { Client } from '@elastic/elasticsearch'\n *\n * const es = new Client({ node: process.env.ES_URL })\n *\n * export default defineLumiConfig({\n * plugins: [\n * // queue() and the carmaker-transform plugin must be present too.\n * queue(),\n * imports({ esClient: () => es, esIndex: 'parts' }),\n * ],\n * })\n * ```\n */\n\nimport type { Plugin } from '@murumets-ee/core'\nimport { createAdminClient } from '@murumets-ee/core/clients'\nimport { ImportRun } from './entities/import-run.js'\nimport {\n type EsClientResolver,\n type FilePathResolver,\n createRunImportHandler,\n importsRunJob,\n} from './worker.js'\nimport { getTransformRegistry } from './transform.js'\n\nexport interface ImportsPluginOptions {\n /**\n * Elasticsearch client resolver. Lazy so consumers can construct\n * the client inside a route initialiser instead of at plugin-init\n * time. Required — without it, the worker can't bulk-write.\n */\n esClient: EsClientResolver\n /**\n * ES index alias to write to. Defaults to `'parts'` (the\n * `PARTS_INDEX_ALIAS` re-exported from `@murumets-ee/search-elasticsearch`,\n * inlined here to avoid a runtime import for a one-line constant).\n * Per D6 always pass an alias, never a physical index.\n */\n esIndex?: string\n /**\n * Resolve `import_run.filePath` to a local FS path before streaming.\n * **Required when uploads land in remote storage** (R2/S3/etc.) —\n * without it, the worker hands the storage key to `createReadStream`\n * and crashes with `ENOENT`.\n *\n * Typical wiring downloads the storage object to a tmpfile and\n * returns its path; the optional `cleanup` callback runs after the\n * run finishes (success or failure).\n *\n * Omit this option for the original on-disk PoC setup where the\n * upload route writes directly to a local directory and persists\n * the absolute path on `import_run.filePath`.\n */\n resolveFilePath?: FilePathResolver\n}\n\nexport function imports(options: ImportsPluginOptions): Plugin {\n const esIndex = options.esIndex ?? 'parts'\n\n return {\n name: '@murumets-ee/imports',\n server: {\n entities: [ImportRun],\n init: async (app) => {\n // Queue PACKAGE being importable doesn't mean the queue() PLUGIN\n // is in the consumer's plugins array. Without it, registerJob()\n // succeeds but no worker ever picks up the job. Probe and warn.\n const queuePluginPresent = app.plugins\n .all()\n .some((p) => p.name === '@murumets-ee/queue')\n if (!queuePluginPresent) {\n app.logger.warn(\n 'imports: queue() plugin not in plugins array — imports:run jobs will not be processed',\n )\n return\n }\n\n // Queue client stays dynamic-imported: we already early-returned\n // above when the queue() plugin isn't in the consumer's plugins\n // array, so static-importing here would force-load the queue\n // client module in deployments that don't enable the worker.\n const queueClientModule = await import('@murumets-ee/queue/client')\n\n const importRuns = createAdminClient(ImportRun, app)\n const transforms = getTransformRegistry()\n\n queueClientModule.registerJob(\n importsRunJob,\n createRunImportHandler({\n importRuns,\n transforms,\n esClient: options.esClient,\n esIndex,\n ...(options.resolveFilePath !== undefined && {\n resolveFilePath: options.resolveFilePath,\n }),\n logger: app.logger.child({ pkg: 'imports' }),\n }),\n )\n\n app.logger.info({ esIndex }, 'Imports plugin initialized')\n },\n },\n }\n}\n"],"mappings":"2LAsEA,SAAgB,EAAQ,EAAuC,CAC7D,IAAM,EAAU,EAAQ,SAAW,QAEnC,MAAO,CACL,KAAM,uBACN,OAAQ,CACN,SAAU,CAAC,EAAU,CACrB,KAAM,KAAO,IAAQ,CAOnB,GAAI,CAHuB,EAAI,QAC5B,KAAK,CACL,KAAM,GAAM,EAAE,OAAS,qBACH,CAAE,CACvB,EAAI,OAAO,KACT,wFACD,CACD,OAOF,IAAM,EAAoB,MAAM,OAAO,6BAEjC,EAAa,EAAkB,EAAW,EAAI,CAC9C,EAAa,GAAsB,CAEzC,EAAkB,YAChB,EACA,EAAuB,CACrB,aACA,aACA,SAAU,EAAQ,SAClB,UACA,GAAI,EAAQ,kBAAoB,IAAA,IAAa,CAC3C,gBAAiB,EAAQ,gBAC1B,CACD,OAAQ,EAAI,OAAO,MAAM,CAAE,IAAK,UAAW,CAAC,CAC7C,CAAC,CACH,CAED,EAAI,OAAO,KAAK,CAAE,UAAS,CAAE,6BAA6B,EAE7D,CACF"}
1
+ {"version":3,"file":"plugin.mjs","names":[],"sources":["../src/plugin.ts"],"sourcesContent":["/**\n * Imports plugin — registers the `import_run` entity and (when ES + queue\n * are wired) registers the `imports:run` job handler.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): plumbing only. The bespoke\n * admin pages (upload form, parts search) live in PR 8a. Per-feed\n * transforms (carmaker, …) live in PR 8 and self-register against the\n * process-global transform registry — this plugin's init does NOT\n * import any transform.\n *\n * @example\n * ```ts\n * import { defineLumiConfig } from '@murumets-ee/core'\n * import { imports } from '@murumets-ee/imports/plugin'\n * import { Client } from '@elastic/elasticsearch'\n *\n * const es = new Client({ node: process.env.ES_URL })\n *\n * export default defineLumiConfig({\n * plugins: [\n * // queue() and the carmaker-transform plugin must be present too.\n * queue(),\n * imports({ esClient: () => es, esIndex: 'parts' }),\n * ],\n * })\n * ```\n */\n\nimport type { Plugin } from '@murumets-ee/core'\n// `@murumets-ee/core/clients` is dynamic-imported inside `init` below —\n// its module begins with `import 'server-only'`, which throws when this\n// plugin module is loaded at `lumi.config.ts` evaluation time by the\n// CLI (`lumi migrate`/`seed`/`doctor` use jiti, which doesn't apply the\n// `react-server` export condition). Static-importing here would propagate\n// that throw all the way through `lumi.config.ts` → CLI startup, blocking\n// every CLI invocation. Same documented pattern as `getDb()` in\n// `@murumets-ee/queue/admin.ts`. The `init` hook only runs at\n// `createApp()` time (Next.js request init / worker boot), where the\n// `server-only` resolves to its empty stub.\nimport { ImportRun } from './entities/import-run.js'\nimport { type ImportsStorageFactory, registerImportsStorageFactory } from './storage.js'\nimport {\n type EsClientResolver,\n type FilePathResolver,\n createRunImportHandler,\n createImportsRunDeadListener,\n importsRunJob,\n} from './worker.js'\nimport { getTransformRegistry } from './transform.js'\n\n/**\n * `Symbol.for`-keyed slot for the active dead-listener unsubscribe\n * handle. Symbol.for so an HMR-replaced module instance still finds\n * the previous unsubscribe (`Symbol`-by-reference would skip the\n * lookup across module-evaluation boundaries). Same pattern as\n * `transform.ts`'s registry slot and `storage.ts`'s factory slot.\n */\nconst IMPORTS_DEAD_LISTENER_KEY = Symbol.for('@murumets-ee/imports:dead-listener-unsubscribe')\n\ninterface GlobalThisWithUnsubscribe {\n [IMPORTS_DEAD_LISTENER_KEY]?: () => void\n}\n\nexport interface ImportsPluginOptions {\n /**\n * Elasticsearch client resolver. Lazy so consumers can construct\n * the client inside a route initialiser instead of at plugin-init\n * time. Required — without it, the worker can't bulk-write.\n */\n esClient: EsClientResolver\n /**\n * ES index alias to write to. Defaults to `'parts'` (the\n * `PARTS_INDEX_ALIAS` re-exported from `@murumets-ee/search-elasticsearch`,\n * inlined here to avoid a runtime import for a one-line constant).\n * Per D6 always pass an alias, never a physical index.\n */\n esIndex?: string\n /**\n * Resolve `import_run.filePath` to a local FS path before streaming.\n * **Required when uploads land in remote storage** (R2/S3/etc.) —\n * without it, the worker hands the storage key to `createReadStream`\n * and crashes with `ENOENT`.\n *\n * Typical wiring downloads the storage object to a tmpfile and\n * returns its path; the optional `cleanup` callback runs after the\n * run finishes (success or failure).\n *\n * Omit this option for the original on-disk PoC setup where the\n * upload route writes directly to a local directory and persists\n * the absolute path on `import_run.filePath`.\n */\n resolveFilePath?: FilePathResolver\n /**\n * Optional dedicated `StorageClient` factory for imports. When\n * provided, the upload route (in `@murumets-ee/commerce`) and the\n * worker resolver consult this client instead of the global\n * `getStorageConfig()` default — so a deployment can pin imports\n * to a local-disk adapter (single-VPS / dev) while media,\n * ticketing attachments, and the rest of the app keep using the\n * default R2 (or S3) adapter.\n *\n * Lazy / async so consumers can defer adapter construction until\n * the first import — useful when the local-disk root is\n * environment-dependent or when the factory needs `getApp()` to\n * have resolved.\n *\n * Omit to keep the legacy single-storage behaviour (everything on\n * the global default).\n */\n storage?: ImportsStorageFactory\n}\n\nexport function imports(options: ImportsPluginOptions): Plugin {\n const esIndex = options.esIndex ?? 'parts'\n\n // Register the storage factory eagerly — runs at `imports()` factory\n // call time during `lumi.config.ts` evaluation, NOT inside the\n // `init` hook. The hook only fires at `createApp()` time (Next.js\n // request boot / worker boot), but the factory needs to be visible\n // to consumers that read it at module-load (e.g. the commerce\n // route's lazy `getStorageClient` thunk closes over a value that\n // resolves the moment a request comes in). Late registration in\n // `init` would race the first request.\n if (options.storage) {\n registerImportsStorageFactory(options.storage)\n }\n\n return {\n name: '@murumets-ee/imports',\n server: {\n entities: [ImportRun],\n init: async (app) => {\n // Queue PACKAGE being importable doesn't mean the queue() PLUGIN\n // is in the consumer's plugins array. Without it, registerJob()\n // succeeds but no worker ever picks up the job. Probe and warn.\n const queuePluginPresent = app.plugins\n .all()\n .some((p) => p.name === '@murumets-ee/queue')\n if (!queuePluginPresent) {\n app.logger.warn(\n 'imports: queue() plugin not in plugins array — imports:run jobs will not be processed',\n )\n return\n }\n\n // Queue client stays dynamic-imported: we already early-returned\n // above when the queue() plugin isn't in the consumer's plugins\n // array, so static-importing here would force-load the queue\n // client module in deployments that don't enable the worker.\n const queueClientModule = await import('@murumets-ee/queue/client')\n const { createAdminClient } = await import('@murumets-ee/core/clients')\n\n const importRuns = createAdminClient(ImportRun, app)\n const transforms = getTransformRegistry()\n\n queueClientModule.registerJob(\n importsRunJob,\n createRunImportHandler({\n importRuns,\n transforms,\n esClient: options.esClient,\n esIndex,\n ...(options.resolveFilePath !== undefined && {\n resolveFilePath: options.resolveFilePath,\n }),\n logger: app.logger.child({ pkg: 'imports' }),\n }),\n )\n\n // Subscribe to the queue's terminal-state event stream so a\n // worker that crashes mid-import — and whose `imports:run` job\n // ultimately reaches `dead` without the handler reaching its\n // own `markFailed` path — flips the mirror `import_run` row to\n // `failed`. This replaces what would otherwise be a per-domain\n // periodic sweep; the queue's terminal notifier is the generic\n // primitive (graphile-worker's `job:failed` event, BullMQ's\n // `failed` listener — same shape).\n //\n // The listener is multi-subscriber, so we coexist with the\n // notifications plugin's admin-email handler without either of\n // us clobbering the other. Filtering on `event.jobType` keeps\n // this listener cheap on jobs we don't own.\n //\n // **HMR**: each re-init of this `init` hook constructs a new\n // listener instance. The notifier registry stores listeners\n // by reference, so without explicit cleanup an HMR cycle would\n // STACK duplicate listeners — `findMany`+`update` per dead\n // event growing linearly with the number of HMR rounds. Stash\n // the unsubscribe on a globalThis Symbol-keyed slot (same\n // pattern as `transform.ts` / `storage.ts`) and call it before\n // re-subscribing so HMR is idempotent.\n const previousUnsubscribe = (globalThis as GlobalThisWithUnsubscribe)[\n IMPORTS_DEAD_LISTENER_KEY\n ]\n if (previousUnsubscribe) previousUnsubscribe()\n const unsubscribe = queueClientModule.addQueueTerminalListener(\n createImportsRunDeadListener({\n importRuns,\n logger: app.logger.child({ pkg: 'imports' }),\n }),\n )\n ;(globalThis as GlobalThisWithUnsubscribe)[IMPORTS_DEAD_LISTENER_KEY] = unsubscribe\n\n app.logger.info({ esIndex }, 'Imports plugin initialized')\n },\n },\n }\n}\n"],"mappings":"6NAyDA,MAAM,EAA4B,OAAO,IAAI,iDAAiD,CAuD9F,SAAgB,EAAQ,EAAuC,CAC7D,IAAM,EAAU,EAAQ,SAAW,QAcnC,OAJI,EAAQ,SACV,EAA8B,EAAQ,QAAQ,CAGzC,CACL,KAAM,uBACN,OAAQ,CACN,SAAU,CAAC,EAAU,CACrB,KAAM,KAAO,IAAQ,CAOnB,GAAI,CAHuB,EAAI,QAC5B,KAAK,CACL,KAAM,GAAM,EAAE,OAAS,qBACH,CAAE,CACvB,EAAI,OAAO,KACT,wFACD,CACD,OAOF,IAAM,EAAoB,MAAM,OAAO,6BACjC,CAAE,qBAAsB,MAAM,OAAO,6BAErC,EAAa,EAAkB,EAAW,EAAI,CAC9C,EAAa,GAAsB,CAEzC,EAAkB,YAChB,EACA,EAAuB,CACrB,aACA,aACA,SAAU,EAAQ,SAClB,UACA,GAAI,EAAQ,kBAAoB,IAAA,IAAa,CAC3C,gBAAiB,EAAQ,gBAC1B,CACD,OAAQ,EAAI,OAAO,MAAM,CAAE,IAAK,UAAW,CAAC,CAC7C,CAAC,CACH,CAwBD,IAAM,EAAuB,WAC3B,GAEE,GAAqB,GAAqB,CAC9C,IAAM,EAAc,EAAkB,yBACpC,EAA6B,CAC3B,aACA,OAAQ,EAAI,OAAO,MAAM,CAAE,IAAK,UAAW,CAAC,CAC7C,CAAC,CACH,CACC,WAAyC,GAA6B,EAExE,EAAI,OAAO,KAAK,CAAE,UAAS,CAAE,6BAA6B,EAE7D,CACF"}
@@ -0,0 +1,2 @@
1
+ import{createLogger as e}from"@murumets-ee/logging";const t={runCreated:`imports.run.created`,runStarted:`imports.run.started`,runSucceeded:`imports.run.succeeded`,runFailed:`imports.run.failed`},n=e({name:`imports:realtime`}),r={admin:!0};let i=null;function a(e,t){e instanceof Error&&e.name===`RealtimeOversizeError`?n.error({err:e,topic:t},`realtime publish failed — payload oversize`):n.warn({err:e,topic:t},`realtime publish failed — event dropped`)}function o(e,t){if(i){try{i({topic:e,payload:t,scope:r})}catch(t){a(t,e)}return}import(`@murumets-ee/core/realtime`).then(n=>{i=n.publishEvent,i({topic:e,payload:t,scope:r})}).catch(t=>{a(t,e)})}async function s(){i||=(await import(`@murumets-ee/core/realtime`)).publishEvent}function c(e,n){o(t.runCreated,{importRunId:e,...n.brandSlug!==void 0&&{brandSlug:n.brandSlug},...n.supplierDisplayName!==void 0&&{supplierDisplayName:n.supplierDisplayName},filename:n.filename})}function l(e){o(t.runStarted,{importRunId:e})}function u(e,n){o(t.runSucceeded,{importRunId:e,rowsRead:n.rowsRead,rowsSucceeded:n.rowsSucceeded,rowsFailed:n.rowsFailed})}function d(e,n){o(t.runFailed,{importRunId:e,reason:n})}export{u as a,l as i,c as n,t as o,d as r,s as t};
2
+ //# sourceMappingURL=publish-Dw2vnCoo.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"publish-Dw2vnCoo.mjs","names":[],"sources":["../src/realtime/topics.ts","../src/realtime/publish.ts"],"sourcesContent":["/**\n * Imports realtime topic registry.\n *\n * Topics use the `imports.run.<event>` namespace. Subscribers listen\n * via `imports.run.**` for everything or narrow per event.\n *\n * Payload shapes are intentionally untyped here — callers pass\n * `Record<string, unknown>` and consumers (the imports admin page)\n * read fields by name. The point of this layer is \"an import_run\n * row's status changed, refetch\" rather than diffing event payloads\n * against the page's snapshot.\n *\n * Why a separate namespace from `queue.job.*`: queue events are\n * job-shaped (id, type, attempts, …) and admin-only. Imports events\n * are row-shaped (importRunId, status, brand, supplier, …) and\n * track the entity lifecycle independent of the underlying queue\n * mechanics — a future refactor could swap the queue for a different\n * driver without changing the imports event surface.\n */\n\nexport const IMPORTS_TOPICS = {\n /**\n * The upload route inserted an `import_run` row + enqueued its\n * driving queue job. Fired AFTER the outbox txn commits, so a\n * subscriber's refetch sees the row.\n */\n runCreated: 'imports.run.created',\n /**\n * The worker claimed the row and transitioned its status to\n * `running` — the streaming reader is open, the first batch is\n * about to bulk-write. Distinct from `queue.job.claimed` because\n * imports' state machine flips on the row, not the job.\n */\n runStarted: 'imports.run.started',\n /**\n * The worker finished cleanly — `import_run.status = 'succeeded'`,\n * `errorSummary` populated, totals locked in. Subscribers refetch\n * the history slice to bring the row out of the Active panel and\n * into Recent.\n */\n runSucceeded: 'imports.run.succeeded',\n /**\n * The worker (or `markFailed`) wrote `import_run.status = 'failed'`\n * with an error in `errorSummary`. Same refetch trigger; the page\n * differentiates terminal-success vs terminal-failure by reading\n * the row.\n */\n runFailed: 'imports.run.failed',\n} as const\n\nexport type ImportsTopic = (typeof IMPORTS_TOPICS)[keyof typeof IMPORTS_TOPICS]\n","/**\n * Typed publisher wrappers around `@murumets-ee/core/realtime.publishEvent`\n * for the `imports.run.*` topic surface.\n *\n * Like queue events, every imports event is admin-only — a future\n * refactor that opens these to non-admin operators (e.g. an\n * `imports:view` permission scope) would update {@link ADMIN_SCOPE}\n * here, not the call sites.\n *\n * Publish failures are caught and logged, never thrown — these\n * wrappers are called from worker state-transition paths and the\n * upload route, and a missing realtime delivery must NEVER roll back\n * the database write that triggered it.\n *\n * Why dynamic import: `@murumets-ee/core/realtime` carries\n * `import 'server-only'`, but this module is reachable from the\n * imports package's entrypoints (e.g. `worker.ts`), which are loaded\n * by jiti/tsx in `lumi doctor` / `lumi migrate` / `pnpm worker` —\n * none of which set the `react-server` export condition that turns\n * `server-only` into the empty stub. A static value-import would\n * therefore throw at boot. Same dynamic-import pattern as\n * `packages/queue/src/realtime/publish.ts` and\n * `packages/ticketing/src/realtime/publish.ts`.\n *\n * The `cachedPublish` + `publishSafe` + `_warmRealtimePublisher`\n * scaffolding here is the third copy of the same shape (queue,\n * ticketing, imports). Tracked for extraction to a shared\n * `@murumets-ee/core/realtime/safe-publisher` helper in #201 —\n * reach for that when it lands instead of copy-pasting a fourth\n * time.\n */\n\nimport type { Scope } from '@murumets-ee/core/realtime'\nimport { createLogger } from '@murumets-ee/logging'\nimport { IMPORTS_TOPICS } from './topics.js'\n\nconst logger = createLogger({ name: 'imports:realtime' })\n\n/** All imports events fan out to admin connections only. */\nconst ADMIN_SCOPE: Scope = { admin: true }\n\n// Cached publisher — once the dynamic import has resolved, every\n// subsequent publish is fully synchronous. Only the very first call\n// after process start pays the import cost.\nlet cachedPublish: typeof import('@murumets-ee/core/realtime').publishEvent | null = null\n\nfunction logPublishError(err: unknown, topic: string): void {\n if (err instanceof Error && err.name === 'RealtimeOversizeError') {\n // Oversize is a code bug (payload above the 7900-byte pg_notify\n // cap). For imports row events the payload is tiny — id, status,\n // brand/supplier label fragments — well under the cap. If this\n // ever fires the deriver smuggled too much in.\n logger.error({ err, topic }, 'realtime publish failed — payload oversize')\n } else {\n logger.warn({ err, topic }, 'realtime publish failed — event dropped')\n }\n}\n\nfunction publishSafe(topic: string, payload: Record<string, unknown>): void {\n if (cachedPublish) {\n try {\n cachedPublish({ topic, payload, scope: ADMIN_SCOPE })\n } catch (err) {\n logPublishError(err, topic)\n }\n return\n }\n // First call: resolve the import then fan out. Subsequent calls in\n // the same microtask race the import — that's fine, they queue\n // behind the same promise and use the resolved cache when it lands.\n import('@murumets-ee/core/realtime')\n .then((mod) => {\n cachedPublish = mod.publishEvent\n cachedPublish({ topic, payload, scope: ADMIN_SCOPE })\n })\n .catch((err: unknown) => {\n logPublishError(err, topic)\n })\n}\n\n/**\n * @internal — tests pre-resolve the dynamic import so the first\n * publish is synchronous, matching the post-warmup behaviour in\n * production.\n */\nexport async function _warmRealtimePublisher(): Promise<void> {\n if (cachedPublish) return\n const mod = await import('@murumets-ee/core/realtime')\n cachedPublish = mod.publishEvent\n}\n\n/**\n * Fire after the upload route's outbox txn commits, NOT inside it.\n * The realtime fanout is best-effort and a failure must not roll\n * back the row insert.\n */\nexport function publishImportRunCreated(\n importRunId: string,\n fields: { brandSlug?: string; supplierDisplayName?: string; filename: string },\n): void {\n publishSafe(IMPORTS_TOPICS.runCreated, {\n importRunId,\n ...(fields.brandSlug !== undefined && { brandSlug: fields.brandSlug }),\n ...(fields.supplierDisplayName !== undefined && {\n supplierDisplayName: fields.supplierDisplayName,\n }),\n filename: fields.filename,\n })\n}\n\n/**\n * Fire after the worker writes `status: 'running'` on the row.\n * Subscribers refetch to move the row from \"queued\" to \"running\" in\n * the Active panel and update the History snapshot.\n */\nexport function publishImportRunStarted(importRunId: string): void {\n publishSafe(IMPORTS_TOPICS.runStarted, { importRunId })\n}\n\n/**\n * Fire after the worker writes `status: 'succeeded'` + final totals.\n * Subscribers refetch to move the row from Active → Recent.\n */\nexport function publishImportRunSucceeded(\n importRunId: string,\n totals: { rowsRead: number; rowsSucceeded: number; rowsFailed: number },\n): void {\n publishSafe(IMPORTS_TOPICS.runSucceeded, {\n importRunId,\n rowsRead: totals.rowsRead,\n rowsSucceeded: totals.rowsSucceeded,\n rowsFailed: totals.rowsFailed,\n })\n}\n\n/**\n * Fire after the worker writes `status: 'failed'` with an error in\n * `errorSummary`. Subscribers refetch to surface the failure in\n * Recent.\n */\nexport function publishImportRunFailed(importRunId: string, reason: string): void {\n publishSafe(IMPORTS_TOPICS.runFailed, { importRunId, reason })\n}\n"],"mappings":"oDAoBA,MAAa,EAAiB,CAM5B,WAAY,sBAOZ,WAAY,sBAOZ,aAAc,wBAOd,UAAW,qBACZ,CCZK,EAAS,EAAa,CAAE,KAAM,mBAAoB,CAAC,CAGnD,EAAqB,CAAE,MAAO,GAAM,CAK1C,IAAI,EAAiF,KAErF,SAAS,EAAgB,EAAc,EAAqB,CACtD,aAAe,OAAS,EAAI,OAAS,wBAKvC,EAAO,MAAM,CAAE,MAAK,QAAO,CAAE,6CAA6C,CAE1E,EAAO,KAAK,CAAE,MAAK,QAAO,CAAE,0CAA0C,CAI1E,SAAS,EAAY,EAAe,EAAwC,CAC1E,GAAI,EAAe,CACjB,GAAI,CACF,EAAc,CAAE,QAAO,UAAS,MAAO,EAAa,CAAC,OAC9C,EAAK,CACZ,EAAgB,EAAK,EAAM,CAE7B,OAKF,OAAO,8BACJ,KAAM,GAAQ,CACb,EAAgB,EAAI,aACpB,EAAc,CAAE,QAAO,UAAS,MAAO,EAAa,CAAC,EACrD,CACD,MAAO,GAAiB,CACvB,EAAgB,EAAK,EAAM,EAC3B,CAQN,eAAsB,GAAwC,CACxD,AAEJ,KAAgB,MADE,OAAO,+BACL,aAQtB,SAAgB,EACd,EACA,EACM,CACN,EAAY,EAAe,WAAY,CACrC,cACA,GAAI,EAAO,YAAc,IAAA,IAAa,CAAE,UAAW,EAAO,UAAW,CACrE,GAAI,EAAO,sBAAwB,IAAA,IAAa,CAC9C,oBAAqB,EAAO,oBAC7B,CACD,SAAU,EAAO,SAClB,CAAC,CAQJ,SAAgB,EAAwB,EAA2B,CACjE,EAAY,EAAe,WAAY,CAAE,cAAa,CAAC,CAOzD,SAAgB,EACd,EACA,EACM,CACN,EAAY,EAAe,aAAc,CACvC,cACA,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACpB,CAAC,CAQJ,SAAgB,EAAuB,EAAqB,EAAsB,CAChF,EAAY,EAAe,UAAW,CAAE,cAAa,SAAQ,CAAC"}
@@ -0,0 +1,123 @@
1
+ //#region src/realtime/publish.d.ts
2
+ /**
3
+ * Typed publisher wrappers around `@murumets-ee/core/realtime.publishEvent`
4
+ * for the `imports.run.*` topic surface.
5
+ *
6
+ * Like queue events, every imports event is admin-only — a future
7
+ * refactor that opens these to non-admin operators (e.g. an
8
+ * `imports:view` permission scope) would update {@link ADMIN_SCOPE}
9
+ * here, not the call sites.
10
+ *
11
+ * Publish failures are caught and logged, never thrown — these
12
+ * wrappers are called from worker state-transition paths and the
13
+ * upload route, and a missing realtime delivery must NEVER roll back
14
+ * the database write that triggered it.
15
+ *
16
+ * Why dynamic import: `@murumets-ee/core/realtime` carries
17
+ * `import 'server-only'`, but this module is reachable from the
18
+ * imports package's entrypoints (e.g. `worker.ts`), which are loaded
19
+ * by jiti/tsx in `lumi doctor` / `lumi migrate` / `pnpm worker` —
20
+ * none of which set the `react-server` export condition that turns
21
+ * `server-only` into the empty stub. A static value-import would
22
+ * therefore throw at boot. Same dynamic-import pattern as
23
+ * `packages/queue/src/realtime/publish.ts` and
24
+ * `packages/ticketing/src/realtime/publish.ts`.
25
+ *
26
+ * The `cachedPublish` + `publishSafe` + `_warmRealtimePublisher`
27
+ * scaffolding here is the third copy of the same shape (queue,
28
+ * ticketing, imports). Tracked for extraction to a shared
29
+ * `@murumets-ee/core/realtime/safe-publisher` helper in #201 —
30
+ * reach for that when it lands instead of copy-pasting a fourth
31
+ * time.
32
+ */
33
+ /**
34
+ * @internal — tests pre-resolve the dynamic import so the first
35
+ * publish is synchronous, matching the post-warmup behaviour in
36
+ * production.
37
+ */
38
+ declare function _warmRealtimePublisher(): Promise<void>;
39
+ /**
40
+ * Fire after the upload route's outbox txn commits, NOT inside it.
41
+ * The realtime fanout is best-effort and a failure must not roll
42
+ * back the row insert.
43
+ */
44
+ declare function publishImportRunCreated(importRunId: string, fields: {
45
+ brandSlug?: string;
46
+ supplierDisplayName?: string;
47
+ filename: string;
48
+ }): void;
49
+ /**
50
+ * Fire after the worker writes `status: 'running'` on the row.
51
+ * Subscribers refetch to move the row from "queued" to "running" in
52
+ * the Active panel and update the History snapshot.
53
+ */
54
+ declare function publishImportRunStarted(importRunId: string): void;
55
+ /**
56
+ * Fire after the worker writes `status: 'succeeded'` + final totals.
57
+ * Subscribers refetch to move the row from Active → Recent.
58
+ */
59
+ declare function publishImportRunSucceeded(importRunId: string, totals: {
60
+ rowsRead: number;
61
+ rowsSucceeded: number;
62
+ rowsFailed: number;
63
+ }): void;
64
+ /**
65
+ * Fire after the worker writes `status: 'failed'` with an error in
66
+ * `errorSummary`. Subscribers refetch to surface the failure in
67
+ * Recent.
68
+ */
69
+ declare function publishImportRunFailed(importRunId: string, reason: string): void;
70
+ //#endregion
71
+ //#region src/realtime/topics.d.ts
72
+ /**
73
+ * Imports realtime topic registry.
74
+ *
75
+ * Topics use the `imports.run.<event>` namespace. Subscribers listen
76
+ * via `imports.run.**` for everything or narrow per event.
77
+ *
78
+ * Payload shapes are intentionally untyped here — callers pass
79
+ * `Record<string, unknown>` and consumers (the imports admin page)
80
+ * read fields by name. The point of this layer is "an import_run
81
+ * row's status changed, refetch" rather than diffing event payloads
82
+ * against the page's snapshot.
83
+ *
84
+ * Why a separate namespace from `queue.job.*`: queue events are
85
+ * job-shaped (id, type, attempts, …) and admin-only. Imports events
86
+ * are row-shaped (importRunId, status, brand, supplier, …) and
87
+ * track the entity lifecycle independent of the underlying queue
88
+ * mechanics — a future refactor could swap the queue for a different
89
+ * driver without changing the imports event surface.
90
+ */
91
+ declare const IMPORTS_TOPICS: {
92
+ /**
93
+ * The upload route inserted an `import_run` row + enqueued its
94
+ * driving queue job. Fired AFTER the outbox txn commits, so a
95
+ * subscriber's refetch sees the row.
96
+ */
97
+ readonly runCreated: "imports.run.created";
98
+ /**
99
+ * The worker claimed the row and transitioned its status to
100
+ * `running` — the streaming reader is open, the first batch is
101
+ * about to bulk-write. Distinct from `queue.job.claimed` because
102
+ * imports' state machine flips on the row, not the job.
103
+ */
104
+ readonly runStarted: "imports.run.started";
105
+ /**
106
+ * The worker finished cleanly — `import_run.status = 'succeeded'`,
107
+ * `errorSummary` populated, totals locked in. Subscribers refetch
108
+ * the history slice to bring the row out of the Active panel and
109
+ * into Recent.
110
+ */
111
+ readonly runSucceeded: "imports.run.succeeded";
112
+ /**
113
+ * The worker (or `markFailed`) wrote `import_run.status = 'failed'`
114
+ * with an error in `errorSummary`. Same refetch trigger; the page
115
+ * differentiates terminal-success vs terminal-failure by reading
116
+ * the row.
117
+ */
118
+ readonly runFailed: "imports.run.failed";
119
+ };
120
+ type ImportsTopic = (typeof IMPORTS_TOPICS)[keyof typeof IMPORTS_TOPICS];
121
+ //#endregion
122
+ export { IMPORTS_TOPICS, type ImportsTopic, _warmRealtimePublisher, publishImportRunCreated, publishImportRunFailed, publishImportRunStarted, publishImportRunSucceeded };
123
+ //# sourceMappingURL=index.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/realtime/publish.ts","../../src/realtime/topics.ts"],"mappings":";;AAqFA;;;;;AAWA;;;;;;;;;;;AAmBA;;;;;AAQA;;;;;;;;;;;AAiBA;;;iBAvDsB,sBAAA,CAAA,GAA0B,OAAA;;;;;ACjEhD;iBD4EgB,uBAAA,CACd,WAAA,UACA,MAAA;EAAU,SAAA;EAAoB,mBAAA;EAA8B,QAAA;AAAA;;;;;AChD9D;iBDiEgB,uBAAA,CAAwB,WAAA;;;;;iBAQxB,yBAAA,CACd,WAAA,UACA,MAAA;EAAU,QAAA;EAAkB,aAAA;EAAuB,UAAA;AAAA;;;;;;iBAerC,sBAAA,CAAuB,WAAA,UAAqB,MAAA;;;;AAvD5D;;;;;AAWA;;;;;;;;;;;AAmBA;;cC/Fa,cAAA;ED+F2B;;AAQxC;;;EARwC;EAStC;;;;;;EAAA;EAgBc;;;;;;EAAA;;ACxHhB;;;;;;;KA8BY,YAAA,WAAuB,cAAA,eAA6B,cAAA"}
@@ -0,0 +1 @@
1
+ import{a as e,i as t,n,o as r,r as i,t as a}from"../publish-Dw2vnCoo.mjs";export{r as IMPORTS_TOPICS,a as _warmRealtimePublisher,n as publishImportRunCreated,i as publishImportRunFailed,t as publishImportRunStarted,e as publishImportRunSucceeded};
@@ -0,0 +1,2 @@
1
+ import{bulkUpsert as e}from"@murumets-ee/search-elasticsearch";import{createReadStream as t}from"node:fs";import{parse as n}from"csv-parse";const r=50,i=5;var a=class{patterns=new Map;maxPatterns;maxSamplesPerPattern;droppedSignatures=0;constructor(e={}){this.maxPatterns=e.maxPatterns??50,this.maxSamplesPerPattern=e.maxSamplesPerPattern??5}addError(e,t,n,r,i){let a=`${t}:${r??`GENERAL`}:${n}`,o=this.patterns.get(a);if(!o){if(this.patterns.size>=this.maxPatterns){this.droppedSignatures+=1;return}o={errorType:t,field:r??null,message:n,count:0,firstOccurrence:e,lastOccurrence:e,samples:[]},this.patterns.set(a,o)}o.count+=1,o.lastOccurrence=e,o.samples.length<this.maxSamplesPerPattern&&o.samples.push({rowNumber:e,rowData:i})}getTotalErrorCount(){let e=0;for(let t of this.patterns.values())e+=t.count;return e}getDistinctPatternCount(){return this.patterns.size}getDroppedSignatureCount(){return this.droppedSignatures}getTopPatterns(){let e=Array.from(this.patterns.values()).sort((e,t)=>t.count-e.count),t=e.reduce((e,t)=>e+t.count,0);return e.map(e=>({errorType:e.errorType,field:e.field,message:e.message,count:e.count,firstOccurrence:e.firstOccurrence,lastOccurrence:e.lastOccurrence,samples:e.samples.slice(),percentage:t>0?e.count/t*100:0}))}snapshot(){return{totalErrors:this.getTotalErrorCount(),distinctPatterns:this.getDistinctPatternCount(),droppedSignatures:this.droppedSignatures,patterns:this.getTopPatterns()}}};async function o(e,n={}){let{hasHeader:r=!0}=n,i=t(e),a=0,o=0;for await(let e of i){for(let t=0;t<e.length;t+=1)e[t]===10&&(a+=1);e.length>0&&(o=e[e.length-1]??0)}return o!==0&&o!==10&&(a+=1),r?Math.max(0,a-1):a}async function*s(e){let{filePath:r,delimiter:i=` `,hasHeader:a=!0,columns:o,relaxColumnCount:s=!1,quote:c=!1}=e,l=o?Array.from(o):a,u=t(r),d=u.pipe(n({delimiter:i,columns:l,bom:!0,skip_empty_lines:!0,relax_column_count:s,quote:c})),f=0;try{for await(let e of d){f+=1;let t={};if(Array.isArray(e)){let n=e;for(let e=0;e<n.length;e+=1)t[String(e)]=n[e]??``}else for(let[n,r]of Object.entries(e))t[n]=r??``;yield{rowNumber:f,row:t}}}finally{u.destroy()}}const c=1e3;async function l(t){let{importRunId:n,runLabel:r,params:i,transform:o,feed:l,esClient:u,esIndex:d,batchSize:f=c,onProgress:p,rowLimit:m,signal:h,errorTracker:g=new a,logger:_}=t;if(f<1)throw Error(`batchSize must be >= 1 (got ${f})`);let v=Date.now();_?.info({importRunId:n,runLabel:r,batchSize:f,esIndex:d},`imports.runner: started`);let y=0,b=0,x=0,S=0,C=0,w=[],T=async()=>{if(w.length===0)return;let t=w;w=[];let r=Date.now(),i;try{i=await e(u,{index:d,docs:t.map(({id:e,doc:t})=>({id:e,doc:t})),...h!==void 0&&{signal:h}})}catch(e){let n=h?.aborted??(e instanceof Error&&(e.name===`AbortError`||/abort/i.test(e.message)))?`aborted`:`bulk_request_failed`,r=e instanceof Error?e.message:String(e);for(let{rowNumber:e}of t)g.addError(e,n,r,void 0,null);throw x+=t.length,e}b+=i.succeeded,x+=i.failures.length,C+=1;let a=Date.now()-r,o=(Date.now()-v)/1e3;if(_?.info({importRunId:n,batch:C,flushMs:a,submitted:t.length,succeeded:i.succeeded,failed:i.failures.length,rowsRead:y,rowsPerSecond:o>0?Math.round(y/o):0},`imports.runner: batch flushed`),i.failures.length>0){let e=new Map(t.map(e=>[e.id,e.rowNumber]));for(let t of i.failures){let n=e.get(t.id)??-1;g.addError(n,t.type,t.reason,void 0,{id:t.id})}}if(p){let e=(Date.now()-v)/1e3;p({rowsRead:y,rowsSucceeded:b,rowsFailed:x,rowsSkipped:S,batchesCompleted:C,elapsedSeconds:e,rowsPerSecond:e>0?y/e:0,distinctErrorPatterns:g.getDistinctPatternCount()})}},E=f,D=new Map;for await(let{rowNumber:e,row:t}of s(l)){if(h?.aborted||m!==void 0&&y>=m)break;if(y+=1,y===1&&_?.info({importRunId:n,rowNumber:e,sampleKeys:Object.keys(t)},`imports.runner: STREAM_FIRST_ROW`),y%E===0){let e=(Date.now()-v)/1e3;_?.info({importRunId:n,rowsRead:y,rowsSucceeded:b,rowsFailed:x,rowsSkipped:S,pending:w.length,elapsedSec:Math.round(e*10)/10,rowsPerSecond:e>0?Math.round(y/e):0,distinctFirstErrorTypes:D.size},`imports.runner: STREAM_TICK`)}let a={importRunId:n,params:i,runLabel:r,rowNumber:e},s;try{s=await o(t,a)}catch(r){let i=r instanceof Error?r.message:String(r);g.addError(e,`transform_threw`,i,void 0,t),D.has(`transform_threw`)||(D.set(`transform_threw`,{rowNumber:e,message:i}),_?.warn({importRunId:n,rowNumber:e,errorType:`transform_threw`,message:i},`imports.runner: FIRST_ERROR transform_threw`)),x+=1;continue}if(s.kind===`skip`){S+=1;continue}if(s.kind===`error`){g.addError(e,s.error.errorType,s.error.message,s.error.field,t),D.has(s.error.errorType)||(D.set(s.error.errorType,{rowNumber:e,message:s.error.message}),_?.warn({importRunId:n,rowNumber:e,errorType:s.error.errorType,field:s.error.field,message:s.error.message},`imports.runner: FIRST_ERROR ${s.error.errorType}`)),x+=1;continue}w.push({id:s.id,doc:s.doc,rowNumber:e}),w.length>=f&&await T()}await T();let O=(Date.now()-v)/1e3;return _?.info({importRunId:n,rowsRead:y,rowsSucceeded:b,rowsFailed:x,rowsSkipped:S,batchesCompleted:C,totalSec:Math.round(O*10)/10,rowsPerSecond:O>0?Math.round(y/O):0},`imports.runner: finished`),{rowsRead:y,rowsSucceeded:b,rowsFailed:x,rowsSkipped:S,batchesCompleted:C,errors:g.snapshot()}}export{r as a,s as i,l as n,i as o,o as r,a as s,c as t};
2
+ //# sourceMappingURL=runner-D9FtnIBn.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner-D9FtnIBn.mjs","names":[],"sources":["../src/error-tracker.ts","../src/streaming.ts","../src/runner.ts"],"sourcesContent":["/**\n * Aggregates per-row errors into top-N pattern buckets so a feed of 1M\n * malformed rows surfaces as a handful of actionable signatures rather\n * than a million identical strings.\n *\n * Lifted from giga-test (`backend/src/workers/csv-importer.ts`) and\n * generalized:\n * - Configurable caps so the importer can tune memory bounds per run.\n * - Pure data — no logging side effects, no I/O.\n * - JSON-serialisable output via {@link ErrorTracker.snapshot} for the\n * `import_run.errorSummary` column.\n *\n * Pattern signature shape: `${errorType}:${field || 'GENERAL'}:${message}`.\n * Same `errorType + field + message` collapses to one bucket; differing\n * messages stay separate. This is intentional: a parser error on column\n * `NetPrice/Discount` (\"invalid number 'NA,5'\") and the same on column\n * `GrossPrice` are operationally distinct even if the parser is the same.\n *\n * Memory bounds: the patterns map is capped at `maxPatterns`. Once full,\n * additional NEW signatures are dropped — known patterns keep accumulating\n * counts. This is the \"top-N most common\" model: rare-but-novel errors\n * past the cap are invisible, but the cap protects against a runaway\n * adversarial feed exploding the map. Sample arrays are independently\n * capped at `maxSamplesPerPattern`.\n */\n\n/**\n * Recursively-defined JSON-serialisable value. Mirrors the shape of\n * `JsonValue` in `@murumets-ee/entity` without taking a dependency on\n * that package — error-tracker is otherwise standalone, and the\n * `ImportRun.errorSummary` JSONB column accepts anything in this shape.\n */\nexport type ImportJsonValue =\n | string\n | number\n | boolean\n | null\n | ImportJsonValue[]\n | { [key: string]: ImportJsonValue }\n\n/** One sample row attached to a pattern. `rowData` is the originally-parsed row. */\nexport interface ErrorSample {\n rowNumber: number\n rowData: ImportJsonValue\n}\n\n/** Public shape of an aggregated pattern as returned by {@link ErrorTracker.getTopPatterns}. */\nexport interface ErrorPattern {\n errorType: string\n field: string | null\n message: string\n count: number\n firstOccurrence: number\n lastOccurrence: number\n samples: ReadonlyArray<ErrorSample>\n /** `count / totalErrors`, scaled 0..100. `0` when there are zero errors total. */\n percentage: number\n}\n\nexport interface ErrorTrackerConfig {\n /** Hard cap on distinct signatures. Defaults to 50. New signatures past the cap are dropped. */\n maxPatterns?: number\n /** Hard cap on samples retained per pattern. Defaults to 5. Excess samples are dropped. */\n maxSamplesPerPattern?: number\n}\n\n/** Default caps — match giga-test for compatibility. */\nexport const DEFAULT_MAX_PATTERNS = 50\nexport const DEFAULT_MAX_SAMPLES_PER_PATTERN = 5\n\ninterface InternalPattern {\n errorType: string\n field: string | null\n message: string\n count: number\n firstOccurrence: number\n lastOccurrence: number\n samples: ErrorSample[]\n}\n\nexport class ErrorTracker {\n private readonly patterns = new Map<string, InternalPattern>()\n private readonly maxPatterns: number\n private readonly maxSamplesPerPattern: number\n private droppedSignatures = 0\n\n constructor(config: ErrorTrackerConfig = {}) {\n this.maxPatterns = config.maxPatterns ?? DEFAULT_MAX_PATTERNS\n this.maxSamplesPerPattern = config.maxSamplesPerPattern ?? DEFAULT_MAX_SAMPLES_PER_PATTERN\n }\n\n /**\n * Record one error. Same `(errorType, field, message)` triple bumps the\n * existing bucket; a new triple opens a new one (subject to {@link maxPatterns}).\n *\n * `field` is optional — pass `undefined` for errors not tied to a single\n * column (e.g. parse errors at row level). Internally normalised to the\n * literal string `'GENERAL'` so it shares a bucket with other genericised\n * errors of the same type+message.\n */\n addError(\n rowNumber: number,\n errorType: string,\n message: string,\n field: string | undefined,\n rowData: ImportJsonValue,\n ): void {\n const fieldKey = field ?? 'GENERAL'\n const signature = `${errorType}:${fieldKey}:${message}`\n\n let pattern = this.patterns.get(signature)\n if (!pattern) {\n if (this.patterns.size >= this.maxPatterns) {\n this.droppedSignatures += 1\n return\n }\n pattern = {\n errorType,\n field: field ?? null,\n message,\n count: 0,\n firstOccurrence: rowNumber,\n lastOccurrence: rowNumber,\n samples: [],\n }\n this.patterns.set(signature, pattern)\n }\n\n pattern.count += 1\n pattern.lastOccurrence = rowNumber\n if (pattern.samples.length < this.maxSamplesPerPattern) {\n pattern.samples.push({ rowNumber, rowData })\n }\n }\n\n /** Total count across every pattern. Counts errors, not patterns. */\n getTotalErrorCount(): number {\n let sum = 0\n for (const p of this.patterns.values()) sum += p.count\n return sum\n }\n\n /** Number of distinct signatures retained in the map (≤ `maxPatterns`). */\n getDistinctPatternCount(): number {\n return this.patterns.size\n }\n\n /**\n * Number of NEW signatures dropped because the map was already at\n * capacity. Surfacing this in the import_run summary tells the operator\n * \"the top-N was saturated — there's a long tail you're not seeing\".\n */\n getDroppedSignatureCount(): number {\n return this.droppedSignatures\n }\n\n /**\n * Top patterns sorted by descending count, capped at `maxPatterns`.\n * Stable secondary order is insertion order (Map iteration order is\n * insertion order; Array.sort is stable in V8).\n */\n getTopPatterns(): ErrorPattern[] {\n const all = Array.from(this.patterns.values()).sort((a, b) => b.count - a.count)\n const totalErrors = all.reduce((sum, p) => sum + p.count, 0)\n return all.map((p) => ({\n errorType: p.errorType,\n field: p.field,\n message: p.message,\n count: p.count,\n firstOccurrence: p.firstOccurrence,\n lastOccurrence: p.lastOccurrence,\n samples: p.samples.slice(),\n percentage: totalErrors > 0 ? (p.count / totalErrors) * 100 : 0,\n }))\n }\n\n /**\n * Compact JSON-serialisable snapshot for `import_run.errorSummary`.\n * Aside from the patterns array, includes the totals so a reader of\n * just this column doesn't have to re-derive them.\n */\n snapshot(): ErrorTrackerSnapshot {\n return {\n totalErrors: this.getTotalErrorCount(),\n distinctPatterns: this.getDistinctPatternCount(),\n droppedSignatures: this.droppedSignatures,\n patterns: this.getTopPatterns(),\n }\n }\n}\n\nexport interface ErrorTrackerSnapshot {\n totalErrors: number\n distinctPatterns: number\n droppedSignatures: number\n patterns: ErrorPattern[]\n}\n","/**\n * Tab-delimited / CSV streaming reader. Generic over delimiter so the\n * same path handles `.txt` (tab), `.csv` (comma), and the rare `;`\n * European export dialect.\n *\n * Built on `csv-parse` per giga-test precedent — node-stream-based, low\n * memory, handles UTF-8 BOM (the carmaker feeds are Windows-exported\n * and ship with a BOM that breaks naive split-on-tab parsers).\n *\n * The reader yields `{ rowNumber, row }` pairs where:\n * - `rowNumber` is 1-based and counts the header as row 0.\n * - `row` is `Record<string, string>` keyed by header name. Empty cells\n * are the empty string, NOT `undefined` — feed transforms test with\n * `value === ''` consistently.\n *\n * Why this lives in a streaming reader and not inside the transform\n * itself: the transform sees one already-parsed row at a time, never\n * the file. That keeps transform implementations free of I/O concerns\n * and makes them trivially unit-testable with a fixture row map.\n */\n\nimport { createReadStream } from 'node:fs'\nimport { parse } from 'csv-parse'\n\n/**\n * Pre-pass: count data rows in a delimited file (excluding the header\n * row when `hasHeader` is true; default `true` to match `streamFeed`).\n *\n * Implementation: stream the file as raw bytes and tally `\\n`. NOT a\n * full parse — escaped newlines inside quoted fields would over-count.\n * That's acceptable for the supplier-feed use case because we default\n * `quote: false` (every `\\n` IS a row boundary). For genuine RFC-4180\n * CSV inputs with embedded newlines, treat the result as an upper\n * bound; the actual parsed row count from `streamFeed` is authoritative.\n *\n * Why bytes-only and not csv-parse: a 16 MB feed counts in ~50 ms with\n * `\\n` tally vs ~500 ms with csv-parse parsing. For 500 MB the gap\n * matters — pre-pass needs to be a small fraction of the actual import\n * or the operator perceives the run as \"stuck on pre-pass\" with no UI.\n *\n * Returns the number of DATA rows (header excluded when present).\n * Empty trailing lines (no `\\n` after the final row, or a single\n * trailing `\\n`) are handled identically to how csv-parse treats them\n * with `skip_empty_lines: true`.\n */\nexport async function countDataRows(\n filePath: string,\n options: { hasHeader?: boolean } = {},\n): Promise<number> {\n const { hasHeader = true } = options\n const stream = createReadStream(filePath)\n let count = 0\n let lastByte = 0\n for await (const chunk of stream as AsyncIterable<Buffer>) {\n for (let i = 0; i < chunk.length; i += 1) {\n if (chunk[i] === 0x0a /* \\n */) count += 1\n }\n if (chunk.length > 0) lastByte = chunk[chunk.length - 1] ?? 0\n }\n // If the file doesn't end with a newline, the final row didn't get\n // counted by the `\\n` tally — add one. Skipping when the file is\n // entirely empty (count=0 and lastByte=0).\n if (lastByte !== 0 && lastByte !== 0x0a) count += 1\n return hasHeader ? Math.max(0, count - 1) : count\n}\n\nexport interface StreamFeedOptions {\n /** Path to the file on disk. The PoC uploads land on local disk; S3-keyed reads come later. */\n filePath: string\n /**\n * Single-character field delimiter. Default `\\t` (the carmaker feed\n * format). Pass `,` for CSV, `;` for some European dialects.\n */\n delimiter?: string\n /**\n * `true` (default): the first row is the header and column names come\n * from it. `false`: rows are emitted as positional `{ \"0\": ..., \"1\": ... }`\n * and the transform reads by index — useful for headerless feeds that\n * commit to a documented column order.\n */\n hasHeader?: boolean\n /**\n * Optional explicit column-name list. When provided, takes precedence\n * over `hasHeader` (header row, if present, is skipped but its values\n * are ignored). Useful when the upstream header is unstable but the\n * positional shape isn't.\n */\n columns?: ReadonlyArray<string>\n /**\n * Forward to `csv-parse` `relax_column_count`. Default `false` —\n * a row whose column count doesn't match the header surfaces as a\n * parser error so the transform isn't silently fed truncated data.\n */\n relaxColumnCount?: boolean\n /**\n * Forward to `csv-parse` `quote`. The character (or `false`) that\n * encloses fields containing the delimiter. Default `false` — most\n * supplier feeds in this package's target use case are tab-delimited\n * exports that contain literal `\"` characters in description fields\n * (e.g. `LIVRE \"INVERSION D'IMAGE\"` from the Mercedes carmaker feed)\n * with NO quote-as-field-wrapper convention. csv-parse's library\n * default `'\"'` then misreads the literal `\"` as an opening quote\n * and rejects the row with `Invalid Opening Quote`.\n *\n * Pass `'\"'` (or any character) to opt back into RFC-4180-style\n * CSV parsing for genuine quoted-CSV inputs. Pass `'\\''` for\n * single-quoted dialects.\n */\n quote?: string | false\n}\n\nexport interface StreamFeedRow {\n /** 1-based row number. Header (when present) is row 0; first data row is row 1. */\n rowNumber: number\n /**\n * Cell values keyed by column name (or string-position when\n * `hasHeader: false` AND no `columns`).\n *\n * **Cell-value invariants:**\n * - Empty cells (`A\\t\\tC`) → `''` (empty string).\n * - Missing TRAILING cells in `relaxColumnCount: true` mode → the\n * key is **absent** from the object, not present-with-`''`. csv-parse\n * does not emit keys for short rows. Transforms reading those\n * columns get `undefined` from `row['col']` and must handle it\n * (`row['col'] ?? ''` is the canonical idiom).\n * - With the default `relaxColumnCount: false`, short rows reject at\n * the parser, so this case never reaches the transform.\n */\n row: Record<string, string>\n}\n\n/**\n * Async-iterable over the parsed rows of a delimited file. Use with\n * `for await (const { rowNumber, row } of streamFeed({ filePath, ... }))`.\n *\n * The iterator owns its file descriptor — the `for await` loop closes\n * the underlying stream when it returns or breaks. Aborting mid-stream\n * (`break`, `throw`, signal) is safe; csv-parse propagates the close.\n */\nexport async function* streamFeed(options: StreamFeedOptions): AsyncIterable<StreamFeedRow> {\n const {\n filePath,\n delimiter = '\\t',\n hasHeader = true,\n columns,\n relaxColumnCount = false,\n quote = false,\n } = options\n\n // When the caller provides explicit `columns`, prefer them. When the\n // file has a header but no explicit `columns`, csv-parse takes the\n // first row as the column source. When neither is true, rows are\n // emitted with string-position keys.\n //\n // Typed against csv-parse's actual `columns?: ColumnOption[] | boolean`\n // signature — `string[]` satisfies `ColumnOption[]` since\n // `ColumnOption = string | undefined | null | false | { name: string }`.\n const columnConfig: string[] | boolean = columns ? Array.from(columns) : hasHeader\n\n const stream = createReadStream(filePath)\n const parser = stream.pipe(\n parse({\n delimiter,\n columns: columnConfig,\n bom: true,\n skip_empty_lines: true,\n // csv-parse defaults to strict column count; opt-in relaxation only.\n relax_column_count: relaxColumnCount,\n // Default `quote: false` — see StreamFeedOptions.quote JSDoc.\n // Carmaker / parts-feed exports contain literal `\"` characters\n // in description fields and would otherwise reject as\n // `Invalid Opening Quote`. Genuine RFC-4180 CSV consumers opt\n // back in by passing `quote: '\"'`.\n quote,\n }),\n )\n\n let rowNumber = 0\n try {\n for await (const rawRow of parser as AsyncIterable<\n ReadonlyArray<string> | Record<string, string | undefined>\n >) {\n rowNumber += 1\n // csv-parse emits records with string keys when `columns` is\n // truthy, otherwise an Array. Normalise both shapes to\n // `Record<string, string>` so transforms can rely on `value === ''`\n // for missing cells (csv-parse leaves trailing missing cells as\n // `undefined` when `relax_column_count: true`; this collapses\n // them to `''` to keep the contract uniform).\n const row: Record<string, string> = {}\n if (Array.isArray(rawRow)) {\n const arr = rawRow as ReadonlyArray<string | undefined>\n for (let i = 0; i < arr.length; i += 1) {\n row[String(i)] = arr[i] ?? ''\n }\n } else {\n for (const [k, v] of Object.entries(rawRow)) {\n row[k] = v ?? ''\n }\n }\n yield { rowNumber, row }\n }\n } finally {\n // Safety: ensure the underlying file descriptor closes even if the\n // consumer breaks mid-iteration. Node closes streams on\n // garbage-collection but this makes it deterministic under tests.\n stream.destroy()\n }\n}\n","/**\n * One-shot importer: stream rows → transform → batched bulk-write to ES,\n * accumulating per-row errors into `ErrorTracker` and reporting progress\n * to the queue every batch.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope):\n * - **Batch size 1000.** Matches giga-test precedent. Configurable for\n * integration tests that don't want a 1k row floor.\n * - **No resumability and no automatic retries.** The `imports:run` queue\n * job is registered with `defaultRetries: 0` (see `worker.ts`) so a\n * failed handler does NOT re-enqueue itself — re-running a multi-batch\n * import against the same `import_run.id` while the previous attempt\n * may still be writing is a footgun (duplicate batches, double-counted\n * progress). Operator retries by creating a NEW `import_run` row.\n * - **No per-supplier transform plugin.** The runner takes a single\n * `RowTransform<TDoc>` from the registry and applies it to every row;\n * PR 8 may diverge but only by registering a different transform name.\n * - **Direct `bulkUpsert` into the live aliased index** (D6 alias is\n * set up by PR 4's `ensureAliasedIndex`; the importer doesn't reindex).\n *\n * Per D21 (sanctioned bulk path): this runner intentionally bypasses\n * AdminClient and entity hooks. Per-batch audit / observability lives\n * on the surrounding `import_run` row + queue progress, NOT per-row.\n */\n\nimport { bulkUpsert, type BulkIndexResult, type EsClientLike } from '@murumets-ee/search-elasticsearch'\nimport { ErrorTracker } from './error-tracker.js'\nimport { streamFeed, type StreamFeedOptions } from './streaming.js'\nimport type { RowTransform, TransformContext } from './transform.js'\n\n/** Soft default; chosen to match giga-test. ES bulk requests over ~5MB get split server-side anyway. */\nexport const DEFAULT_BATCH_SIZE = 1000\n\nexport interface RunImportOptions<TDoc> {\n /** UUID of the `import_run` row driving this run. Forwarded to every transform invocation. */\n importRunId: string\n /** Operator-supplied label for the run. Forwarded to the transform context. */\n runLabel: string\n /** Opaque per-run params copied from `import_run.params`. */\n params: Record<string, unknown>\n /** Transform applied to every parsed row. */\n transform: RowTransform<TDoc>\n /** Streaming reader options — file path, delimiter, header config. */\n feed: StreamFeedOptions\n /** ES client (low-level shape from `@murumets-ee/search-elasticsearch`). */\n esClient: EsClientLike\n /** Index alias to write to. Per D6, callers always pass an alias, never a physical index. */\n esIndex: string\n /** Rows per `bulkUpsert` call. Default {@link DEFAULT_BATCH_SIZE}. */\n batchSize?: number\n /**\n * Callback invoked after every batch. The handler in `worker.ts`\n * forwards this to `ctx.updateProgress` for the queue UI; tests\n * inspect it directly. Synchronous + cheap so a slow callback can't\n * back-pressure the importer.\n */\n onProgress?: (progress: ImportRunProgress) => void\n /** Optional: stop processing after this many rows. Tests use it; production passes `undefined`. */\n rowLimit?: number\n /** Abort signal threaded into the underlying ES client request — cooperative cancel. */\n signal?: AbortSignal\n /** Optional ErrorTracker config (caps). Default: top-50 patterns × 5 samples. */\n errorTracker?: ErrorTracker\n /**\n * Optional structural logger for per-batch telemetry. Receives\n * `{ batch, ms, succeeded, failed, rowsRead, rowsPerSecond }` after\n * every flush, plus a one-shot `started` and `finished` event. The\n * worker forwards `app.logger.child({ pkg: 'imports' })`; tests\n * leave it undefined.\n *\n * Why: without per-batch visibility, a stalled run looks identical\n * to \"first batch in progress\" forever — the only feedback is the\n * queue's `progress` jsonb (which writes after each batch) and the\n * eventual `succeeded`/`failed` job event. Adding this lets the\n * operator tail the worker log and immediately see whether ES is\n * rate-limiting, the transform is rejecting every row, or the\n * stream is actually progressing.\n */\n logger?: {\n info: (data: Record<string, unknown>, msg: string) => void\n warn: (data: Record<string, unknown>, msg: string) => void\n }\n}\n\n/**\n * Progress payload written to `toolkit_jobs.progress` after every batch.\n * Caps + flush rules live on the queue's `updateProgress` debounce —\n * callers don't need to throttle.\n */\nexport interface ImportRunProgress {\n rowsRead: number\n rowsSucceeded: number\n rowsFailed: number\n rowsSkipped: number\n batchesCompleted: number\n /** Wall-clock seconds since the runner started. */\n elapsedSeconds: number\n /** Rows / second, computed at every batch. */\n rowsPerSecond: number\n /** Distinct error patterns currently held by the tracker. Saturates at the cap. */\n distinctErrorPatterns: number\n /**\n * Total data-row count from a pre-pass scan, when the worker computed one.\n * Lets the UI render `processed / total` percent. Absent when the worker\n * skipped the count (file too large for the count budget, count failed,\n * etc.) — UI must treat it as optional.\n */\n totalRows?: number\n}\n\n/**\n * Final result returned by {@link runImport}. The handler writes these\n * onto the `import_run` row alongside the ErrorTracker snapshot.\n */\nexport interface RunImportResult {\n /** Total rows read from the file (excludes skipped empty lines). */\n rowsRead: number\n /** Rows the transform turned into a successful doc AND the ES cluster acknowledged. */\n rowsSucceeded: number\n /**\n * Rows that the transform rejected (`{ kind: 'error' }`) OR that ES\n * rejected on bulk-write (per-doc failure). Both are aggregated by\n * `errorTracker` for the import_run summary.\n */\n rowsFailed: number\n /** Rows that the transform skipped (`{ kind: 'skip' }`) — header noise, blank lines, intentional drop. */\n rowsSkipped: number\n /** Number of `bulkUpsert` calls made. */\n batchesCompleted: number\n /** Final value of {@link ErrorTracker.snapshot}. */\n errors: ReturnType<ErrorTracker['snapshot']>\n}\n\n/**\n * Apply the runner against a feed file. Stops on rowLimit OR end-of-file\n * OR if `signal` aborts. Throws if the streaming reader / ES client\n * throws — caller (the queue handler) catches that and writes\n * `import_run.status = 'failed'` with the error message in\n * `errorSummary.fatal`.\n */\nexport async function runImport<TDoc>(options: RunImportOptions<TDoc>): Promise<RunImportResult> {\n const {\n importRunId,\n runLabel,\n params,\n transform,\n feed,\n esClient,\n esIndex,\n batchSize = DEFAULT_BATCH_SIZE,\n onProgress,\n rowLimit,\n signal,\n errorTracker = new ErrorTracker(),\n logger,\n } = options\n\n if (batchSize < 1) {\n throw new Error(`batchSize must be >= 1 (got ${batchSize})`)\n }\n\n const startedAt = Date.now()\n logger?.info(\n { importRunId, runLabel, batchSize, esIndex },\n 'imports.runner: started',\n )\n let rowsRead = 0\n let rowsSucceeded = 0\n let rowsFailed = 0\n let rowsSkipped = 0\n let batchesCompleted = 0\n\n let pending: Array<{ id: string; doc: TDoc; rowNumber: number }> = []\n\n const flush = async (): Promise<void> => {\n if (pending.length === 0) return\n const batch = pending\n pending = []\n const flushStartedAt = Date.now()\n let result: BulkIndexResult\n try {\n result = await bulkUpsert<TDoc>(esClient, {\n index: esIndex,\n docs: batch.map(({ id, doc }) => ({ id, doc })),\n ...(signal !== undefined && { signal }),\n })\n } catch (err) {\n // Cluster- or transport-level failure — the whole batch is\n // unaccounted for. Distinguish abort (operator-driven cancel) from\n // a real cluster failure so the errorSummary doesn't mislabel a\n // cancelled run as broken cluster connectivity.\n const isAbort =\n signal?.aborted ??\n (err instanceof Error && (err.name === 'AbortError' || /abort/i.test(err.message)))\n const errorType = isAbort ? 'aborted' : 'bulk_request_failed'\n const reason = err instanceof Error ? err.message : String(err)\n for (const { rowNumber } of batch) {\n errorTracker.addError(rowNumber, errorType, reason, undefined, null)\n }\n rowsFailed += batch.length\n throw err\n }\n\n rowsSucceeded += result.succeeded\n rowsFailed += result.failures.length\n batchesCompleted += 1\n\n const flushMs = Date.now() - flushStartedAt\n const elapsedSec = (Date.now() - startedAt) / 1000\n logger?.info(\n {\n importRunId,\n batch: batchesCompleted,\n flushMs,\n submitted: batch.length,\n succeeded: result.succeeded,\n failed: result.failures.length,\n rowsRead,\n rowsPerSecond: elapsedSec > 0 ? Math.round(rowsRead / elapsedSec) : 0,\n },\n 'imports.runner: batch flushed',\n )\n\n if (result.failures.length > 0) {\n // Map each ES failure back to its source row via `id`. The bulk\n // response order matches the request order, but ES doesn't promise\n // that; matching by `id` is the safe path. PoC volume is small\n // enough that the O(failures × batch) cost is irrelevant.\n const byId = new Map(batch.map((b) => [b.id, b.rowNumber]))\n for (const fail of result.failures) {\n const rowNumber = byId.get(fail.id) ?? -1\n errorTracker.addError(rowNumber, fail.type, fail.reason, undefined, { id: fail.id })\n }\n }\n\n if (onProgress) {\n const elapsedSeconds = (Date.now() - startedAt) / 1000\n onProgress({\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n elapsedSeconds,\n rowsPerSecond: elapsedSeconds > 0 ? rowsRead / elapsedSeconds : 0,\n distinctErrorPatterns: errorTracker.getDistinctPatternCount(),\n })\n }\n }\n\n // Streaming heartbeat — sample every Nth row so the operator sees the\n // streamer is alive even when a transform-rejected file would prevent\n // any flush from firing. Without this, a feed that's 100% rejected\n // looks identical to \"stuck\" until end-of-file. Sample frequency is\n // batchSize so the cadence matches the natural batch rhythm.\n const streamSampleEvery = batchSize\n\n // First-error capture per error type — surface what's actually rejecting\n // the rows when 100% of a feed is rejected. Without this, the only signal\n // is `errorTracker.snapshot()` which only writes to import_run.errorSummary\n // at the END of the run. Mid-run we want to know NOW.\n const firstErrorByType = new Map<string, { rowNumber: number; message: string }>()\n\n for await (const { rowNumber, row } of streamFeed(feed)) {\n if (signal?.aborted) break\n if (rowLimit !== undefined && rowsRead >= rowLimit) break\n rowsRead += 1\n\n if (rowsRead === 1) {\n logger?.info({ importRunId, rowNumber, sampleKeys: Object.keys(row) }, 'imports.runner: STREAM_FIRST_ROW')\n }\n if (rowsRead % streamSampleEvery === 0) {\n const elapsedSec = (Date.now() - startedAt) / 1000\n // Telemetry only — no row content, no error sample text.\n // PR #262 review (M-5): the prior `firstErrors:\n // Object.fromEntries(firstErrorByType)` payload included the\n // raw error message captured from the transform, which for the\n // carmaker feed embeds supplier codes + price fragments. The\n // detailed first-error breakdown is already persisted on\n // `import_run.errorSummary` via `errorTracker.snapshot()` —\n // operators inspect it there, not in the worker log.\n logger?.info(\n {\n importRunId,\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n pending: pending.length,\n elapsedSec: Math.round(elapsedSec * 10) / 10,\n rowsPerSecond: elapsedSec > 0 ? Math.round(rowsRead / elapsedSec) : 0,\n distinctFirstErrorTypes: firstErrorByType.size,\n },\n 'imports.runner: STREAM_TICK',\n )\n }\n\n const ctx: TransformContext = { importRunId, params, runLabel, rowNumber }\n let result: Awaited<ReturnType<typeof transform>>\n try {\n result = await transform(row, ctx)\n } catch (err) {\n // A throw from the transform is a programmer error — surface it as\n // a row-level error so the run can continue. (If the bug is\n // catastrophic, the operator sees the same message repeated and\n // can stop the run.)\n const reason = err instanceof Error ? err.message : String(err)\n errorTracker.addError(rowNumber, 'transform_threw', reason, undefined, row)\n if (!firstErrorByType.has('transform_threw')) {\n firstErrorByType.set('transform_threw', { rowNumber, message: reason })\n // PR #262 review (M-5): drop the `row` payload from the\n // log line — supplier feeds carry pricing + internal codes.\n // The full row is on `import_run.errorSummary.samples` for\n // post-mortem review by operators with view permission.\n logger?.warn(\n { importRunId, rowNumber, errorType: 'transform_threw', message: reason },\n 'imports.runner: FIRST_ERROR transform_threw',\n )\n }\n rowsFailed += 1\n continue\n }\n\n if (result.kind === 'skip') {\n rowsSkipped += 1\n continue\n }\n if (result.kind === 'error') {\n errorTracker.addError(\n rowNumber,\n result.error.errorType,\n result.error.message,\n result.error.field,\n row,\n )\n if (!firstErrorByType.has(result.error.errorType)) {\n firstErrorByType.set(result.error.errorType, {\n rowNumber,\n message: result.error.message,\n })\n logger?.warn(\n {\n importRunId,\n rowNumber,\n errorType: result.error.errorType,\n field: result.error.field,\n message: result.error.message,\n },\n `imports.runner: FIRST_ERROR ${result.error.errorType}`,\n )\n }\n rowsFailed += 1\n continue\n }\n\n pending.push({ id: result.id, doc: result.doc, rowNumber })\n if (pending.length >= batchSize) {\n await flush()\n }\n }\n\n await flush()\n\n const totalSec = (Date.now() - startedAt) / 1000\n logger?.info(\n {\n importRunId,\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n totalSec: Math.round(totalSec * 10) / 10,\n rowsPerSecond: totalSec > 0 ? Math.round(rowsRead / totalSec) : 0,\n },\n 'imports.runner: finished',\n )\n\n return {\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n errors: errorTracker.snapshot(),\n }\n}\n"],"mappings":"4IAmEA,MAAa,EAAuB,GACvB,EAAkC,EAY/C,IAAa,EAAb,KAA0B,CACxB,SAA4B,IAAI,IAChC,YACA,qBACA,kBAA4B,EAE5B,YAAY,EAA6B,EAAE,CAAE,CAC3C,KAAK,YAAc,EAAO,aAAA,GAC1B,KAAK,qBAAuB,EAAO,sBAAA,EAYrC,SACE,EACA,EACA,EACA,EACA,EACM,CAEN,IAAM,EAAY,GAAG,EAAU,GADd,GAAS,UACiB,GAAG,IAE1C,EAAU,KAAK,SAAS,IAAI,EAAU,CAC1C,GAAI,CAAC,EAAS,CACZ,GAAI,KAAK,SAAS,MAAQ,KAAK,YAAa,CAC1C,KAAK,mBAAqB,EAC1B,OAEF,EAAU,CACR,YACA,MAAO,GAAS,KAChB,UACA,MAAO,EACP,gBAAiB,EACjB,eAAgB,EAChB,QAAS,EAAE,CACZ,CACD,KAAK,SAAS,IAAI,EAAW,EAAQ,CAGvC,EAAQ,OAAS,EACjB,EAAQ,eAAiB,EACrB,EAAQ,QAAQ,OAAS,KAAK,sBAChC,EAAQ,QAAQ,KAAK,CAAE,YAAW,UAAS,CAAC,CAKhD,oBAA6B,CAC3B,IAAI,EAAM,EACV,IAAK,IAAM,KAAK,KAAK,SAAS,QAAQ,CAAE,GAAO,EAAE,MACjD,OAAO,EAIT,yBAAkC,CAChC,OAAO,KAAK,SAAS,KAQvB,0BAAmC,CACjC,OAAO,KAAK,kBAQd,gBAAiC,CAC/B,IAAM,EAAM,MAAM,KAAK,KAAK,SAAS,QAAQ,CAAC,CAAC,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC1E,EAAc,EAAI,QAAQ,EAAK,IAAM,EAAM,EAAE,MAAO,EAAE,CAC5D,OAAO,EAAI,IAAK,IAAO,CACrB,UAAW,EAAE,UACb,MAAO,EAAE,MACT,QAAS,EAAE,QACX,MAAO,EAAE,MACT,gBAAiB,EAAE,gBACnB,eAAgB,EAAE,eAClB,QAAS,EAAE,QAAQ,OAAO,CAC1B,WAAY,EAAc,EAAK,EAAE,MAAQ,EAAe,IAAM,EAC/D,EAAE,CAQL,UAAiC,CAC/B,MAAO,CACL,YAAa,KAAK,oBAAoB,CACtC,iBAAkB,KAAK,yBAAyB,CAChD,kBAAmB,KAAK,kBACxB,SAAU,KAAK,gBAAgB,CAChC,GC9IL,eAAsB,EACpB,EACA,EAAmC,EAAE,CACpB,CACjB,GAAM,CAAE,YAAY,IAAS,EACvB,EAAS,EAAiB,EAAS,CACrC,EAAQ,EACR,EAAW,EACf,UAAW,IAAM,KAAS,EAAiC,CACzD,IAAK,IAAI,EAAI,EAAG,EAAI,EAAM,OAAQ,GAAK,EACjC,EAAM,KAAO,KAAe,GAAS,GAEvC,EAAM,OAAS,IAAG,EAAW,EAAM,EAAM,OAAS,IAAM,GAM9D,OADI,IAAa,GAAK,IAAa,KAAM,GAAS,GAC3C,EAAY,KAAK,IAAI,EAAG,EAAQ,EAAE,CAAG,EA4E9C,eAAuB,EAAW,EAA0D,CAC1F,GAAM,CACJ,WACA,YAAY,IACZ,YAAY,GACZ,UACA,mBAAmB,GACnB,QAAQ,IACN,EAUE,EAAmC,EAAU,MAAM,KAAK,EAAQ,CAAG,EAEnE,EAAS,EAAiB,EAAS,CACnC,EAAS,EAAO,KACpB,EAAM,CACJ,YACA,QAAS,EACT,IAAK,GACL,iBAAkB,GAElB,mBAAoB,EAMpB,QACD,CAAC,CACH,CAEG,EAAY,EAChB,GAAI,CACF,UAAW,IAAM,KAAU,EAExB,CACD,GAAa,EAOb,IAAM,EAA8B,EAAE,CACtC,GAAI,MAAM,QAAQ,EAAO,CAAE,CACzB,IAAM,EAAM,EACZ,IAAK,IAAI,EAAI,EAAG,EAAI,EAAI,OAAQ,GAAK,EACnC,EAAI,OAAO,EAAE,EAAI,EAAI,IAAM,QAG7B,IAAK,GAAM,CAAC,EAAG,KAAM,OAAO,QAAQ,EAAO,CACzC,EAAI,GAAK,GAAK,GAGlB,KAAM,CAAE,YAAW,MAAK,SAElB,CAIR,EAAO,SAAS,EC/KpB,MAAa,EAAqB,IA6GlC,eAAsB,EAAgB,EAA2D,CAC/F,GAAM,CACJ,cACA,WACA,SACA,YACA,OACA,WACA,UACA,YAAY,EACZ,aACA,WACA,SACA,eAAe,IAAI,EACnB,UACE,EAEJ,GAAI,EAAY,EACd,MAAU,MAAM,+BAA+B,EAAU,GAAG,CAG9D,IAAM,EAAY,KAAK,KAAK,CAC5B,GAAQ,KACN,CAAE,cAAa,WAAU,YAAW,UAAS,CAC7C,0BACD,CACD,IAAI,EAAW,EACX,EAAgB,EAChB,EAAa,EACb,EAAc,EACd,EAAmB,EAEnB,EAA+D,EAAE,CAE/D,EAAQ,SAA2B,CACvC,GAAI,EAAQ,SAAW,EAAG,OAC1B,IAAM,EAAQ,EACd,EAAU,EAAE,CACZ,IAAM,EAAiB,KAAK,KAAK,CAC7B,EACJ,GAAI,CACF,EAAS,MAAM,EAAiB,EAAU,CACxC,MAAO,EACP,KAAM,EAAM,KAAK,CAAE,KAAI,UAAW,CAAE,KAAI,MAAK,EAAE,CAC/C,GAAI,IAAW,IAAA,IAAa,CAAE,SAAQ,CACvC,CAAC,OACK,EAAK,CAQZ,IAAM,EAFJ,GAAQ,UACP,aAAe,QAAU,EAAI,OAAS,cAAgB,SAAS,KAAK,EAAI,QAAQ,GACvD,UAAY,sBAClC,EAAS,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CAC/D,IAAK,GAAM,CAAE,eAAe,EAC1B,EAAa,SAAS,EAAW,EAAW,EAAQ,IAAA,GAAW,KAAK,CAGtE,KADA,IAAc,EAAM,OACd,EAGR,GAAiB,EAAO,UACxB,GAAc,EAAO,SAAS,OAC9B,GAAoB,EAEpB,IAAM,EAAU,KAAK,KAAK,CAAG,EACvB,GAAc,KAAK,KAAK,CAAG,GAAa,IAe9C,GAdA,GAAQ,KACN,CACE,cACA,MAAO,EACP,UACA,UAAW,EAAM,OACjB,UAAW,EAAO,UAClB,OAAQ,EAAO,SAAS,OACxB,WACA,cAAe,EAAa,EAAI,KAAK,MAAM,EAAW,EAAW,CAAG,EACrE,CACD,gCACD,CAEG,EAAO,SAAS,OAAS,EAAG,CAK9B,IAAM,EAAO,IAAI,IAAI,EAAM,IAAK,GAAM,CAAC,EAAE,GAAI,EAAE,UAAU,CAAC,CAAC,CAC3D,IAAK,IAAM,KAAQ,EAAO,SAAU,CAClC,IAAM,EAAY,EAAK,IAAI,EAAK,GAAG,EAAI,GACvC,EAAa,SAAS,EAAW,EAAK,KAAM,EAAK,OAAQ,IAAA,GAAW,CAAE,GAAI,EAAK,GAAI,CAAC,EAIxF,GAAI,EAAY,CACd,IAAM,GAAkB,KAAK,KAAK,CAAG,GAAa,IAClD,EAAW,CACT,WACA,gBACA,aACA,cACA,mBACA,iBACA,cAAe,EAAiB,EAAI,EAAW,EAAiB,EAChE,sBAAuB,EAAa,yBAAyB,CAC9D,CAAC,GASA,EAAoB,EAMpB,EAAmB,IAAI,IAE7B,UAAW,GAAM,CAAE,YAAW,SAAS,EAAW,EAAK,CAAE,CAEvD,GADI,GAAQ,SACR,IAAa,IAAA,IAAa,GAAY,EAAU,MAMpD,GALA,GAAY,EAER,IAAa,GACf,GAAQ,KAAK,CAAE,cAAa,YAAW,WAAY,OAAO,KAAK,EAAI,CAAE,CAAE,mCAAmC,CAExG,EAAW,IAAsB,EAAG,CACtC,IAAM,GAAc,KAAK,KAAK,CAAG,GAAa,IAS9C,GAAQ,KACN,CACE,cACA,WACA,gBACA,aACA,cACA,QAAS,EAAQ,OACjB,WAAY,KAAK,MAAM,EAAa,GAAG,CAAG,GAC1C,cAAe,EAAa,EAAI,KAAK,MAAM,EAAW,EAAW,CAAG,EACpE,wBAAyB,EAAiB,KAC3C,CACD,8BACD,CAGH,IAAM,EAAwB,CAAE,cAAa,SAAQ,WAAU,YAAW,CACtE,EACJ,GAAI,CACF,EAAS,MAAM,EAAU,EAAK,EAAI,OAC3B,EAAK,CAKZ,IAAM,EAAS,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CAC/D,EAAa,SAAS,EAAW,kBAAmB,EAAQ,IAAA,GAAW,EAAI,CACtE,EAAiB,IAAI,kBAAkB,GAC1C,EAAiB,IAAI,kBAAmB,CAAE,YAAW,QAAS,EAAQ,CAAC,CAKvE,GAAQ,KACN,CAAE,cAAa,YAAW,UAAW,kBAAmB,QAAS,EAAQ,CACzE,8CACD,EAEH,GAAc,EACd,SAGF,GAAI,EAAO,OAAS,OAAQ,CAC1B,GAAe,EACf,SAEF,GAAI,EAAO,OAAS,QAAS,CAC3B,EAAa,SACX,EACA,EAAO,MAAM,UACb,EAAO,MAAM,QACb,EAAO,MAAM,MACb,EACD,CACI,EAAiB,IAAI,EAAO,MAAM,UAAU,GAC/C,EAAiB,IAAI,EAAO,MAAM,UAAW,CAC3C,YACA,QAAS,EAAO,MAAM,QACvB,CAAC,CACF,GAAQ,KACN,CACE,cACA,YACA,UAAW,EAAO,MAAM,UACxB,MAAO,EAAO,MAAM,MACpB,QAAS,EAAO,MAAM,QACvB,CACD,+BAA+B,EAAO,MAAM,YAC7C,EAEH,GAAc,EACd,SAGF,EAAQ,KAAK,CAAE,GAAI,EAAO,GAAI,IAAK,EAAO,IAAK,YAAW,CAAC,CACvD,EAAQ,QAAU,GACpB,MAAM,GAAO,CAIjB,MAAM,GAAO,CAEb,IAAM,GAAY,KAAK,KAAK,CAAG,GAAa,IAe5C,OAdA,GAAQ,KACN,CACE,cACA,WACA,gBACA,aACA,cACA,mBACA,SAAU,KAAK,MAAM,EAAW,GAAG,CAAG,GACtC,cAAe,EAAW,EAAI,KAAK,MAAM,EAAW,EAAS,CAAG,EACjE,CACD,2BACD,CAEM,CACL,WACA,gBACA,aACA,cACA,mBACA,OAAQ,EAAa,UAAU,CAChC"}
@@ -1,11 +1,26 @@
1
- import { n as FilePathResolver } from "./worker-DerGVTSI.mjs";
1
+ import { n as FilePathResolver } from "./worker-B7ADOFEV.mjs";
2
2
 
3
3
  //#region src/storage-resolver.d.ts
4
4
  /**
5
- * Read the storage object at `key` into a tmpfile and return the
6
- * path. Cleans up via `fs.unlink` after the run finishes — best-
7
- * effort, swallows `ENOENT` (the file may already be gone if the
8
- * worker crashed and the OS cleaned `/tmp`).
5
+ * Resolve a storage key to a path the streaming reader can open.
6
+ *
7
+ * Two paths:
8
+ *
9
+ * 1. **Local-disk shortcut** — when the configured adapter implements
10
+ * the optional `getLocalPath(key)` method (LocalDiskAdapter does;
11
+ * R2Adapter does not), the bytes are already on a filesystem
12
+ * visible to this process. Return that path directly with **no
13
+ * cleanup** (the file is the canonical storage object — the
14
+ * cleanup callback's job is to remove the *copy* we made into
15
+ * /tmp; with no copy there's nothing to remove). This eliminates
16
+ * the upload→R2→download→tmpfile round trip on single-VPS / dev
17
+ * setups where it's pure overhead — a 137 MB feed that took ~25s
18
+ * to round-trip completes in the time it takes to `fs.access` the
19
+ * path.
20
+ *
21
+ * 2. **Remote-storage path** (R2/S3/etc.) — download the object into
22
+ * a tmpfile and return that. The caller is required to await the
23
+ * `cleanup` so the tmpfile doesn't outlive the run.
9
24
  *
10
25
  * Lazy-imports `@murumets-ee/storage` + `@murumets-ee/core` so
11
26
  * deployments that don't ingest from remote storage never load the
@@ -1 +1 @@
1
- {"version":3,"file":"storage-resolver.d.mts","names":[],"sources":["../src/storage-resolver.ts"],"mappings":";;;;;;;;;;;;;cA6Ca,sBAAA,EAAwB,gBAAA"}
1
+ {"version":3,"file":"storage-resolver.d.mts","names":[],"sources":["../src/storage-resolver.ts"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;cA6Da,sBAAA,EAAwB,gBAAA"}
@@ -1,2 +1,2 @@
1
- import{promises as e}from"node:fs";import{tmpdir as t}from"node:os";import{join as n}from"node:path";const r=async r=>{let{createStorageClient:i}=await import(`@murumets-ee/storage`),{getStorageConfig:a}=await import(`@murumets-ee/storage/plugin`),{getApp:o}=await import(`@murumets-ee/core`),{body:s}=await i(a(),{app:o()}).download(r),c=Buffer.isBuffer(s)?s:Buffer.from(await new Response(s).arrayBuffer()),l=r.split(`/`).pop()??`feed`,u=n(t(),`imports-${crypto.randomUUID()}-${l}`);return await e.writeFile(u,c),{localPath:u,cleanup:async()=>{try{await e.unlink(u)}catch(e){if(e.code!==`ENOENT`)throw e}}}};export{r as storageResolveFilePath};
1
+ import{getImportsStorage as e}from"./storage.mjs";import{promises as t}from"node:fs";import{tmpdir as n}from"node:os";import{join as r}from"node:path";const i=async i=>{let a=await e(),o=await a.getAdapterLocalPath(i);if(o)return{localPath:o};let{body:s}=await a.download(i),c=Buffer.isBuffer(s)?s:Buffer.from(await new Response(s).arrayBuffer()),l=i.split(`/`).pop()??`feed`,u=r(n(),`imports-${crypto.randomUUID()}-${l}`);return await t.writeFile(u,c),{localPath:u,cleanup:async()=>{try{await t.unlink(u)}catch(e){if(e.code!==`ENOENT`)throw e}}}};export{i as storageResolveFilePath};
2
2
  //# sourceMappingURL=storage-resolver.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"storage-resolver.mjs","names":["fs"],"sources":["../src/storage-resolver.ts"],"sourcesContent":["/**\n * Storage-backed `resolveFilePath` for the imports plugin. Downloads\n * an object out of `@murumets-ee/storage` to a tmpfile so the queue\n * worker can stream it via `node:fs.createReadStream`.\n *\n * Wire into the imports plugin like this:\n *\n * ```ts\n * import { imports } from '@murumets-ee/imports/plugin'\n * import { storageResolveFilePath } from '@murumets-ee/imports/storage-resolver'\n *\n * imports({\n * esClient: () => esClient,\n * resolveFilePath: storageResolveFilePath,\n * })\n * ```\n *\n * Why this lives here rather than in the route or in the storage\n * package:\n * - The route shouldn't know about the consumer's runner; it just\n * persists a key on `import_run.filePath`.\n * - The storage package is generic (no opinion on imports).\n * - The imports worker is the natural download point: it owns the\n * run's lifecycle and can guarantee tmpfile cleanup.\n *\n * The helper is in a separate subpath so consumers that use a\n * different upload sink (local disk, S3 with a custom adapter, etc.)\n * don't pull in `@murumets-ee/storage` transitively.\n */\n\nimport { promises as fs } from 'node:fs'\nimport { tmpdir } from 'node:os'\nimport { join } from 'node:path'\nimport type { FilePathResolver } from './worker.js'\n\n/**\n * Read the storage object at `key` into a tmpfile and return the\n * path. Cleans up via `fs.unlink` after the run finishesbest-\n * effort, swallows `ENOENT` (the file may already be gone if the\n * worker crashed and the OS cleaned `/tmp`).\n *\n * Lazy-imports `@murumets-ee/storage` + `@murumets-ee/core` so\n * deployments that don't ingest from remote storage never load the\n * R2 client.\n */\nexport const storageResolveFilePath: FilePathResolver = async (storageKey) => {\n const { createStorageClient } = await import('@murumets-ee/storage')\n const { getStorageConfig } = await import('@murumets-ee/storage/plugin')\n const { getApp } = await import('@murumets-ee/core')\n\n const storage = createStorageClient(getStorageConfig(), { app: getApp() })\n const { body } = await storage.download(storageKey)\n\n // `DownloadResult.body` is `Buffer | ReadableStream<Uint8Array>` per\n // storage's adapter contract. Normalise to Buffer for the simple\n // tmpfile-write path; the carmaker feeds top out at ~tens of MB,\n // well within memory.\n const buffer = Buffer.isBuffer(body)\n ? body\n : Buffer.from(await new Response(body).arrayBuffer())\n\n // Suffix carries the basename hint (last storage-key segment) so\n // operator-readable temp paths help debugging without leaking the\n // full original filename through the FS.\n const suffix = storageKey.split('/').pop() ?? 'feed'\n const localPath = join(tmpdir(), `imports-${crypto.randomUUID()}-${suffix}`)\n await fs.writeFile(localPath, buffer)\n\n return {\n localPath,\n cleanup: async () => {\n try {\n await fs.unlink(localPath)\n } catch (err) {\n // ENOENT is fine — file already gone (OS cleanup, manual\n // intervention, etc.). Anything else, rethrow so the worker's\n // best-effort cleanup logs a warning.\n if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err\n }\n },\n }\n}\n"],"mappings":"qGA6CA,MAAa,EAA2C,KAAO,IAAe,CAC5E,GAAM,CAAE,uBAAwB,MAAM,OAAO,wBACvC,CAAE,oBAAqB,MAAM,OAAO,+BACpC,CAAE,UAAW,MAAM,OAAO,qBAG1B,CAAE,QAAS,MADD,EAAoB,GAAkB,CAAE,CAAE,IAAK,GAAQ,CAAE,CAC3C,CAAC,SAAS,EAAW,CAM7C,EAAS,OAAO,SAAS,EAAK,CAChC,EACA,OAAO,KAAK,MAAM,IAAI,SAAS,EAAK,CAAC,aAAa,CAAC,CAKjD,EAAS,EAAW,MAAM,IAAI,CAAC,KAAK,EAAI,OACxC,EAAY,EAAK,GAAQ,CAAE,WAAW,OAAO,YAAY,CAAC,GAAG,IAAS,CAG5E,OAFA,MAAMA,EAAG,UAAU,EAAW,EAAO,CAE9B,CACL,YACA,QAAS,SAAY,CACnB,GAAI,CACF,MAAMA,EAAG,OAAO,EAAU,OACnB,EAAK,CAIZ,GAAK,EAA8B,OAAS,SAAU,MAAM,IAGjE"}
1
+ {"version":3,"file":"storage-resolver.mjs","names":["fs"],"sources":["../src/storage-resolver.ts"],"sourcesContent":["/**\n * Storage-backed `resolveFilePath` for the imports plugin. Downloads\n * an object out of `@murumets-ee/storage` to a tmpfile so the queue\n * worker can stream it via `node:fs.createReadStream`.\n *\n * Wire into the imports plugin like this:\n *\n * ```ts\n * import { imports } from '@murumets-ee/imports/plugin'\n * import { storageResolveFilePath } from '@murumets-ee/imports/storage-resolver'\n *\n * imports({\n * esClient: () => esClient,\n * resolveFilePath: storageResolveFilePath,\n * })\n * ```\n *\n * Why this lives here rather than in the route or in the storage\n * package:\n * - The route shouldn't know about the consumer's runner; it just\n * persists a key on `import_run.filePath`.\n * - The storage package is generic (no opinion on imports).\n * - The imports worker is the natural download point: it owns the\n * run's lifecycle and can guarantee tmpfile cleanup.\n *\n * The helper is in a separate subpath so consumers that use a\n * different upload sink (local disk, S3 with a custom adapter, etc.)\n * don't pull in `@murumets-ee/storage` transitively.\n */\n\nimport { promises as fs } from 'node:fs'\nimport { tmpdir } from 'node:os'\nimport { join } from 'node:path'\nimport { getImportsStorage } from './storage.js'\nimport type { FilePathResolver } from './worker.js'\n\n/**\n * Resolve a storage key to a path the streaming reader can open.\n *\n * Two paths:\n *\n * 1. **Local-disk shortcut** — when the configured adapter implements\n * the optional `getLocalPath(key)` method (LocalDiskAdapter does;\n * R2Adapter does not), the bytes are already on a filesystem\n * visible to this process. Return that path directly with **no\n * cleanup** (the file is the canonical storage object — the\n * cleanup callback's job is to remove the *copy* we made into\n * /tmp; with no copy there's nothing to remove). This eliminates\n * the upload→R2→download→tmpfile round trip on single-VPS / dev\n * setups where it's pure overhead a 137 MB feed that took ~25s\n * to round-trip completes in the time it takes to `fs.access` the\n * path.\n *\n * 2. **Remote-storage path** (R2/S3/etc.) — download the object into\n * a tmpfile and return that. The caller is required to await the\n * `cleanup` so the tmpfile doesn't outlive the run.\n *\n * Lazy-imports `@murumets-ee/storage` + `@murumets-ee/core` so\n * deployments that don't ingest from remote storage never load the\n * R2 client.\n */\nexport const storageResolveFilePath: FilePathResolver = async (storageKey) => {\n // Consult the imports-specific storage handle (registered by\n // `imports({ storage: ... })`). Falls back to the global\n // `getStorageConfig()` client when no per-imports factory was\n // wired — preserves the original single-storage behaviour for\n // consumers that haven't migrated yet.\n const storage = await getImportsStorage()\n\n // Local-disk shortcut: the adapter is on the same filesystem.\n // `getAdapterLocalPath` is the public hook on `StorageClient` that\n // forwards to `adapter.getLocalPath(key)` when the adapter\n // implements it; absence of that method is the signal to fall\n // through to the remote-download path below.\n const directPath = await storage.getAdapterLocalPath(storageKey)\n if (directPath) {\n return { localPath: directPath }\n }\n\n const { body } = await storage.download(storageKey)\n\n // `DownloadResult.body` is `Buffer | ReadableStream<Uint8Array>` per\n // storage's adapter contract. Normalise to Buffer for the simple\n // tmpfile-write path; the carmaker feeds top out at ~tens of MB,\n // well within memory.\n const buffer = Buffer.isBuffer(body)\n ? body\n : Buffer.from(await new Response(body).arrayBuffer())\n\n // Suffix carries the basename hint (last storage-key segment) so\n // operator-readable temp paths help debugging without leaking the\n // full original filename through the FS.\n const suffix = storageKey.split('/').pop() ?? 'feed'\n const localPath = join(tmpdir(), `imports-${crypto.randomUUID()}-${suffix}`)\n await fs.writeFile(localPath, buffer)\n\n return {\n localPath,\n cleanup: async () => {\n try {\n await fs.unlink(localPath)\n } catch (err) {\n // ENOENT is fine — file already gone (OS cleanup, manual\n // intervention, etc.). Anything else, rethrow so the worker's\n // best-effort cleanup logs a warning.\n if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err\n }\n },\n }\n}\n"],"mappings":"uJA6DA,MAAa,EAA2C,KAAO,IAAe,CAM5E,IAAM,EAAU,MAAM,GAAmB,CAOnC,EAAa,MAAM,EAAQ,oBAAoB,EAAW,CAChE,GAAI,EACF,MAAO,CAAE,UAAW,EAAY,CAGlC,GAAM,CAAE,QAAS,MAAM,EAAQ,SAAS,EAAW,CAM7C,EAAS,OAAO,SAAS,EAAK,CAChC,EACA,OAAO,KAAK,MAAM,IAAI,SAAS,EAAK,CAAC,aAAa,CAAC,CAKjD,EAAS,EAAW,MAAM,IAAI,CAAC,KAAK,EAAI,OACxC,EAAY,EAAK,GAAQ,CAAE,WAAW,OAAO,YAAY,CAAC,GAAG,IAAS,CAG5E,OAFA,MAAMA,EAAG,UAAU,EAAW,EAAO,CAE9B,CACL,YACA,QAAS,SAAY,CACnB,GAAI,CACF,MAAMA,EAAG,OAAO,EAAU,OACnB,EAAK,CAIZ,GAAK,EAA8B,OAAS,SAAU,MAAM,IAGjE"}
@@ -0,0 +1,31 @@
1
+ import { StorageClient } from "@murumets-ee/storage";
2
+
3
+ //#region src/storage.d.ts
4
+ /** Lazy storage-client factory. May be sync or async. */
5
+ type ImportsStorageFactory = () => StorageClient | Promise<StorageClient>;
6
+ /**
7
+ * Register a factory for the imports-only `StorageClient`. Called by
8
+ * `imports({ storage: ... })` during plugin init. Subsequent calls
9
+ * overwrite — the last `imports()` plugin instance wins, which
10
+ * matches every other plugin's "second registration replaces first"
11
+ * behaviour.
12
+ *
13
+ * Resets the resolved-client cache so a re-registration in dev (HMR
14
+ * re-evaluating `lumi.config.ts`) doesn't keep handing out a stale
15
+ * client built from the previous factory.
16
+ */
17
+ declare function registerImportsStorageFactory(factory: ImportsStorageFactory): void;
18
+ /**
19
+ * Resolve the imports `StorageClient`. Returns a cached promise after
20
+ * the first call within a process. When no factory was registered (no
21
+ * `imports({ storage })` config), falls back to the global storage
22
+ * default — keeps existing consumers working without a config bump.
23
+ */
24
+ declare function getImportsStorage(): Promise<StorageClient>;
25
+ /**
26
+ * @internal — tests reset the registration to a clean state.
27
+ */
28
+ declare function _resetImportsStorageRegistration(): void;
29
+ //#endregion
30
+ export { ImportsStorageFactory, _resetImportsStorageRegistration, getImportsStorage, registerImportsStorageFactory };
31
+ //# sourceMappingURL=storage.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage.d.mts","names":[],"sources":["../src/storage.ts"],"mappings":";;;;KA2BY,qBAAA,SAA8B,aAAA,GAAgB,OAAA,CAAQ,aAAA;AA+DlE;;;;;;;;;;;AAAA,iBAhCgB,6BAAA,CAA8B,OAAA,EAAS,qBAAA;;;;;;;iBAWvC,iBAAA,CAAA,GAAqB,OAAA,CAAQ,aAAA;;;;iBAqB7B,gCAAA,CAAA"}
@@ -0,0 +1,2 @@
1
+ const e=Symbol.for(`@murumets-ee/imports:storage-factory`),t=Symbol.for(`@murumets-ee/imports:storage-promise`);function n(e){return globalThis[e]}function r(e,t){globalThis[e]=t}function i(n){r(e,n),r(t,void 0)}function a(){let i=n(t);if(i)return i;let a=n(e),o=Promise.resolve(a?a():s()).catch(e=>{throw r(t,void 0),e});return r(t,o),o}function o(){r(e,void 0),r(t,void 0)}async function s(){let{createStorageClient:e}=await import(`@murumets-ee/storage`),{getStorageConfig:t}=await import(`@murumets-ee/storage/plugin`),{getApp:n}=await import(`@murumets-ee/core`);return e(t(),{app:n()})}export{o as _resetImportsStorageRegistration,a as getImportsStorage,i as registerImportsStorageFactory};
2
+ //# sourceMappingURL=storage.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage.mjs","names":[],"sources":["../src/storage.ts"],"sourcesContent":["/**\n * Per-feature storage handle for the imports flow.\n *\n * Imports lets the consumer plug in a dedicated `StorageClient` (e.g.\n * a local-disk adapter for single-VPS deployments where worker + web\n * share a filesystem and the R2 round-trip is pure overhead) without\n * disturbing the global `getStorageConfig()` default that media,\n * ticketing attachments, and friends keep using.\n *\n * Wiring: the imports plugin's `init` hook stashes the consumer's\n * `storage` factory on a `globalThis` Symbol; `getImportsStorage()`\n * reads it back and caches the resolved client. When no factory is\n * registered (the legacy back-compat path), the function falls\n * through to `createStorageClient(getStorageConfig())` — so existing\n * consumers that only ever wired one storage layer continue to work\n * unchanged.\n *\n * Why a Symbol on `globalThis` and not module state: matches the\n * `@murumets-ee/storage/plugin` pattern. Turbopack can duplicate the\n * same module across chunks, so module-level singletons can leak into\n * separate-instance pairs that disagree about which adapter to use.\n * The Symbol survives that.\n */\n\nimport type { StorageClient } from '@murumets-ee/storage'\n\n/** Lazy storage-client factory. May be sync or async. */\nexport type ImportsStorageFactory = () => StorageClient | Promise<StorageClient>\n\nconst IMPORTS_STORAGE_FACTORY_KEY = Symbol.for('@murumets-ee/imports:storage-factory')\n// Cache the in-flight Promise rather than the resolved client. Two\n// concurrent first-callers both miss the cache, but only the first\n// creates a Promise; the others await the same one. PR #262 review\n// (M-9): caching the resolved value has a TOCTOU window where two\n// callers each invoke the factory and the loser's client is\n// silently discarded — but any in-flight use that captured the\n// loser keeps reading/writing through it.\nconst IMPORTS_STORAGE_PROMISE_KEY = Symbol.for('@murumets-ee/imports:storage-promise')\n\nfunction readGlobal<T>(key: symbol): T | undefined {\n return (globalThis as Record<symbol, unknown>)[key] as T | undefined\n}\n\nfunction writeGlobal(key: symbol, value: unknown): void {\n ;(globalThis as Record<symbol, unknown>)[key] = value\n}\n\n/**\n * Register a factory for the imports-only `StorageClient`. Called by\n * `imports({ storage: ... })` during plugin init. Subsequent calls\n * overwrite — the last `imports()` plugin instance wins, which\n * matches every other plugin's \"second registration replaces first\"\n * behaviour.\n *\n * Resets the resolved-client cache so a re-registration in dev (HMR\n * re-evaluating `lumi.config.ts`) doesn't keep handing out a stale\n * client built from the previous factory.\n */\nexport function registerImportsStorageFactory(factory: ImportsStorageFactory): void {\n writeGlobal(IMPORTS_STORAGE_FACTORY_KEY, factory)\n writeGlobal(IMPORTS_STORAGE_PROMISE_KEY, undefined)\n}\n\n/**\n * Resolve the imports `StorageClient`. Returns a cached promise after\n * the first call within a process. When no factory was registered (no\n * `imports({ storage })` config), falls back to the global storage\n * default — keeps existing consumers working without a config bump.\n */\nexport function getImportsStorage(): Promise<StorageClient> {\n const cached = readGlobal<Promise<StorageClient>>(IMPORTS_STORAGE_PROMISE_KEY)\n if (cached) return cached\n\n const factory = readGlobal<ImportsStorageFactory>(IMPORTS_STORAGE_FACTORY_KEY)\n const promise = Promise.resolve(factory ? factory() : defaultStorage()).catch(\n (err: unknown): never => {\n // Allow a retry on the next call if the first build failed (e.g.\n // missing env, transient app-not-ready). Without this, a one-off\n // failure pins a rejected promise into the cache forever.\n writeGlobal(IMPORTS_STORAGE_PROMISE_KEY, undefined)\n throw err\n },\n )\n writeGlobal(IMPORTS_STORAGE_PROMISE_KEY, promise)\n return promise\n}\n\n/**\n * @internal — tests reset the registration to a clean state.\n */\nexport function _resetImportsStorageRegistration(): void {\n writeGlobal(IMPORTS_STORAGE_FACTORY_KEY, undefined)\n writeGlobal(IMPORTS_STORAGE_PROMISE_KEY, undefined)\n}\n\n/**\n * Back-compat fallback — build a storage client from the global\n * `getStorageConfig()` (the old single-storage shape). Lazy-imports so\n * deployments that fully wire imports to a different adapter never\n * load the default factory's transitive dependencies.\n */\nasync function defaultStorage(): Promise<StorageClient> {\n const { createStorageClient } = await import('@murumets-ee/storage')\n const { getStorageConfig } = await import('@murumets-ee/storage/plugin')\n const { getApp } = await import('@murumets-ee/core')\n return createStorageClient(getStorageConfig(), { app: getApp() })\n}\n"],"mappings":"AA6BA,MAAM,EAA8B,OAAO,IAAI,uCAAuC,CAQhF,EAA8B,OAAO,IAAI,uCAAuC,CAEtF,SAAS,EAAc,EAA4B,CACjD,OAAQ,WAAuC,GAGjD,SAAS,EAAY,EAAa,EAAsB,CACpD,WAAuC,GAAO,EAclD,SAAgB,EAA8B,EAAsC,CAClF,EAAY,EAA6B,EAAQ,CACjD,EAAY,EAA6B,IAAA,GAAU,CASrD,SAAgB,GAA4C,CAC1D,IAAM,EAAS,EAAmC,EAA4B,CAC9E,GAAI,EAAQ,OAAO,EAEnB,IAAM,EAAU,EAAkC,EAA4B,CACxE,EAAU,QAAQ,QAAQ,EAAU,GAAS,CAAG,GAAgB,CAAC,CAAC,MACrE,GAAwB,CAKvB,MADA,EAAY,EAA6B,IAAA,GAAU,CAC7C,GAET,CAED,OADA,EAAY,EAA6B,EAAQ,CAC1C,EAMT,SAAgB,GAAyC,CACvD,EAAY,EAA6B,IAAA,GAAU,CACnD,EAAY,EAA6B,IAAA,GAAU,CASrD,eAAe,GAAyC,CACtD,GAAM,CAAE,uBAAwB,MAAM,OAAO,wBACvC,CAAE,oBAAqB,MAAM,OAAO,+BACpC,CAAE,UAAW,MAAM,OAAO,qBAChC,OAAO,EAAoB,GAAkB,CAAE,CAAE,IAAK,GAAQ,CAAE,CAAC"}
@@ -116,4 +116,4 @@ declare function getTransformRegistry(): TransformRegistry;
116
116
  declare function registerImportTransform<TDoc>(name: TransformName, transform: RowTransform<TDoc>): void;
117
117
  //#endregion
118
118
  export { TransformName as a, registerImportTransform as c, TransformContext as i, RowResult as n, TransformRegistry as o, RowTransform as r, getTransformRegistry as s, RowError as t };
119
- //# sourceMappingURL=transform-D_uhdLeo.d.mts.map
119
+ //# sourceMappingURL=transform-CqCKV88O.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"transform-D_uhdLeo.d.mts","names":[],"sources":["../src/transform.ts"],"mappings":";;AA0BA;;;;;AAGA;;;;;;;;;;;AAqBA;;;;;;;;KAxBY,aAAA;;UAGK,gBAAA;EAwBX;EAtBJ,WAAA;EAsB0B;EApB1B,MAAA,EAAQ,MAAA;EAoB0B;EAlBlC,QAAA;EAqBuB;EAnBvB,SAAA;AAAA;;;;;;AA8BF;;;;;KAjBY,SAAA;EACN,IAAA;EAAiB,EAAA;EAAY,GAAA,EAAK,IAAA;AAAA;EAClC,IAAA;EAAc,MAAA;AAAA;EACd,IAAA;EAAe,KAAA,EAAO,QAAA;AAAA;;UAGX,QAAA;EACf,SAAA;EACA,OAAA;EACA,KAAA;AAAA;;;;;;UAQe,YAAA;EAAA,CACd,MAAA,EAAQ,MAAA,kBAAwB,GAAA,EAAK,gBAAA,GAAmB,OAAA,CAAQ,SAAA,CAAU,IAAA;AAAA;;;;;;;;;;;;;;;;;;;cAqBhE,iBAAA;EAAA,iBACM,OAAA;EAgCZ;;AAyBP;;;EAlDE,QAAA,MAAA,CAAe,IAAA,EAAM,aAAA,EAAe,SAAA,EAAW,YAAA,CAAa,IAAA;EAkDL;EAxCvD,GAAA,CAAI,IAAA,EAAM,aAAA,GAAgB,YAAA;EAuDW;EAlDrC,GAAA,CAAI,IAAA,EAAM,aAAA;EAmDJ;EA9CN,IAAA,CAAA,GAAQ,aAAA;EA+CG;EA1CX,KAAA,CAAA;AAAA;;iBAyBc,oBAAA,CAAA,GAAwB,iBAAA;;;;;;iBAexB,uBAAA,MAAA,CACd,IAAA,EAAM,aAAA,EACN,SAAA,EAAW,YAAA,CAAa,IAAA"}
1
+ {"version":3,"file":"transform-CqCKV88O.d.mts","names":[],"sources":["../src/transform.ts"],"mappings":";;AA0BA;;;;;AAGA;;;;;;;;;;;AAqBA;;;;;;;;KAxBY,aAAA;;UAGK,gBAAA;EAwBX;EAtBJ,WAAA;EAsB0B;EApB1B,MAAA,EAAQ,MAAA;EAoB0B;EAlBlC,QAAA;EAqBuB;EAnBvB,SAAA;AAAA;;;;;;AA8BF;;;;;KAjBY,SAAA;EACN,IAAA;EAAiB,EAAA;EAAY,GAAA,EAAK,IAAA;AAAA;EAClC,IAAA;EAAc,MAAA;AAAA;EACd,IAAA;EAAe,KAAA,EAAO,QAAA;AAAA;;UAGX,QAAA;EACf,SAAA;EACA,OAAA;EACA,KAAA;AAAA;;;;;;UAQe,YAAA;EAAA,CACd,MAAA,EAAQ,MAAA,kBAAwB,GAAA,EAAK,gBAAA,GAAmB,OAAA,CAAQ,SAAA,CAAU,IAAA;AAAA;;;;;;;;;;;;;;;;;;;cAqBhE,iBAAA;EAAA,iBACM,OAAA;EAgCZ;;AAyBP;;;EAlDE,QAAA,MAAA,CAAe,IAAA,EAAM,aAAA,EAAe,SAAA,EAAW,YAAA,CAAa,IAAA;EAkDL;EAxCvD,GAAA,CAAI,IAAA,EAAM,aAAA,GAAgB,YAAA;EAuDW;EAlDrC,GAAA,CAAI,IAAA,EAAM,aAAA;EAmDJ;EA9CN,IAAA,CAAA,GAAQ,aAAA;EA+CG;EA1CX,KAAA,CAAA;AAAA;;iBAyBc,oBAAA,CAAA,GAAwB,iBAAA;;;;;;iBAexB,uBAAA,MAAA,CACd,IAAA,EAAM,aAAA,EACN,SAAA,EAAW,YAAA,CAAa,IAAA"}
@@ -1,4 +1,4 @@
1
- import { r as RowTransform } from "./transform-D_uhdLeo.mjs";
1
+ import { r as RowTransform } from "./transform-CqCKV88O.mjs";
2
2
  import { PartsDocument } from "@murumets-ee/search-elasticsearch";
3
3
  import { z } from "zod";
4
4
 
@@ -1,8 +1,8 @@
1
- import { o as TransformRegistry, r as RowTransform } from "./transform-D_uhdLeo.mjs";
1
+ import { o as TransformRegistry, r as RowTransform } from "./transform-CqCKV88O.mjs";
2
2
  import * as _$_murumets_ee_entity0 from "@murumets-ee/entity";
3
3
  import { EsClientLike } from "@murumets-ee/search-elasticsearch";
4
4
  import { Logger } from "@murumets-ee/core";
5
- import { JobDefinition } from "@murumets-ee/queue/client";
5
+ import { JobDefinition, QueueTerminalNotifier } from "@murumets-ee/queue/client";
6
6
  import { z } from "zod";
7
7
  import { AdminClient } from "@murumets-ee/entity/admin";
8
8
 
@@ -214,6 +214,30 @@ interface ErrorTrackerSnapshot {
214
214
  * the file. That keeps transform implementations free of I/O concerns
215
215
  * and makes them trivially unit-testable with a fixture row map.
216
216
  */
217
+ /**
218
+ * Pre-pass: count data rows in a delimited file (excluding the header
219
+ * row when `hasHeader` is true; default `true` to match `streamFeed`).
220
+ *
221
+ * Implementation: stream the file as raw bytes and tally `\n`. NOT a
222
+ * full parse — escaped newlines inside quoted fields would over-count.
223
+ * That's acceptable for the supplier-feed use case because we default
224
+ * `quote: false` (every `\n` IS a row boundary). For genuine RFC-4180
225
+ * CSV inputs with embedded newlines, treat the result as an upper
226
+ * bound; the actual parsed row count from `streamFeed` is authoritative.
227
+ *
228
+ * Why bytes-only and not csv-parse: a 16 MB feed counts in ~50 ms with
229
+ * `\n` tally vs ~500 ms with csv-parse parsing. For 500 MB the gap
230
+ * matters — pre-pass needs to be a small fraction of the actual import
231
+ * or the operator perceives the run as "stuck on pre-pass" with no UI.
232
+ *
233
+ * Returns the number of DATA rows (header excluded when present).
234
+ * Empty trailing lines (no `\n` after the final row, or a single
235
+ * trailing `\n`) are handled identically to how csv-parse treats them
236
+ * with `skip_empty_lines: true`.
237
+ */
238
+ declare function countDataRows(filePath: string, options?: {
239
+ hasHeader?: boolean;
240
+ }): Promise<number>;
217
241
  interface StreamFeedOptions {
218
242
  /** Path to the file on disk. The PoC uploads land on local disk; S3-keyed reads come later. */
219
243
  filePath: string;
@@ -242,6 +266,21 @@ interface StreamFeedOptions {
242
266
  * parser error so the transform isn't silently fed truncated data.
243
267
  */
244
268
  relaxColumnCount?: boolean;
269
+ /**
270
+ * Forward to `csv-parse` `quote`. The character (or `false`) that
271
+ * encloses fields containing the delimiter. Default `false` — most
272
+ * supplier feeds in this package's target use case are tab-delimited
273
+ * exports that contain literal `"` characters in description fields
274
+ * (e.g. `LIVRE "INVERSION D'IMAGE"` from the Mercedes carmaker feed)
275
+ * with NO quote-as-field-wrapper convention. csv-parse's library
276
+ * default `'"'` then misreads the literal `"` as an opening quote
277
+ * and rejects the row with `Invalid Opening Quote`.
278
+ *
279
+ * Pass `'"'` (or any character) to opt back into RFC-4180-style
280
+ * CSV parsing for genuine quoted-CSV inputs. Pass `'\''` for
281
+ * single-quoted dialects.
282
+ */
283
+ quote?: string | false;
245
284
  }
246
285
  interface StreamFeedRow {
247
286
  /** 1-based row number. Header (when present) is row 0; first data row is row 1. */
@@ -305,6 +344,25 @@ interface RunImportOptions<TDoc> {
305
344
  signal?: AbortSignal;
306
345
  /** Optional ErrorTracker config (caps). Default: top-50 patterns × 5 samples. */
307
346
  errorTracker?: ErrorTracker;
347
+ /**
348
+ * Optional structural logger for per-batch telemetry. Receives
349
+ * `{ batch, ms, succeeded, failed, rowsRead, rowsPerSecond }` after
350
+ * every flush, plus a one-shot `started` and `finished` event. The
351
+ * worker forwards `app.logger.child({ pkg: 'imports' })`; tests
352
+ * leave it undefined.
353
+ *
354
+ * Why: without per-batch visibility, a stalled run looks identical
355
+ * to "first batch in progress" forever — the only feedback is the
356
+ * queue's `progress` jsonb (which writes after each batch) and the
357
+ * eventual `succeeded`/`failed` job event. Adding this lets the
358
+ * operator tail the worker log and immediately see whether ES is
359
+ * rate-limiting, the transform is rejecting every row, or the
360
+ * stream is actually progressing.
361
+ */
362
+ logger?: {
363
+ info: (data: Record<string, unknown>, msg: string) => void;
364
+ warn: (data: Record<string, unknown>, msg: string) => void;
365
+ };
308
366
  }
309
367
  /**
310
368
  * Progress payload written to `toolkit_jobs.progress` after every batch.
@@ -323,6 +381,13 @@ interface ImportRunProgress {
323
381
  rowsPerSecond: number;
324
382
  /** Distinct error patterns currently held by the tracker. Saturates at the cap. */
325
383
  distinctErrorPatterns: number;
384
+ /**
385
+ * Total data-row count from a pre-pass scan, when the worker computed one.
386
+ * Lets the UI render `processed / total` percent. Absent when the worker
387
+ * skipped the count (file too large for the count budget, count failed,
388
+ * etc.) — UI must treat it as optional.
389
+ */
390
+ totalRows?: number;
326
391
  }
327
392
  /**
328
393
  * Final result returned by {@link runImport}. The handler writes these
@@ -462,6 +527,49 @@ interface RunImportHandlerConfig {
462
527
  * `@murumets-ee/core`).
463
528
  */
464
529
  declare function createRunImportHandler(config: RunImportHandlerConfig): (job: ImportsRunJobContext) => Promise<void>;
530
+ interface ImportsRunDeadListenerConfig {
531
+ importRuns: ImportRunClient;
532
+ /** Optional structured logger. */
533
+ logger?: Logger;
534
+ }
535
+ /**
536
+ * Build a {@link QueueTerminalNotifier} that mirrors `imports:run`
537
+ * dead-job events into `import_run.status='failed'`.
538
+ *
539
+ * **Why this exists.** The `imports:run` handler's normal failure path
540
+ * already calls `markFailed` before re-throwing, so a synchronous
541
+ * handler error reaches the operator-visible `failed` status without
542
+ * help from anyone. The case THIS listener catches is the one
543
+ * `markFailed` never gets to: the worker process is killed mid-handler
544
+ * (OOM, container restart, kill -9), the queue's lock-recovery sweep
545
+ * eventually reclaims the row, and the queue ultimately marks the job
546
+ * `dead` (defaultRetries: 0 + repeated process death = dead-letter
547
+ * with `import_run.status` still `running`). Without this listener
548
+ * the row sits in the operator's Active panel forever.
549
+ *
550
+ * **Generic primitive.** This is a thin subscriber over the queue's
551
+ * multi-subscriber `QueueTerminalNotifier` registry — the same shape
552
+ * graphile-worker (`job:failed`) and BullMQ (`failed`) expose. Other
553
+ * domain plugins that mirror queue state (ticketing-reply, future
554
+ * exports/reports/...) can register their own listeners exactly the
555
+ * same way without coordinating via a per-domain sweep job.
556
+ *
557
+ * **Filtering.** The listener is invoked for EVERY dead event in the
558
+ * process. We early-return on any `jobType` other than `imports:run`
559
+ * so the per-event cost on unrelated jobs is a single property read.
560
+ *
561
+ * **Idempotency.** A dead event delivered after the row has already
562
+ * reached its own terminal state (succeeded / failed / cancelled) is
563
+ * ignored — the only transition we make is `running → failed`. This
564
+ * keeps a late replay or out-of-order delivery from clobbering a
565
+ * row whose worker reached `markFailed` faster than the queue's
566
+ * dead-letter dispatch.
567
+ *
568
+ * The handler is wrapped in `runAsCli` so AdminClient calls inside
569
+ * have a synthetic CLI admin context — same pattern as
570
+ * {@link createRunImportHandler}.
571
+ */
572
+ declare function createImportsRunDeadListener(config: ImportsRunDeadListenerConfig): QueueTerminalNotifier;
465
573
  //#endregion
466
- export { IMPORT_RUN_STATUSES as C, ImportRunStatus as E, ErrorTrackerConfig as S, ImportRunClient as T, DEFAULT_MAX_PATTERNS as _, RunImportHandlerConfig as a, ErrorSample as b, importsRunJobPayloadSchema as c, RunImportOptions as d, RunImportResult as f, streamFeed as g, StreamFeedRow as h, ImportsRunJobPayload as i, DEFAULT_BATCH_SIZE as l, StreamFeedOptions as m, FilePathResolver as n, createRunImportHandler as o, runImport as p, ImportsRunJobContext as r, importsRunJob as s, EsClientResolver as t, ImportRunProgress as u, DEFAULT_MAX_SAMPLES_PER_PATTERN as v, ImportRun as w, ErrorTracker as x, ErrorPattern as y };
467
- //# sourceMappingURL=worker-DerGVTSI.d.mts.map
574
+ export { ErrorSample as C, ImportRun as D, IMPORT_RUN_STATUSES as E, ImportRunClient as O, ErrorPattern as S, ErrorTrackerConfig as T, StreamFeedRow as _, ImportsRunJobPayload as a, DEFAULT_MAX_PATTERNS as b, createRunImportHandler as c, DEFAULT_BATCH_SIZE as d, ImportRunProgress as f, StreamFeedOptions as g, runImport as h, ImportsRunJobContext as i, ImportRunStatus as k, importsRunJob as l, RunImportResult as m, FilePathResolver as n, RunImportHandlerConfig as o, RunImportOptions as p, ImportsRunDeadListenerConfig as r, createImportsRunDeadListener as s, EsClientResolver as t, importsRunJobPayloadSchema as u, countDataRows as v, ErrorTracker as w, DEFAULT_MAX_SAMPLES_PER_PATTERN as x, streamFeed as y };
575
+ //# sourceMappingURL=worker-B7ADOFEV.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"worker-B7ADOFEV.d.mts","names":[],"sources":["../src/entities/import-run.ts","../src/error-tracker.ts","../src/streaming.ts","../src/runner.ts","../src/worker.ts"],"mappings":";;;;;;;;;;;AAgCA;;;;;;;cAHa,mBAAA;AAAA,KACD,eAAA,WAA0B,mBAAA;AAAA,cAEzB,SAAA,yBAAS,MAAA;MAgEpB,sBAAA,CAAA,OAAA;AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAEU,eAAA,GAAkB,WAAA,QAAmB,SAAA,CAAU,SAAA;;;;;;;;;;;;AArE3D;;;;;AACA;;;;;AAEA;;;;;;;;;;;;KCAY,eAAA,sCAKR,eAAA;EAAA,CACG,GAAA,WAAc,eAAA;AAAA;;UAGJ,WAAA;EACf,SAAA;EACA,OAAA,EAAS,eAAA;AAAA;;UAIM,YAAA;EACf,SAAA;EACA,KAAA;EACA,OAAA;EACA,KAAA;EACA,eAAA;EACA,cAAA;EACA,OAAA,EAAS,aAAA,CAAc,WAAA;;EAEvB,UAAA;AAAA;AAAA,UAGe,kBAAA;;EAEf,WAAA;;EAEA,oBAAA;AAAA;;cAIW,oBAAA;AAAA,cACA,+BAAA;AAAA,cAYA,YAAA;EAAA,iBACM,QAAA;EAAA,iBACA,WAAA;EAAA,iBACA,oBAAA;EAAA,QACT,iBAAA;cAEI,MAAA,GAAQ,kBAAA;;;;;;;;;;EAcpB,QAAA,CACE,SAAA,UACA,SAAA,UACA,OAAA,UACA,KAAA,sBACA,OAAA,EAAS,eAAA;;EA+BX,kBAAA,CAAA;;EAOA,uBAAA,CAAA;;;;;;EASA,wBAAA,CAAA;;;;;;EASA,cAAA,CAAA,GAAkB,YAAA;;;;;AD/DpB;ECmFE,QAAA,CAAA,GAAY,oBAAA;AAAA;AAAA,UAUG,oBAAA;EACf,WAAA;EACA,gBAAA;EACA,iBAAA;EACA,QAAA,EAAU,YAAA;AAAA;;;;;;;;;;;;ADtKZ;;;;;AACA;;;;;AAEA;;;;;;;;;;;;;;;;;;;;;;iBEasB,aAAA,CACpB,QAAA,UACA,OAAA;EAAW,SAAA;AAAA,IACV,OAAA;AAAA,UAkBc,iBAAA;EFlCK;EEoCpB,QAAA;EFpCoB;;;;EEyCpB,SAAA;;;;;;;EAOA,SAAA;;;;;;;EAOA,OAAA,GAAU,aAAA;;;;;;EAMV,gBAAA;;;;;;;;;;;;;;;EAeA,KAAA;AAAA;AAAA,UAGe,aAAA;;EAEf,SAAA;;;;;;AFfF;;;;;;;;;EE8BE,GAAA,EAAK,MAAA;AAAA;;ADhGP;;;;;;;iBC2GuB,UAAA,CAAW,OAAA,EAAS,iBAAA,GAAoB,aAAA,CAAc,aAAA;;;;cC5GhE,kBAAA;AAAA,UAEI,gBAAA;;EAEf,WAAA;;EAEA,QAAA;;EAEA,MAAA,EAAQ,MAAA;;EAER,SAAA,EAAW,YAAA,CAAa,IAAA;;EAExB,IAAA,EAAM,iBAAA;;EAEN,QAAA,EAAU,YAAA;;EAEV,OAAA;EHfoB;EGiBpB,SAAA;EHjBoB;;;;;;EGwBpB,UAAA,IAAc,QAAA,EAAU,iBAAA;;EAExB,QAAA;;EAEA,MAAA,GAAS,WAAA;;EAET,YAAA,GAAe,YAAA;;;;;;;;;;;;;;;;EAgBf,MAAA;IACE,IAAA,GAAO,IAAA,EAAM,MAAA,mBAAyB,GAAA;IACtC,IAAA,GAAO,IAAA,EAAM,MAAA,mBAAyB,GAAA;EAAA;AAAA;;;;;;UASzB,iBAAA;EACf,QAAA;EACA,aAAA;EACA,UAAA;EACA,WAAA;EACA,gBAAA;;EAEA,cAAA;;EAEA,aAAA;;EAEA,qBAAA;EHFyB;;;;;;EGSzB,SAAA;AAAA;;;;AF3EF;UEkFiB,eAAA;;EAEf,QAAA;EF/EE;EEiFF,aAAA;EFhFmB;;;AAGrB;;EEmFE,UAAA;EFjFwB;EEmFxB,WAAA;EFnFA;EEqFA,gBAAA;EFrFwB;EEuFxB,MAAA,EAAQ,UAAA,CAAW,YAAA;AAAA;;;;;;;;iBAUC,SAAA,MAAA,CAAgB,OAAA,EAAS,gBAAA,CAAiB,IAAA,IAAQ,OAAA,CAAQ,eAAA;;;;;;;;;;;;;UC9F/D,oBAAA;EACf,EAAA;EACA,OAAA,EAAS,oBAAA;EACT,cAAA,CAAe,IAAA,EAAM,iBAAA;AAAA;;;;;;;cASV,0BAAA,EAA0B,CAAA,CAAA,SAAA;;;;;;;KAG3B,oBAAA,GAAuB,CAAA,CAAE,KAAA,QAAa,0BAAA;;;;;;;;;;;;cAarC,aAAA,EAAe,aAAA,CAAc,oBAAA;;;;;;;KAa9B,gBAAA,SAAyB,YAAA,GAAe,OAAA,CAAQ,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;KA2BhD,gBAAA,IACV,UAAA,aACG,OAAA;EAAU,SAAA;EAAmB,OAAA,SAAgB,OAAA;AAAA;AAAA,UAEjC,sBAAA;EJpBwB;EIsBvC,UAAA,EAAY,eAAA;EJtBmC;EIwB/C,UAAA,EAAY,iBAAA;EJxBsD;EI0BlE,QAAA,EAAU,gBAAA;;EAEV,OAAA;;AH9FF;;;;EGoGE,eAAA,GAAkB,gBAAA;EH9Fb;EGgGL,MAAA,GAAS,MAAA;AAAA;;AH7FX;;;;;;;;;iBG0GgB,sBAAA,CACd,MAAA,EAAQ,sBAAA,IACN,GAAA,EAAK,oBAAA,KAAyB,OAAA;AAAA,UAoNjB,4BAAA;EACf,UAAA,EAAY,eAAA;EHpTU;EGsTtB,MAAA,GAAS,MAAA;AAAA;;;;;;;;;;;AHjTX;;;;;AAQA;;;;;AACA;;;;;AAYA;;;;;;;;;;;;iBGoUgB,4BAAA,CACd,MAAA,EAAQ,4BAAA,GACP,qBAAA"}
package/dist/worker.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- import { a as RunImportHandlerConfig, c as importsRunJobPayloadSchema, i as ImportsRunJobPayload, n as FilePathResolver, o as createRunImportHandler, r as ImportsRunJobContext, s as importsRunJob, t as EsClientResolver } from "./worker-DerGVTSI.mjs";
2
- export { EsClientResolver, FilePathResolver, ImportsRunJobContext, ImportsRunJobPayload, RunImportHandlerConfig, createRunImportHandler, importsRunJob, importsRunJobPayloadSchema };
1
+ import { a as ImportsRunJobPayload, c as createRunImportHandler, i as ImportsRunJobContext, l as importsRunJob, n as FilePathResolver, o as RunImportHandlerConfig, r as ImportsRunDeadListenerConfig, s as createImportsRunDeadListener, t as EsClientResolver, u as importsRunJobPayloadSchema } from "./worker-B7ADOFEV.mjs";
2
+ export { EsClientResolver, FilePathResolver, ImportsRunDeadListenerConfig, ImportsRunJobContext, ImportsRunJobPayload, RunImportHandlerConfig, createImportsRunDeadListener, createRunImportHandler, importsRunJob, importsRunJobPayloadSchema };
package/dist/worker.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import{n as e}from"./runner-DdhiNybk.mjs";import{runAsCli as t}from"@murumets-ee/core";import{defineJob as n}from"@murumets-ee/queue/client";import{z as r}from"zod";const i=r.object({importRunId:r.string().uuid()}),a=n({name:`imports:run`,description:`Stream a feed file, transform rows, bulk-write to Elasticsearch.`,schema:i,defaultRetries:0});function o(n){let{importRuns:r,transforms:i,esClient:a,esIndex:o,resolveFilePath:c,logger:l}=n;return async n=>t(async()=>{let{importRunId:t}=n.payload,u=l?.child({jobId:n.id,importRunId:t,type:`imports:run`}),d=await r.findById(t);if(!d){u?.warn(`import_run row not found — skipping`);return}let f=i.get(d.transformName);if(!f)throw await s(r,t,`Unknown transform "${d.transformName}"`),Error(`No transform registered for name "${d.transformName}"`);let p=d.params&&typeof d.params==`object`&&!Array.isArray(d.params)?d.params:{};await r.update(t,{status:`running`,queueJobId:n.id,startedAt:new Date});let m;try{m=await a()}catch(e){throw await s(r,t,`Could not resolve ES client: ${e instanceof Error?e.message:String(e)}`),e}let h,g;if(c)try{let e=await c(d.filePath);h=e.localPath,g=e.cleanup}catch(e){throw await s(r,t,`Could not resolve filePath: ${e instanceof Error?e.message:String(e)}`),e}else h=d.filePath;let _;try{_=await e({importRunId:t,runLabel:d.label,params:p,transform:f,feed:{filePath:h},esClient:m,esIndex:o,onProgress:e=>n.updateProgress(e)})}catch(e){throw await s(r,t,e instanceof Error?e.message:String(e)),g&&await g().catch(e=>{u?.warn({err:e},`filePath cleanup after failed run threw — ignoring`)}),e}g&&await g().catch(e=>{u?.warn({err:e},`filePath cleanup after successful run threw — ignoring`)}),await r.update(t,{status:`succeeded`,finishedAt:new Date,totals:{rowsRead:_.rowsRead,rowsSucceeded:_.rowsSucceeded,rowsFailed:_.rowsFailed,rowsSkipped:_.rowsSkipped,batchesCompleted:_.batchesCompleted},errorSummary:JSON.parse(JSON.stringify(_.errors))}),u?.info({rowsRead:_.rowsRead,rowsSucceeded:_.rowsSucceeded,rowsFailed:_.rowsFailed,rowsSkipped:_.rowsSkipped,batches:_.batchesCompleted},`import_run completed`)})}async function s(e,t,n){try{await e.update(t,{status:`failed`,finishedAt:new Date,errorSummary:{fatal:n}})}catch{}}export{o as createRunImportHandler,a as importsRunJob,i as importsRunJobPayloadSchema};
1
+ import{n as e,r as t}from"./runner-D9FtnIBn.mjs";import{a as n,i as r,r as i}from"./publish-Dw2vnCoo.mjs";import{runAsCli as a}from"@murumets-ee/core";import{defineJob as o}from"@murumets-ee/queue/client";import{eq as s}from"drizzle-orm";import{z as c}from"zod";const l=c.object({importRunId:c.string().uuid()}),u=o({name:`imports:run`,description:`Stream a feed file, transform rows, bulk-write to Elasticsearch.`,schema:l,defaultRetries:0});function d(i){let{importRuns:o,transforms:s,esClient:c,esIndex:l,resolveFilePath:u,logger:d}=i;return async i=>a(async()=>{let{importRunId:a}=i.payload,p=d?.child({jobId:i.id,importRunId:a,type:`imports:run`}),m=await o.findById(a);if(!m){p?.warn(`import_run row not found — skipping`);return}m.status===`running`&&p?.warn({previousQueueJobId:m.queueJobId,previousStartedAt:m.startedAt},`imports: re-claiming a row left in running by a prior worker — likely a crash recovered by queue lock-recovery`);let h=s.get(m.transformName);if(!h)throw await f(o,a,`Unknown transform "${m.transformName}"`),Error(`No transform registered for name "${m.transformName}"`);let g=m.params&&typeof m.params==`object`&&!Array.isArray(m.params)?m.params:{};await o.update(a,{status:`running`,queueJobId:i.id,startedAt:new Date}),r(a);let _;try{_=await c()}catch(e){throw await f(o,a,`Could not resolve ES client: ${e instanceof Error?e.message:String(e)}`),e}let v,y;if(u)try{let e=await u(m.filePath);v=e.localPath,y=e.cleanup}catch(e){throw await f(o,a,`Could not resolve filePath: ${e instanceof Error?e.message:String(e)}`),e}else v=m.filePath;let b;try{b=await t(v),p?.info({importRunId:a,totalRows:b},`imports: pre-pass row count`)}catch(e){p?.warn({err:e},`imports: pre-pass row count failed — continuing without total`)}let x;try{x=await e({importRunId:a,runLabel:m.label,params:g,transform:h,feed:{filePath:v},esClient:_,esIndex:l,onProgress:e=>i.updateProgress(b===void 0?e:{...e,totalRows:b}),...p!==void 0&&{logger:p}})}catch(e){throw await f(o,a,e instanceof Error?e.message:String(e)),y&&await y().catch(e=>{p?.warn({err:e},`filePath cleanup after failed run threw — ignoring`)}),e}y&&await y().catch(e=>{p?.warn({err:e},`filePath cleanup after successful run threw — ignoring`)}),await o.update(a,{status:`succeeded`,finishedAt:new Date,totals:{rowsRead:x.rowsRead,rowsSucceeded:x.rowsSucceeded,rowsFailed:x.rowsFailed,rowsSkipped:x.rowsSkipped,batchesCompleted:x.batchesCompleted},errorSummary:JSON.parse(JSON.stringify(x.errors))}),n(a,{rowsRead:x.rowsRead,rowsSucceeded:x.rowsSucceeded,rowsFailed:x.rowsFailed}),p?.info({rowsRead:x.rowsRead,rowsSucceeded:x.rowsSucceeded,rowsFailed:x.rowsFailed,rowsSkipped:x.rowsSkipped,batches:x.batchesCompleted},`import_run completed`)})}async function f(e,t,n){try{await e.update(t,{status:`failed`,finishedAt:new Date,errorSummary:{fatal:n}}),i(t,n)}catch{}}function p(e){let{importRuns:t,logger:n}=e;return{onJobDead:async e=>{e.jobType===`imports:run`&&await a(async()=>{let r=n?.child({event:`imports.run.dead`,jobId:e.jobId,type:`imports:run`}),i=t.getTable(),a=(await t.findMany({where:s(i.queueJobId,e.jobId),limit:1}))[0];if(!a){r?.warn(`queue.job.dead with no matching import_run — skipping`);return}if(a.status!==`running`&&a.status!==`pending`){r?.info({importRunId:a.id,status:a.status},`queue.job.dead arrived for a row already in a terminal state — leaving as-is`);return}a.status===`pending`&&r?.warn({importRunId:a.id,attempts:e.attempts},`queue.job.dead arrived for an import_run still in pending — worker crashed BEFORE claim/lookup`),await f(t,a.id,e.error),r?.info({importRunId:a.id,attempts:e.attempts,priorStatus:a.status},`queue.job.dead → import_run marked failed`)})}}}export{p as createImportsRunDeadListener,d as createRunImportHandler,u as importsRunJob,l as importsRunJobPayloadSchema};
2
2
  //# sourceMappingURL=worker.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"worker.mjs","names":[],"sources":["../src/worker.ts"],"sourcesContent":["/**\n * Queue handler for `imports:run`. The route handler that creates the\n * `import_run` row enqueues this job carrying `{ importRunId }`; the\n * worker module here resolves the run, the transform, and the ES\n * client, then runs `runImport` and writes results back.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): one job type, no resumability.\n * A failed handler attempt re-runs from row 1 on retry — fine for the\n * tens-of-MB feed sizes the PoC exercises.\n *\n * Per CLAUDE.md \"leaf packages\" boundary rule: this handler reads the\n * import_run row via an injected `AdminClient`, NOT a direct `db.select`.\n * Construction happens at plugin init in `plugin.ts`.\n */\n\nimport { type Logger, runAsCli } from '@murumets-ee/core'\nimport { defineJob, type JobDefinition } from '@murumets-ee/queue/client'\nimport type { EsClientLike } from '@murumets-ee/search-elasticsearch'\nimport { z } from 'zod'\nimport { type ImportRunClient, type ImportRunStatus } from './entities/import-run.js'\nimport { type ImportRunProgress, runImport } from './runner.js'\nimport { TransformRegistry } from './transform.js'\n\n/**\n * Structural subset of `JobContext` the handler reads. Declared\n * locally so we don't depend on a `JobHandler` import — the queue\n * package doesn't re-export it from any subpath today (matches the\n * `SendEmailJobContext` pattern in `@murumets-ee/notifications`).\n *\n * Function-param contravariance lets a handler returned with this\n * narrower shape be assigned to the wider `JobHandler<TPayload>` that\n * `registerJob` expects.\n */\nexport interface ImportsRunJobContext {\n id: string\n payload: ImportsRunJobPayload\n updateProgress(data: ImportRunProgress): void\n}\n\n/**\n * Payload schema. `importRunId` is the `import_run.id` UUID — the worker\n * looks up everything else (filePath, transformName, params, …) from\n * that row. Keeping the payload tiny means the queue's progress JSON\n * column never bloats with feed metadata duplicated across `toolkit_jobs`.\n */\nexport const importsRunJobPayloadSchema = z.object({\n importRunId: z.string().uuid(),\n})\nexport type ImportsRunJobPayload = z.infer<typeof importsRunJobPayloadSchema>\n\n/**\n * Job definition. Consumers register their handler against this with\n * `registerJob(importsRunJob, createRunImportHandler({...}))`.\n *\n * `defaultRetries: 0` — re-running a multi-batch import against the\n * same `import_run.id` while the previous handler may still be writing\n * is a footgun (duplicate batches, double-counted progress). The\n * operator handles retries explicitly via PR 8a's \"retry\" button by\n * creating a new `import_run` row. When the resumable design lands\n * (post-PoC), retries become safe to enable.\n */\nexport const importsRunJob: JobDefinition<ImportsRunJobPayload> = defineJob({\n name: 'imports:run',\n description: 'Stream a feed file, transform rows, bulk-write to Elasticsearch.',\n schema: importsRunJobPayloadSchema,\n defaultRetries: 0,\n})\n\n/**\n * Resolves the ES client at handler-invocation time. A function (rather\n * than the bare `EsClientLike`) so the consumer can lazy-construct the\n * client — typical Next.js setups create the ES connection in a\n * route-handler initialiser, not at plugin-init time.\n */\nexport type EsClientResolver = () => EsClientLike | Promise<EsClientLike>\n\n/**\n * Resolves the value of `import_run.filePath` (whatever the upload\n * route persisted there — typically a storage adapter key) to a\n * readable LOCAL filesystem path that {@link runImport} can hand to\n * `node:fs.createReadStream`.\n *\n * **Why this exists:** the streaming reader (`streamFeed`) reads from\n * local disk via `createReadStream`. The upload route may persist a\n * remote-storage object key (R2, S3, …) on `import_run.filePath`\n * because that's what `@murumets-ee/storage` returns. Without a\n * resolver, `createReadStream('uploads/2026/05/<uuid>/feed.txt')`\n * crashes with `ENOENT`. The resolver is the documented integration\n * point — typical wiring downloads the storage object to a tmpfile\n * and returns its path.\n *\n * **`cleanup`** runs after the run finishes (success OR failure). The\n * worker awaits it best-effort — a failed cleanup logs but does not\n * crash the run.\n *\n * **No-resolver fallback:** when `resolveFilePath` is unset, the\n * worker treats `import_run.filePath` as already a local FS path\n * (back-compat with the original PoC design where uploads landed on\n * local disk). This stays valid for fixture-driven tests + on-disk\n * deployments.\n */\nexport type FilePathResolver = (\n storageKey: string,\n) => Promise<{ localPath: string; cleanup?: () => Promise<void> }>\n\nexport interface RunImportHandlerConfig {\n /** AdminClient over the `import_run` entity. */\n importRuns: ImportRunClient\n /** Transform registry to dispatch against. Defaults to the process-global singleton. */\n transforms: TransformRegistry\n /** Resolver for the ES client. */\n esClient: EsClientResolver\n /** ES alias / index to bulk-write into. Per D6 callers always pass an alias. */\n esIndex: string\n /**\n * Optional resolver for `import_run.filePath`. See {@link FilePathResolver}.\n * Required when uploads land in remote storage (R2/S3); optional for\n * on-disk PoC setups.\n */\n resolveFilePath?: FilePathResolver\n /** Optional structured logger. Defaults to silent. */\n logger?: Logger\n}\n\n/**\n * Build the `JobHandler` for {@link importsRunJob}. The returned\n * handler is what gets passed to `registerJob`.\n *\n * The body is wrapped in `runAsCli` so AdminClient calls inside have a\n * synthetic `cli` admin context — `auditable()` records `updatedBy:\n * 'cli'` rather than NULL, and the firewall checker passes. This is\n * the documented worker entry-point pattern (see `runAsCli` JSDoc in\n * `@murumets-ee/core`).\n */\nexport function createRunImportHandler(\n config: RunImportHandlerConfig,\n): (job: ImportsRunJobContext) => Promise<void> {\n const { importRuns, transforms, esClient, esIndex, resolveFilePath, logger } = config\n\n return async (job: ImportsRunJobContext): Promise<void> =>\n runAsCli(async () => {\n const { importRunId } = job.payload\n const log = logger?.child({ jobId: job.id, importRunId, type: 'imports:run' })\n\n const run = await importRuns.findById(importRunId)\n if (!run) {\n // Row was deleted between enqueue and dispatch — nothing to do.\n // Don't throw: a thrown handler retries with no payoff because\n // the row stays gone. Log + return so the queue marks the job\n // completed.\n log?.warn('import_run row not found — skipping')\n return\n }\n\n // `transformName`, `filePath`, `label` are declared `field.text({\n // required: true })` so the inferred DTO already types them as\n // `string` — no cast needed. `params` is `field.json()` which\n // infers as `JsonValue | undefined`; narrow to a record so the\n // transform receives an object.\n const transform = transforms.get(run.transformName)\n if (!transform) {\n await markFailed(importRuns, importRunId, `Unknown transform \"${run.transformName}\"`)\n throw new Error(`No transform registered for name \"${run.transformName}\"`)\n }\n const params: Record<string, unknown> =\n run.params && typeof run.params === 'object' && !Array.isArray(run.params)\n ? (run.params as Record<string, unknown>)\n : {}\n\n await importRuns.update(importRunId, {\n status: 'running' satisfies ImportRunStatus,\n queueJobId: job.id,\n startedAt: new Date(),\n })\n\n let resolvedClient: EsClientLike\n try {\n resolvedClient = await esClient()\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, `Could not resolve ES client: ${reason}`)\n throw err\n }\n\n // Materialise the file locally if the consumer wired a resolver.\n // The `cleanup` callback (when present) runs in the `finally`\n // below regardless of run outcome so a tmpfile created here\n // doesn't outlive the handler.\n let localPath: string\n let fileCleanup: (() => Promise<void>) | undefined\n if (resolveFilePath) {\n try {\n const resolved = await resolveFilePath(run.filePath)\n localPath = resolved.localPath\n fileCleanup = resolved.cleanup\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, `Could not resolve filePath: ${reason}`)\n throw err\n }\n } else {\n // Back-compat: no resolver wired → treat `filePath` as already\n // a local FS path. Fine for fixture-driven tests + the original\n // on-disk PoC design.\n localPath = run.filePath\n }\n\n let result: Awaited<ReturnType<typeof runImport>>\n try {\n result = await runImport({\n importRunId,\n runLabel: run.label,\n params,\n transform,\n feed: { filePath: localPath },\n esClient: resolvedClient,\n esIndex,\n onProgress: (progress) => job.updateProgress(progress),\n })\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, reason)\n if (fileCleanup) {\n await fileCleanup().catch((cleanupErr: unknown) => {\n log?.warn({ err: cleanupErr }, 'filePath cleanup after failed run threw — ignoring')\n })\n }\n throw err\n }\n\n // Cleanup runs before the success-update so a cleanup throw\n // surfaces as a logged warning rather than a crashed handler at\n // the worst possible moment (just after the row was about to be\n // marked succeeded).\n if (fileCleanup) {\n await fileCleanup().catch((cleanupErr: unknown) => {\n log?.warn({ err: cleanupErr }, 'filePath cleanup after successful run threw — ignoring')\n })\n }\n\n await importRuns.update(importRunId, {\n status: 'succeeded' satisfies ImportRunStatus,\n finishedAt: new Date(),\n totals: {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n rowsSkipped: result.rowsSkipped,\n batchesCompleted: result.batchesCompleted,\n },\n // The typed `ErrorTrackerSnapshot` is fully JSON-serialisable but\n // doesn't structurally satisfy entity's recursive `JsonValue`\n // (objects without an index signature aren't `Record<string,\n // JsonValue>`). Round-trip through `JSON.parse(JSON.stringify(...))`\n // to land as the recursive JSON shape — the runtime cost is one\n // serialise per import_run completion, the type-cast cost is zero.\n errorSummary: JSON.parse(JSON.stringify(result.errors)),\n })\n\n log?.info(\n {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n rowsSkipped: result.rowsSkipped,\n batches: result.batchesCompleted,\n },\n 'import_run completed',\n )\n })\n}\n\n/** Write a `failed` row with a fatal error reason. Best-effort — logs but does not throw on DB failure. */\nasync function markFailed(\n importRuns: ImportRunClient,\n id: string,\n reason: string,\n): Promise<void> {\n try {\n await importRuns.update(id, {\n status: 'failed' satisfies ImportRunStatus,\n finishedAt: new Date(),\n errorSummary: { fatal: reason },\n })\n } catch {\n // The handler already threw / will throw — the queue will surface\n // the original error. A second failure here just logs noise.\n }\n}\n"],"mappings":"qKA6CA,MAAa,EAA6B,EAAE,OAAO,CACjD,YAAa,EAAE,QAAQ,CAAC,MAAM,CAC/B,CAAC,CAcW,EAAqD,EAAU,CAC1E,KAAM,cACN,YAAa,mEACb,OAAQ,EACR,eAAgB,EACjB,CAAC,CAoEF,SAAgB,EACd,EAC8C,CAC9C,GAAM,CAAE,aAAY,aAAY,WAAU,UAAS,kBAAiB,UAAW,EAE/E,OAAO,KAAO,IACZ,EAAS,SAAY,CACnB,GAAM,CAAE,eAAgB,EAAI,QACtB,EAAM,GAAQ,MAAM,CAAE,MAAO,EAAI,GAAI,cAAa,KAAM,cAAe,CAAC,CAExE,EAAM,MAAM,EAAW,SAAS,EAAY,CAClD,GAAI,CAAC,EAAK,CAKR,GAAK,KAAK,sCAAsC,CAChD,OAQF,IAAM,EAAY,EAAW,IAAI,EAAI,cAAc,CACnD,GAAI,CAAC,EAEH,MADA,MAAM,EAAW,EAAY,EAAa,sBAAsB,EAAI,cAAc,GAAG,CAC3E,MAAM,qCAAqC,EAAI,cAAc,GAAG,CAE5E,IAAM,EACJ,EAAI,QAAU,OAAO,EAAI,QAAW,UAAY,CAAC,MAAM,QAAQ,EAAI,OAAO,CACrE,EAAI,OACL,EAAE,CAER,MAAM,EAAW,OAAO,EAAa,CACnC,OAAQ,UACR,WAAY,EAAI,GAChB,UAAW,IAAI,KAChB,CAAC,CAEF,IAAI,EACJ,GAAI,CACF,EAAiB,MAAM,GAAU,OAC1B,EAAK,CAGZ,MADA,MAAM,EAAW,EAAY,EAAa,gCAD3B,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,GACoB,CAC7E,EAOR,IAAI,EACA,EACJ,GAAI,EACF,GAAI,CACF,IAAM,EAAW,MAAM,EAAgB,EAAI,SAAS,CACpD,EAAY,EAAS,UACrB,EAAc,EAAS,cAChB,EAAK,CAGZ,MADA,MAAM,EAAW,EAAY,EAAa,+BAD3B,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,GACmB,CAC5E,OAMR,EAAY,EAAI,SAGlB,IAAI,EACJ,GAAI,CACF,EAAS,MAAM,EAAU,CACvB,cACA,SAAU,EAAI,MACd,SACA,YACA,KAAM,CAAE,SAAU,EAAW,CAC7B,SAAU,EACV,UACA,WAAa,GAAa,EAAI,eAAe,EAAS,CACvD,CAAC,OACK,EAAK,CAQZ,MANA,MAAM,EAAW,EAAY,EADd,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CACd,CAC7C,GACF,MAAM,GAAa,CAAC,MAAO,GAAwB,CACjD,GAAK,KAAK,CAAE,IAAK,EAAY,CAAE,qDAAqD,EACpF,CAEE,EAOJ,GACF,MAAM,GAAa,CAAC,MAAO,GAAwB,CACjD,GAAK,KAAK,CAAE,IAAK,EAAY,CAAE,yDAAyD,EACxF,CAGJ,MAAM,EAAW,OAAO,EAAa,CACnC,OAAQ,YACR,WAAY,IAAI,KAChB,OAAQ,CACN,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACnB,YAAa,EAAO,YACpB,iBAAkB,EAAO,iBAC1B,CAOD,aAAc,KAAK,MAAM,KAAK,UAAU,EAAO,OAAO,CAAC,CACxD,CAAC,CAEF,GAAK,KACH,CACE,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACnB,YAAa,EAAO,YACpB,QAAS,EAAO,iBACjB,CACD,uBACD,EACD,CAIN,eAAe,EACb,EACA,EACA,EACe,CACf,GAAI,CACF,MAAM,EAAW,OAAO,EAAI,CAC1B,OAAQ,SACR,WAAY,IAAI,KAChB,aAAc,CAAE,MAAO,EAAQ,CAChC,CAAC,MACI"}
1
+ {"version":3,"file":"worker.mjs","names":[],"sources":["../src/worker.ts"],"sourcesContent":["/**\n * Queue handler for `imports:run`. The route handler that creates the\n * `import_run` row enqueues this job carrying `{ importRunId }`; the\n * worker module here resolves the run, the transform, and the ES\n * client, then runs `runImport` and writes results back.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): one job type, no resumability.\n * A failed handler attempt re-runs from row 1 on retry — fine for the\n * tens-of-MB feed sizes the PoC exercises.\n *\n * Per CLAUDE.md \"leaf packages\" boundary rule: this handler reads the\n * import_run row via an injected `AdminClient`, NOT a direct `db.select`.\n * Construction happens at plugin init in `plugin.ts`.\n */\n\nimport { type Logger, runAsCli } from '@murumets-ee/core'\nimport {\n defineJob,\n type JobDefinition,\n type QueueJobDeadEvent,\n type QueueTerminalNotifier,\n} from '@murumets-ee/queue/client'\nimport type { EsClientLike } from '@murumets-ee/search-elasticsearch'\nimport { eq } from 'drizzle-orm'\nimport type { PgColumn } from 'drizzle-orm/pg-core'\nimport { z } from 'zod'\nimport { type ImportRunClient, type ImportRunStatus } from './entities/import-run.js'\nimport {\n publishImportRunFailed,\n publishImportRunStarted,\n publishImportRunSucceeded,\n} from './realtime/publish.js'\nimport { type ImportRunProgress, runImport } from './runner.js'\nimport { countDataRows } from './streaming.js'\nimport { TransformRegistry } from './transform.js'\n\n/**\n * Structural subset of `JobContext` the handler reads. Declared\n * locally so we don't depend on a `JobHandler` import — the queue\n * package doesn't re-export it from any subpath today (matches the\n * `SendEmailJobContext` pattern in `@murumets-ee/notifications`).\n *\n * Function-param contravariance lets a handler returned with this\n * narrower shape be assigned to the wider `JobHandler<TPayload>` that\n * `registerJob` expects.\n */\nexport interface ImportsRunJobContext {\n id: string\n payload: ImportsRunJobPayload\n updateProgress(data: ImportRunProgress): void\n}\n\n/**\n * Payload schema. `importRunId` is the `import_run.id` UUID — the worker\n * looks up everything else (filePath, transformName, params, …) from\n * that row. Keeping the payload tiny means the queue's progress JSON\n * column never bloats with feed metadata duplicated across `toolkit_jobs`.\n */\nexport const importsRunJobPayloadSchema = z.object({\n importRunId: z.string().uuid(),\n})\nexport type ImportsRunJobPayload = z.infer<typeof importsRunJobPayloadSchema>\n\n/**\n * Job definition. Consumers register their handler against this with\n * `registerJob(importsRunJob, createRunImportHandler({...}))`.\n *\n * `defaultRetries: 0` — re-running a multi-batch import against the\n * same `import_run.id` while the previous handler may still be writing\n * is a footgun (duplicate batches, double-counted progress). The\n * operator handles retries explicitly via PR 8a's \"retry\" button by\n * creating a new `import_run` row. When the resumable design lands\n * (post-PoC), retries become safe to enable.\n */\nexport const importsRunJob: JobDefinition<ImportsRunJobPayload> = defineJob({\n name: 'imports:run',\n description: 'Stream a feed file, transform rows, bulk-write to Elasticsearch.',\n schema: importsRunJobPayloadSchema,\n defaultRetries: 0,\n})\n\n/**\n * Resolves the ES client at handler-invocation time. A function (rather\n * than the bare `EsClientLike`) so the consumer can lazy-construct the\n * client — typical Next.js setups create the ES connection in a\n * route-handler initialiser, not at plugin-init time.\n */\nexport type EsClientResolver = () => EsClientLike | Promise<EsClientLike>\n\n/**\n * Resolves the value of `import_run.filePath` (whatever the upload\n * route persisted there — typically a storage adapter key) to a\n * readable LOCAL filesystem path that {@link runImport} can hand to\n * `node:fs.createReadStream`.\n *\n * **Why this exists:** the streaming reader (`streamFeed`) reads from\n * local disk via `createReadStream`. The upload route may persist a\n * remote-storage object key (R2, S3, …) on `import_run.filePath`\n * because that's what `@murumets-ee/storage` returns. Without a\n * resolver, `createReadStream('uploads/2026/05/<uuid>/feed.txt')`\n * crashes with `ENOENT`. The resolver is the documented integration\n * point — typical wiring downloads the storage object to a tmpfile\n * and returns its path.\n *\n * **`cleanup`** runs after the run finishes (success OR failure). The\n * worker awaits it best-effort — a failed cleanup logs but does not\n * crash the run.\n *\n * **No-resolver fallback:** when `resolveFilePath` is unset, the\n * worker treats `import_run.filePath` as already a local FS path\n * (back-compat with the original PoC design where uploads landed on\n * local disk). This stays valid for fixture-driven tests + on-disk\n * deployments.\n */\nexport type FilePathResolver = (\n storageKey: string,\n) => Promise<{ localPath: string; cleanup?: () => Promise<void> }>\n\nexport interface RunImportHandlerConfig {\n /** AdminClient over the `import_run` entity. */\n importRuns: ImportRunClient\n /** Transform registry to dispatch against. Defaults to the process-global singleton. */\n transforms: TransformRegistry\n /** Resolver for the ES client. */\n esClient: EsClientResolver\n /** ES alias / index to bulk-write into. Per D6 callers always pass an alias. */\n esIndex: string\n /**\n * Optional resolver for `import_run.filePath`. See {@link FilePathResolver}.\n * Required when uploads land in remote storage (R2/S3); optional for\n * on-disk PoC setups.\n */\n resolveFilePath?: FilePathResolver\n /** Optional structured logger. Defaults to silent. */\n logger?: Logger\n}\n\n/**\n * Build the `JobHandler` for {@link importsRunJob}. The returned\n * handler is what gets passed to `registerJob`.\n *\n * The body is wrapped in `runAsCli` so AdminClient calls inside have a\n * synthetic `cli` admin context — `auditable()` records `updatedBy:\n * 'cli'` rather than NULL, and the firewall checker passes. This is\n * the documented worker entry-point pattern (see `runAsCli` JSDoc in\n * `@murumets-ee/core`).\n */\nexport function createRunImportHandler(\n config: RunImportHandlerConfig,\n): (job: ImportsRunJobContext) => Promise<void> {\n const { importRuns, transforms, esClient, esIndex, resolveFilePath, logger } = config\n\n return async (job: ImportsRunJobContext): Promise<void> =>\n runAsCli(async () => {\n const { importRunId } = job.payload\n const log = logger?.child({ jobId: job.id, importRunId, type: 'imports:run' })\n\n const run = await importRuns.findById(importRunId)\n if (!run) {\n // Row was deleted between enqueue and dispatch — nothing to do.\n // Don't throw: a thrown handler retries with no payoff because\n // the row stays gone. Log + return so the queue marks the job\n // completed.\n log?.warn('import_run row not found — skipping')\n return\n }\n\n // Re-claim detection. The queue's `recoverStuckJobs` sweep flips a\n // crashed worker's `toolkit_jobs` row from `processing` back to\n // `pending` — but the queue layer has no knowledge of `import_run`,\n // so this row stays at `status='running'` from the prior attempt.\n // When the queue re-dispatches the job, we land here with a row\n // that the operator-facing Active panel has been showing as live\n // for however long lock-recovery took (default 30 min). The\n // existing `update` below overwrites `queueJobId` + `startedAt` so\n // there is no STATE corruption — but emit a structured warning so\n // ops can correlate this re-run with the worker crash that\n // produced it. The sibling `sweepStaleImportRuns` job below catches\n // the case where re-dispatch never happens (queue marks the job\n // dead, or the queue worker is offline long-term).\n if (run.status === 'running') {\n log?.warn(\n {\n previousQueueJobId: run.queueJobId,\n previousStartedAt: run.startedAt,\n },\n 'imports: re-claiming a row left in running by a prior worker — likely a crash recovered by queue lock-recovery',\n )\n }\n\n // `transformName`, `filePath`, `label` are declared `field.text({\n // required: true })` so the inferred DTO already types them as\n // `string` — no cast needed. `params` is `field.json()` which\n // infers as `JsonValue | undefined`; narrow to a record so the\n // transform receives an object.\n const transform = transforms.get(run.transformName)\n if (!transform) {\n await markFailed(importRuns, importRunId, `Unknown transform \"${run.transformName}\"`)\n throw new Error(`No transform registered for name \"${run.transformName}\"`)\n }\n const params: Record<string, unknown> =\n run.params && typeof run.params === 'object' && !Array.isArray(run.params)\n ? (run.params as Record<string, unknown>)\n : {}\n\n await importRuns.update(importRunId, {\n status: 'running' satisfies ImportRunStatus,\n queueJobId: job.id,\n startedAt: new Date(),\n })\n // Realtime: row transitioned pending → running. Subscribers\n // refetch to surface the change in the imports admin page.\n // Best-effort — a publish failure is logged inside `publishSafe`\n // and does NOT roll back the row update.\n publishImportRunStarted(importRunId)\n\n let resolvedClient: EsClientLike\n try {\n resolvedClient = await esClient()\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, `Could not resolve ES client: ${reason}`)\n throw err\n }\n\n // Materialise the file locally if the consumer wired a resolver.\n // The `cleanup` callback (when present) runs in the `finally`\n // below regardless of run outcome so a tmpfile created here\n // doesn't outlive the handler.\n let localPath: string\n let fileCleanup: (() => Promise<void>) | undefined\n if (resolveFilePath) {\n try {\n const resolved = await resolveFilePath(run.filePath)\n localPath = resolved.localPath\n fileCleanup = resolved.cleanup\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, `Could not resolve filePath: ${reason}`)\n throw err\n }\n } else {\n // Back-compat: no resolver wired → treat `filePath` as already\n // a local FS path. Fine for fixture-driven tests + the original\n // on-disk PoC design.\n localPath = run.filePath\n }\n\n // Pre-pass line count: a 16 MB tab-delimited feed counts in\n // ~50ms; a 500 MB feed in <2s. Cheap relative to the full\n // import. Gives the operator a denominator (`processed / total`)\n // so the imports-page panel can render an actual percent\n // instead of just \"rows so far\".\n let totalRows: number | undefined\n try {\n totalRows = await countDataRows(localPath)\n log?.info({ importRunId, totalRows }, 'imports: pre-pass row count')\n } catch (err) {\n // Don't fail the run on a count miss — the actual import is\n // unaffected; the operator just loses the percent denominator.\n log?.warn({ err }, 'imports: pre-pass row count failed — continuing without total')\n }\n\n let result: Awaited<ReturnType<typeof runImport>>\n try {\n result = await runImport({\n importRunId,\n runLabel: run.label,\n params,\n transform,\n feed: { filePath: localPath },\n esClient: resolvedClient,\n esIndex,\n onProgress: (progress) =>\n job.updateProgress(totalRows !== undefined ? { ...progress, totalRows } : progress),\n ...(log !== undefined && { logger: log }),\n })\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, reason)\n if (fileCleanup) {\n await fileCleanup().catch((cleanupErr: unknown) => {\n log?.warn({ err: cleanupErr }, 'filePath cleanup after failed run threw — ignoring')\n })\n }\n throw err\n }\n\n // Cleanup runs before the success-update so a cleanup throw\n // surfaces as a logged warning rather than a crashed handler at\n // the worst possible moment (just after the row was about to be\n // marked succeeded).\n if (fileCleanup) {\n await fileCleanup().catch((cleanupErr: unknown) => {\n log?.warn({ err: cleanupErr }, 'filePath cleanup after successful run threw — ignoring')\n })\n }\n\n await importRuns.update(importRunId, {\n status: 'succeeded' satisfies ImportRunStatus,\n finishedAt: new Date(),\n totals: {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n rowsSkipped: result.rowsSkipped,\n batchesCompleted: result.batchesCompleted,\n },\n // The typed `ErrorTrackerSnapshot` is fully JSON-serialisable but\n // doesn't structurally satisfy entity's recursive `JsonValue`\n // (objects without an index signature aren't `Record<string,\n // JsonValue>`). Round-trip through `JSON.parse(JSON.stringify(...))`\n // to land as the recursive JSON shape — the runtime cost is one\n // serialise per import_run completion, the type-cast cost is zero.\n errorSummary: JSON.parse(JSON.stringify(result.errors)),\n })\n // Realtime: row transitioned to terminal-success. Subscribers\n // refetch to move the row from Active → Recent.\n publishImportRunSucceeded(importRunId, {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n })\n\n log?.info(\n {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n rowsSkipped: result.rowsSkipped,\n batches: result.batchesCompleted,\n },\n 'import_run completed',\n )\n })\n}\n\n/** Write a `failed` row with a fatal error reason. Best-effort — logs but does not throw on DB failure. */\nasync function markFailed(\n importRuns: ImportRunClient,\n id: string,\n reason: string,\n): Promise<void> {\n try {\n await importRuns.update(id, {\n status: 'failed' satisfies ImportRunStatus,\n finishedAt: new Date(),\n errorSummary: { fatal: reason },\n })\n // Realtime: row transitioned to terminal-failure. Subscribers\n // refetch to surface the failure in the Recent panel. Publish\n // is best-effort and only fires when the persist succeeds —\n // skipping it on persist failure keeps subscribers consistent\n // with the DB (no event for a row that didn't transition).\n publishImportRunFailed(id, reason)\n } catch {\n // The handler already threw / will throw — the queue will surface\n // the original error. A second failure here just logs noise.\n }\n}\n\nexport interface ImportsRunDeadListenerConfig {\n importRuns: ImportRunClient\n /** Optional structured logger. */\n logger?: Logger\n}\n\n/**\n * Build a {@link QueueTerminalNotifier} that mirrors `imports:run`\n * dead-job events into `import_run.status='failed'`.\n *\n * **Why this exists.** The `imports:run` handler's normal failure path\n * already calls `markFailed` before re-throwing, so a synchronous\n * handler error reaches the operator-visible `failed` status without\n * help from anyone. The case THIS listener catches is the one\n * `markFailed` never gets to: the worker process is killed mid-handler\n * (OOM, container restart, kill -9), the queue's lock-recovery sweep\n * eventually reclaims the row, and the queue ultimately marks the job\n * `dead` (defaultRetries: 0 + repeated process death = dead-letter\n * with `import_run.status` still `running`). Without this listener\n * the row sits in the operator's Active panel forever.\n *\n * **Generic primitive.** This is a thin subscriber over the queue's\n * multi-subscriber `QueueTerminalNotifier` registry — the same shape\n * graphile-worker (`job:failed`) and BullMQ (`failed`) expose. Other\n * domain plugins that mirror queue state (ticketing-reply, future\n * exports/reports/...) can register their own listeners exactly the\n * same way without coordinating via a per-domain sweep job.\n *\n * **Filtering.** The listener is invoked for EVERY dead event in the\n * process. We early-return on any `jobType` other than `imports:run`\n * so the per-event cost on unrelated jobs is a single property read.\n *\n * **Idempotency.** A dead event delivered after the row has already\n * reached its own terminal state (succeeded / failed / cancelled) is\n * ignored — the only transition we make is `running → failed`. This\n * keeps a late replay or out-of-order delivery from clobbering a\n * row whose worker reached `markFailed` faster than the queue's\n * dead-letter dispatch.\n *\n * The handler is wrapped in `runAsCli` so AdminClient calls inside\n * have a synthetic CLI admin context — same pattern as\n * {@link createRunImportHandler}.\n */\nexport function createImportsRunDeadListener(\n config: ImportsRunDeadListenerConfig,\n): QueueTerminalNotifier {\n const { importRuns, logger } = config\n\n return {\n onJobDead: async (event: QueueJobDeadEvent): Promise<void> => {\n if (event.jobType !== 'imports:run') return\n\n await runAsCli(async () => {\n const log = logger?.child({\n event: 'imports.run.dead',\n jobId: event.jobId,\n type: 'imports:run',\n })\n\n // Look up the mirror row by `queueJobId`. Cap to 1 — the\n // upload route writes one `import_run` per enqueued job.\n //\n // `AdminClient.getTable()` returns `PgTableWithColumns<any>`\n // because entity tables have dynamic column shapes (see the\n // `getTable()` JSDoc in `packages/entity/src/admin/client.ts`\n // and the documented-`any`-cases list in CLAUDE.md). Cast\n // the table to a structural shape covering only the column\n // we touch, so both `eq()` and type-coverage see a typed\n // operand instead of an `any`-derived property access.\n const table = importRuns.getTable() as unknown as { queueJobId: PgColumn }\n const matches = await importRuns.findMany({\n where: eq(table.queueJobId, event.jobId),\n limit: 1,\n })\n const row = matches[0]\n if (!row) {\n // No mirror row for this dead job. Either it was deleted\n // by the operator, or the dead event is for a job\n // unrelated to imports despite the type filter\n // (shouldn't happen, but is cheap to log).\n log?.warn('queue.job.dead with no matching import_run — skipping')\n return\n }\n\n // Only transition rows that haven't already reached their\n // own terminal state. A late dead-event must not overwrite\n // a `succeeded` row that the worker reached via the normal\n // path before the queue's dead-letter dispatch fired.\n if (row.status !== 'running' && row.status !== 'pending') {\n log?.info(\n { importRunId: row.id, status: row.status },\n 'queue.job.dead arrived for a row already in a terminal state — leaving as-is',\n )\n return\n }\n\n // Dead-from-`pending` is the rarer + more interesting case:\n // the worker process died (or never claimed) BEFORE it could\n // flip the row to `running`. Log it at warn so ops can spot\n // a queue+worker-startup pathology that the standard `running\n // → failed` transition wouldn't surface. Dead-from-`running`\n // is the documented \"crash mid-import\" case — info is enough.\n if (row.status === 'pending') {\n log?.warn(\n { importRunId: row.id, attempts: event.attempts },\n 'queue.job.dead arrived for an import_run still in pending — worker crashed BEFORE claim/lookup',\n )\n }\n\n await markFailed(importRuns, row.id, event.error)\n log?.info(\n { importRunId: row.id, attempts: event.attempts, priorStatus: row.status },\n 'queue.job.dead → import_run marked failed',\n )\n })\n },\n }\n}\n"],"mappings":"sQA0DA,MAAa,EAA6B,EAAE,OAAO,CACjD,YAAa,EAAE,QAAQ,CAAC,MAAM,CAC/B,CAAC,CAcW,EAAqD,EAAU,CAC1E,KAAM,cACN,YAAa,mEACb,OAAQ,EACR,eAAgB,EACjB,CAAC,CAoEF,SAAgB,EACd,EAC8C,CAC9C,GAAM,CAAE,aAAY,aAAY,WAAU,UAAS,kBAAiB,UAAW,EAE/E,OAAO,KAAO,IACZ,EAAS,SAAY,CACnB,GAAM,CAAE,eAAgB,EAAI,QACtB,EAAM,GAAQ,MAAM,CAAE,MAAO,EAAI,GAAI,cAAa,KAAM,cAAe,CAAC,CAExE,EAAM,MAAM,EAAW,SAAS,EAAY,CAClD,GAAI,CAAC,EAAK,CAKR,GAAK,KAAK,sCAAsC,CAChD,OAgBE,EAAI,SAAW,WACjB,GAAK,KACH,CACE,mBAAoB,EAAI,WACxB,kBAAmB,EAAI,UACxB,CACD,iHACD,CAQH,IAAM,EAAY,EAAW,IAAI,EAAI,cAAc,CACnD,GAAI,CAAC,EAEH,MADA,MAAM,EAAW,EAAY,EAAa,sBAAsB,EAAI,cAAc,GAAG,CAC3E,MAAM,qCAAqC,EAAI,cAAc,GAAG,CAE5E,IAAM,EACJ,EAAI,QAAU,OAAO,EAAI,QAAW,UAAY,CAAC,MAAM,QAAQ,EAAI,OAAO,CACrE,EAAI,OACL,EAAE,CAER,MAAM,EAAW,OAAO,EAAa,CACnC,OAAQ,UACR,WAAY,EAAI,GAChB,UAAW,IAAI,KAChB,CAAC,CAKF,EAAwB,EAAY,CAEpC,IAAI,EACJ,GAAI,CACF,EAAiB,MAAM,GAAU,OAC1B,EAAK,CAGZ,MADA,MAAM,EAAW,EAAY,EAAa,gCAD3B,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,GACoB,CAC7E,EAOR,IAAI,EACA,EACJ,GAAI,EACF,GAAI,CACF,IAAM,EAAW,MAAM,EAAgB,EAAI,SAAS,CACpD,EAAY,EAAS,UACrB,EAAc,EAAS,cAChB,EAAK,CAGZ,MADA,MAAM,EAAW,EAAY,EAAa,+BAD3B,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,GACmB,CAC5E,OAMR,EAAY,EAAI,SAQlB,IAAI,EACJ,GAAI,CACF,EAAY,MAAM,EAAc,EAAU,CAC1C,GAAK,KAAK,CAAE,cAAa,YAAW,CAAE,8BAA8B,OAC7D,EAAK,CAGZ,GAAK,KAAK,CAAE,MAAK,CAAE,gEAAgE,CAGrF,IAAI,EACJ,GAAI,CACF,EAAS,MAAM,EAAU,CACvB,cACA,SAAU,EAAI,MACd,SACA,YACA,KAAM,CAAE,SAAU,EAAW,CAC7B,SAAU,EACV,UACA,WAAa,GACX,EAAI,eAAe,IAAc,IAAA,GAAyC,EAA7B,CAAE,GAAG,EAAU,YAAW,CAAY,CACrF,GAAI,IAAQ,IAAA,IAAa,CAAE,OAAQ,EAAK,CACzC,CAAC,OACK,EAAK,CAQZ,MANA,MAAM,EAAW,EAAY,EADd,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CACd,CAC7C,GACF,MAAM,GAAa,CAAC,MAAO,GAAwB,CACjD,GAAK,KAAK,CAAE,IAAK,EAAY,CAAE,qDAAqD,EACpF,CAEE,EAOJ,GACF,MAAM,GAAa,CAAC,MAAO,GAAwB,CACjD,GAAK,KAAK,CAAE,IAAK,EAAY,CAAE,yDAAyD,EACxF,CAGJ,MAAM,EAAW,OAAO,EAAa,CACnC,OAAQ,YACR,WAAY,IAAI,KAChB,OAAQ,CACN,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACnB,YAAa,EAAO,YACpB,iBAAkB,EAAO,iBAC1B,CAOD,aAAc,KAAK,MAAM,KAAK,UAAU,EAAO,OAAO,CAAC,CACxD,CAAC,CAGF,EAA0B,EAAa,CACrC,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACpB,CAAC,CAEF,GAAK,KACH,CACE,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACnB,YAAa,EAAO,YACpB,QAAS,EAAO,iBACjB,CACD,uBACD,EACD,CAIN,eAAe,EACb,EACA,EACA,EACe,CACf,GAAI,CACF,MAAM,EAAW,OAAO,EAAI,CAC1B,OAAQ,SACR,WAAY,IAAI,KAChB,aAAc,CAAE,MAAO,EAAQ,CAChC,CAAC,CAMF,EAAuB,EAAI,EAAO,MAC5B,GAiDV,SAAgB,EACd,EACuB,CACvB,GAAM,CAAE,aAAY,UAAW,EAE/B,MAAO,CACL,UAAW,KAAO,IAA4C,CACxD,EAAM,UAAY,eAEtB,MAAM,EAAS,SAAY,CACzB,IAAM,EAAM,GAAQ,MAAM,CACxB,MAAO,mBACP,MAAO,EAAM,MACb,KAAM,cACP,CAAC,CAYI,EAAQ,EAAW,UAAU,CAK7B,GAAM,MAJU,EAAW,SAAS,CACxC,MAAO,EAAG,EAAM,WAAY,EAAM,MAAM,CACxC,MAAO,EACR,CAAC,EACkB,GACpB,GAAI,CAAC,EAAK,CAKR,GAAK,KAAK,wDAAwD,CAClE,OAOF,GAAI,EAAI,SAAW,WAAa,EAAI,SAAW,UAAW,CACxD,GAAK,KACH,CAAE,YAAa,EAAI,GAAI,OAAQ,EAAI,OAAQ,CAC3C,+EACD,CACD,OASE,EAAI,SAAW,WACjB,GAAK,KACH,CAAE,YAAa,EAAI,GAAI,SAAU,EAAM,SAAU,CACjD,iGACD,CAGH,MAAM,EAAW,EAAY,EAAI,GAAI,EAAM,MAAM,CACjD,GAAK,KACH,CAAE,YAAa,EAAI,GAAI,SAAU,EAAM,SAAU,YAAa,EAAI,OAAQ,CAC1E,4CACD,EACD,EAEL"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@murumets-ee/imports",
3
- "version": "0.12.0",
3
+ "version": "0.13.1",
4
4
  "license": "Elastic-2.0",
5
5
  "type": "module",
6
6
  "exports": {
@@ -23,6 +23,14 @@
23
23
  "./storage-resolver": {
24
24
  "types": "./dist/storage-resolver.d.mts",
25
25
  "import": "./dist/storage-resolver.mjs"
26
+ },
27
+ "./storage": {
28
+ "types": "./dist/storage.d.mts",
29
+ "import": "./dist/storage.mjs"
30
+ },
31
+ "./realtime": {
32
+ "types": "./dist/realtime/index.d.mts",
33
+ "import": "./dist/realtime/index.mjs"
26
34
  }
27
35
  },
28
36
  "files": [
@@ -32,13 +40,13 @@
32
40
  "csv-parse": "^5.5.3",
33
41
  "drizzle-orm": "^0.45.2",
34
42
  "zod": "^3.24.1",
35
- "@murumets-ee/core": "0.12.0",
36
- "@murumets-ee/db": "0.12.0",
37
- "@murumets-ee/entity": "0.12.0",
38
- "@murumets-ee/logging": "0.12.0",
39
- "@murumets-ee/queue": "0.12.0",
40
- "@murumets-ee/search-elasticsearch": "0.12.0",
41
- "@murumets-ee/storage": "0.12.0"
43
+ "@murumets-ee/core": "0.13.1",
44
+ "@murumets-ee/db": "0.13.1",
45
+ "@murumets-ee/entity": "0.13.1",
46
+ "@murumets-ee/logging": "0.13.1",
47
+ "@murumets-ee/queue": "0.13.1",
48
+ "@murumets-ee/search-elasticsearch": "0.13.1",
49
+ "@murumets-ee/storage": "0.13.1"
42
50
  },
43
51
  "devDependencies": {
44
52
  "@types/node": "^20.19.39",
@@ -1,2 +0,0 @@
1
- import{bulkUpsert as e}from"@murumets-ee/search-elasticsearch";import{createReadStream as t}from"node:fs";import{parse as n}from"csv-parse";const r=50,i=5;var a=class{patterns=new Map;maxPatterns;maxSamplesPerPattern;droppedSignatures=0;constructor(e={}){this.maxPatterns=e.maxPatterns??50,this.maxSamplesPerPattern=e.maxSamplesPerPattern??5}addError(e,t,n,r,i){let a=`${t}:${r??`GENERAL`}:${n}`,o=this.patterns.get(a);if(!o){if(this.patterns.size>=this.maxPatterns){this.droppedSignatures+=1;return}o={errorType:t,field:r??null,message:n,count:0,firstOccurrence:e,lastOccurrence:e,samples:[]},this.patterns.set(a,o)}o.count+=1,o.lastOccurrence=e,o.samples.length<this.maxSamplesPerPattern&&o.samples.push({rowNumber:e,rowData:i})}getTotalErrorCount(){let e=0;for(let t of this.patterns.values())e+=t.count;return e}getDistinctPatternCount(){return this.patterns.size}getDroppedSignatureCount(){return this.droppedSignatures}getTopPatterns(){let e=Array.from(this.patterns.values()).sort((e,t)=>t.count-e.count),t=e.reduce((e,t)=>e+t.count,0);return e.map(e=>({errorType:e.errorType,field:e.field,message:e.message,count:e.count,firstOccurrence:e.firstOccurrence,lastOccurrence:e.lastOccurrence,samples:e.samples.slice(),percentage:t>0?e.count/t*100:0}))}snapshot(){return{totalErrors:this.getTotalErrorCount(),distinctPatterns:this.getDistinctPatternCount(),droppedSignatures:this.droppedSignatures,patterns:this.getTopPatterns()}}};async function*o(e){let{filePath:r,delimiter:i=` `,hasHeader:a=!0,columns:o,relaxColumnCount:s=!1}=e,c=o?Array.from(o):a,l=t(r),u=l.pipe(n({delimiter:i,columns:c,bom:!0,skip_empty_lines:!0,relax_column_count:s})),d=0;try{for await(let e of u){d+=1;let t={};if(Array.isArray(e)){let n=e;for(let e=0;e<n.length;e+=1)t[String(e)]=n[e]??``}else for(let[n,r]of Object.entries(e))t[n]=r??``;yield{rowNumber:d,row:t}}}finally{l.destroy()}}const s=1e3;async function c(t){let{importRunId:n,runLabel:r,params:i,transform:c,feed:l,esClient:u,esIndex:d,batchSize:f=s,onProgress:p,rowLimit:m,signal:h,errorTracker:g=new a}=t;if(f<1)throw Error(`batchSize must be >= 1 (got ${f})`);let _=Date.now(),v=0,y=0,b=0,x=0,S=0,C=[],w=async()=>{if(C.length===0)return;let t=C;C=[];let n;try{n=await e(u,{index:d,docs:t.map(({id:e,doc:t})=>({id:e,doc:t})),...h!==void 0&&{signal:h}})}catch(e){let n=h?.aborted??(e instanceof Error&&(e.name===`AbortError`||/abort/i.test(e.message)))?`aborted`:`bulk_request_failed`,r=e instanceof Error?e.message:String(e);for(let{rowNumber:e}of t)g.addError(e,n,r,void 0,null);throw b+=t.length,e}if(y+=n.succeeded,b+=n.failures.length,S+=1,n.failures.length>0){let e=new Map(t.map(e=>[e.id,e.rowNumber]));for(let t of n.failures){let n=e.get(t.id)??-1;g.addError(n,t.type,t.reason,void 0,{id:t.id})}}if(p){let e=(Date.now()-_)/1e3;p({rowsRead:v,rowsSucceeded:y,rowsFailed:b,rowsSkipped:x,batchesCompleted:S,elapsedSeconds:e,rowsPerSecond:e>0?v/e:0,distinctErrorPatterns:g.getDistinctPatternCount()})}};for await(let{rowNumber:e,row:t}of o(l)){if(h?.aborted||m!==void 0&&v>=m)break;v+=1;let a={importRunId:n,params:i,runLabel:r,rowNumber:e},o;try{o=await c(t,a)}catch(n){let r=n instanceof Error?n.message:String(n);g.addError(e,`transform_threw`,r,void 0,t),b+=1;continue}if(o.kind===`skip`){x+=1;continue}if(o.kind===`error`){g.addError(e,o.error.errorType,o.error.message,o.error.field,t),b+=1;continue}C.push({id:o.id,doc:o.doc,rowNumber:e}),C.length>=f&&await w()}return await w(),{rowsRead:v,rowsSucceeded:y,rowsFailed:b,rowsSkipped:x,batchesCompleted:S,errors:g.snapshot()}}export{i as a,r as i,c as n,a as o,o as r,s as t};
2
- //# sourceMappingURL=runner-DdhiNybk.mjs.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"runner-DdhiNybk.mjs","names":[],"sources":["../src/error-tracker.ts","../src/streaming.ts","../src/runner.ts"],"sourcesContent":["/**\n * Aggregates per-row errors into top-N pattern buckets so a feed of 1M\n * malformed rows surfaces as a handful of actionable signatures rather\n * than a million identical strings.\n *\n * Lifted from giga-test (`backend/src/workers/csv-importer.ts`) and\n * generalized:\n * - Configurable caps so the importer can tune memory bounds per run.\n * - Pure data — no logging side effects, no I/O.\n * - JSON-serialisable output via {@link ErrorTracker.snapshot} for the\n * `import_run.errorSummary` column.\n *\n * Pattern signature shape: `${errorType}:${field || 'GENERAL'}:${message}`.\n * Same `errorType + field + message` collapses to one bucket; differing\n * messages stay separate. This is intentional: a parser error on column\n * `NetPrice/Discount` (\"invalid number 'NA,5'\") and the same on column\n * `GrossPrice` are operationally distinct even if the parser is the same.\n *\n * Memory bounds: the patterns map is capped at `maxPatterns`. Once full,\n * additional NEW signatures are dropped — known patterns keep accumulating\n * counts. This is the \"top-N most common\" model: rare-but-novel errors\n * past the cap are invisible, but the cap protects against a runaway\n * adversarial feed exploding the map. Sample arrays are independently\n * capped at `maxSamplesPerPattern`.\n */\n\n/**\n * Recursively-defined JSON-serialisable value. Mirrors the shape of\n * `JsonValue` in `@murumets-ee/entity` without taking a dependency on\n * that package — error-tracker is otherwise standalone, and the\n * `ImportRun.errorSummary` JSONB column accepts anything in this shape.\n */\nexport type ImportJsonValue =\n | string\n | number\n | boolean\n | null\n | ImportJsonValue[]\n | { [key: string]: ImportJsonValue }\n\n/** One sample row attached to a pattern. `rowData` is the originally-parsed row. */\nexport interface ErrorSample {\n rowNumber: number\n rowData: ImportJsonValue\n}\n\n/** Public shape of an aggregated pattern as returned by {@link ErrorTracker.getTopPatterns}. */\nexport interface ErrorPattern {\n errorType: string\n field: string | null\n message: string\n count: number\n firstOccurrence: number\n lastOccurrence: number\n samples: ReadonlyArray<ErrorSample>\n /** `count / totalErrors`, scaled 0..100. `0` when there are zero errors total. */\n percentage: number\n}\n\nexport interface ErrorTrackerConfig {\n /** Hard cap on distinct signatures. Defaults to 50. New signatures past the cap are dropped. */\n maxPatterns?: number\n /** Hard cap on samples retained per pattern. Defaults to 5. Excess samples are dropped. */\n maxSamplesPerPattern?: number\n}\n\n/** Default caps — match giga-test for compatibility. */\nexport const DEFAULT_MAX_PATTERNS = 50\nexport const DEFAULT_MAX_SAMPLES_PER_PATTERN = 5\n\ninterface InternalPattern {\n errorType: string\n field: string | null\n message: string\n count: number\n firstOccurrence: number\n lastOccurrence: number\n samples: ErrorSample[]\n}\n\nexport class ErrorTracker {\n private readonly patterns = new Map<string, InternalPattern>()\n private readonly maxPatterns: number\n private readonly maxSamplesPerPattern: number\n private droppedSignatures = 0\n\n constructor(config: ErrorTrackerConfig = {}) {\n this.maxPatterns = config.maxPatterns ?? DEFAULT_MAX_PATTERNS\n this.maxSamplesPerPattern = config.maxSamplesPerPattern ?? DEFAULT_MAX_SAMPLES_PER_PATTERN\n }\n\n /**\n * Record one error. Same `(errorType, field, message)` triple bumps the\n * existing bucket; a new triple opens a new one (subject to {@link maxPatterns}).\n *\n * `field` is optional — pass `undefined` for errors not tied to a single\n * column (e.g. parse errors at row level). Internally normalised to the\n * literal string `'GENERAL'` so it shares a bucket with other genericised\n * errors of the same type+message.\n */\n addError(\n rowNumber: number,\n errorType: string,\n message: string,\n field: string | undefined,\n rowData: ImportJsonValue,\n ): void {\n const fieldKey = field ?? 'GENERAL'\n const signature = `${errorType}:${fieldKey}:${message}`\n\n let pattern = this.patterns.get(signature)\n if (!pattern) {\n if (this.patterns.size >= this.maxPatterns) {\n this.droppedSignatures += 1\n return\n }\n pattern = {\n errorType,\n field: field ?? null,\n message,\n count: 0,\n firstOccurrence: rowNumber,\n lastOccurrence: rowNumber,\n samples: [],\n }\n this.patterns.set(signature, pattern)\n }\n\n pattern.count += 1\n pattern.lastOccurrence = rowNumber\n if (pattern.samples.length < this.maxSamplesPerPattern) {\n pattern.samples.push({ rowNumber, rowData })\n }\n }\n\n /** Total count across every pattern. Counts errors, not patterns. */\n getTotalErrorCount(): number {\n let sum = 0\n for (const p of this.patterns.values()) sum += p.count\n return sum\n }\n\n /** Number of distinct signatures retained in the map (≤ `maxPatterns`). */\n getDistinctPatternCount(): number {\n return this.patterns.size\n }\n\n /**\n * Number of NEW signatures dropped because the map was already at\n * capacity. Surfacing this in the import_run summary tells the operator\n * \"the top-N was saturated — there's a long tail you're not seeing\".\n */\n getDroppedSignatureCount(): number {\n return this.droppedSignatures\n }\n\n /**\n * Top patterns sorted by descending count, capped at `maxPatterns`.\n * Stable secondary order is insertion order (Map iteration order is\n * insertion order; Array.sort is stable in V8).\n */\n getTopPatterns(): ErrorPattern[] {\n const all = Array.from(this.patterns.values()).sort((a, b) => b.count - a.count)\n const totalErrors = all.reduce((sum, p) => sum + p.count, 0)\n return all.map((p) => ({\n errorType: p.errorType,\n field: p.field,\n message: p.message,\n count: p.count,\n firstOccurrence: p.firstOccurrence,\n lastOccurrence: p.lastOccurrence,\n samples: p.samples.slice(),\n percentage: totalErrors > 0 ? (p.count / totalErrors) * 100 : 0,\n }))\n }\n\n /**\n * Compact JSON-serialisable snapshot for `import_run.errorSummary`.\n * Aside from the patterns array, includes the totals so a reader of\n * just this column doesn't have to re-derive them.\n */\n snapshot(): ErrorTrackerSnapshot {\n return {\n totalErrors: this.getTotalErrorCount(),\n distinctPatterns: this.getDistinctPatternCount(),\n droppedSignatures: this.droppedSignatures,\n patterns: this.getTopPatterns(),\n }\n }\n}\n\nexport interface ErrorTrackerSnapshot {\n totalErrors: number\n distinctPatterns: number\n droppedSignatures: number\n patterns: ErrorPattern[]\n}\n","/**\n * Tab-delimited / CSV streaming reader. Generic over delimiter so the\n * same path handles `.txt` (tab), `.csv` (comma), and the rare `;`\n * European export dialect.\n *\n * Built on `csv-parse` per giga-test precedent — node-stream-based, low\n * memory, handles UTF-8 BOM (the carmaker feeds are Windows-exported\n * and ship with a BOM that breaks naive split-on-tab parsers).\n *\n * The reader yields `{ rowNumber, row }` pairs where:\n * - `rowNumber` is 1-based and counts the header as row 0.\n * - `row` is `Record<string, string>` keyed by header name. Empty cells\n * are the empty string, NOT `undefined` — feed transforms test with\n * `value === ''` consistently.\n *\n * Why this lives in a streaming reader and not inside the transform\n * itself: the transform sees one already-parsed row at a time, never\n * the file. That keeps transform implementations free of I/O concerns\n * and makes them trivially unit-testable with a fixture row map.\n */\n\nimport { createReadStream } from 'node:fs'\nimport { parse } from 'csv-parse'\n\nexport interface StreamFeedOptions {\n /** Path to the file on disk. The PoC uploads land on local disk; S3-keyed reads come later. */\n filePath: string\n /**\n * Single-character field delimiter. Default `\\t` (the carmaker feed\n * format). Pass `,` for CSV, `;` for some European dialects.\n */\n delimiter?: string\n /**\n * `true` (default): the first row is the header and column names come\n * from it. `false`: rows are emitted as positional `{ \"0\": ..., \"1\": ... }`\n * and the transform reads by index — useful for headerless feeds that\n * commit to a documented column order.\n */\n hasHeader?: boolean\n /**\n * Optional explicit column-name list. When provided, takes precedence\n * over `hasHeader` (header row, if present, is skipped but its values\n * are ignored). Useful when the upstream header is unstable but the\n * positional shape isn't.\n */\n columns?: ReadonlyArray<string>\n /**\n * Forward to `csv-parse` `relax_column_count`. Default `false` —\n * a row whose column count doesn't match the header surfaces as a\n * parser error so the transform isn't silently fed truncated data.\n */\n relaxColumnCount?: boolean\n}\n\nexport interface StreamFeedRow {\n /** 1-based row number. Header (when present) is row 0; first data row is row 1. */\n rowNumber: number\n /**\n * Cell values keyed by column name (or string-position when\n * `hasHeader: false` AND no `columns`).\n *\n * **Cell-value invariants:**\n * - Empty cells (`A\\t\\tC`) → `''` (empty string).\n * - Missing TRAILING cells in `relaxColumnCount: true` mode → the\n * key is **absent** from the object, not present-with-`''`. csv-parse\n * does not emit keys for short rows. Transforms reading those\n * columns get `undefined` from `row['col']` and must handle it\n * (`row['col'] ?? ''` is the canonical idiom).\n * - With the default `relaxColumnCount: false`, short rows reject at\n * the parser, so this case never reaches the transform.\n */\n row: Record<string, string>\n}\n\n/**\n * Async-iterable over the parsed rows of a delimited file. Use with\n * `for await (const { rowNumber, row } of streamFeed({ filePath, ... }))`.\n *\n * The iterator owns its file descriptor — the `for await` loop closes\n * the underlying stream when it returns or breaks. Aborting mid-stream\n * (`break`, `throw`, signal) is safe; csv-parse propagates the close.\n */\nexport async function* streamFeed(options: StreamFeedOptions): AsyncIterable<StreamFeedRow> {\n const { filePath, delimiter = '\\t', hasHeader = true, columns, relaxColumnCount = false } = options\n\n // When the caller provides explicit `columns`, prefer them. When the\n // file has a header but no explicit `columns`, csv-parse takes the\n // first row as the column source. When neither is true, rows are\n // emitted with string-position keys.\n //\n // Typed against csv-parse's actual `columns?: ColumnOption[] | boolean`\n // signature — `string[]` satisfies `ColumnOption[]` since\n // `ColumnOption = string | undefined | null | false | { name: string }`.\n const columnConfig: string[] | boolean = columns ? Array.from(columns) : hasHeader\n\n const stream = createReadStream(filePath)\n const parser = stream.pipe(\n parse({\n delimiter,\n columns: columnConfig,\n bom: true,\n skip_empty_lines: true,\n // csv-parse defaults to strict column count; opt-in relaxation only.\n relax_column_count: relaxColumnCount,\n }),\n )\n\n let rowNumber = 0\n try {\n for await (const rawRow of parser as AsyncIterable<\n ReadonlyArray<string> | Record<string, string | undefined>\n >) {\n rowNumber += 1\n // csv-parse emits records with string keys when `columns` is\n // truthy, otherwise an Array. Normalise both shapes to\n // `Record<string, string>` so transforms can rely on `value === ''`\n // for missing cells (csv-parse leaves trailing missing cells as\n // `undefined` when `relax_column_count: true`; this collapses\n // them to `''` to keep the contract uniform).\n const row: Record<string, string> = {}\n if (Array.isArray(rawRow)) {\n const arr = rawRow as ReadonlyArray<string | undefined>\n for (let i = 0; i < arr.length; i += 1) {\n row[String(i)] = arr[i] ?? ''\n }\n } else {\n for (const [k, v] of Object.entries(rawRow)) {\n row[k] = v ?? ''\n }\n }\n yield { rowNumber, row }\n }\n } finally {\n // Safety: ensure the underlying file descriptor closes even if the\n // consumer breaks mid-iteration. Node closes streams on\n // garbage-collection but this makes it deterministic under tests.\n stream.destroy()\n }\n}\n","/**\n * One-shot importer: stream rows → transform → batched bulk-write to ES,\n * accumulating per-row errors into `ErrorTracker` and reporting progress\n * to the queue every batch.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope):\n * - **Batch size 1000.** Matches giga-test precedent. Configurable for\n * integration tests that don't want a 1k row floor.\n * - **No resumability and no automatic retries.** The `imports:run` queue\n * job is registered with `defaultRetries: 0` (see `worker.ts`) so a\n * failed handler does NOT re-enqueue itself — re-running a multi-batch\n * import against the same `import_run.id` while the previous attempt\n * may still be writing is a footgun (duplicate batches, double-counted\n * progress). Operator retries by creating a NEW `import_run` row.\n * - **No per-supplier transform plugin.** The runner takes a single\n * `RowTransform<TDoc>` from the registry and applies it to every row;\n * PR 8 may diverge but only by registering a different transform name.\n * - **Direct `bulkUpsert` into the live aliased index** (D6 alias is\n * set up by PR 4's `ensureAliasedIndex`; the importer doesn't reindex).\n *\n * Per D21 (sanctioned bulk path): this runner intentionally bypasses\n * AdminClient and entity hooks. Per-batch audit / observability lives\n * on the surrounding `import_run` row + queue progress, NOT per-row.\n */\n\nimport { bulkUpsert, type BulkIndexResult, type EsClientLike } from '@murumets-ee/search-elasticsearch'\nimport { ErrorTracker } from './error-tracker.js'\nimport { streamFeed, type StreamFeedOptions } from './streaming.js'\nimport type { RowTransform, TransformContext } from './transform.js'\n\n/** Soft default; chosen to match giga-test. ES bulk requests over ~5MB get split server-side anyway. */\nexport const DEFAULT_BATCH_SIZE = 1000\n\nexport interface RunImportOptions<TDoc> {\n /** UUID of the `import_run` row driving this run. Forwarded to every transform invocation. */\n importRunId: string\n /** Operator-supplied label for the run. Forwarded to the transform context. */\n runLabel: string\n /** Opaque per-run params copied from `import_run.params`. */\n params: Record<string, unknown>\n /** Transform applied to every parsed row. */\n transform: RowTransform<TDoc>\n /** Streaming reader options — file path, delimiter, header config. */\n feed: StreamFeedOptions\n /** ES client (low-level shape from `@murumets-ee/search-elasticsearch`). */\n esClient: EsClientLike\n /** Index alias to write to. Per D6, callers always pass an alias, never a physical index. */\n esIndex: string\n /** Rows per `bulkUpsert` call. Default {@link DEFAULT_BATCH_SIZE}. */\n batchSize?: number\n /**\n * Callback invoked after every batch. The handler in `worker.ts`\n * forwards this to `ctx.updateProgress` for the queue UI; tests\n * inspect it directly. Synchronous + cheap so a slow callback can't\n * back-pressure the importer.\n */\n onProgress?: (progress: ImportRunProgress) => void\n /** Optional: stop processing after this many rows. Tests use it; production passes `undefined`. */\n rowLimit?: number\n /** Abort signal threaded into the underlying ES client request — cooperative cancel. */\n signal?: AbortSignal\n /** Optional ErrorTracker config (caps). Default: top-50 patterns × 5 samples. */\n errorTracker?: ErrorTracker\n}\n\n/**\n * Progress payload written to `toolkit_jobs.progress` after every batch.\n * Caps + flush rules live on the queue's `updateProgress` debounce —\n * callers don't need to throttle.\n */\nexport interface ImportRunProgress {\n rowsRead: number\n rowsSucceeded: number\n rowsFailed: number\n rowsSkipped: number\n batchesCompleted: number\n /** Wall-clock seconds since the runner started. */\n elapsedSeconds: number\n /** Rows / second, computed at every batch. */\n rowsPerSecond: number\n /** Distinct error patterns currently held by the tracker. Saturates at the cap. */\n distinctErrorPatterns: number\n}\n\n/**\n * Final result returned by {@link runImport}. The handler writes these\n * onto the `import_run` row alongside the ErrorTracker snapshot.\n */\nexport interface RunImportResult {\n /** Total rows read from the file (excludes skipped empty lines). */\n rowsRead: number\n /** Rows the transform turned into a successful doc AND the ES cluster acknowledged. */\n rowsSucceeded: number\n /**\n * Rows that the transform rejected (`{ kind: 'error' }`) OR that ES\n * rejected on bulk-write (per-doc failure). Both are aggregated by\n * `errorTracker` for the import_run summary.\n */\n rowsFailed: number\n /** Rows that the transform skipped (`{ kind: 'skip' }`) — header noise, blank lines, intentional drop. */\n rowsSkipped: number\n /** Number of `bulkUpsert` calls made. */\n batchesCompleted: number\n /** Final value of {@link ErrorTracker.snapshot}. */\n errors: ReturnType<ErrorTracker['snapshot']>\n}\n\n/**\n * Apply the runner against a feed file. Stops on rowLimit OR end-of-file\n * OR if `signal` aborts. Throws if the streaming reader / ES client\n * throws — caller (the queue handler) catches that and writes\n * `import_run.status = 'failed'` with the error message in\n * `errorSummary.fatal`.\n */\nexport async function runImport<TDoc>(options: RunImportOptions<TDoc>): Promise<RunImportResult> {\n const {\n importRunId,\n runLabel,\n params,\n transform,\n feed,\n esClient,\n esIndex,\n batchSize = DEFAULT_BATCH_SIZE,\n onProgress,\n rowLimit,\n signal,\n errorTracker = new ErrorTracker(),\n } = options\n\n if (batchSize < 1) {\n throw new Error(`batchSize must be >= 1 (got ${batchSize})`)\n }\n\n const startedAt = Date.now()\n let rowsRead = 0\n let rowsSucceeded = 0\n let rowsFailed = 0\n let rowsSkipped = 0\n let batchesCompleted = 0\n\n let pending: Array<{ id: string; doc: TDoc; rowNumber: number }> = []\n\n const flush = async (): Promise<void> => {\n if (pending.length === 0) return\n const batch = pending\n pending = []\n let result: BulkIndexResult\n try {\n result = await bulkUpsert<TDoc>(esClient, {\n index: esIndex,\n docs: batch.map(({ id, doc }) => ({ id, doc })),\n ...(signal !== undefined && { signal }),\n })\n } catch (err) {\n // Cluster- or transport-level failure — the whole batch is\n // unaccounted for. Distinguish abort (operator-driven cancel) from\n // a real cluster failure so the errorSummary doesn't mislabel a\n // cancelled run as broken cluster connectivity.\n const isAbort =\n signal?.aborted ??\n (err instanceof Error && (err.name === 'AbortError' || /abort/i.test(err.message)))\n const errorType = isAbort ? 'aborted' : 'bulk_request_failed'\n const reason = err instanceof Error ? err.message : String(err)\n for (const { rowNumber } of batch) {\n errorTracker.addError(rowNumber, errorType, reason, undefined, null)\n }\n rowsFailed += batch.length\n throw err\n }\n\n rowsSucceeded += result.succeeded\n rowsFailed += result.failures.length\n batchesCompleted += 1\n\n if (result.failures.length > 0) {\n // Map each ES failure back to its source row via `id`. The bulk\n // response order matches the request order, but ES doesn't promise\n // that; matching by `id` is the safe path. PoC volume is small\n // enough that the O(failures × batch) cost is irrelevant.\n const byId = new Map(batch.map((b) => [b.id, b.rowNumber]))\n for (const fail of result.failures) {\n const rowNumber = byId.get(fail.id) ?? -1\n errorTracker.addError(rowNumber, fail.type, fail.reason, undefined, { id: fail.id })\n }\n }\n\n if (onProgress) {\n const elapsedSeconds = (Date.now() - startedAt) / 1000\n onProgress({\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n elapsedSeconds,\n rowsPerSecond: elapsedSeconds > 0 ? rowsRead / elapsedSeconds : 0,\n distinctErrorPatterns: errorTracker.getDistinctPatternCount(),\n })\n }\n }\n\n for await (const { rowNumber, row } of streamFeed(feed)) {\n if (signal?.aborted) break\n if (rowLimit !== undefined && rowsRead >= rowLimit) break\n rowsRead += 1\n\n const ctx: TransformContext = { importRunId, params, runLabel, rowNumber }\n let result: Awaited<ReturnType<typeof transform>>\n try {\n result = await transform(row, ctx)\n } catch (err) {\n // A throw from the transform is a programmer error — surface it as\n // a row-level error so the run can continue. (If the bug is\n // catastrophic, the operator sees the same message repeated and\n // can stop the run.)\n const reason = err instanceof Error ? err.message : String(err)\n errorTracker.addError(rowNumber, 'transform_threw', reason, undefined, row)\n rowsFailed += 1\n continue\n }\n\n if (result.kind === 'skip') {\n rowsSkipped += 1\n continue\n }\n if (result.kind === 'error') {\n errorTracker.addError(\n rowNumber,\n result.error.errorType,\n result.error.message,\n result.error.field,\n row,\n )\n rowsFailed += 1\n continue\n }\n\n pending.push({ id: result.id, doc: result.doc, rowNumber })\n if (pending.length >= batchSize) {\n await flush()\n }\n }\n\n await flush()\n\n return {\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n errors: errorTracker.snapshot(),\n }\n}\n"],"mappings":"4IAmEA,MAAa,EAAuB,GACvB,EAAkC,EAY/C,IAAa,EAAb,KAA0B,CACxB,SAA4B,IAAI,IAChC,YACA,qBACA,kBAA4B,EAE5B,YAAY,EAA6B,EAAE,CAAE,CAC3C,KAAK,YAAc,EAAO,aAAA,GAC1B,KAAK,qBAAuB,EAAO,sBAAA,EAYrC,SACE,EACA,EACA,EACA,EACA,EACM,CAEN,IAAM,EAAY,GAAG,EAAU,GADd,GAAS,UACiB,GAAG,IAE1C,EAAU,KAAK,SAAS,IAAI,EAAU,CAC1C,GAAI,CAAC,EAAS,CACZ,GAAI,KAAK,SAAS,MAAQ,KAAK,YAAa,CAC1C,KAAK,mBAAqB,EAC1B,OAEF,EAAU,CACR,YACA,MAAO,GAAS,KAChB,UACA,MAAO,EACP,gBAAiB,EACjB,eAAgB,EAChB,QAAS,EAAE,CACZ,CACD,KAAK,SAAS,IAAI,EAAW,EAAQ,CAGvC,EAAQ,OAAS,EACjB,EAAQ,eAAiB,EACrB,EAAQ,QAAQ,OAAS,KAAK,sBAChC,EAAQ,QAAQ,KAAK,CAAE,YAAW,UAAS,CAAC,CAKhD,oBAA6B,CAC3B,IAAI,EAAM,EACV,IAAK,IAAM,KAAK,KAAK,SAAS,QAAQ,CAAE,GAAO,EAAE,MACjD,OAAO,EAIT,yBAAkC,CAChC,OAAO,KAAK,SAAS,KAQvB,0BAAmC,CACjC,OAAO,KAAK,kBAQd,gBAAiC,CAC/B,IAAM,EAAM,MAAM,KAAK,KAAK,SAAS,QAAQ,CAAC,CAAC,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC1E,EAAc,EAAI,QAAQ,EAAK,IAAM,EAAM,EAAE,MAAO,EAAE,CAC5D,OAAO,EAAI,IAAK,IAAO,CACrB,UAAW,EAAE,UACb,MAAO,EAAE,MACT,QAAS,EAAE,QACX,MAAO,EAAE,MACT,gBAAiB,EAAE,gBACnB,eAAgB,EAAE,eAClB,QAAS,EAAE,QAAQ,OAAO,CAC1B,WAAY,EAAc,EAAK,EAAE,MAAQ,EAAe,IAAM,EAC/D,EAAE,CAQL,UAAiC,CAC/B,MAAO,CACL,YAAa,KAAK,oBAAoB,CACtC,iBAAkB,KAAK,yBAAyB,CAChD,kBAAmB,KAAK,kBACxB,SAAU,KAAK,gBAAgB,CAChC,GCzGL,eAAuB,EAAW,EAA0D,CAC1F,GAAM,CAAE,WAAU,YAAY,IAAM,YAAY,GAAM,UAAS,mBAAmB,IAAU,EAUtF,EAAmC,EAAU,MAAM,KAAK,EAAQ,CAAG,EAEnE,EAAS,EAAiB,EAAS,CACnC,EAAS,EAAO,KACpB,EAAM,CACJ,YACA,QAAS,EACT,IAAK,GACL,iBAAkB,GAElB,mBAAoB,EACrB,CAAC,CACH,CAEG,EAAY,EAChB,GAAI,CACF,UAAW,IAAM,KAAU,EAExB,CACD,GAAa,EAOb,IAAM,EAA8B,EAAE,CACtC,GAAI,MAAM,QAAQ,EAAO,CAAE,CACzB,IAAM,EAAM,EACZ,IAAK,IAAI,EAAI,EAAG,EAAI,EAAI,OAAQ,GAAK,EACnC,EAAI,OAAO,EAAE,EAAI,EAAI,IAAM,QAG7B,IAAK,GAAM,CAAC,EAAG,KAAM,OAAO,QAAQ,EAAO,CACzC,EAAI,GAAK,GAAK,GAGlB,KAAM,CAAE,YAAW,MAAK,SAElB,CAIR,EAAO,SAAS,ECzGpB,MAAa,EAAqB,IAmFlC,eAAsB,EAAgB,EAA2D,CAC/F,GAAM,CACJ,cACA,WACA,SACA,YACA,OACA,WACA,UACA,YAAY,EACZ,aACA,WACA,SACA,eAAe,IAAI,GACjB,EAEJ,GAAI,EAAY,EACd,MAAU,MAAM,+BAA+B,EAAU,GAAG,CAG9D,IAAM,EAAY,KAAK,KAAK,CACxB,EAAW,EACX,EAAgB,EAChB,EAAa,EACb,EAAc,EACd,EAAmB,EAEnB,EAA+D,EAAE,CAE/D,EAAQ,SAA2B,CACvC,GAAI,EAAQ,SAAW,EAAG,OAC1B,IAAM,EAAQ,EACd,EAAU,EAAE,CACZ,IAAI,EACJ,GAAI,CACF,EAAS,MAAM,EAAiB,EAAU,CACxC,MAAO,EACP,KAAM,EAAM,KAAK,CAAE,KAAI,UAAW,CAAE,KAAI,MAAK,EAAE,CAC/C,GAAI,IAAW,IAAA,IAAa,CAAE,SAAQ,CACvC,CAAC,OACK,EAAK,CAQZ,IAAM,EAFJ,GAAQ,UACP,aAAe,QAAU,EAAI,OAAS,cAAgB,SAAS,KAAK,EAAI,QAAQ,GACvD,UAAY,sBAClC,EAAS,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CAC/D,IAAK,GAAM,CAAE,eAAe,EAC1B,EAAa,SAAS,EAAW,EAAW,EAAQ,IAAA,GAAW,KAAK,CAGtE,KADA,IAAc,EAAM,OACd,EAOR,GAJA,GAAiB,EAAO,UACxB,GAAc,EAAO,SAAS,OAC9B,GAAoB,EAEhB,EAAO,SAAS,OAAS,EAAG,CAK9B,IAAM,EAAO,IAAI,IAAI,EAAM,IAAK,GAAM,CAAC,EAAE,GAAI,EAAE,UAAU,CAAC,CAAC,CAC3D,IAAK,IAAM,KAAQ,EAAO,SAAU,CAClC,IAAM,EAAY,EAAK,IAAI,EAAK,GAAG,EAAI,GACvC,EAAa,SAAS,EAAW,EAAK,KAAM,EAAK,OAAQ,IAAA,GAAW,CAAE,GAAI,EAAK,GAAI,CAAC,EAIxF,GAAI,EAAY,CACd,IAAM,GAAkB,KAAK,KAAK,CAAG,GAAa,IAClD,EAAW,CACT,WACA,gBACA,aACA,cACA,mBACA,iBACA,cAAe,EAAiB,EAAI,EAAW,EAAiB,EAChE,sBAAuB,EAAa,yBAAyB,CAC9D,CAAC,GAIN,UAAW,GAAM,CAAE,YAAW,SAAS,EAAW,EAAK,CAAE,CAEvD,GADI,GAAQ,SACR,IAAa,IAAA,IAAa,GAAY,EAAU,MACpD,GAAY,EAEZ,IAAM,EAAwB,CAAE,cAAa,SAAQ,WAAU,YAAW,CACtE,EACJ,GAAI,CACF,EAAS,MAAM,EAAU,EAAK,EAAI,OAC3B,EAAK,CAKZ,IAAM,EAAS,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CAC/D,EAAa,SAAS,EAAW,kBAAmB,EAAQ,IAAA,GAAW,EAAI,CAC3E,GAAc,EACd,SAGF,GAAI,EAAO,OAAS,OAAQ,CAC1B,GAAe,EACf,SAEF,GAAI,EAAO,OAAS,QAAS,CAC3B,EAAa,SACX,EACA,EAAO,MAAM,UACb,EAAO,MAAM,QACb,EAAO,MAAM,MACb,EACD,CACD,GAAc,EACd,SAGF,EAAQ,KAAK,CAAE,GAAI,EAAO,GAAI,IAAK,EAAO,IAAK,YAAW,CAAC,CACvD,EAAQ,QAAU,GACpB,MAAM,GAAO,CAMjB,OAFA,MAAM,GAAO,CAEN,CACL,WACA,gBACA,aACA,cACA,mBACA,OAAQ,EAAa,UAAU,CAChC"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"worker-DerGVTSI.d.mts","names":[],"sources":["../src/entities/import-run.ts","../src/error-tracker.ts","../src/streaming.ts","../src/runner.ts","../src/worker.ts"],"mappings":";;;;;;;;;;;AAgCA;;;;;;;cAHa,mBAAA;AAAA,KACD,eAAA,WAA0B,mBAAA;AAAA,cAEzB,SAAA,yBAAS,MAAA;MAgEpB,sBAAA,CAAA,OAAA;AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAEU,eAAA,GAAkB,WAAA,QAAmB,SAAA,CAAU,SAAA;;;;;;;;;;;;AArE3D;;;;;AACA;;;;;AAEA;;;;;;;;;;;;KCAY,eAAA,sCAKR,eAAA;EAAA,CACG,GAAA,WAAc,eAAA;AAAA;;UAGJ,WAAA;EACf,SAAA;EACA,OAAA,EAAS,eAAA;AAAA;;UAIM,YAAA;EACf,SAAA;EACA,KAAA;EACA,OAAA;EACA,KAAA;EACA,eAAA;EACA,cAAA;EACA,OAAA,EAAS,aAAA,CAAc,WAAA;;EAEvB,UAAA;AAAA;AAAA,UAGe,kBAAA;;EAEf,WAAA;;EAEA,oBAAA;AAAA;;cAIW,oBAAA;AAAA,cACA,+BAAA;AAAA,cAYA,YAAA;EAAA,iBACM,QAAA;EAAA,iBACA,WAAA;EAAA,iBACA,oBAAA;EAAA,QACT,iBAAA;cAEI,MAAA,GAAQ,kBAAA;;;;;;;;;;EAcpB,QAAA,CACE,SAAA,UACA,SAAA,UACA,OAAA,UACA,KAAA,sBACA,OAAA,EAAS,eAAA;;EA+BX,kBAAA,CAAA;;EAOA,uBAAA,CAAA;;;;;;EASA,wBAAA,CAAA;;;;;;EASA,cAAA,CAAA,GAAkB,YAAA;;;;;AD/DpB;ECmFE,QAAA,CAAA,GAAY,oBAAA;AAAA;AAAA,UAUG,oBAAA;EACf,WAAA;EACA,gBAAA;EACA,iBAAA;EACA,QAAA,EAAU,YAAA;AAAA;;;;;;;;;;;;ADtKZ;;;;;AACA;;;;;AAEA;UERiB,iBAAA;;EAEf,QAAA;;;;;EAKA,SAAA;;;;;;;EAOA,SAAA;;;;;;;EAOA,OAAA,GAAU,aAAA;;;;;;EAMV,gBAAA;AAAA;AAAA,UAGe,aAAA;;EAEf,SAAA;;;;;;;;;;;;;;;EAeA,GAAA,EAAK,MAAA;AAAA;;;;;;;;;iBAWgB,UAAA,CAAW,OAAA,EAAS,iBAAA,GAAoB,aAAA,CAAc,aAAA;;;;cCnDhE,kBAAA;AAAA,UAEI,gBAAA;;EAEf,WAAA;;EAEA,QAAA;;EAEA,MAAA,EAAQ,MAAA;;EAER,SAAA,EAAW,YAAA,CAAa,IAAA;;EAExB,IAAA,EAAM,iBAAA;;EAEN,QAAA,EAAU,YAAA;;EAEV,OAAA;EHfoB;EGiBpB,SAAA;EHjBoB;;;;;;EGwBpB,UAAA,IAAc,QAAA,EAAU,iBAAA;;EAExB,QAAA;;EAEA,MAAA,GAAS,WAAA;;EAET,YAAA,GAAe,YAAA;AAAA;;;;;;UAQA,iBAAA;EACf,QAAA;EACA,aAAA;EACA,UAAA;EACA,WAAA;EACA,gBAAA;;EAEA,cAAA;;EAEA,aAAA;;EAEA,qBAAA;AAAA;;;;;UAOe,eAAA;;EAEf,QAAA;;EAEA,aAAA;;;;;;EAMA,UAAA;;EAEA,WAAA;EHFU;EGIV,gBAAA;;EAEA,MAAA,EAAQ,UAAA,CAAW,YAAA;AAAA;;;;;;;;iBAUC,SAAA,MAAA,CAAgB,OAAA,EAAS,gBAAA,CAAiB,IAAA,IAAQ,OAAA,CAAQ,eAAA;;;;;;;;;;;;;UCjF/D,oBAAA;EACf,EAAA;EACA,OAAA,EAAS,oBAAA;EACT,cAAA,CAAe,IAAA,EAAM,iBAAA;AAAA;;;;;;;cASV,0BAAA,EAA0B,CAAA,CAAA,SAAA;;;;;;;KAG3B,oBAAA,GAAuB,CAAA,CAAE,KAAA,QAAa,0BAAA;;;;;;;;;;;;cAarC,aAAA,EAAe,aAAA,CAAc,oBAAA;;;;;;;KAa9B,gBAAA,SAAyB,YAAA,GAAe,OAAA,CAAQ,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;KA2BhD,gBAAA,IACV,UAAA,aACG,OAAA;EAAU,SAAA;EAAmB,OAAA,SAAgB,OAAA;AAAA;AAAA,UAEjC,sBAAA;EJPwB;EISvC,UAAA,EAAY,eAAA;EJTmC;EIW/C,UAAA,EAAY,iBAAA;EJXsD;EIalE,QAAA,EAAU,gBAAA;;EAEV,OAAA;;AHjFF;;;;EGuFE,eAAA,GAAkB,gBAAA;EHjFb;EGmFL,MAAA,GAAS,MAAA;AAAA;;AHhFX;;;;;;;;;iBG6FgB,sBAAA,CACd,MAAA,EAAQ,sBAAA,IACN,GAAA,EAAK,oBAAA,KAAyB,OAAA"}