@murumets-ee/imports 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,94 @@
1
+ Elastic License 2.0 (ELv2)
2
+
3
+ URL: https://www.elastic.co/licensing/elastic-license
4
+
5
+ ## Acceptance
6
+
7
+ By using the software, you agree to all of the terms and conditions below.
8
+
9
+ ## Copyright License
10
+
11
+ The licensor grants you a non-exclusive, royalty-free, worldwide,
12
+ non-sublicensable, non-transferable license to use, copy, distribute, make
13
+ available, and prepare derivative works of the software, in each case subject
14
+ to the limitations and conditions below.
15
+
16
+ ## Limitations
17
+
18
+ You may not provide the software to third parties as a hosted or managed
19
+ service, where the service provides users with access to any substantial set
20
+ of the features or functionality of the software.
21
+
22
+ You may not move, change, disable, or circumvent the license key functionality
23
+ in the software, and you may not remove or obscure any functionality in the
24
+ software that is protected by the license key.
25
+
26
+ You may not alter, remove, or obscure any licensing, copyright, or other
27
+ notices of the licensor in the software. Any use of the licensor's trademarks
28
+ is subject to applicable law.
29
+
30
+ ## Patents
31
+
32
+ The licensor grants you a license, under any patent claims the licensor can
33
+ license, or becomes able to license, to make, have made, use, sell, offer for
34
+ sale, import and have imported the software, in each case subject to the
35
+ limitations and conditions in this license. This license does not cover any
36
+ patent claims that you cause to be infringed by modifications or additions to
37
+ the software. If you or your company make any written claim that the software
38
+ infringes or contributes to infringement of any patent, your patent license
39
+ for the software granted under these terms ends immediately. If your company
40
+ makes such a claim, your patent license ends immediately for work on behalf
41
+ of your company.
42
+
43
+ ## Notices
44
+
45
+ You must ensure that anyone who gets a copy of any part of the software from
46
+ you also gets a copy of these terms.
47
+
48
+ If you modify the software, you must include in any modified copies of the
49
+ software prominent notices stating that you have modified the software.
50
+
51
+ ## No Other Rights
52
+
53
+ These terms do not imply any licenses other than those expressly granted in
54
+ these terms.
55
+
56
+ ## Termination
57
+
58
+ If you use the software in violation of these terms, such use is not licensed,
59
+ and your licenses will automatically terminate. If the licensor provides you
60
+ with a notice of your violation, and you cease all violation of this license
61
+ no later than 30 days after you receive that notice, your licenses will be
62
+ reinstated retroactively. However, if you violate these terms after such
63
+ reinstatement, any additional violation of these terms will cause your
64
+ licenses to terminate automatically and permanently.
65
+
66
+ ## No Liability
67
+
68
+ As far as the law allows, the software comes as is, without any warranty or
69
+ condition, and the licensor will not be liable to you for any damages arising
70
+ out of these terms or the use or nature of the software, under any kind of
71
+ legal claim.
72
+
73
+ ## Definitions
74
+
75
+ The **licensor** is the entity offering these terms, and the **software** is
76
+ the software the licensor makes available under these terms, including any
77
+ portion of it.
78
+
79
+ **you** refers to the individual or entity agreeing to these terms.
80
+
81
+ **your company** is any legal entity, sole proprietorship, or other kind of
82
+ organization that you work for, plus all organizations that have control over,
83
+ are under the control of, or are under common control with that organization.
84
+ **control** means ownership of substantially all the assets of an entity, or
85
+ the power to direct the management and policies of an entity (for example, by
86
+ voting right, contract, or otherwise). Control can be direct or indirect.
87
+
88
+ **your licenses** are all the licenses granted to you for the software under
89
+ these terms.
90
+
91
+ **use** means anything you do with the software requiring one of your
92
+ licenses.
93
+
94
+ **trademark** means trademarks, service marks, and similar rights.
@@ -0,0 +1,3 @@
1
+ import { C as IMPORT_RUN_STATUSES, E as ImportRunStatus, S as ErrorTrackerConfig, T as ImportRunClient, _ as DEFAULT_MAX_PATTERNS, b as ErrorSample, d as RunImportOptions, f as RunImportResult, g as streamFeed, h as StreamFeedRow, l as DEFAULT_BATCH_SIZE, m as StreamFeedOptions, n as FilePathResolver, p as runImport, t as EsClientResolver, u as ImportRunProgress, v as DEFAULT_MAX_SAMPLES_PER_PATTERN, w as ImportRun, x as ErrorTracker, y as ErrorPattern } from "./worker-DerGVTSI.mjs";
2
+ import { a as TransformName, c as registerImportTransform, i as TransformContext, n as RowResult, o as TransformRegistry, r as RowTransform, s as getTransformRegistry, t as RowError } from "./transform-D_uhdLeo.mjs";
3
+ export { DEFAULT_BATCH_SIZE, DEFAULT_MAX_PATTERNS, DEFAULT_MAX_SAMPLES_PER_PATTERN, type ErrorPattern, type ErrorSample, ErrorTracker, type ErrorTrackerConfig, type EsClientResolver, type FilePathResolver, IMPORT_RUN_STATUSES, ImportRun, type ImportRunClient, type ImportRunProgress, type ImportRunStatus, type RowError, type RowResult, type RowTransform, type RunImportOptions, type RunImportResult, type StreamFeedOptions, type StreamFeedRow, type TransformContext, type TransformName, TransformRegistry, getTransformRegistry, registerImportTransform, runImport, streamFeed };
package/dist/index.mjs ADDED
@@ -0,0 +1 @@
1
+ import{a as e,i as t,n,r,t as i}from"./transform-BUGBTotp.mjs";import{a,i as o,n as s,o as c,r as l,t as u}from"./runner-DdhiNybk.mjs";export{u as DEFAULT_BATCH_SIZE,o as DEFAULT_MAX_PATTERNS,a as DEFAULT_MAX_SAMPLES_PER_PATTERN,c as ErrorTracker,t as IMPORT_RUN_STATUSES,e as ImportRun,i as TransformRegistry,n as getTransformRegistry,r as registerImportTransform,s as runImport,l as streamFeed};
@@ -0,0 +1,38 @@
1
+ import { n as FilePathResolver, t as EsClientResolver } from "./worker-DerGVTSI.mjs";
2
+ import { Plugin } from "@murumets-ee/core";
3
+
4
+ //#region src/plugin.d.ts
5
+ interface ImportsPluginOptions {
6
+ /**
7
+ * Elasticsearch client resolver. Lazy so consumers can construct
8
+ * the client inside a route initialiser instead of at plugin-init
9
+ * time. Required — without it, the worker can't bulk-write.
10
+ */
11
+ esClient: EsClientResolver;
12
+ /**
13
+ * ES index alias to write to. Defaults to `'parts'` (the
14
+ * `PARTS_INDEX_ALIAS` re-exported from `@murumets-ee/search-elasticsearch`,
15
+ * inlined here to avoid a runtime import for a one-line constant).
16
+ * Per D6 always pass an alias, never a physical index.
17
+ */
18
+ esIndex?: string;
19
+ /**
20
+ * Resolve `import_run.filePath` to a local FS path before streaming.
21
+ * **Required when uploads land in remote storage** (R2/S3/etc.) —
22
+ * without it, the worker hands the storage key to `createReadStream`
23
+ * and crashes with `ENOENT`.
24
+ *
25
+ * Typical wiring downloads the storage object to a tmpfile and
26
+ * returns its path; the optional `cleanup` callback runs after the
27
+ * run finishes (success or failure).
28
+ *
29
+ * Omit this option for the original on-disk PoC setup where the
30
+ * upload route writes directly to a local directory and persists
31
+ * the absolute path on `import_run.filePath`.
32
+ */
33
+ resolveFilePath?: FilePathResolver;
34
+ }
35
+ declare function imports(options: ImportsPluginOptions): Plugin;
36
+ //#endregion
37
+ export { ImportsPluginOptions, imports };
38
+ //# sourceMappingURL=plugin.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"plugin.d.mts","names":[],"sources":["../src/plugin.ts"],"mappings":";;;;UAuCiB,oBAAA;;;;;;EAMf,QAAA,EAAU,gBAAA;;;;;;;EAOV,OAAA;;;;;;;;;;;;;;;EAeA,eAAA,GAAkB,gBAAA;AAAA;AAAA,iBAGJ,OAAA,CAAQ,OAAA,EAAS,oBAAA,GAAuB,MAAA"}
@@ -0,0 +1,2 @@
1
+ import{a as e,n as t}from"./transform-BUGBTotp.mjs";import{createRunImportHandler as n,importsRunJob as r}from"./worker.mjs";import{createAdminClient as i}from"@murumets-ee/core/clients";function a(a){let o=a.esIndex??`parts`;return{name:`@murumets-ee/imports`,server:{entities:[e],init:async s=>{if(!s.plugins.all().some(e=>e.name===`@murumets-ee/queue`)){s.logger.warn(`imports: queue() plugin not in plugins array — imports:run jobs will not be processed`);return}let c=await import(`@murumets-ee/queue/client`),l=i(e,s),u=t();c.registerJob(r,n({importRuns:l,transforms:u,esClient:a.esClient,esIndex:o,...a.resolveFilePath!==void 0&&{resolveFilePath:a.resolveFilePath},logger:s.logger.child({pkg:`imports`})})),s.logger.info({esIndex:o},`Imports plugin initialized`)}}}}export{a as imports};
2
+ //# sourceMappingURL=plugin.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"plugin.mjs","names":[],"sources":["../src/plugin.ts"],"sourcesContent":["/**\n * Imports plugin — registers the `import_run` entity and (when ES + queue\n * are wired) registers the `imports:run` job handler.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): plumbing only. The bespoke\n * admin pages (upload form, parts search) live in PR 8a. Per-feed\n * transforms (carmaker, …) live in PR 8 and self-register against the\n * process-global transform registry — this plugin's init does NOT\n * import any transform.\n *\n * @example\n * ```ts\n * import { defineLumiConfig } from '@murumets-ee/core'\n * import { imports } from '@murumets-ee/imports/plugin'\n * import { Client } from '@elastic/elasticsearch'\n *\n * const es = new Client({ node: process.env.ES_URL })\n *\n * export default defineLumiConfig({\n * plugins: [\n * // queue() and the carmaker-transform plugin must be present too.\n * queue(),\n * imports({ esClient: () => es, esIndex: 'parts' }),\n * ],\n * })\n * ```\n */\n\nimport type { Plugin } from '@murumets-ee/core'\nimport { createAdminClient } from '@murumets-ee/core/clients'\nimport { ImportRun } from './entities/import-run.js'\nimport {\n type EsClientResolver,\n type FilePathResolver,\n createRunImportHandler,\n importsRunJob,\n} from './worker.js'\nimport { getTransformRegistry } from './transform.js'\n\nexport interface ImportsPluginOptions {\n /**\n * Elasticsearch client resolver. Lazy so consumers can construct\n * the client inside a route initialiser instead of at plugin-init\n * time. Required — without it, the worker can't bulk-write.\n */\n esClient: EsClientResolver\n /**\n * ES index alias to write to. Defaults to `'parts'` (the\n * `PARTS_INDEX_ALIAS` re-exported from `@murumets-ee/search-elasticsearch`,\n * inlined here to avoid a runtime import for a one-line constant).\n * Per D6 always pass an alias, never a physical index.\n */\n esIndex?: string\n /**\n * Resolve `import_run.filePath` to a local FS path before streaming.\n * **Required when uploads land in remote storage** (R2/S3/etc.) —\n * without it, the worker hands the storage key to `createReadStream`\n * and crashes with `ENOENT`.\n *\n * Typical wiring downloads the storage object to a tmpfile and\n * returns its path; the optional `cleanup` callback runs after the\n * run finishes (success or failure).\n *\n * Omit this option for the original on-disk PoC setup where the\n * upload route writes directly to a local directory and persists\n * the absolute path on `import_run.filePath`.\n */\n resolveFilePath?: FilePathResolver\n}\n\nexport function imports(options: ImportsPluginOptions): Plugin {\n const esIndex = options.esIndex ?? 'parts'\n\n return {\n name: '@murumets-ee/imports',\n server: {\n entities: [ImportRun],\n init: async (app) => {\n // Queue PACKAGE being importable doesn't mean the queue() PLUGIN\n // is in the consumer's plugins array. Without it, registerJob()\n // succeeds but no worker ever picks up the job. Probe and warn.\n const queuePluginPresent = app.plugins\n .all()\n .some((p) => p.name === '@murumets-ee/queue')\n if (!queuePluginPresent) {\n app.logger.warn(\n 'imports: queue() plugin not in plugins array — imports:run jobs will not be processed',\n )\n return\n }\n\n // Queue client stays dynamic-imported: we already early-returned\n // above when the queue() plugin isn't in the consumer's plugins\n // array, so static-importing here would force-load the queue\n // client module in deployments that don't enable the worker.\n const queueClientModule = await import('@murumets-ee/queue/client')\n\n const importRuns = createAdminClient(ImportRun, app)\n const transforms = getTransformRegistry()\n\n queueClientModule.registerJob(\n importsRunJob,\n createRunImportHandler({\n importRuns,\n transforms,\n esClient: options.esClient,\n esIndex,\n ...(options.resolveFilePath !== undefined && {\n resolveFilePath: options.resolveFilePath,\n }),\n logger: app.logger.child({ pkg: 'imports' }),\n }),\n )\n\n app.logger.info({ esIndex }, 'Imports plugin initialized')\n },\n },\n }\n}\n"],"mappings":"2LAsEA,SAAgB,EAAQ,EAAuC,CAC7D,IAAM,EAAU,EAAQ,SAAW,QAEnC,MAAO,CACL,KAAM,uBACN,OAAQ,CACN,SAAU,CAAC,EAAU,CACrB,KAAM,KAAO,IAAQ,CAOnB,GAAI,CAHuB,EAAI,QAC5B,KAAK,CACL,KAAM,GAAM,EAAE,OAAS,qBACH,CAAE,CACvB,EAAI,OAAO,KACT,wFACD,CACD,OAOF,IAAM,EAAoB,MAAM,OAAO,6BAEjC,EAAa,EAAkB,EAAW,EAAI,CAC9C,EAAa,GAAsB,CAEzC,EAAkB,YAChB,EACA,EAAuB,CACrB,aACA,aACA,SAAU,EAAQ,SAClB,UACA,GAAI,EAAQ,kBAAoB,IAAA,IAAa,CAC3C,gBAAiB,EAAQ,gBAC1B,CACD,OAAQ,EAAI,OAAO,MAAM,CAAE,IAAK,UAAW,CAAC,CAC7C,CAAC,CACH,CAED,EAAI,OAAO,KAAK,CAAE,UAAS,CAAE,6BAA6B,EAE7D,CACF"}
@@ -0,0 +1,2 @@
1
+ import{bulkUpsert as e}from"@murumets-ee/search-elasticsearch";import{createReadStream as t}from"node:fs";import{parse as n}from"csv-parse";const r=50,i=5;var a=class{patterns=new Map;maxPatterns;maxSamplesPerPattern;droppedSignatures=0;constructor(e={}){this.maxPatterns=e.maxPatterns??50,this.maxSamplesPerPattern=e.maxSamplesPerPattern??5}addError(e,t,n,r,i){let a=`${t}:${r??`GENERAL`}:${n}`,o=this.patterns.get(a);if(!o){if(this.patterns.size>=this.maxPatterns){this.droppedSignatures+=1;return}o={errorType:t,field:r??null,message:n,count:0,firstOccurrence:e,lastOccurrence:e,samples:[]},this.patterns.set(a,o)}o.count+=1,o.lastOccurrence=e,o.samples.length<this.maxSamplesPerPattern&&o.samples.push({rowNumber:e,rowData:i})}getTotalErrorCount(){let e=0;for(let t of this.patterns.values())e+=t.count;return e}getDistinctPatternCount(){return this.patterns.size}getDroppedSignatureCount(){return this.droppedSignatures}getTopPatterns(){let e=Array.from(this.patterns.values()).sort((e,t)=>t.count-e.count),t=e.reduce((e,t)=>e+t.count,0);return e.map(e=>({errorType:e.errorType,field:e.field,message:e.message,count:e.count,firstOccurrence:e.firstOccurrence,lastOccurrence:e.lastOccurrence,samples:e.samples.slice(),percentage:t>0?e.count/t*100:0}))}snapshot(){return{totalErrors:this.getTotalErrorCount(),distinctPatterns:this.getDistinctPatternCount(),droppedSignatures:this.droppedSignatures,patterns:this.getTopPatterns()}}};async function*o(e){let{filePath:r,delimiter:i=` `,hasHeader:a=!0,columns:o,relaxColumnCount:s=!1}=e,c=o?Array.from(o):a,l=t(r),u=l.pipe(n({delimiter:i,columns:c,bom:!0,skip_empty_lines:!0,relax_column_count:s})),d=0;try{for await(let e of u){d+=1;let t={};if(Array.isArray(e)){let n=e;for(let e=0;e<n.length;e+=1)t[String(e)]=n[e]??``}else for(let[n,r]of Object.entries(e))t[n]=r??``;yield{rowNumber:d,row:t}}}finally{l.destroy()}}const s=1e3;async function c(t){let{importRunId:n,runLabel:r,params:i,transform:c,feed:l,esClient:u,esIndex:d,batchSize:f=s,onProgress:p,rowLimit:m,signal:h,errorTracker:g=new a}=t;if(f<1)throw Error(`batchSize must be >= 1 (got ${f})`);let _=Date.now(),v=0,y=0,b=0,x=0,S=0,C=[],w=async()=>{if(C.length===0)return;let t=C;C=[];let n;try{n=await e(u,{index:d,docs:t.map(({id:e,doc:t})=>({id:e,doc:t})),...h!==void 0&&{signal:h}})}catch(e){let n=h?.aborted??(e instanceof Error&&(e.name===`AbortError`||/abort/i.test(e.message)))?`aborted`:`bulk_request_failed`,r=e instanceof Error?e.message:String(e);for(let{rowNumber:e}of t)g.addError(e,n,r,void 0,null);throw b+=t.length,e}if(y+=n.succeeded,b+=n.failures.length,S+=1,n.failures.length>0){let e=new Map(t.map(e=>[e.id,e.rowNumber]));for(let t of n.failures){let n=e.get(t.id)??-1;g.addError(n,t.type,t.reason,void 0,{id:t.id})}}if(p){let e=(Date.now()-_)/1e3;p({rowsRead:v,rowsSucceeded:y,rowsFailed:b,rowsSkipped:x,batchesCompleted:S,elapsedSeconds:e,rowsPerSecond:e>0?v/e:0,distinctErrorPatterns:g.getDistinctPatternCount()})}};for await(let{rowNumber:e,row:t}of o(l)){if(h?.aborted||m!==void 0&&v>=m)break;v+=1;let a={importRunId:n,params:i,runLabel:r,rowNumber:e},o;try{o=await c(t,a)}catch(n){let r=n instanceof Error?n.message:String(n);g.addError(e,`transform_threw`,r,void 0,t),b+=1;continue}if(o.kind===`skip`){x+=1;continue}if(o.kind===`error`){g.addError(e,o.error.errorType,o.error.message,o.error.field,t),b+=1;continue}C.push({id:o.id,doc:o.doc,rowNumber:e}),C.length>=f&&await w()}return await w(),{rowsRead:v,rowsSucceeded:y,rowsFailed:b,rowsSkipped:x,batchesCompleted:S,errors:g.snapshot()}}export{i as a,r as i,c as n,a as o,o as r,s as t};
2
+ //# sourceMappingURL=runner-DdhiNybk.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner-DdhiNybk.mjs","names":[],"sources":["../src/error-tracker.ts","../src/streaming.ts","../src/runner.ts"],"sourcesContent":["/**\n * Aggregates per-row errors into top-N pattern buckets so a feed of 1M\n * malformed rows surfaces as a handful of actionable signatures rather\n * than a million identical strings.\n *\n * Lifted from giga-test (`backend/src/workers/csv-importer.ts`) and\n * generalized:\n * - Configurable caps so the importer can tune memory bounds per run.\n * - Pure data — no logging side effects, no I/O.\n * - JSON-serialisable output via {@link ErrorTracker.snapshot} for the\n * `import_run.errorSummary` column.\n *\n * Pattern signature shape: `${errorType}:${field || 'GENERAL'}:${message}`.\n * Same `errorType + field + message` collapses to one bucket; differing\n * messages stay separate. This is intentional: a parser error on column\n * `NetPrice/Discount` (\"invalid number 'NA,5'\") and the same on column\n * `GrossPrice` are operationally distinct even if the parser is the same.\n *\n * Memory bounds: the patterns map is capped at `maxPatterns`. Once full,\n * additional NEW signatures are dropped — known patterns keep accumulating\n * counts. This is the \"top-N most common\" model: rare-but-novel errors\n * past the cap are invisible, but the cap protects against a runaway\n * adversarial feed exploding the map. Sample arrays are independently\n * capped at `maxSamplesPerPattern`.\n */\n\n/**\n * Recursively-defined JSON-serialisable value. Mirrors the shape of\n * `JsonValue` in `@murumets-ee/entity` without taking a dependency on\n * that package — error-tracker is otherwise standalone, and the\n * `ImportRun.errorSummary` JSONB column accepts anything in this shape.\n */\nexport type ImportJsonValue =\n | string\n | number\n | boolean\n | null\n | ImportJsonValue[]\n | { [key: string]: ImportJsonValue }\n\n/** One sample row attached to a pattern. `rowData` is the originally-parsed row. */\nexport interface ErrorSample {\n rowNumber: number\n rowData: ImportJsonValue\n}\n\n/** Public shape of an aggregated pattern as returned by {@link ErrorTracker.getTopPatterns}. */\nexport interface ErrorPattern {\n errorType: string\n field: string | null\n message: string\n count: number\n firstOccurrence: number\n lastOccurrence: number\n samples: ReadonlyArray<ErrorSample>\n /** `count / totalErrors`, scaled 0..100. `0` when there are zero errors total. */\n percentage: number\n}\n\nexport interface ErrorTrackerConfig {\n /** Hard cap on distinct signatures. Defaults to 50. New signatures past the cap are dropped. */\n maxPatterns?: number\n /** Hard cap on samples retained per pattern. Defaults to 5. Excess samples are dropped. */\n maxSamplesPerPattern?: number\n}\n\n/** Default caps — match giga-test for compatibility. */\nexport const DEFAULT_MAX_PATTERNS = 50\nexport const DEFAULT_MAX_SAMPLES_PER_PATTERN = 5\n\ninterface InternalPattern {\n errorType: string\n field: string | null\n message: string\n count: number\n firstOccurrence: number\n lastOccurrence: number\n samples: ErrorSample[]\n}\n\nexport class ErrorTracker {\n private readonly patterns = new Map<string, InternalPattern>()\n private readonly maxPatterns: number\n private readonly maxSamplesPerPattern: number\n private droppedSignatures = 0\n\n constructor(config: ErrorTrackerConfig = {}) {\n this.maxPatterns = config.maxPatterns ?? DEFAULT_MAX_PATTERNS\n this.maxSamplesPerPattern = config.maxSamplesPerPattern ?? DEFAULT_MAX_SAMPLES_PER_PATTERN\n }\n\n /**\n * Record one error. Same `(errorType, field, message)` triple bumps the\n * existing bucket; a new triple opens a new one (subject to {@link maxPatterns}).\n *\n * `field` is optional — pass `undefined` for errors not tied to a single\n * column (e.g. parse errors at row level). Internally normalised to the\n * literal string `'GENERAL'` so it shares a bucket with other genericised\n * errors of the same type+message.\n */\n addError(\n rowNumber: number,\n errorType: string,\n message: string,\n field: string | undefined,\n rowData: ImportJsonValue,\n ): void {\n const fieldKey = field ?? 'GENERAL'\n const signature = `${errorType}:${fieldKey}:${message}`\n\n let pattern = this.patterns.get(signature)\n if (!pattern) {\n if (this.patterns.size >= this.maxPatterns) {\n this.droppedSignatures += 1\n return\n }\n pattern = {\n errorType,\n field: field ?? null,\n message,\n count: 0,\n firstOccurrence: rowNumber,\n lastOccurrence: rowNumber,\n samples: [],\n }\n this.patterns.set(signature, pattern)\n }\n\n pattern.count += 1\n pattern.lastOccurrence = rowNumber\n if (pattern.samples.length < this.maxSamplesPerPattern) {\n pattern.samples.push({ rowNumber, rowData })\n }\n }\n\n /** Total count across every pattern. Counts errors, not patterns. */\n getTotalErrorCount(): number {\n let sum = 0\n for (const p of this.patterns.values()) sum += p.count\n return sum\n }\n\n /** Number of distinct signatures retained in the map (≤ `maxPatterns`). */\n getDistinctPatternCount(): number {\n return this.patterns.size\n }\n\n /**\n * Number of NEW signatures dropped because the map was already at\n * capacity. Surfacing this in the import_run summary tells the operator\n * \"the top-N was saturated — there's a long tail you're not seeing\".\n */\n getDroppedSignatureCount(): number {\n return this.droppedSignatures\n }\n\n /**\n * Top patterns sorted by descending count, capped at `maxPatterns`.\n * Stable secondary order is insertion order (Map iteration order is\n * insertion order; Array.sort is stable in V8).\n */\n getTopPatterns(): ErrorPattern[] {\n const all = Array.from(this.patterns.values()).sort((a, b) => b.count - a.count)\n const totalErrors = all.reduce((sum, p) => sum + p.count, 0)\n return all.map((p) => ({\n errorType: p.errorType,\n field: p.field,\n message: p.message,\n count: p.count,\n firstOccurrence: p.firstOccurrence,\n lastOccurrence: p.lastOccurrence,\n samples: p.samples.slice(),\n percentage: totalErrors > 0 ? (p.count / totalErrors) * 100 : 0,\n }))\n }\n\n /**\n * Compact JSON-serialisable snapshot for `import_run.errorSummary`.\n * Aside from the patterns array, includes the totals so a reader of\n * just this column doesn't have to re-derive them.\n */\n snapshot(): ErrorTrackerSnapshot {\n return {\n totalErrors: this.getTotalErrorCount(),\n distinctPatterns: this.getDistinctPatternCount(),\n droppedSignatures: this.droppedSignatures,\n patterns: this.getTopPatterns(),\n }\n }\n}\n\nexport interface ErrorTrackerSnapshot {\n totalErrors: number\n distinctPatterns: number\n droppedSignatures: number\n patterns: ErrorPattern[]\n}\n","/**\n * Tab-delimited / CSV streaming reader. Generic over delimiter so the\n * same path handles `.txt` (tab), `.csv` (comma), and the rare `;`\n * European export dialect.\n *\n * Built on `csv-parse` per giga-test precedent — node-stream-based, low\n * memory, handles UTF-8 BOM (the carmaker feeds are Windows-exported\n * and ship with a BOM that breaks naive split-on-tab parsers).\n *\n * The reader yields `{ rowNumber, row }` pairs where:\n * - `rowNumber` is 1-based and counts the header as row 0.\n * - `row` is `Record<string, string>` keyed by header name. Empty cells\n * are the empty string, NOT `undefined` — feed transforms test with\n * `value === ''` consistently.\n *\n * Why this lives in a streaming reader and not inside the transform\n * itself: the transform sees one already-parsed row at a time, never\n * the file. That keeps transform implementations free of I/O concerns\n * and makes them trivially unit-testable with a fixture row map.\n */\n\nimport { createReadStream } from 'node:fs'\nimport { parse } from 'csv-parse'\n\nexport interface StreamFeedOptions {\n /** Path to the file on disk. The PoC uploads land on local disk; S3-keyed reads come later. */\n filePath: string\n /**\n * Single-character field delimiter. Default `\\t` (the carmaker feed\n * format). Pass `,` for CSV, `;` for some European dialects.\n */\n delimiter?: string\n /**\n * `true` (default): the first row is the header and column names come\n * from it. `false`: rows are emitted as positional `{ \"0\": ..., \"1\": ... }`\n * and the transform reads by index — useful for headerless feeds that\n * commit to a documented column order.\n */\n hasHeader?: boolean\n /**\n * Optional explicit column-name list. When provided, takes precedence\n * over `hasHeader` (header row, if present, is skipped but its values\n * are ignored). Useful when the upstream header is unstable but the\n * positional shape isn't.\n */\n columns?: ReadonlyArray<string>\n /**\n * Forward to `csv-parse` `relax_column_count`. Default `false` —\n * a row whose column count doesn't match the header surfaces as a\n * parser error so the transform isn't silently fed truncated data.\n */\n relaxColumnCount?: boolean\n}\n\nexport interface StreamFeedRow {\n /** 1-based row number. Header (when present) is row 0; first data row is row 1. */\n rowNumber: number\n /**\n * Cell values keyed by column name (or string-position when\n * `hasHeader: false` AND no `columns`).\n *\n * **Cell-value invariants:**\n * - Empty cells (`A\\t\\tC`) → `''` (empty string).\n * - Missing TRAILING cells in `relaxColumnCount: true` mode → the\n * key is **absent** from the object, not present-with-`''`. csv-parse\n * does not emit keys for short rows. Transforms reading those\n * columns get `undefined` from `row['col']` and must handle it\n * (`row['col'] ?? ''` is the canonical idiom).\n * - With the default `relaxColumnCount: false`, short rows reject at\n * the parser, so this case never reaches the transform.\n */\n row: Record<string, string>\n}\n\n/**\n * Async-iterable over the parsed rows of a delimited file. Use with\n * `for await (const { rowNumber, row } of streamFeed({ filePath, ... }))`.\n *\n * The iterator owns its file descriptor — the `for await` loop closes\n * the underlying stream when it returns or breaks. Aborting mid-stream\n * (`break`, `throw`, signal) is safe; csv-parse propagates the close.\n */\nexport async function* streamFeed(options: StreamFeedOptions): AsyncIterable<StreamFeedRow> {\n const { filePath, delimiter = '\\t', hasHeader = true, columns, relaxColumnCount = false } = options\n\n // When the caller provides explicit `columns`, prefer them. When the\n // file has a header but no explicit `columns`, csv-parse takes the\n // first row as the column source. When neither is true, rows are\n // emitted with string-position keys.\n //\n // Typed against csv-parse's actual `columns?: ColumnOption[] | boolean`\n // signature — `string[]` satisfies `ColumnOption[]` since\n // `ColumnOption = string | undefined | null | false | { name: string }`.\n const columnConfig: string[] | boolean = columns ? Array.from(columns) : hasHeader\n\n const stream = createReadStream(filePath)\n const parser = stream.pipe(\n parse({\n delimiter,\n columns: columnConfig,\n bom: true,\n skip_empty_lines: true,\n // csv-parse defaults to strict column count; opt-in relaxation only.\n relax_column_count: relaxColumnCount,\n }),\n )\n\n let rowNumber = 0\n try {\n for await (const rawRow of parser as AsyncIterable<\n ReadonlyArray<string> | Record<string, string | undefined>\n >) {\n rowNumber += 1\n // csv-parse emits records with string keys when `columns` is\n // truthy, otherwise an Array. Normalise both shapes to\n // `Record<string, string>` so transforms can rely on `value === ''`\n // for missing cells (csv-parse leaves trailing missing cells as\n // `undefined` when `relax_column_count: true`; this collapses\n // them to `''` to keep the contract uniform).\n const row: Record<string, string> = {}\n if (Array.isArray(rawRow)) {\n const arr = rawRow as ReadonlyArray<string | undefined>\n for (let i = 0; i < arr.length; i += 1) {\n row[String(i)] = arr[i] ?? ''\n }\n } else {\n for (const [k, v] of Object.entries(rawRow)) {\n row[k] = v ?? ''\n }\n }\n yield { rowNumber, row }\n }\n } finally {\n // Safety: ensure the underlying file descriptor closes even if the\n // consumer breaks mid-iteration. Node closes streams on\n // garbage-collection but this makes it deterministic under tests.\n stream.destroy()\n }\n}\n","/**\n * One-shot importer: stream rows → transform → batched bulk-write to ES,\n * accumulating per-row errors into `ErrorTracker` and reporting progress\n * to the queue every batch.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope):\n * - **Batch size 1000.** Matches giga-test precedent. Configurable for\n * integration tests that don't want a 1k row floor.\n * - **No resumability and no automatic retries.** The `imports:run` queue\n * job is registered with `defaultRetries: 0` (see `worker.ts`) so a\n * failed handler does NOT re-enqueue itself — re-running a multi-batch\n * import against the same `import_run.id` while the previous attempt\n * may still be writing is a footgun (duplicate batches, double-counted\n * progress). Operator retries by creating a NEW `import_run` row.\n * - **No per-supplier transform plugin.** The runner takes a single\n * `RowTransform<TDoc>` from the registry and applies it to every row;\n * PR 8 may diverge but only by registering a different transform name.\n * - **Direct `bulkUpsert` into the live aliased index** (D6 alias is\n * set up by PR 4's `ensureAliasedIndex`; the importer doesn't reindex).\n *\n * Per D21 (sanctioned bulk path): this runner intentionally bypasses\n * AdminClient and entity hooks. Per-batch audit / observability lives\n * on the surrounding `import_run` row + queue progress, NOT per-row.\n */\n\nimport { bulkUpsert, type BulkIndexResult, type EsClientLike } from '@murumets-ee/search-elasticsearch'\nimport { ErrorTracker } from './error-tracker.js'\nimport { streamFeed, type StreamFeedOptions } from './streaming.js'\nimport type { RowTransform, TransformContext } from './transform.js'\n\n/** Soft default; chosen to match giga-test. ES bulk requests over ~5MB get split server-side anyway. */\nexport const DEFAULT_BATCH_SIZE = 1000\n\nexport interface RunImportOptions<TDoc> {\n /** UUID of the `import_run` row driving this run. Forwarded to every transform invocation. */\n importRunId: string\n /** Operator-supplied label for the run. Forwarded to the transform context. */\n runLabel: string\n /** Opaque per-run params copied from `import_run.params`. */\n params: Record<string, unknown>\n /** Transform applied to every parsed row. */\n transform: RowTransform<TDoc>\n /** Streaming reader options — file path, delimiter, header config. */\n feed: StreamFeedOptions\n /** ES client (low-level shape from `@murumets-ee/search-elasticsearch`). */\n esClient: EsClientLike\n /** Index alias to write to. Per D6, callers always pass an alias, never a physical index. */\n esIndex: string\n /** Rows per `bulkUpsert` call. Default {@link DEFAULT_BATCH_SIZE}. */\n batchSize?: number\n /**\n * Callback invoked after every batch. The handler in `worker.ts`\n * forwards this to `ctx.updateProgress` for the queue UI; tests\n * inspect it directly. Synchronous + cheap so a slow callback can't\n * back-pressure the importer.\n */\n onProgress?: (progress: ImportRunProgress) => void\n /** Optional: stop processing after this many rows. Tests use it; production passes `undefined`. */\n rowLimit?: number\n /** Abort signal threaded into the underlying ES client request — cooperative cancel. */\n signal?: AbortSignal\n /** Optional ErrorTracker config (caps). Default: top-50 patterns × 5 samples. */\n errorTracker?: ErrorTracker\n}\n\n/**\n * Progress payload written to `toolkit_jobs.progress` after every batch.\n * Caps + flush rules live on the queue's `updateProgress` debounce —\n * callers don't need to throttle.\n */\nexport interface ImportRunProgress {\n rowsRead: number\n rowsSucceeded: number\n rowsFailed: number\n rowsSkipped: number\n batchesCompleted: number\n /** Wall-clock seconds since the runner started. */\n elapsedSeconds: number\n /** Rows / second, computed at every batch. */\n rowsPerSecond: number\n /** Distinct error patterns currently held by the tracker. Saturates at the cap. */\n distinctErrorPatterns: number\n}\n\n/**\n * Final result returned by {@link runImport}. The handler writes these\n * onto the `import_run` row alongside the ErrorTracker snapshot.\n */\nexport interface RunImportResult {\n /** Total rows read from the file (excludes skipped empty lines). */\n rowsRead: number\n /** Rows the transform turned into a successful doc AND the ES cluster acknowledged. */\n rowsSucceeded: number\n /**\n * Rows that the transform rejected (`{ kind: 'error' }`) OR that ES\n * rejected on bulk-write (per-doc failure). Both are aggregated by\n * `errorTracker` for the import_run summary.\n */\n rowsFailed: number\n /** Rows that the transform skipped (`{ kind: 'skip' }`) — header noise, blank lines, intentional drop. */\n rowsSkipped: number\n /** Number of `bulkUpsert` calls made. */\n batchesCompleted: number\n /** Final value of {@link ErrorTracker.snapshot}. */\n errors: ReturnType<ErrorTracker['snapshot']>\n}\n\n/**\n * Apply the runner against a feed file. Stops on rowLimit OR end-of-file\n * OR if `signal` aborts. Throws if the streaming reader / ES client\n * throws — caller (the queue handler) catches that and writes\n * `import_run.status = 'failed'` with the error message in\n * `errorSummary.fatal`.\n */\nexport async function runImport<TDoc>(options: RunImportOptions<TDoc>): Promise<RunImportResult> {\n const {\n importRunId,\n runLabel,\n params,\n transform,\n feed,\n esClient,\n esIndex,\n batchSize = DEFAULT_BATCH_SIZE,\n onProgress,\n rowLimit,\n signal,\n errorTracker = new ErrorTracker(),\n } = options\n\n if (batchSize < 1) {\n throw new Error(`batchSize must be >= 1 (got ${batchSize})`)\n }\n\n const startedAt = Date.now()\n let rowsRead = 0\n let rowsSucceeded = 0\n let rowsFailed = 0\n let rowsSkipped = 0\n let batchesCompleted = 0\n\n let pending: Array<{ id: string; doc: TDoc; rowNumber: number }> = []\n\n const flush = async (): Promise<void> => {\n if (pending.length === 0) return\n const batch = pending\n pending = []\n let result: BulkIndexResult\n try {\n result = await bulkUpsert<TDoc>(esClient, {\n index: esIndex,\n docs: batch.map(({ id, doc }) => ({ id, doc })),\n ...(signal !== undefined && { signal }),\n })\n } catch (err) {\n // Cluster- or transport-level failure — the whole batch is\n // unaccounted for. Distinguish abort (operator-driven cancel) from\n // a real cluster failure so the errorSummary doesn't mislabel a\n // cancelled run as broken cluster connectivity.\n const isAbort =\n signal?.aborted ??\n (err instanceof Error && (err.name === 'AbortError' || /abort/i.test(err.message)))\n const errorType = isAbort ? 'aborted' : 'bulk_request_failed'\n const reason = err instanceof Error ? err.message : String(err)\n for (const { rowNumber } of batch) {\n errorTracker.addError(rowNumber, errorType, reason, undefined, null)\n }\n rowsFailed += batch.length\n throw err\n }\n\n rowsSucceeded += result.succeeded\n rowsFailed += result.failures.length\n batchesCompleted += 1\n\n if (result.failures.length > 0) {\n // Map each ES failure back to its source row via `id`. The bulk\n // response order matches the request order, but ES doesn't promise\n // that; matching by `id` is the safe path. PoC volume is small\n // enough that the O(failures × batch) cost is irrelevant.\n const byId = new Map(batch.map((b) => [b.id, b.rowNumber]))\n for (const fail of result.failures) {\n const rowNumber = byId.get(fail.id) ?? -1\n errorTracker.addError(rowNumber, fail.type, fail.reason, undefined, { id: fail.id })\n }\n }\n\n if (onProgress) {\n const elapsedSeconds = (Date.now() - startedAt) / 1000\n onProgress({\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n elapsedSeconds,\n rowsPerSecond: elapsedSeconds > 0 ? rowsRead / elapsedSeconds : 0,\n distinctErrorPatterns: errorTracker.getDistinctPatternCount(),\n })\n }\n }\n\n for await (const { rowNumber, row } of streamFeed(feed)) {\n if (signal?.aborted) break\n if (rowLimit !== undefined && rowsRead >= rowLimit) break\n rowsRead += 1\n\n const ctx: TransformContext = { importRunId, params, runLabel, rowNumber }\n let result: Awaited<ReturnType<typeof transform>>\n try {\n result = await transform(row, ctx)\n } catch (err) {\n // A throw from the transform is a programmer error — surface it as\n // a row-level error so the run can continue. (If the bug is\n // catastrophic, the operator sees the same message repeated and\n // can stop the run.)\n const reason = err instanceof Error ? err.message : String(err)\n errorTracker.addError(rowNumber, 'transform_threw', reason, undefined, row)\n rowsFailed += 1\n continue\n }\n\n if (result.kind === 'skip') {\n rowsSkipped += 1\n continue\n }\n if (result.kind === 'error') {\n errorTracker.addError(\n rowNumber,\n result.error.errorType,\n result.error.message,\n result.error.field,\n row,\n )\n rowsFailed += 1\n continue\n }\n\n pending.push({ id: result.id, doc: result.doc, rowNumber })\n if (pending.length >= batchSize) {\n await flush()\n }\n }\n\n await flush()\n\n return {\n rowsRead,\n rowsSucceeded,\n rowsFailed,\n rowsSkipped,\n batchesCompleted,\n errors: errorTracker.snapshot(),\n }\n}\n"],"mappings":"4IAmEA,MAAa,EAAuB,GACvB,EAAkC,EAY/C,IAAa,EAAb,KAA0B,CACxB,SAA4B,IAAI,IAChC,YACA,qBACA,kBAA4B,EAE5B,YAAY,EAA6B,EAAE,CAAE,CAC3C,KAAK,YAAc,EAAO,aAAA,GAC1B,KAAK,qBAAuB,EAAO,sBAAA,EAYrC,SACE,EACA,EACA,EACA,EACA,EACM,CAEN,IAAM,EAAY,GAAG,EAAU,GADd,GAAS,UACiB,GAAG,IAE1C,EAAU,KAAK,SAAS,IAAI,EAAU,CAC1C,GAAI,CAAC,EAAS,CACZ,GAAI,KAAK,SAAS,MAAQ,KAAK,YAAa,CAC1C,KAAK,mBAAqB,EAC1B,OAEF,EAAU,CACR,YACA,MAAO,GAAS,KAChB,UACA,MAAO,EACP,gBAAiB,EACjB,eAAgB,EAChB,QAAS,EAAE,CACZ,CACD,KAAK,SAAS,IAAI,EAAW,EAAQ,CAGvC,EAAQ,OAAS,EACjB,EAAQ,eAAiB,EACrB,EAAQ,QAAQ,OAAS,KAAK,sBAChC,EAAQ,QAAQ,KAAK,CAAE,YAAW,UAAS,CAAC,CAKhD,oBAA6B,CAC3B,IAAI,EAAM,EACV,IAAK,IAAM,KAAK,KAAK,SAAS,QAAQ,CAAE,GAAO,EAAE,MACjD,OAAO,EAIT,yBAAkC,CAChC,OAAO,KAAK,SAAS,KAQvB,0BAAmC,CACjC,OAAO,KAAK,kBAQd,gBAAiC,CAC/B,IAAM,EAAM,MAAM,KAAK,KAAK,SAAS,QAAQ,CAAC,CAAC,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC1E,EAAc,EAAI,QAAQ,EAAK,IAAM,EAAM,EAAE,MAAO,EAAE,CAC5D,OAAO,EAAI,IAAK,IAAO,CACrB,UAAW,EAAE,UACb,MAAO,EAAE,MACT,QAAS,EAAE,QACX,MAAO,EAAE,MACT,gBAAiB,EAAE,gBACnB,eAAgB,EAAE,eAClB,QAAS,EAAE,QAAQ,OAAO,CAC1B,WAAY,EAAc,EAAK,EAAE,MAAQ,EAAe,IAAM,EAC/D,EAAE,CAQL,UAAiC,CAC/B,MAAO,CACL,YAAa,KAAK,oBAAoB,CACtC,iBAAkB,KAAK,yBAAyB,CAChD,kBAAmB,KAAK,kBACxB,SAAU,KAAK,gBAAgB,CAChC,GCzGL,eAAuB,EAAW,EAA0D,CAC1F,GAAM,CAAE,WAAU,YAAY,IAAM,YAAY,GAAM,UAAS,mBAAmB,IAAU,EAUtF,EAAmC,EAAU,MAAM,KAAK,EAAQ,CAAG,EAEnE,EAAS,EAAiB,EAAS,CACnC,EAAS,EAAO,KACpB,EAAM,CACJ,YACA,QAAS,EACT,IAAK,GACL,iBAAkB,GAElB,mBAAoB,EACrB,CAAC,CACH,CAEG,EAAY,EAChB,GAAI,CACF,UAAW,IAAM,KAAU,EAExB,CACD,GAAa,EAOb,IAAM,EAA8B,EAAE,CACtC,GAAI,MAAM,QAAQ,EAAO,CAAE,CACzB,IAAM,EAAM,EACZ,IAAK,IAAI,EAAI,EAAG,EAAI,EAAI,OAAQ,GAAK,EACnC,EAAI,OAAO,EAAE,EAAI,EAAI,IAAM,QAG7B,IAAK,GAAM,CAAC,EAAG,KAAM,OAAO,QAAQ,EAAO,CACzC,EAAI,GAAK,GAAK,GAGlB,KAAM,CAAE,YAAW,MAAK,SAElB,CAIR,EAAO,SAAS,ECzGpB,MAAa,EAAqB,IAmFlC,eAAsB,EAAgB,EAA2D,CAC/F,GAAM,CACJ,cACA,WACA,SACA,YACA,OACA,WACA,UACA,YAAY,EACZ,aACA,WACA,SACA,eAAe,IAAI,GACjB,EAEJ,GAAI,EAAY,EACd,MAAU,MAAM,+BAA+B,EAAU,GAAG,CAG9D,IAAM,EAAY,KAAK,KAAK,CACxB,EAAW,EACX,EAAgB,EAChB,EAAa,EACb,EAAc,EACd,EAAmB,EAEnB,EAA+D,EAAE,CAE/D,EAAQ,SAA2B,CACvC,GAAI,EAAQ,SAAW,EAAG,OAC1B,IAAM,EAAQ,EACd,EAAU,EAAE,CACZ,IAAI,EACJ,GAAI,CACF,EAAS,MAAM,EAAiB,EAAU,CACxC,MAAO,EACP,KAAM,EAAM,KAAK,CAAE,KAAI,UAAW,CAAE,KAAI,MAAK,EAAE,CAC/C,GAAI,IAAW,IAAA,IAAa,CAAE,SAAQ,CACvC,CAAC,OACK,EAAK,CAQZ,IAAM,EAFJ,GAAQ,UACP,aAAe,QAAU,EAAI,OAAS,cAAgB,SAAS,KAAK,EAAI,QAAQ,GACvD,UAAY,sBAClC,EAAS,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CAC/D,IAAK,GAAM,CAAE,eAAe,EAC1B,EAAa,SAAS,EAAW,EAAW,EAAQ,IAAA,GAAW,KAAK,CAGtE,KADA,IAAc,EAAM,OACd,EAOR,GAJA,GAAiB,EAAO,UACxB,GAAc,EAAO,SAAS,OAC9B,GAAoB,EAEhB,EAAO,SAAS,OAAS,EAAG,CAK9B,IAAM,EAAO,IAAI,IAAI,EAAM,IAAK,GAAM,CAAC,EAAE,GAAI,EAAE,UAAU,CAAC,CAAC,CAC3D,IAAK,IAAM,KAAQ,EAAO,SAAU,CAClC,IAAM,EAAY,EAAK,IAAI,EAAK,GAAG,EAAI,GACvC,EAAa,SAAS,EAAW,EAAK,KAAM,EAAK,OAAQ,IAAA,GAAW,CAAE,GAAI,EAAK,GAAI,CAAC,EAIxF,GAAI,EAAY,CACd,IAAM,GAAkB,KAAK,KAAK,CAAG,GAAa,IAClD,EAAW,CACT,WACA,gBACA,aACA,cACA,mBACA,iBACA,cAAe,EAAiB,EAAI,EAAW,EAAiB,EAChE,sBAAuB,EAAa,yBAAyB,CAC9D,CAAC,GAIN,UAAW,GAAM,CAAE,YAAW,SAAS,EAAW,EAAK,CAAE,CAEvD,GADI,GAAQ,SACR,IAAa,IAAA,IAAa,GAAY,EAAU,MACpD,GAAY,EAEZ,IAAM,EAAwB,CAAE,cAAa,SAAQ,WAAU,YAAW,CACtE,EACJ,GAAI,CACF,EAAS,MAAM,EAAU,EAAK,EAAI,OAC3B,EAAK,CAKZ,IAAM,EAAS,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CAC/D,EAAa,SAAS,EAAW,kBAAmB,EAAQ,IAAA,GAAW,EAAI,CAC3E,GAAc,EACd,SAGF,GAAI,EAAO,OAAS,OAAQ,CAC1B,GAAe,EACf,SAEF,GAAI,EAAO,OAAS,QAAS,CAC3B,EAAa,SACX,EACA,EAAO,MAAM,UACb,EAAO,MAAM,QACb,EAAO,MAAM,MACb,EACD,CACD,GAAc,EACd,SAGF,EAAQ,KAAK,CAAE,GAAI,EAAO,GAAI,IAAK,EAAO,IAAK,YAAW,CAAC,CACvD,EAAQ,QAAU,GACpB,MAAM,GAAO,CAMjB,OAFA,MAAM,GAAO,CAEN,CACL,WACA,gBACA,aACA,cACA,mBACA,OAAQ,EAAa,UAAU,CAChC"}
@@ -0,0 +1,17 @@
1
+ import { n as FilePathResolver } from "./worker-DerGVTSI.mjs";
2
+
3
+ //#region src/storage-resolver.d.ts
4
+ /**
5
+ * Read the storage object at `key` into a tmpfile and return the
6
+ * path. Cleans up via `fs.unlink` after the run finishes — best-
7
+ * effort, swallows `ENOENT` (the file may already be gone if the
8
+ * worker crashed and the OS cleaned `/tmp`).
9
+ *
10
+ * Lazy-imports `@murumets-ee/storage` + `@murumets-ee/core` so
11
+ * deployments that don't ingest from remote storage never load the
12
+ * R2 client.
13
+ */
14
+ declare const storageResolveFilePath: FilePathResolver;
15
+ //#endregion
16
+ export { storageResolveFilePath };
17
+ //# sourceMappingURL=storage-resolver.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage-resolver.d.mts","names":[],"sources":["../src/storage-resolver.ts"],"mappings":";;;;;;;;;;;;;cA6Ca,sBAAA,EAAwB,gBAAA"}
@@ -0,0 +1,2 @@
1
+ import{promises as e}from"node:fs";import{tmpdir as t}from"node:os";import{join as n}from"node:path";const r=async r=>{let{createStorageClient:i}=await import(`@murumets-ee/storage`),{getStorageConfig:a}=await import(`@murumets-ee/storage/plugin`),{getApp:o}=await import(`@murumets-ee/core`),{body:s}=await i(a(),{app:o()}).download(r),c=Buffer.isBuffer(s)?s:Buffer.from(await new Response(s).arrayBuffer()),l=r.split(`/`).pop()??`feed`,u=n(t(),`imports-${crypto.randomUUID()}-${l}`);return await e.writeFile(u,c),{localPath:u,cleanup:async()=>{try{await e.unlink(u)}catch(e){if(e.code!==`ENOENT`)throw e}}}};export{r as storageResolveFilePath};
2
+ //# sourceMappingURL=storage-resolver.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage-resolver.mjs","names":["fs"],"sources":["../src/storage-resolver.ts"],"sourcesContent":["/**\n * Storage-backed `resolveFilePath` for the imports plugin. Downloads\n * an object out of `@murumets-ee/storage` to a tmpfile so the queue\n * worker can stream it via `node:fs.createReadStream`.\n *\n * Wire into the imports plugin like this:\n *\n * ```ts\n * import { imports } from '@murumets-ee/imports/plugin'\n * import { storageResolveFilePath } from '@murumets-ee/imports/storage-resolver'\n *\n * imports({\n * esClient: () => esClient,\n * resolveFilePath: storageResolveFilePath,\n * })\n * ```\n *\n * Why this lives here rather than in the route or in the storage\n * package:\n * - The route shouldn't know about the consumer's runner; it just\n * persists a key on `import_run.filePath`.\n * - The storage package is generic (no opinion on imports).\n * - The imports worker is the natural download point: it owns the\n * run's lifecycle and can guarantee tmpfile cleanup.\n *\n * The helper is in a separate subpath so consumers that use a\n * different upload sink (local disk, S3 with a custom adapter, etc.)\n * don't pull in `@murumets-ee/storage` transitively.\n */\n\nimport { promises as fs } from 'node:fs'\nimport { tmpdir } from 'node:os'\nimport { join } from 'node:path'\nimport type { FilePathResolver } from './worker.js'\n\n/**\n * Read the storage object at `key` into a tmpfile and return the\n * path. Cleans up via `fs.unlink` after the run finishes — best-\n * effort, swallows `ENOENT` (the file may already be gone if the\n * worker crashed and the OS cleaned `/tmp`).\n *\n * Lazy-imports `@murumets-ee/storage` + `@murumets-ee/core` so\n * deployments that don't ingest from remote storage never load the\n * R2 client.\n */\nexport const storageResolveFilePath: FilePathResolver = async (storageKey) => {\n const { createStorageClient } = await import('@murumets-ee/storage')\n const { getStorageConfig } = await import('@murumets-ee/storage/plugin')\n const { getApp } = await import('@murumets-ee/core')\n\n const storage = createStorageClient(getStorageConfig(), { app: getApp() })\n const { body } = await storage.download(storageKey)\n\n // `DownloadResult.body` is `Buffer | ReadableStream<Uint8Array>` per\n // storage's adapter contract. Normalise to Buffer for the simple\n // tmpfile-write path; the carmaker feeds top out at ~tens of MB,\n // well within memory.\n const buffer = Buffer.isBuffer(body)\n ? body\n : Buffer.from(await new Response(body).arrayBuffer())\n\n // Suffix carries the basename hint (last storage-key segment) so\n // operator-readable temp paths help debugging without leaking the\n // full original filename through the FS.\n const suffix = storageKey.split('/').pop() ?? 'feed'\n const localPath = join(tmpdir(), `imports-${crypto.randomUUID()}-${suffix}`)\n await fs.writeFile(localPath, buffer)\n\n return {\n localPath,\n cleanup: async () => {\n try {\n await fs.unlink(localPath)\n } catch (err) {\n // ENOENT is fine — file already gone (OS cleanup, manual\n // intervention, etc.). Anything else, rethrow so the worker's\n // best-effort cleanup logs a warning.\n if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err\n }\n },\n }\n}\n"],"mappings":"qGA6CA,MAAa,EAA2C,KAAO,IAAe,CAC5E,GAAM,CAAE,uBAAwB,MAAM,OAAO,wBACvC,CAAE,oBAAqB,MAAM,OAAO,+BACpC,CAAE,UAAW,MAAM,OAAO,qBAG1B,CAAE,QAAS,MADD,EAAoB,GAAkB,CAAE,CAAE,IAAK,GAAQ,CAAE,CAC3C,CAAC,SAAS,EAAW,CAM7C,EAAS,OAAO,SAAS,EAAK,CAChC,EACA,OAAO,KAAK,MAAM,IAAI,SAAS,EAAK,CAAC,aAAa,CAAC,CAKjD,EAAS,EAAW,MAAM,IAAI,CAAC,KAAK,EAAI,OACxC,EAAY,EAAK,GAAQ,CAAE,WAAW,OAAO,YAAY,CAAC,GAAG,IAAS,CAG5E,OAFA,MAAMA,EAAG,UAAU,EAAW,EAAO,CAE9B,CACL,YACA,QAAS,SAAY,CACnB,GAAI,CACF,MAAMA,EAAG,OAAO,EAAU,OACnB,EAAK,CAIZ,GAAK,EAA8B,OAAS,SAAU,MAAM,IAGjE"}
@@ -0,0 +1,2 @@
1
+ import{behavior as e,defineEntity as t,field as n}from"@murumets-ee/entity";const r=[`pending`,`running`,`succeeded`,`failed`,`cancelled`],i=t({name:`import_run`,fields:{label:n.text({required:!0,maxLength:255}),status:n.select({options:r,default:`pending`,indexed:!0}),filePath:n.text({required:!0,maxLength:2048}),transformName:n.text({required:!0,maxLength:128,indexed:!0}),params:n.json(),totals:n.json(),errorSummary:n.json(),startedAt:n.date(),finishedAt:n.date(),queueJobId:n.text({maxLength:64,indexed:!0})},behaviors:[e.auditable()],scope:`global`,admin:{group:`imports`,label:`Import runs`,labelSingular:`Import run`,icon:`upload`,hideFromMenu:!0}});var a=class{entries=new Map;register(e,t){if(this.entries.has(e))throw Error(`Transform "${e}" is already registered — two plugins cannot register the same transform name.`);this.entries.set(e,t)}get(e){return this.entries.get(e)}has(e){return this.entries.has(e)}list(){return Array.from(this.entries.keys()).sort()}clear(){this.entries.clear()}};const o=Symbol.for(`@murumets-ee/imports:transforms`);function s(){let e=globalThis,t=e[o];return t||(t=new a,e[o]=t),t}function c(e,t){s().register(e,t)}export{i as a,r as i,s as n,c as r,a as t};
2
+ //# sourceMappingURL=transform-BUGBTotp.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transform-BUGBTotp.mjs","names":[],"sources":["../src/entities/import-run.ts","../src/transform.ts"],"sourcesContent":["/**\n * ImportRun — one feed-import attempt. Tracks file, transform, status,\n * row totals, and the top error patterns surfaced by `ErrorTracker`.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): generic plumbing entity. The\n * commerce-specific transform parameters (brandId, supplierId, supplier\n * code prefix) live in the opaque `params` JSONB so the imports package\n * stays free of commerce-domain dependencies. PR 8 wires a concrete\n * carmaker-feed transform that reads those params; PR 8a renders a\n * bespoke `/admin/commerce/imports` page over this entity's history.\n *\n * `hideFromMenu: true` because PR 7 ships only the entity + plumbing —\n * the operator-facing surface is PR 8a. The auto-EntityListPage at\n * `/admin/import_run` still resolves for direct lookups, but no sidebar\n * entry points to it.\n */\n\nimport { behavior, defineEntity, field } from '@murumets-ee/entity'\nimport type { AdminClient } from '@murumets-ee/entity/admin'\n\n/**\n * Lifecycle stages.\n *\n * - `pending` — row exists, queue job has been enqueued, worker hasn't picked it up yet.\n * - `running` — worker is streaming + bulk-writing.\n * - `succeeded` — worker finished cleanly (note: per-row failures still possible — see `totals.failed`).\n * - `failed` — worker threw and the queue marked the job dead. `errorSummary.fatal` carries the cause.\n * - `cancelled` — manual operator action via PR 8a (out of scope for PR 7).\n */\nexport const IMPORT_RUN_STATUSES = ['pending', 'running', 'succeeded', 'failed', 'cancelled'] as const\nexport type ImportRunStatus = (typeof IMPORT_RUN_STATUSES)[number]\n\nexport const ImportRun = defineEntity({\n name: 'import_run',\n fields: {\n /** Operator-visible label, e.g. `\"MERCEDES — ME_20251027075918.txt\"`. Free-form. */\n label: field.text({ required: true, maxLength: 255 }),\n /** See {@link IMPORT_RUN_STATUSES}. */\n status: field.select({\n options: IMPORT_RUN_STATUSES,\n default: 'pending',\n indexed: true,\n }),\n /**\n * Path or storage key of the uploaded feed file. Generic string —\n * could be `/var/lumi/uploads/<id>.txt` for the local-disk PoC or an\n * S3 object key once storage adapter integration lands. The worker\n * reads this with the configured `readFeed` resolver.\n */\n filePath: field.text({ required: true, maxLength: 2048 }),\n /**\n * Name of the registered transform applied to each row. Resolved at\n * worker-dispatch time against the transform registry contributed by\n * the consumer. PoC: `'commerce:carmaker-feed'`.\n */\n transformName: field.text({ required: true, maxLength: 128, indexed: true }),\n /**\n * Opaque per-transform parameters. The carmaker transform expects\n * `{ brandId, supplierId, codePrefix? }`. Validation happens inside\n * the transform — the imports package never inspects this shape so\n * a new transform with different params doesn't require a schema\n * migration.\n */\n params: field.json(),\n /**\n * Row counters: `{ submitted, succeeded, failed, skipped, batches }`.\n * Updated by the worker as each batch completes. Final values are\n * what the operator reads; intermediate progress comes from\n * `toolkit_jobs.progress` via the queue UI.\n */\n totals: field.json(),\n /**\n * Output of `ErrorTracker.getTopPatterns(totalRows)` — the top-50\n * error signatures with up to 5 sample rows each. Empty `[]` until\n * the worker writes it on completion.\n */\n errorSummary: field.json(),\n /** Set when the worker picks up the job. */\n startedAt: field.date(),\n /** Set when the worker finishes (success OR fatal failure). */\n finishedAt: field.date(),\n /**\n * `toolkit_jobs.id` of the queue job processing this run — link\n * back so PR 8a can show live progress without a second lookup.\n */\n queueJobId: field.text({ maxLength: 64, indexed: true }),\n },\n behaviors: [behavior.auditable()],\n scope: 'global',\n admin: {\n group: 'imports',\n label: 'Import runs',\n labelSingular: 'Import run',\n icon: 'upload',\n hideFromMenu: true,\n },\n})\n\nexport type ImportRunClient = AdminClient<typeof ImportRun.allFields>\n","/**\n * Per-feed transform plugin interface (PLAN-ECOMMERCE.md PR 7 / D14).\n *\n * The streaming reader reads raw rows out of the file (column key →\n * string). The transform turns one row into a typed `OutputDoc` ready\n * for the bulk-write surface (PoC: `PartsDocument` for the parts ES\n * index). The transform is what makes a generic feed-importer\n * commerce-aware — without it, the importer has no opinion on what a\n * row \"means\".\n *\n * Why an interface, not a function:\n * - The transform may need to reject a row (`RowSkip`) without that\n * counting as an error in `ErrorTracker` — e.g. blank lines, header\n * rows mistakenly retained, or \"this is a replacement-code marker\n * row, handled out-of-band\" decisions.\n * - The transform may need to fail a row (`RowError`) with a typed\n * error class so `ErrorTracker` collapses them by `errorType`.\n * - The transform may need access to the run-level params (brand /\n * supplier IDs, code prefix, batch ID) without those being\n * re-derived per row.\n *\n * PR 7 ships only the interface + a registry. PR 8 ships the first\n * concrete carmaker-feed transform.\n */\n\n/** Identifies which transform a registered handler implements. Stable across deploys. */\nexport type TransformName = string\n\n/** Per-run context provided to every row call. Kept narrow on purpose. */\nexport interface TransformContext {\n /** UUID of the `import_run` row. Forward to the output doc as `import_batch_id` for source attribution (D20). */\n importRunId: string\n /** Opaque per-run params copied from `import_run.params`. The transform validates the shape it expects. */\n params: Record<string, unknown>\n /** Operator-supplied label, useful for logging / debugging. */\n runLabel: string\n /** 1-based row number across the whole feed (header counts as row 0). Forward to `RowError.rowNumber`. */\n rowNumber: number\n}\n\n/**\n * One of: a successful `OutputDoc` keyed by a stable `id`, an\n * intentional `skip` (counted in `totals.skipped`), or a row-level\n * error (counted in `totals.failed` AND aggregated by `ErrorTracker`).\n *\n * The `id` field on `success` is used as the bulk-upsert primary key\n * (`_id` in ES). Per D4 the parts index uses\n * `<code_normalized>__<supplier_id>` so a re-run of the same feed\n * idempotently overwrites instead of duplicating.\n */\nexport type RowResult<TDoc> =\n | { kind: 'success'; id: string; doc: TDoc }\n | { kind: 'skip'; reason: string }\n | { kind: 'error'; error: RowError }\n\n/** Row-level error. The aggregator uses `(errorType, field, message)` as the dedup signature. */\nexport interface RowError {\n errorType: string\n message: string\n field?: string | undefined\n}\n\n/**\n * The contract every per-feed transform implements. Pure function of\n * `(row, ctx) → RowResult` — no I/O. Async only because the future\n * carmaker variant might consult an in-memory taxonomy lookup.\n */\nexport interface RowTransform<TDoc> {\n (rawRow: Record<string, string>, ctx: TransformContext): Promise<RowResult<TDoc>>\n}\n\n/**\n * Holds the registered transforms for the current process. Each plugin\n * that ships a transform calls {@link TransformRegistry.register} from\n * its `init` hook.\n *\n * Per CLAUDE.md \"Whitelist, don't blacklist\": the worker dispatches\n * by `import_run.transformName` against this registry. An unregistered\n * name fails the run rather than running with a default transform.\n *\n * **Production callers MUST go through {@link getTransformRegistry}**, not\n * `new TransformRegistry()`. The constructor is exposed only so tests\n * (and rare custom-worker embeds) can build an isolated instance and\n * inject it into {@link createRunImportHandler} directly. Two `new`\n * instances do NOT share state — registering a transform on one will\n * NOT make it visible to a worker resolving against the other. The\n * package's queue handler (registered by `imports()` plugin) always\n * resolves against the singleton.\n */\nexport class TransformRegistry {\n private readonly entries = new Map<TransformName, RowTransform<unknown>>()\n\n /**\n * Register a transform under a stable name. Throws on duplicate\n * names so two plugins can't silently overwrite each other (matches\n * the `SearchRegistry` pattern in `@murumets-ee/search`).\n */\n register<TDoc>(name: TransformName, transform: RowTransform<TDoc>): void {\n if (this.entries.has(name)) {\n throw new Error(\n `Transform \"${name}\" is already registered — two plugins cannot register the same transform name.`,\n )\n }\n this.entries.set(name, transform as RowTransform<unknown>)\n }\n\n /** Look up a transform by name. Returns `undefined` if no plugin has registered one. */\n get(name: TransformName): RowTransform<unknown> | undefined {\n return this.entries.get(name)\n }\n\n /** True iff a transform is registered for the given name. */\n has(name: TransformName): boolean {\n return this.entries.has(name)\n }\n\n /** All registered names. Useful for the admin Catalog tab and for tests. */\n list(): TransformName[] {\n return Array.from(this.entries.keys()).sort()\n }\n\n /** Drop all registrations. Tests only. */\n clear(): void {\n this.entries.clear()\n }\n}\n\n// ---------------------------------------------------------------------------\n// Process-global singleton (mirrors `@murumets-ee/queue`'s handler registry).\n// ---------------------------------------------------------------------------\n//\n// Why a singleton: PR 8's carmaker-feed transform lives in a different\n// plugin from `@murumets-ee/imports`. Under Next.js HMR, ONE of those\n// plugins might re-evaluate while the other does not. Holding the\n// registry on `globalThis` via `Symbol.for` means the same `Map`\n// instance is shared across module evaluations — the carmaker plugin's\n// `register` and the imports worker's `get` see each other regardless\n// of evaluation order. Same fix shape as the queue's handler-registry\n// HMR bug (#186).\n\nconst REGISTRY_KEY = Symbol.for('@murumets-ee/imports:transforms')\n\ninterface GlobalThisWithTransforms {\n [REGISTRY_KEY]?: TransformRegistry\n}\n\n/** Returns the singleton registry, creating it on first access. */\nexport function getTransformRegistry(): TransformRegistry {\n const g = globalThis as GlobalThisWithTransforms\n let reg = g[REGISTRY_KEY]\n if (!reg) {\n reg = new TransformRegistry()\n g[REGISTRY_KEY] = reg\n }\n return reg\n}\n\n/**\n * Convenience wrapper around `getTransformRegistry().register(name, fn)`.\n * Plugins that ship a transform call this from their `init` hook — same\n * shape as `@murumets-ee/queue/client`'s `registerJob`.\n */\nexport function registerImportTransform<TDoc>(\n name: TransformName,\n transform: RowTransform<TDoc>,\n): void {\n getTransformRegistry().register(name, transform)\n}\n"],"mappings":"4EA6BA,MAAa,EAAsB,CAAC,UAAW,UAAW,YAAa,SAAU,YAAY,CAGhF,EAAY,EAAa,CACpC,KAAM,aACN,OAAQ,CAEN,MAAO,EAAM,KAAK,CAAE,SAAU,GAAM,UAAW,IAAK,CAAC,CAErD,OAAQ,EAAM,OAAO,CACnB,QAAS,EACT,QAAS,UACT,QAAS,GACV,CAAC,CAOF,SAAU,EAAM,KAAK,CAAE,SAAU,GAAM,UAAW,KAAM,CAAC,CAMzD,cAAe,EAAM,KAAK,CAAE,SAAU,GAAM,UAAW,IAAK,QAAS,GAAM,CAAC,CAQ5E,OAAQ,EAAM,MAAM,CAOpB,OAAQ,EAAM,MAAM,CAMpB,aAAc,EAAM,MAAM,CAE1B,UAAW,EAAM,MAAM,CAEvB,WAAY,EAAM,MAAM,CAKxB,WAAY,EAAM,KAAK,CAAE,UAAW,GAAI,QAAS,GAAM,CAAC,CACzD,CACD,UAAW,CAAC,EAAS,WAAW,CAAC,CACjC,MAAO,SACP,MAAO,CACL,MAAO,UACP,MAAO,cACP,cAAe,aACf,KAAM,SACN,aAAc,GACf,CACF,CAAC,CCPF,IAAa,EAAb,KAA+B,CAC7B,QAA2B,IAAI,IAO/B,SAAe,EAAqB,EAAqC,CACvE,GAAI,KAAK,QAAQ,IAAI,EAAK,CACxB,MAAU,MACR,cAAc,EAAK,gFACpB,CAEH,KAAK,QAAQ,IAAI,EAAM,EAAmC,CAI5D,IAAI,EAAwD,CAC1D,OAAO,KAAK,QAAQ,IAAI,EAAK,CAI/B,IAAI,EAA8B,CAChC,OAAO,KAAK,QAAQ,IAAI,EAAK,CAI/B,MAAwB,CACtB,OAAO,MAAM,KAAK,KAAK,QAAQ,MAAM,CAAC,CAAC,MAAM,CAI/C,OAAc,CACZ,KAAK,QAAQ,OAAO,GAiBxB,MAAM,EAAe,OAAO,IAAI,kCAAkC,CAOlE,SAAgB,GAA0C,CACxD,IAAM,EAAI,WACN,EAAM,EAAE,GAKZ,OAJK,IACH,EAAM,IAAI,EACV,EAAE,GAAgB,GAEb,EAQT,SAAgB,EACd,EACA,EACM,CACN,GAAsB,CAAC,SAAS,EAAM,EAAU"}
@@ -0,0 +1,119 @@
1
+ //#region src/transform.d.ts
2
+ /**
3
+ * Per-feed transform plugin interface (PLAN-ECOMMERCE.md PR 7 / D14).
4
+ *
5
+ * The streaming reader reads raw rows out of the file (column key →
6
+ * string). The transform turns one row into a typed `OutputDoc` ready
7
+ * for the bulk-write surface (PoC: `PartsDocument` for the parts ES
8
+ * index). The transform is what makes a generic feed-importer
9
+ * commerce-aware — without it, the importer has no opinion on what a
10
+ * row "means".
11
+ *
12
+ * Why an interface, not a function:
13
+ * - The transform may need to reject a row (`RowSkip`) without that
14
+ * counting as an error in `ErrorTracker` — e.g. blank lines, header
15
+ * rows mistakenly retained, or "this is a replacement-code marker
16
+ * row, handled out-of-band" decisions.
17
+ * - The transform may need to fail a row (`RowError`) with a typed
18
+ * error class so `ErrorTracker` collapses them by `errorType`.
19
+ * - The transform may need access to the run-level params (brand /
20
+ * supplier IDs, code prefix, batch ID) without those being
21
+ * re-derived per row.
22
+ *
23
+ * PR 7 ships only the interface + a registry. PR 8 ships the first
24
+ * concrete carmaker-feed transform.
25
+ */
26
+ /** Identifies which transform a registered handler implements. Stable across deploys. */
27
+ type TransformName = string;
28
+ /** Per-run context provided to every row call. Kept narrow on purpose. */
29
+ interface TransformContext {
30
+ /** UUID of the `import_run` row. Forward to the output doc as `import_batch_id` for source attribution (D20). */
31
+ importRunId: string;
32
+ /** Opaque per-run params copied from `import_run.params`. The transform validates the shape it expects. */
33
+ params: Record<string, unknown>;
34
+ /** Operator-supplied label, useful for logging / debugging. */
35
+ runLabel: string;
36
+ /** 1-based row number across the whole feed (header counts as row 0). Forward to `RowError.rowNumber`. */
37
+ rowNumber: number;
38
+ }
39
+ /**
40
+ * One of: a successful `OutputDoc` keyed by a stable `id`, an
41
+ * intentional `skip` (counted in `totals.skipped`), or a row-level
42
+ * error (counted in `totals.failed` AND aggregated by `ErrorTracker`).
43
+ *
44
+ * The `id` field on `success` is used as the bulk-upsert primary key
45
+ * (`_id` in ES). Per D4 the parts index uses
46
+ * `<code_normalized>__<supplier_id>` so a re-run of the same feed
47
+ * idempotently overwrites instead of duplicating.
48
+ */
49
+ type RowResult<TDoc> = {
50
+ kind: 'success';
51
+ id: string;
52
+ doc: TDoc;
53
+ } | {
54
+ kind: 'skip';
55
+ reason: string;
56
+ } | {
57
+ kind: 'error';
58
+ error: RowError;
59
+ };
60
+ /** Row-level error. The aggregator uses `(errorType, field, message)` as the dedup signature. */
61
+ interface RowError {
62
+ errorType: string;
63
+ message: string;
64
+ field?: string | undefined;
65
+ }
66
+ /**
67
+ * The contract every per-feed transform implements. Pure function of
68
+ * `(row, ctx) → RowResult` — no I/O. Async only because the future
69
+ * carmaker variant might consult an in-memory taxonomy lookup.
70
+ */
71
+ interface RowTransform<TDoc> {
72
+ (rawRow: Record<string, string>, ctx: TransformContext): Promise<RowResult<TDoc>>;
73
+ }
74
+ /**
75
+ * Holds the registered transforms for the current process. Each plugin
76
+ * that ships a transform calls {@link TransformRegistry.register} from
77
+ * its `init` hook.
78
+ *
79
+ * Per CLAUDE.md "Whitelist, don't blacklist": the worker dispatches
80
+ * by `import_run.transformName` against this registry. An unregistered
81
+ * name fails the run rather than running with a default transform.
82
+ *
83
+ * **Production callers MUST go through {@link getTransformRegistry}**, not
84
+ * `new TransformRegistry()`. The constructor is exposed only so tests
85
+ * (and rare custom-worker embeds) can build an isolated instance and
86
+ * inject it into {@link createRunImportHandler} directly. Two `new`
87
+ * instances do NOT share state — registering a transform on one will
88
+ * NOT make it visible to a worker resolving against the other. The
89
+ * package's queue handler (registered by `imports()` plugin) always
90
+ * resolves against the singleton.
91
+ */
92
+ declare class TransformRegistry {
93
+ private readonly entries;
94
+ /**
95
+ * Register a transform under a stable name. Throws on duplicate
96
+ * names so two plugins can't silently overwrite each other (matches
97
+ * the `SearchRegistry` pattern in `@murumets-ee/search`).
98
+ */
99
+ register<TDoc>(name: TransformName, transform: RowTransform<TDoc>): void;
100
+ /** Look up a transform by name. Returns `undefined` if no plugin has registered one. */
101
+ get(name: TransformName): RowTransform<unknown> | undefined;
102
+ /** True iff a transform is registered for the given name. */
103
+ has(name: TransformName): boolean;
104
+ /** All registered names. Useful for the admin Catalog tab and for tests. */
105
+ list(): TransformName[];
106
+ /** Drop all registrations. Tests only. */
107
+ clear(): void;
108
+ }
109
+ /** Returns the singleton registry, creating it on first access. */
110
+ declare function getTransformRegistry(): TransformRegistry;
111
+ /**
112
+ * Convenience wrapper around `getTransformRegistry().register(name, fn)`.
113
+ * Plugins that ship a transform call this from their `init` hook — same
114
+ * shape as `@murumets-ee/queue/client`'s `registerJob`.
115
+ */
116
+ declare function registerImportTransform<TDoc>(name: TransformName, transform: RowTransform<TDoc>): void;
117
+ //#endregion
118
+ export { TransformName as a, registerImportTransform as c, TransformContext as i, RowResult as n, TransformRegistry as o, RowTransform as r, getTransformRegistry as s, RowError as t };
119
+ //# sourceMappingURL=transform-D_uhdLeo.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transform-D_uhdLeo.d.mts","names":[],"sources":["../src/transform.ts"],"mappings":";;AA0BA;;;;;AAGA;;;;;;;;;;;AAqBA;;;;;;;;KAxBY,aAAA;;UAGK,gBAAA;EAwBX;EAtBJ,WAAA;EAsB0B;EApB1B,MAAA,EAAQ,MAAA;EAoB0B;EAlBlC,QAAA;EAqBuB;EAnBvB,SAAA;AAAA;;;;;;AA8BF;;;;;KAjBY,SAAA;EACN,IAAA;EAAiB,EAAA;EAAY,GAAA,EAAK,IAAA;AAAA;EAClC,IAAA;EAAc,MAAA;AAAA;EACd,IAAA;EAAe,KAAA,EAAO,QAAA;AAAA;;UAGX,QAAA;EACf,SAAA;EACA,OAAA;EACA,KAAA;AAAA;;;;;;UAQe,YAAA;EAAA,CACd,MAAA,EAAQ,MAAA,kBAAwB,GAAA,EAAK,gBAAA,GAAmB,OAAA,CAAQ,SAAA,CAAU,IAAA;AAAA;;;;;;;;;;;;;;;;;;;cAqBhE,iBAAA;EAAA,iBACM,OAAA;EAgCZ;;AAyBP;;;EAlDE,QAAA,MAAA,CAAe,IAAA,EAAM,aAAA,EAAe,SAAA,EAAW,YAAA,CAAa,IAAA;EAkDL;EAxCvD,GAAA,CAAI,IAAA,EAAM,aAAA,GAAgB,YAAA;EAuDW;EAlDrC,GAAA,CAAI,IAAA,EAAM,aAAA;EAmDJ;EA9CN,IAAA,CAAA,GAAQ,aAAA;EA+CG;EA1CX,KAAA,CAAA;AAAA;;iBAyBc,oBAAA,CAAA,GAAwB,iBAAA;;;;;;iBAexB,uBAAA,MAAA,CACd,IAAA,EAAM,aAAA,EACN,SAAA,EAAW,YAAA,CAAa,IAAA"}
@@ -0,0 +1,57 @@
1
+ import { r as RowTransform } from "./transform-D_uhdLeo.mjs";
2
+ import { PartsDocument } from "@murumets-ee/search-elasticsearch";
3
+ import { z } from "zod";
4
+
5
+ //#region src/transforms/carmaker-feed.d.ts
6
+ /** Stable name under which this transform is registered. */
7
+ declare const CARMAKER_FEED_TRANSFORM_NAME = "commerce:carmaker-feed";
8
+ /**
9
+ * Per-job configuration. PR 8a's upload route validates the
10
+ * brandId/supplierId UUIDs, looks up the brand slug + supplier
11
+ * display_name from the commerce entities, and writes the resolved
12
+ * shape into `import_run.params` before enqueuing — that keeps
13
+ * this transform free of DB I/O.
14
+ */
15
+ declare const carmakerFeedParamsSchema: z.ZodObject<{
16
+ /** UUID of the brand. Forwarded to ES doc per D24 (typed FK). */brandId: z.ZodString; /** Brand slug for facet labels — pre-resolved by the route. */
17
+ brandSlug: z.ZodString; /** UUID of the supplier. */
18
+ supplierId: z.ZodString;
19
+ /**
20
+ * Customer-facing supplier alias (D24 anti-disintermediation) —
21
+ * pre-resolved. NEVER `supplier.name` (the legal name).
22
+ */
23
+ supplierDisplayName: z.ZodString;
24
+ /**
25
+ * Optional supplier-specific code prefix to strip from
26
+ * `ArticleName` before normalization. e.g. `'ME-'` for Mercedes,
27
+ * `'TO-'` for Toyota. When absent, the full ArticleName is
28
+ * normalized as-is.
29
+ */
30
+ supplierCodePrefix: z.ZodOptional<z.ZodString>;
31
+ /**
32
+ * Stable identifier for this run's batch — written verbatim to
33
+ * `PartsDocument.import_batch_id` so D20 source-attribution holds
34
+ * for the rare cases an operator needs to roll back a single feed
35
+ * import in ES.
36
+ */
37
+ importBatchId: z.ZodString;
38
+ }, "strip", z.ZodTypeAny, {
39
+ brandId: string;
40
+ brandSlug: string;
41
+ supplierId: string;
42
+ supplierDisplayName: string;
43
+ importBatchId: string;
44
+ supplierCodePrefix?: string | undefined;
45
+ }, {
46
+ brandId: string;
47
+ brandSlug: string;
48
+ supplierId: string;
49
+ supplierDisplayName: string;
50
+ importBatchId: string;
51
+ supplierCodePrefix?: string | undefined;
52
+ }>;
53
+ type CarmakerFeedParams = z.infer<typeof carmakerFeedParamsSchema>;
54
+ declare const carmakerFeedTransform: RowTransform<PartsDocument>;
55
+ //#endregion
56
+ export { CARMAKER_FEED_TRANSFORM_NAME, type CarmakerFeedParams, carmakerFeedParamsSchema, carmakerFeedTransform };
57
+ //# sourceMappingURL=transforms.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transforms.d.mts","names":[],"sources":["../src/transforms/carmaker-feed.ts"],"mappings":";;;;;;cAiCa,4BAAA;;;;;;;;cASA,wBAAA,EAAwB,CAAA,CAAA,SAAA;;;;;;;;;;;;;;;;;;;;AA+BrC;;;;;;;;;;;;;;;;;;KAAY,kBAAA,GAAqB,CAAA,CAAE,KAAA,QAAa,wBAAA;AAAA,cAoFnC,qBAAA,EAAuB,YAAA,CAAa,aAAA"}
@@ -0,0 +1,2 @@
1
+ import{z as e}from"zod";const t=`commerce:carmaker-feed`,n=e.object({brandId:e.string().uuid(),brandSlug:e.string().min(1).max(64),supplierId:e.string().uuid(),supplierDisplayName:e.string().min(1).max(255),supplierCodePrefix:e.string().max(8).regex(/^[A-Z0-9-]+$/,`must be uppercase alphanumeric or "-"`).optional(),importBatchId:e.string().uuid()}),r=new WeakMap;function i(e){let t=r.get(e);if(t)return t;let i=n.parse(e);return r.set(e,i),i}const a=/[\s-]/g;function o(e){return e.toUpperCase().replace(a,``)}const s=/^-?\d+(\.\d+)?$/;function c(e){let t=e.includes(`,`),n=e.includes(`.`),r;if(t&&n)r=e.replace(/\./g,``).replace(`,`,`.`);else if(t)r=e.replace(`,`,`.`);else if(n)return NaN;else r=e;return s.test(r)?Number.parseFloat(r):NaN}function l(e){if(e===void 0)return null;let t=e.trim();return t===``?null:t}const u=async(e,t)=>{let n=i(t.params),r=e.ArticleName?.trim();if(!r)return{kind:`error`,error:{errorType:`missing_article_name`,message:`ArticleName is empty`,field:`ArticleName`}};let a=(e.CurrencyCd??``).trim();if(a!==`EUR`)return{kind:`error`,error:{errorType:`unsupported_currency`,message:`Currency "${a}" not supported (PoC is EUR-only)`,field:`CurrencyCd`}};let s=o(n.supplierCodePrefix&&r.startsWith(n.supplierCodePrefix)?r.slice(n.supplierCodePrefix.length):r);if(!s)return{kind:`error`,error:{errorType:`empty_normalized_code`,message:`Code "${r}" is empty after normalization`,field:`ArticleName`}};let u=e[`NetPrice/Discount`]??``,d=c(u);if(Number.isNaN(d))return{kind:`error`,error:{errorType:`invalid_net_price`,message:`Could not parse "${u}" as a number`,field:`NetPrice/Discount`}};let f=(e.GrossPrice??``).trim(),p=null;if(f!==``&&f!==`NA`){let e=c(f);if(Number.isNaN(e))return{kind:`error`,error:{errorType:`invalid_gross_price`,message:`Could not parse "${f}" as a number`,field:`GrossPrice`}};p=e}let m=`${s}__${n.supplierId}`;return{kind:`success`,id:m,doc:{doc_id:m,code:r,code_normalized:s,brand_id:n.brandId,brand_slug:n.brandSlug,supplier_id:n.supplierId,supplier_display_name:n.supplierDisplayName,net_price_eur:d,gross_price_eur:p,currency:`EUR`,barcode:l(e.Barcode),name_de:l(e.Description_DE),name_en:l(e.Description_EN),name_es:l(e.Description_ES),name_fr:l(e.Description_FR),name_it:l(e.Description_IT),name_nl:l(e.Description_NL),name_pt:l(e.Description_PT),description1:l(e.Description1),description2:l(e.Description2),import_batch_id:n.importBatchId,imported_at:new Date().toISOString()}}};export{t as CARMAKER_FEED_TRANSFORM_NAME,n as carmakerFeedParamsSchema,u as carmakerFeedTransform};
2
+ //# sourceMappingURL=transforms.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transforms.mjs","names":[],"sources":["../src/transforms/carmaker-feed.ts"],"sourcesContent":["/**\n * Carmaker-feed transform — converts the giga-test 14-column tab-\n * delimited row into a `PartsDocument` for the parts ES alias.\n *\n * Per PLAN-ECOMMERCE.md PR 8 (PoC scope): ONE generic transform\n * covering every supplier whose feed matches this header. Per-job\n * `(brandId, brandSlug, supplierId, supplierDisplayName, supplierCodePrefix?)`\n * config is fully resolved at enqueue time (PR 8a's route looks up\n * the slug + display_name from the UUIDs before enqueuing) so the\n * transform is a pure function with no DB lookups per row.\n *\n * Decisions enforced:\n * - **D2** Code = bare manufacturer code, normalized. Strip the\n * supplier prefix (e.g. `ME-` for Mercedes, `TO-` for Toyota) if\n * present, then uppercase + dash/space-stripped.\n * - **D4** ES `_id` = `<code_normalized>__<supplierId>` so re-running\n * the same feed idempotently overwrites instead of duplicating.\n * - **D9** Single currency (EUR). Non-EUR rows are rejected as\n * row-level errors — at the PoC stage, only EUR data is loaded.\n * - **D21** Bulk-write path bypasses entity hooks. Per-batch audit\n * lives on the surrounding `import_run`, NOT per-row.\n *\n * **`*S` rows pass through unchanged.** The replacement-code side\n * effect (R1 / D11) is deferred to PR 5e — the row goes into ES\n * with its `*S` suffix preserved, no out-of-band write to\n * `replacement_group` / `replacement_member`.\n */\n\nimport type { PartsDocument } from '@murumets-ee/search-elasticsearch'\nimport { z } from 'zod'\nimport type { RowTransform } from '../transform.js'\n\n/** Stable name under which this transform is registered. */\nexport const CARMAKER_FEED_TRANSFORM_NAME = 'commerce:carmaker-feed'\n\n/**\n * Per-job configuration. PR 8a's upload route validates the\n * brandId/supplierId UUIDs, looks up the brand slug + supplier\n * display_name from the commerce entities, and writes the resolved\n * shape into `import_run.params` before enqueuing — that keeps\n * this transform free of DB I/O.\n */\nexport const carmakerFeedParamsSchema = z.object({\n /** UUID of the brand. Forwarded to ES doc per D24 (typed FK). */\n brandId: z.string().uuid(),\n /** Brand slug for facet labels — pre-resolved by the route. */\n brandSlug: z.string().min(1).max(64),\n /** UUID of the supplier. */\n supplierId: z.string().uuid(),\n /**\n * Customer-facing supplier alias (D24 anti-disintermediation) —\n * pre-resolved. NEVER `supplier.name` (the legal name).\n */\n supplierDisplayName: z.string().min(1).max(255),\n /**\n * Optional supplier-specific code prefix to strip from\n * `ArticleName` before normalization. e.g. `'ME-'` for Mercedes,\n * `'TO-'` for Toyota. When absent, the full ArticleName is\n * normalized as-is.\n */\n supplierCodePrefix: z\n .string()\n .max(8)\n .regex(/^[A-Z0-9-]+$/, 'must be uppercase alphanumeric or \"-\"')\n .optional(),\n /**\n * Stable identifier for this run's batch — written verbatim to\n * `PartsDocument.import_batch_id` so D20 source-attribution holds\n * for the rare cases an operator needs to roll back a single feed\n * import in ES.\n */\n importBatchId: z.string().uuid(),\n})\nexport type CarmakerFeedParams = z.infer<typeof carmakerFeedParamsSchema>\n\n/**\n * Run-level memoization of the parsed params. The runner passes the\n * same `ctx.params` reference for every row of a run, so this caches\n * the Zod parse result by reference identity. Garbage collection\n * clears the entry naturally once the run finishes and the params\n * object goes out of scope.\n *\n * Failed parses are NOT cached — a malformed-params run throws on\n * every row, but the ErrorTracker collapses identical Zod errors\n * into one bucket so the noise is bounded.\n */\nconst paramsCache = new WeakMap<object, CarmakerFeedParams>()\n\nfunction resolveParams(raw: Record<string, unknown>): CarmakerFeedParams {\n const cached = paramsCache.get(raw)\n if (cached) return cached\n const parsed = carmakerFeedParamsSchema.parse(raw)\n paramsCache.set(raw, parsed)\n return parsed\n}\n\nconst NORMALIZE_CODE_RE = /[\\s-]/g\n\n/** Uppercase + dash/whitespace-stripped (per D2). */\nfunction normalizeCode(s: string): string {\n return s.toUpperCase().replace(NORMALIZE_CODE_RE, '')\n}\n\n/** Validates the post-cleanup numeric form before accepting it. */\nconst STRICT_NUMBER_RE = /^-?\\d+(\\.\\d+)?$/\n\n/**\n * Parse a European-formatted decimal: `,` is the decimal separator, `.`\n * is the (optional) thousands separator. Returns `NaN` on unparseable\n * input — the caller surfaces that as a row-level error rather than\n * silently coercing to `0` or to a wrong value.\n *\n * Cases handled:\n * - `'1,98'` → `1.98` (decimal only)\n * - `'1.234,56'` → `1234.56` (thousands + decimal)\n * - `'5'` → `5` (integer)\n * - `'-5,00'` → `-5` (signed)\n *\n * Cases REJECTED (return `NaN`):\n * - `'1.98'` — bare-dot decimal is ambiguous in this format. The\n * carmaker feeds are documented as comma-decimal; a `.` without a\n * `,` could mean US-format decimal (`1.98`) OR a thousands marker\n * without decimal part. Rather than guess, reject. If a future\n * supplier sends US-format prices, that's a per-supplier transform\n * decision, not a quiet promotion of every dot.\n * - `'1,98EUR'`, `'1,98 '`, `'1,98abc'` — trailing junk. `parseFloat`\n * would silently accept the leading numeric prefix; the strict regex\n * post-check ensures the entire string is a clean number.\n * - `'1,2,3'` — multiple commas.\n * - `''`, `'NA'`, anything else non-numeric.\n */\nfunction parseEuropeanNumber(s: string): number {\n const hasComma = s.includes(',')\n const hasDot = s.includes('.')\n let cleaned: string\n if (hasComma && hasDot) {\n // European thousands+decimal: strip every `.`, then swap the (sole)\n // `,` for `.`. The strict regex below catches multi-comma input.\n cleaned = s.replace(/\\./g, '').replace(',', '.')\n } else if (hasComma) {\n cleaned = s.replace(',', '.')\n } else if (hasDot) {\n return Number.NaN\n } else {\n cleaned = s\n }\n if (!STRICT_NUMBER_RE.test(cleaned)) return Number.NaN\n return Number.parseFloat(cleaned)\n}\n\n/** Trim + collapse `''` and `undefined` to `null` (matches the `PartsDocument.* | null` shape). */\nfunction emptyToNull(s: string | undefined): string | null {\n if (s === undefined) return null\n const trimmed = s.trim()\n return trimmed === '' ? null : trimmed\n}\n\nexport const carmakerFeedTransform: RowTransform<PartsDocument> = async (row, ctx) => {\n const params = resolveParams(ctx.params)\n\n // 1. ArticleName is the only required cell that drives both `code`\n // and the ES `_id`. An empty value cannot be normalized; reject\n // rather than emit a doc with `_id = '__<supplierId>'`.\n const articleName = row['ArticleName']?.trim()\n if (!articleName) {\n return {\n kind: 'error',\n error: {\n errorType: 'missing_article_name',\n message: 'ArticleName is empty',\n field: 'ArticleName',\n },\n }\n }\n\n // 2. Currency gate (D9). The PoC is EUR-only — non-EUR data is not\n // converted, it's rejected. When PR 5c lands the price-application\n // layer with FxRate, this gate widens (or moves to the indexer).\n const currency = (row['CurrencyCd'] ?? '').trim()\n if (currency !== 'EUR') {\n return {\n kind: 'error',\n error: {\n errorType: 'unsupported_currency',\n message: `Currency \"${currency}\" not supported (PoC is EUR-only)`,\n field: 'CurrencyCd',\n },\n }\n }\n\n // 3. Strip supplier prefix if configured + present, then normalize.\n // A row whose ArticleName doesn't start with the configured prefix\n // is normalized as-is — this matches feeds that mix prefixed and\n // bare codes (rare but observed).\n const stripped =\n params.supplierCodePrefix && articleName.startsWith(params.supplierCodePrefix)\n ? articleName.slice(params.supplierCodePrefix.length)\n : articleName\n const codeNormalized = normalizeCode(stripped)\n if (!codeNormalized) {\n return {\n kind: 'error',\n error: {\n errorType: 'empty_normalized_code',\n message: `Code \"${articleName}\" is empty after normalization`,\n field: 'ArticleName',\n },\n }\n }\n\n // 4. Parse prices. Asymmetric: NetPrice is required (empty / unparseable\n // fails as `invalid_net_price`); GrossPrice is optional (`'NA'` or\n // empty → null, only a non-empty unparseable value fails). Required\n // because every parts row carries a wholesale price; gross is a\n // derived retail figure the supplier doesn't always populate.\n const netPriceRaw = row['NetPrice/Discount'] ?? ''\n const netPrice = parseEuropeanNumber(netPriceRaw)\n if (Number.isNaN(netPrice)) {\n return {\n kind: 'error',\n error: {\n errorType: 'invalid_net_price',\n message: `Could not parse \"${netPriceRaw}\" as a number`,\n field: 'NetPrice/Discount',\n },\n }\n }\n\n const grossRaw = (row['GrossPrice'] ?? '').trim()\n let grossPrice: number | null = null\n if (grossRaw !== '' && grossRaw !== 'NA') {\n const g = parseEuropeanNumber(grossRaw)\n if (Number.isNaN(g)) {\n return {\n kind: 'error',\n error: {\n errorType: 'invalid_gross_price',\n message: `Could not parse \"${grossRaw}\" as a number`,\n field: 'GrossPrice',\n },\n }\n }\n grossPrice = g\n }\n\n // 5. Build the ES doc. `imported_at` is generated per-row rather\n // than per-batch so the document carries a usable timestamp even\n // when the batch boundary is invisible to the reader.\n const docId = `${codeNormalized}__${params.supplierId}`\n const doc: PartsDocument = {\n doc_id: docId,\n code: articleName,\n code_normalized: codeNormalized,\n brand_id: params.brandId,\n brand_slug: params.brandSlug,\n supplier_id: params.supplierId,\n supplier_display_name: params.supplierDisplayName,\n net_price_eur: netPrice,\n gross_price_eur: grossPrice,\n currency: 'EUR',\n barcode: emptyToNull(row['Barcode']),\n name_de: emptyToNull(row['Description_DE']),\n name_en: emptyToNull(row['Description_EN']),\n name_es: emptyToNull(row['Description_ES']),\n name_fr: emptyToNull(row['Description_FR']),\n name_it: emptyToNull(row['Description_IT']),\n name_nl: emptyToNull(row['Description_NL']),\n name_pt: emptyToNull(row['Description_PT']),\n description1: emptyToNull(row['Description1']),\n description2: emptyToNull(row['Description2']),\n import_batch_id: params.importBatchId,\n imported_at: new Date().toISOString(),\n }\n\n return { kind: 'success', id: docId, doc }\n}\n"],"mappings":"wBAiCA,MAAa,EAA+B,yBAS/B,EAA2B,EAAE,OAAO,CAE/C,QAAS,EAAE,QAAQ,CAAC,MAAM,CAE1B,UAAW,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,GAAG,CAEpC,WAAY,EAAE,QAAQ,CAAC,MAAM,CAK7B,oBAAqB,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,IAAI,CAO/C,mBAAoB,EACjB,QAAQ,CACR,IAAI,EAAE,CACN,MAAM,eAAgB,wCAAwC,CAC9D,UAAU,CAOb,cAAe,EAAE,QAAQ,CAAC,MAAM,CACjC,CAAC,CAcI,EAAc,IAAI,QAExB,SAAS,EAAc,EAAkD,CACvE,IAAM,EAAS,EAAY,IAAI,EAAI,CACnC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAS,EAAyB,MAAM,EAAI,CAElD,OADA,EAAY,IAAI,EAAK,EAAO,CACrB,EAGT,MAAM,EAAoB,SAG1B,SAAS,EAAc,EAAmB,CACxC,OAAO,EAAE,aAAa,CAAC,QAAQ,EAAmB,GAAG,CAIvD,MAAM,EAAmB,kBA2BzB,SAAS,EAAoB,EAAmB,CAC9C,IAAM,EAAW,EAAE,SAAS,IAAI,CAC1B,EAAS,EAAE,SAAS,IAAI,CAC1B,EACJ,GAAI,GAAY,EAGd,EAAU,EAAE,QAAQ,MAAO,GAAG,CAAC,QAAQ,IAAK,IAAI,SACvC,EACT,EAAU,EAAE,QAAQ,IAAK,IAAI,SACpB,EACT,MAAO,UAEP,EAAU,EAGZ,OADK,EAAiB,KAAK,EAAQ,CAC5B,OAAO,WAAW,EAAQ,CADW,IAK9C,SAAS,EAAY,EAAsC,CACzD,GAAI,IAAM,IAAA,GAAW,OAAO,KAC5B,IAAM,EAAU,EAAE,MAAM,CACxB,OAAO,IAAY,GAAK,KAAO,EAGjC,MAAa,EAAqD,MAAO,EAAK,IAAQ,CACpF,IAAM,EAAS,EAAc,EAAI,OAAO,CAKlC,EAAc,EAAI,aAAgB,MAAM,CAC9C,GAAI,CAAC,EACH,MAAO,CACL,KAAM,QACN,MAAO,CACL,UAAW,uBACX,QAAS,uBACT,MAAO,cACR,CACF,CAMH,IAAM,GAAY,EAAI,YAAiB,IAAI,MAAM,CACjD,GAAI,IAAa,MACf,MAAO,CACL,KAAM,QACN,MAAO,CACL,UAAW,uBACX,QAAS,aAAa,EAAS,mCAC/B,MAAO,aACR,CACF,CAWH,IAAM,EAAiB,EAHrB,EAAO,oBAAsB,EAAY,WAAW,EAAO,mBAAmB,CAC1E,EAAY,MAAM,EAAO,mBAAmB,OAAO,CACnD,EACwC,CAC9C,GAAI,CAAC,EACH,MAAO,CACL,KAAM,QACN,MAAO,CACL,UAAW,wBACX,QAAS,SAAS,EAAY,gCAC9B,MAAO,cACR,CACF,CAQH,IAAM,EAAc,EAAI,sBAAwB,GAC1C,EAAW,EAAoB,EAAY,CACjD,GAAI,OAAO,MAAM,EAAS,CACxB,MAAO,CACL,KAAM,QACN,MAAO,CACL,UAAW,oBACX,QAAS,oBAAoB,EAAY,eACzC,MAAO,oBACR,CACF,CAGH,IAAM,GAAY,EAAI,YAAiB,IAAI,MAAM,CAC7C,EAA4B,KAChC,GAAI,IAAa,IAAM,IAAa,KAAM,CACxC,IAAM,EAAI,EAAoB,EAAS,CACvC,GAAI,OAAO,MAAM,EAAE,CACjB,MAAO,CACL,KAAM,QACN,MAAO,CACL,UAAW,sBACX,QAAS,oBAAoB,EAAS,eACtC,MAAO,aACR,CACF,CAEH,EAAa,EAMf,IAAM,EAAQ,GAAG,EAAe,IAAI,EAAO,aA0B3C,MAAO,CAAE,KAAM,UAAW,GAAI,EAAO,IAAA,CAxBnC,OAAQ,EACR,KAAM,EACN,gBAAiB,EACjB,SAAU,EAAO,QACjB,WAAY,EAAO,UACnB,YAAa,EAAO,WACpB,sBAAuB,EAAO,oBAC9B,cAAe,EACf,gBAAiB,EACjB,SAAU,MACV,QAAS,EAAY,EAAI,QAAW,CACpC,QAAS,EAAY,EAAI,eAAkB,CAC3C,QAAS,EAAY,EAAI,eAAkB,CAC3C,QAAS,EAAY,EAAI,eAAkB,CAC3C,QAAS,EAAY,EAAI,eAAkB,CAC3C,QAAS,EAAY,EAAI,eAAkB,CAC3C,QAAS,EAAY,EAAI,eAAkB,CAC3C,QAAS,EAAY,EAAI,eAAkB,CAC3C,aAAc,EAAY,EAAI,aAAgB,CAC9C,aAAc,EAAY,EAAI,aAAgB,CAC9C,gBAAiB,EAAO,cACxB,YAAa,IAAI,MAAM,CAAC,aAAa,CAGC,CAAE"}
@@ -0,0 +1,467 @@
1
+ import { o as TransformRegistry, r as RowTransform } from "./transform-D_uhdLeo.mjs";
2
+ import * as _$_murumets_ee_entity0 from "@murumets-ee/entity";
3
+ import { EsClientLike } from "@murumets-ee/search-elasticsearch";
4
+ import { Logger } from "@murumets-ee/core";
5
+ import { JobDefinition } from "@murumets-ee/queue/client";
6
+ import { z } from "zod";
7
+ import { AdminClient } from "@murumets-ee/entity/admin";
8
+
9
+ //#region src/entities/import-run.d.ts
10
+ /**
11
+ * Lifecycle stages.
12
+ *
13
+ * - `pending` — row exists, queue job has been enqueued, worker hasn't picked it up yet.
14
+ * - `running` — worker is streaming + bulk-writing.
15
+ * - `succeeded` — worker finished cleanly (note: per-row failures still possible — see `totals.failed`).
16
+ * - `failed` — worker threw and the queue marked the job dead. `errorSummary.fatal` carries the cause.
17
+ * - `cancelled` — manual operator action via PR 8a (out of scope for PR 7).
18
+ */
19
+ declare const IMPORT_RUN_STATUSES: readonly ["pending", "running", "succeeded", "failed", "cancelled"];
20
+ type ImportRunStatus = (typeof IMPORT_RUN_STATUSES)[number];
21
+ declare const ImportRun: _$_murumets_ee_entity0.Entity<{
22
+ id: _$_murumets_ee_entity0.IdField;
23
+ } & _$_murumets_ee_entity0.AuditableFields & {
24
+ /** Operator-visible label, e.g. `"MERCEDES — ME_20251027075918.txt"`. Free-form. */label: _$_murumets_ee_entity0.TextField & {
25
+ readonly required: true;
26
+ readonly maxLength: 255;
27
+ }; /** See {@link IMPORT_RUN_STATUSES}. */
28
+ status: _$_murumets_ee_entity0.SelectField & {
29
+ options: readonly ["pending", "running", "succeeded", "failed", "cancelled"];
30
+ } & {
31
+ readonly options: readonly ["pending", "running", "succeeded", "failed", "cancelled"];
32
+ readonly default: "pending";
33
+ readonly indexed: true;
34
+ };
35
+ /**
36
+ * Path or storage key of the uploaded feed file. Generic string —
37
+ * could be `/var/lumi/uploads/<id>.txt` for the local-disk PoC or an
38
+ * S3 object key once storage adapter integration lands. The worker
39
+ * reads this with the configured `readFeed` resolver.
40
+ */
41
+ filePath: _$_murumets_ee_entity0.TextField & {
42
+ readonly required: true;
43
+ readonly maxLength: 2048;
44
+ };
45
+ /**
46
+ * Name of the registered transform applied to each row. Resolved at
47
+ * worker-dispatch time against the transform registry contributed by
48
+ * the consumer. PoC: `'commerce:carmaker-feed'`.
49
+ */
50
+ transformName: _$_murumets_ee_entity0.TextField & {
51
+ readonly required: true;
52
+ readonly maxLength: 128;
53
+ readonly indexed: true;
54
+ };
55
+ /**
56
+ * Opaque per-transform parameters. The carmaker transform expects
57
+ * `{ brandId, supplierId, codePrefix? }`. Validation happens inside
58
+ * the transform — the imports package never inspects this shape so
59
+ * a new transform with different params doesn't require a schema
60
+ * migration.
61
+ */
62
+ params: _$_murumets_ee_entity0.JsonField & Partial<_$_murumets_ee_entity0.JsonField>;
63
+ /**
64
+ * Row counters: `{ submitted, succeeded, failed, skipped, batches }`.
65
+ * Updated by the worker as each batch completes. Final values are
66
+ * what the operator reads; intermediate progress comes from
67
+ * `toolkit_jobs.progress` via the queue UI.
68
+ */
69
+ totals: _$_murumets_ee_entity0.JsonField & Partial<_$_murumets_ee_entity0.JsonField>;
70
+ /**
71
+ * Output of `ErrorTracker.getTopPatterns(totalRows)` — the top-50
72
+ * error signatures with up to 5 sample rows each. Empty `[]` until
73
+ * the worker writes it on completion.
74
+ */
75
+ errorSummary: _$_murumets_ee_entity0.JsonField & Partial<_$_murumets_ee_entity0.JsonField>; /** Set when the worker picks up the job. */
76
+ startedAt: _$_murumets_ee_entity0.DateField & Partial<_$_murumets_ee_entity0.DateField>; /** Set when the worker finishes (success OR fatal failure). */
77
+ finishedAt: _$_murumets_ee_entity0.DateField & Partial<_$_murumets_ee_entity0.DateField>;
78
+ /**
79
+ * `toolkit_jobs.id` of the queue job processing this run — link
80
+ * back so PR 8a can show live progress without a second lookup.
81
+ */
82
+ queueJobId: _$_murumets_ee_entity0.TextField & {
83
+ readonly maxLength: 64;
84
+ readonly indexed: true;
85
+ };
86
+ }>;
87
+ type ImportRunClient = AdminClient<typeof ImportRun.allFields>;
88
+ //#endregion
89
+ //#region src/error-tracker.d.ts
90
+ /**
91
+ * Aggregates per-row errors into top-N pattern buckets so a feed of 1M
92
+ * malformed rows surfaces as a handful of actionable signatures rather
93
+ * than a million identical strings.
94
+ *
95
+ * Lifted from giga-test (`backend/src/workers/csv-importer.ts`) and
96
+ * generalized:
97
+ * - Configurable caps so the importer can tune memory bounds per run.
98
+ * - Pure data — no logging side effects, no I/O.
99
+ * - JSON-serialisable output via {@link ErrorTracker.snapshot} for the
100
+ * `import_run.errorSummary` column.
101
+ *
102
+ * Pattern signature shape: `${errorType}:${field || 'GENERAL'}:${message}`.
103
+ * Same `errorType + field + message` collapses to one bucket; differing
104
+ * messages stay separate. This is intentional: a parser error on column
105
+ * `NetPrice/Discount` ("invalid number 'NA,5'") and the same on column
106
+ * `GrossPrice` are operationally distinct even if the parser is the same.
107
+ *
108
+ * Memory bounds: the patterns map is capped at `maxPatterns`. Once full,
109
+ * additional NEW signatures are dropped — known patterns keep accumulating
110
+ * counts. This is the "top-N most common" model: rare-but-novel errors
111
+ * past the cap are invisible, but the cap protects against a runaway
112
+ * adversarial feed exploding the map. Sample arrays are independently
113
+ * capped at `maxSamplesPerPattern`.
114
+ */
115
+ /**
116
+ * Recursively-defined JSON-serialisable value. Mirrors the shape of
117
+ * `JsonValue` in `@murumets-ee/entity` without taking a dependency on
118
+ * that package — error-tracker is otherwise standalone, and the
119
+ * `ImportRun.errorSummary` JSONB column accepts anything in this shape.
120
+ */
121
+ type ImportJsonValue = string | number | boolean | null | ImportJsonValue[] | {
122
+ [key: string]: ImportJsonValue;
123
+ };
124
+ /** One sample row attached to a pattern. `rowData` is the originally-parsed row. */
125
+ interface ErrorSample {
126
+ rowNumber: number;
127
+ rowData: ImportJsonValue;
128
+ }
129
+ /** Public shape of an aggregated pattern as returned by {@link ErrorTracker.getTopPatterns}. */
130
+ interface ErrorPattern {
131
+ errorType: string;
132
+ field: string | null;
133
+ message: string;
134
+ count: number;
135
+ firstOccurrence: number;
136
+ lastOccurrence: number;
137
+ samples: ReadonlyArray<ErrorSample>;
138
+ /** `count / totalErrors`, scaled 0..100. `0` when there are zero errors total. */
139
+ percentage: number;
140
+ }
141
+ interface ErrorTrackerConfig {
142
+ /** Hard cap on distinct signatures. Defaults to 50. New signatures past the cap are dropped. */
143
+ maxPatterns?: number;
144
+ /** Hard cap on samples retained per pattern. Defaults to 5. Excess samples are dropped. */
145
+ maxSamplesPerPattern?: number;
146
+ }
147
+ /** Default caps — match giga-test for compatibility. */
148
+ declare const DEFAULT_MAX_PATTERNS = 50;
149
+ declare const DEFAULT_MAX_SAMPLES_PER_PATTERN = 5;
150
+ declare class ErrorTracker {
151
+ private readonly patterns;
152
+ private readonly maxPatterns;
153
+ private readonly maxSamplesPerPattern;
154
+ private droppedSignatures;
155
+ constructor(config?: ErrorTrackerConfig);
156
+ /**
157
+ * Record one error. Same `(errorType, field, message)` triple bumps the
158
+ * existing bucket; a new triple opens a new one (subject to {@link maxPatterns}).
159
+ *
160
+ * `field` is optional — pass `undefined` for errors not tied to a single
161
+ * column (e.g. parse errors at row level). Internally normalised to the
162
+ * literal string `'GENERAL'` so it shares a bucket with other genericised
163
+ * errors of the same type+message.
164
+ */
165
+ addError(rowNumber: number, errorType: string, message: string, field: string | undefined, rowData: ImportJsonValue): void;
166
+ /** Total count across every pattern. Counts errors, not patterns. */
167
+ getTotalErrorCount(): number;
168
+ /** Number of distinct signatures retained in the map (≤ `maxPatterns`). */
169
+ getDistinctPatternCount(): number;
170
+ /**
171
+ * Number of NEW signatures dropped because the map was already at
172
+ * capacity. Surfacing this in the import_run summary tells the operator
173
+ * "the top-N was saturated — there's a long tail you're not seeing".
174
+ */
175
+ getDroppedSignatureCount(): number;
176
+ /**
177
+ * Top patterns sorted by descending count, capped at `maxPatterns`.
178
+ * Stable secondary order is insertion order (Map iteration order is
179
+ * insertion order; Array.sort is stable in V8).
180
+ */
181
+ getTopPatterns(): ErrorPattern[];
182
+ /**
183
+ * Compact JSON-serialisable snapshot for `import_run.errorSummary`.
184
+ * Aside from the patterns array, includes the totals so a reader of
185
+ * just this column doesn't have to re-derive them.
186
+ */
187
+ snapshot(): ErrorTrackerSnapshot;
188
+ }
189
+ interface ErrorTrackerSnapshot {
190
+ totalErrors: number;
191
+ distinctPatterns: number;
192
+ droppedSignatures: number;
193
+ patterns: ErrorPattern[];
194
+ }
195
+ //#endregion
196
+ //#region src/streaming.d.ts
197
+ /**
198
+ * Tab-delimited / CSV streaming reader. Generic over delimiter so the
199
+ * same path handles `.txt` (tab), `.csv` (comma), and the rare `;`
200
+ * European export dialect.
201
+ *
202
+ * Built on `csv-parse` per giga-test precedent — node-stream-based, low
203
+ * memory, handles UTF-8 BOM (the carmaker feeds are Windows-exported
204
+ * and ship with a BOM that breaks naive split-on-tab parsers).
205
+ *
206
+ * The reader yields `{ rowNumber, row }` pairs where:
207
+ * - `rowNumber` is 1-based and counts the header as row 0.
208
+ * - `row` is `Record<string, string>` keyed by header name. Empty cells
209
+ * are the empty string, NOT `undefined` — feed transforms test with
210
+ * `value === ''` consistently.
211
+ *
212
+ * Why this lives in a streaming reader and not inside the transform
213
+ * itself: the transform sees one already-parsed row at a time, never
214
+ * the file. That keeps transform implementations free of I/O concerns
215
+ * and makes them trivially unit-testable with a fixture row map.
216
+ */
217
+ interface StreamFeedOptions {
218
+ /** Path to the file on disk. The PoC uploads land on local disk; S3-keyed reads come later. */
219
+ filePath: string;
220
+ /**
221
+ * Single-character field delimiter. Default `\t` (the carmaker feed
222
+ * format). Pass `,` for CSV, `;` for some European dialects.
223
+ */
224
+ delimiter?: string;
225
+ /**
226
+ * `true` (default): the first row is the header and column names come
227
+ * from it. `false`: rows are emitted as positional `{ "0": ..., "1": ... }`
228
+ * and the transform reads by index — useful for headerless feeds that
229
+ * commit to a documented column order.
230
+ */
231
+ hasHeader?: boolean;
232
+ /**
233
+ * Optional explicit column-name list. When provided, takes precedence
234
+ * over `hasHeader` (header row, if present, is skipped but its values
235
+ * are ignored). Useful when the upstream header is unstable but the
236
+ * positional shape isn't.
237
+ */
238
+ columns?: ReadonlyArray<string>;
239
+ /**
240
+ * Forward to `csv-parse` `relax_column_count`. Default `false` —
241
+ * a row whose column count doesn't match the header surfaces as a
242
+ * parser error so the transform isn't silently fed truncated data.
243
+ */
244
+ relaxColumnCount?: boolean;
245
+ }
246
+ interface StreamFeedRow {
247
+ /** 1-based row number. Header (when present) is row 0; first data row is row 1. */
248
+ rowNumber: number;
249
+ /**
250
+ * Cell values keyed by column name (or string-position when
251
+ * `hasHeader: false` AND no `columns`).
252
+ *
253
+ * **Cell-value invariants:**
254
+ * - Empty cells (`A\t\tC`) → `''` (empty string).
255
+ * - Missing TRAILING cells in `relaxColumnCount: true` mode → the
256
+ * key is **absent** from the object, not present-with-`''`. csv-parse
257
+ * does not emit keys for short rows. Transforms reading those
258
+ * columns get `undefined` from `row['col']` and must handle it
259
+ * (`row['col'] ?? ''` is the canonical idiom).
260
+ * - With the default `relaxColumnCount: false`, short rows reject at
261
+ * the parser, so this case never reaches the transform.
262
+ */
263
+ row: Record<string, string>;
264
+ }
265
+ /**
266
+ * Async-iterable over the parsed rows of a delimited file. Use with
267
+ * `for await (const { rowNumber, row } of streamFeed({ filePath, ... }))`.
268
+ *
269
+ * The iterator owns its file descriptor — the `for await` loop closes
270
+ * the underlying stream when it returns or breaks. Aborting mid-stream
271
+ * (`break`, `throw`, signal) is safe; csv-parse propagates the close.
272
+ */
273
+ declare function streamFeed(options: StreamFeedOptions): AsyncIterable<StreamFeedRow>;
274
+ //#endregion
275
+ //#region src/runner.d.ts
276
+ /** Soft default; chosen to match giga-test. ES bulk requests over ~5MB get split server-side anyway. */
277
+ declare const DEFAULT_BATCH_SIZE = 1000;
278
+ interface RunImportOptions<TDoc> {
279
+ /** UUID of the `import_run` row driving this run. Forwarded to every transform invocation. */
280
+ importRunId: string;
281
+ /** Operator-supplied label for the run. Forwarded to the transform context. */
282
+ runLabel: string;
283
+ /** Opaque per-run params copied from `import_run.params`. */
284
+ params: Record<string, unknown>;
285
+ /** Transform applied to every parsed row. */
286
+ transform: RowTransform<TDoc>;
287
+ /** Streaming reader options — file path, delimiter, header config. */
288
+ feed: StreamFeedOptions;
289
+ /** ES client (low-level shape from `@murumets-ee/search-elasticsearch`). */
290
+ esClient: EsClientLike;
291
+ /** Index alias to write to. Per D6, callers always pass an alias, never a physical index. */
292
+ esIndex: string;
293
+ /** Rows per `bulkUpsert` call. Default {@link DEFAULT_BATCH_SIZE}. */
294
+ batchSize?: number;
295
+ /**
296
+ * Callback invoked after every batch. The handler in `worker.ts`
297
+ * forwards this to `ctx.updateProgress` for the queue UI; tests
298
+ * inspect it directly. Synchronous + cheap so a slow callback can't
299
+ * back-pressure the importer.
300
+ */
301
+ onProgress?: (progress: ImportRunProgress) => void;
302
+ /** Optional: stop processing after this many rows. Tests use it; production passes `undefined`. */
303
+ rowLimit?: number;
304
+ /** Abort signal threaded into the underlying ES client request — cooperative cancel. */
305
+ signal?: AbortSignal;
306
+ /** Optional ErrorTracker config (caps). Default: top-50 patterns × 5 samples. */
307
+ errorTracker?: ErrorTracker;
308
+ }
309
+ /**
310
+ * Progress payload written to `toolkit_jobs.progress` after every batch.
311
+ * Caps + flush rules live on the queue's `updateProgress` debounce —
312
+ * callers don't need to throttle.
313
+ */
314
+ interface ImportRunProgress {
315
+ rowsRead: number;
316
+ rowsSucceeded: number;
317
+ rowsFailed: number;
318
+ rowsSkipped: number;
319
+ batchesCompleted: number;
320
+ /** Wall-clock seconds since the runner started. */
321
+ elapsedSeconds: number;
322
+ /** Rows / second, computed at every batch. */
323
+ rowsPerSecond: number;
324
+ /** Distinct error patterns currently held by the tracker. Saturates at the cap. */
325
+ distinctErrorPatterns: number;
326
+ }
327
+ /**
328
+ * Final result returned by {@link runImport}. The handler writes these
329
+ * onto the `import_run` row alongside the ErrorTracker snapshot.
330
+ */
331
+ interface RunImportResult {
332
+ /** Total rows read from the file (excludes skipped empty lines). */
333
+ rowsRead: number;
334
+ /** Rows the transform turned into a successful doc AND the ES cluster acknowledged. */
335
+ rowsSucceeded: number;
336
+ /**
337
+ * Rows that the transform rejected (`{ kind: 'error' }`) OR that ES
338
+ * rejected on bulk-write (per-doc failure). Both are aggregated by
339
+ * `errorTracker` for the import_run summary.
340
+ */
341
+ rowsFailed: number;
342
+ /** Rows that the transform skipped (`{ kind: 'skip' }`) — header noise, blank lines, intentional drop. */
343
+ rowsSkipped: number;
344
+ /** Number of `bulkUpsert` calls made. */
345
+ batchesCompleted: number;
346
+ /** Final value of {@link ErrorTracker.snapshot}. */
347
+ errors: ReturnType<ErrorTracker['snapshot']>;
348
+ }
349
+ /**
350
+ * Apply the runner against a feed file. Stops on rowLimit OR end-of-file
351
+ * OR if `signal` aborts. Throws if the streaming reader / ES client
352
+ * throws — caller (the queue handler) catches that and writes
353
+ * `import_run.status = 'failed'` with the error message in
354
+ * `errorSummary.fatal`.
355
+ */
356
+ declare function runImport<TDoc>(options: RunImportOptions<TDoc>): Promise<RunImportResult>;
357
+ //#endregion
358
+ //#region src/worker.d.ts
359
+ /**
360
+ * Structural subset of `JobContext` the handler reads. Declared
361
+ * locally so we don't depend on a `JobHandler` import — the queue
362
+ * package doesn't re-export it from any subpath today (matches the
363
+ * `SendEmailJobContext` pattern in `@murumets-ee/notifications`).
364
+ *
365
+ * Function-param contravariance lets a handler returned with this
366
+ * narrower shape be assigned to the wider `JobHandler<TPayload>` that
367
+ * `registerJob` expects.
368
+ */
369
+ interface ImportsRunJobContext {
370
+ id: string;
371
+ payload: ImportsRunJobPayload;
372
+ updateProgress(data: ImportRunProgress): void;
373
+ }
374
+ /**
375
+ * Payload schema. `importRunId` is the `import_run.id` UUID — the worker
376
+ * looks up everything else (filePath, transformName, params, …) from
377
+ * that row. Keeping the payload tiny means the queue's progress JSON
378
+ * column never bloats with feed metadata duplicated across `toolkit_jobs`.
379
+ */
380
+ declare const importsRunJobPayloadSchema: z.ZodObject<{
381
+ importRunId: z.ZodString;
382
+ }, "strip", z.ZodTypeAny, {
383
+ importRunId: string;
384
+ }, {
385
+ importRunId: string;
386
+ }>;
387
+ type ImportsRunJobPayload = z.infer<typeof importsRunJobPayloadSchema>;
388
+ /**
389
+ * Job definition. Consumers register their handler against this with
390
+ * `registerJob(importsRunJob, createRunImportHandler({...}))`.
391
+ *
392
+ * `defaultRetries: 0` — re-running a multi-batch import against the
393
+ * same `import_run.id` while the previous handler may still be writing
394
+ * is a footgun (duplicate batches, double-counted progress). The
395
+ * operator handles retries explicitly via PR 8a's "retry" button by
396
+ * creating a new `import_run` row. When the resumable design lands
397
+ * (post-PoC), retries become safe to enable.
398
+ */
399
+ declare const importsRunJob: JobDefinition<ImportsRunJobPayload>;
400
+ /**
401
+ * Resolves the ES client at handler-invocation time. A function (rather
402
+ * than the bare `EsClientLike`) so the consumer can lazy-construct the
403
+ * client — typical Next.js setups create the ES connection in a
404
+ * route-handler initialiser, not at plugin-init time.
405
+ */
406
+ type EsClientResolver = () => EsClientLike | Promise<EsClientLike>;
407
+ /**
408
+ * Resolves the value of `import_run.filePath` (whatever the upload
409
+ * route persisted there — typically a storage adapter key) to a
410
+ * readable LOCAL filesystem path that {@link runImport} can hand to
411
+ * `node:fs.createReadStream`.
412
+ *
413
+ * **Why this exists:** the streaming reader (`streamFeed`) reads from
414
+ * local disk via `createReadStream`. The upload route may persist a
415
+ * remote-storage object key (R2, S3, …) on `import_run.filePath`
416
+ * because that's what `@murumets-ee/storage` returns. Without a
417
+ * resolver, `createReadStream('uploads/2026/05/<uuid>/feed.txt')`
418
+ * crashes with `ENOENT`. The resolver is the documented integration
419
+ * point — typical wiring downloads the storage object to a tmpfile
420
+ * and returns its path.
421
+ *
422
+ * **`cleanup`** runs after the run finishes (success OR failure). The
423
+ * worker awaits it best-effort — a failed cleanup logs but does not
424
+ * crash the run.
425
+ *
426
+ * **No-resolver fallback:** when `resolveFilePath` is unset, the
427
+ * worker treats `import_run.filePath` as already a local FS path
428
+ * (back-compat with the original PoC design where uploads landed on
429
+ * local disk). This stays valid for fixture-driven tests + on-disk
430
+ * deployments.
431
+ */
432
+ type FilePathResolver = (storageKey: string) => Promise<{
433
+ localPath: string;
434
+ cleanup?: () => Promise<void>;
435
+ }>;
436
+ interface RunImportHandlerConfig {
437
+ /** AdminClient over the `import_run` entity. */
438
+ importRuns: ImportRunClient;
439
+ /** Transform registry to dispatch against. Defaults to the process-global singleton. */
440
+ transforms: TransformRegistry;
441
+ /** Resolver for the ES client. */
442
+ esClient: EsClientResolver;
443
+ /** ES alias / index to bulk-write into. Per D6 callers always pass an alias. */
444
+ esIndex: string;
445
+ /**
446
+ * Optional resolver for `import_run.filePath`. See {@link FilePathResolver}.
447
+ * Required when uploads land in remote storage (R2/S3); optional for
448
+ * on-disk PoC setups.
449
+ */
450
+ resolveFilePath?: FilePathResolver;
451
+ /** Optional structured logger. Defaults to silent. */
452
+ logger?: Logger;
453
+ }
454
+ /**
455
+ * Build the `JobHandler` for {@link importsRunJob}. The returned
456
+ * handler is what gets passed to `registerJob`.
457
+ *
458
+ * The body is wrapped in `runAsCli` so AdminClient calls inside have a
459
+ * synthetic `cli` admin context — `auditable()` records `updatedBy:
460
+ * 'cli'` rather than NULL, and the firewall checker passes. This is
461
+ * the documented worker entry-point pattern (see `runAsCli` JSDoc in
462
+ * `@murumets-ee/core`).
463
+ */
464
+ declare function createRunImportHandler(config: RunImportHandlerConfig): (job: ImportsRunJobContext) => Promise<void>;
465
+ //#endregion
466
+ export { IMPORT_RUN_STATUSES as C, ImportRunStatus as E, ErrorTrackerConfig as S, ImportRunClient as T, DEFAULT_MAX_PATTERNS as _, RunImportHandlerConfig as a, ErrorSample as b, importsRunJobPayloadSchema as c, RunImportOptions as d, RunImportResult as f, streamFeed as g, StreamFeedRow as h, ImportsRunJobPayload as i, DEFAULT_BATCH_SIZE as l, StreamFeedOptions as m, FilePathResolver as n, createRunImportHandler as o, runImport as p, ImportsRunJobContext as r, importsRunJob as s, EsClientResolver as t, ImportRunProgress as u, DEFAULT_MAX_SAMPLES_PER_PATTERN as v, ImportRun as w, ErrorTracker as x, ErrorPattern as y };
467
+ //# sourceMappingURL=worker-DerGVTSI.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"worker-DerGVTSI.d.mts","names":[],"sources":["../src/entities/import-run.ts","../src/error-tracker.ts","../src/streaming.ts","../src/runner.ts","../src/worker.ts"],"mappings":";;;;;;;;;;;AAgCA;;;;;;;cAHa,mBAAA;AAAA,KACD,eAAA,WAA0B,mBAAA;AAAA,cAEzB,SAAA,yBAAS,MAAA;MAgEpB,sBAAA,CAAA,OAAA;AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAEU,eAAA,GAAkB,WAAA,QAAmB,SAAA,CAAU,SAAA;;;;;;;;;;;;AArE3D;;;;;AACA;;;;;AAEA;;;;;;;;;;;;KCAY,eAAA,sCAKR,eAAA;EAAA,CACG,GAAA,WAAc,eAAA;AAAA;;UAGJ,WAAA;EACf,SAAA;EACA,OAAA,EAAS,eAAA;AAAA;;UAIM,YAAA;EACf,SAAA;EACA,KAAA;EACA,OAAA;EACA,KAAA;EACA,eAAA;EACA,cAAA;EACA,OAAA,EAAS,aAAA,CAAc,WAAA;;EAEvB,UAAA;AAAA;AAAA,UAGe,kBAAA;;EAEf,WAAA;;EAEA,oBAAA;AAAA;;cAIW,oBAAA;AAAA,cACA,+BAAA;AAAA,cAYA,YAAA;EAAA,iBACM,QAAA;EAAA,iBACA,WAAA;EAAA,iBACA,oBAAA;EAAA,QACT,iBAAA;cAEI,MAAA,GAAQ,kBAAA;;;;;;;;;;EAcpB,QAAA,CACE,SAAA,UACA,SAAA,UACA,OAAA,UACA,KAAA,sBACA,OAAA,EAAS,eAAA;;EA+BX,kBAAA,CAAA;;EAOA,uBAAA,CAAA;;;;;;EASA,wBAAA,CAAA;;;;;;EASA,cAAA,CAAA,GAAkB,YAAA;;;;;AD/DpB;ECmFE,QAAA,CAAA,GAAY,oBAAA;AAAA;AAAA,UAUG,oBAAA;EACf,WAAA;EACA,gBAAA;EACA,iBAAA;EACA,QAAA,EAAU,YAAA;AAAA;;;;;;;;;;;;ADtKZ;;;;;AACA;;;;;AAEA;UERiB,iBAAA;;EAEf,QAAA;;;;;EAKA,SAAA;;;;;;;EAOA,SAAA;;;;;;;EAOA,OAAA,GAAU,aAAA;;;;;;EAMV,gBAAA;AAAA;AAAA,UAGe,aAAA;;EAEf,SAAA;;;;;;;;;;;;;;;EAeA,GAAA,EAAK,MAAA;AAAA;;;;;;;;;iBAWgB,UAAA,CAAW,OAAA,EAAS,iBAAA,GAAoB,aAAA,CAAc,aAAA;;;;cCnDhE,kBAAA;AAAA,UAEI,gBAAA;;EAEf,WAAA;;EAEA,QAAA;;EAEA,MAAA,EAAQ,MAAA;;EAER,SAAA,EAAW,YAAA,CAAa,IAAA;;EAExB,IAAA,EAAM,iBAAA;;EAEN,QAAA,EAAU,YAAA;;EAEV,OAAA;EHfoB;EGiBpB,SAAA;EHjBoB;;;;;;EGwBpB,UAAA,IAAc,QAAA,EAAU,iBAAA;;EAExB,QAAA;;EAEA,MAAA,GAAS,WAAA;;EAET,YAAA,GAAe,YAAA;AAAA;;;;;;UAQA,iBAAA;EACf,QAAA;EACA,aAAA;EACA,UAAA;EACA,WAAA;EACA,gBAAA;;EAEA,cAAA;;EAEA,aAAA;;EAEA,qBAAA;AAAA;;;;;UAOe,eAAA;;EAEf,QAAA;;EAEA,aAAA;;;;;;EAMA,UAAA;;EAEA,WAAA;EHFU;EGIV,gBAAA;;EAEA,MAAA,EAAQ,UAAA,CAAW,YAAA;AAAA;;;;;;;;iBAUC,SAAA,MAAA,CAAgB,OAAA,EAAS,gBAAA,CAAiB,IAAA,IAAQ,OAAA,CAAQ,eAAA;;;;;;;;;;;;;UCjF/D,oBAAA;EACf,EAAA;EACA,OAAA,EAAS,oBAAA;EACT,cAAA,CAAe,IAAA,EAAM,iBAAA;AAAA;;;;;;;cASV,0BAAA,EAA0B,CAAA,CAAA,SAAA;;;;;;;KAG3B,oBAAA,GAAuB,CAAA,CAAE,KAAA,QAAa,0BAAA;;;;;;;;;;;;cAarC,aAAA,EAAe,aAAA,CAAc,oBAAA;;;;;;;KAa9B,gBAAA,SAAyB,YAAA,GAAe,OAAA,CAAQ,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;KA2BhD,gBAAA,IACV,UAAA,aACG,OAAA;EAAU,SAAA;EAAmB,OAAA,SAAgB,OAAA;AAAA;AAAA,UAEjC,sBAAA;EJPwB;EISvC,UAAA,EAAY,eAAA;EJTmC;EIW/C,UAAA,EAAY,iBAAA;EJXsD;EIalE,QAAA,EAAU,gBAAA;;EAEV,OAAA;;AHjFF;;;;EGuFE,eAAA,GAAkB,gBAAA;EHjFb;EGmFL,MAAA,GAAS,MAAA;AAAA;;AHhFX;;;;;;;;;iBG6FgB,sBAAA,CACd,MAAA,EAAQ,sBAAA,IACN,GAAA,EAAK,oBAAA,KAAyB,OAAA"}
@@ -0,0 +1,2 @@
1
+ import { a as RunImportHandlerConfig, c as importsRunJobPayloadSchema, i as ImportsRunJobPayload, n as FilePathResolver, o as createRunImportHandler, r as ImportsRunJobContext, s as importsRunJob, t as EsClientResolver } from "./worker-DerGVTSI.mjs";
2
+ export { EsClientResolver, FilePathResolver, ImportsRunJobContext, ImportsRunJobPayload, RunImportHandlerConfig, createRunImportHandler, importsRunJob, importsRunJobPayloadSchema };
@@ -0,0 +1,2 @@
1
+ import{n as e}from"./runner-DdhiNybk.mjs";import{runAsCli as t}from"@murumets-ee/core";import{defineJob as n}from"@murumets-ee/queue/client";import{z as r}from"zod";const i=r.object({importRunId:r.string().uuid()}),a=n({name:`imports:run`,description:`Stream a feed file, transform rows, bulk-write to Elasticsearch.`,schema:i,defaultRetries:0});function o(n){let{importRuns:r,transforms:i,esClient:a,esIndex:o,resolveFilePath:c,logger:l}=n;return async n=>t(async()=>{let{importRunId:t}=n.payload,u=l?.child({jobId:n.id,importRunId:t,type:`imports:run`}),d=await r.findById(t);if(!d){u?.warn(`import_run row not found — skipping`);return}let f=i.get(d.transformName);if(!f)throw await s(r,t,`Unknown transform "${d.transformName}"`),Error(`No transform registered for name "${d.transformName}"`);let p=d.params&&typeof d.params==`object`&&!Array.isArray(d.params)?d.params:{};await r.update(t,{status:`running`,queueJobId:n.id,startedAt:new Date});let m;try{m=await a()}catch(e){throw await s(r,t,`Could not resolve ES client: ${e instanceof Error?e.message:String(e)}`),e}let h,g;if(c)try{let e=await c(d.filePath);h=e.localPath,g=e.cleanup}catch(e){throw await s(r,t,`Could not resolve filePath: ${e instanceof Error?e.message:String(e)}`),e}else h=d.filePath;let _;try{_=await e({importRunId:t,runLabel:d.label,params:p,transform:f,feed:{filePath:h},esClient:m,esIndex:o,onProgress:e=>n.updateProgress(e)})}catch(e){throw await s(r,t,e instanceof Error?e.message:String(e)),g&&await g().catch(e=>{u?.warn({err:e},`filePath cleanup after failed run threw — ignoring`)}),e}g&&await g().catch(e=>{u?.warn({err:e},`filePath cleanup after successful run threw — ignoring`)}),await r.update(t,{status:`succeeded`,finishedAt:new Date,totals:{rowsRead:_.rowsRead,rowsSucceeded:_.rowsSucceeded,rowsFailed:_.rowsFailed,rowsSkipped:_.rowsSkipped,batchesCompleted:_.batchesCompleted},errorSummary:JSON.parse(JSON.stringify(_.errors))}),u?.info({rowsRead:_.rowsRead,rowsSucceeded:_.rowsSucceeded,rowsFailed:_.rowsFailed,rowsSkipped:_.rowsSkipped,batches:_.batchesCompleted},`import_run completed`)})}async function s(e,t,n){try{await e.update(t,{status:`failed`,finishedAt:new Date,errorSummary:{fatal:n}})}catch{}}export{o as createRunImportHandler,a as importsRunJob,i as importsRunJobPayloadSchema};
2
+ //# sourceMappingURL=worker.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"worker.mjs","names":[],"sources":["../src/worker.ts"],"sourcesContent":["/**\n * Queue handler for `imports:run`. The route handler that creates the\n * `import_run` row enqueues this job carrying `{ importRunId }`; the\n * worker module here resolves the run, the transform, and the ES\n * client, then runs `runImport` and writes results back.\n *\n * Per PLAN-ECOMMERCE.md PR 7 (PoC scope): one job type, no resumability.\n * A failed handler attempt re-runs from row 1 on retry — fine for the\n * tens-of-MB feed sizes the PoC exercises.\n *\n * Per CLAUDE.md \"leaf packages\" boundary rule: this handler reads the\n * import_run row via an injected `AdminClient`, NOT a direct `db.select`.\n * Construction happens at plugin init in `plugin.ts`.\n */\n\nimport { type Logger, runAsCli } from '@murumets-ee/core'\nimport { defineJob, type JobDefinition } from '@murumets-ee/queue/client'\nimport type { EsClientLike } from '@murumets-ee/search-elasticsearch'\nimport { z } from 'zod'\nimport { type ImportRunClient, type ImportRunStatus } from './entities/import-run.js'\nimport { type ImportRunProgress, runImport } from './runner.js'\nimport { TransformRegistry } from './transform.js'\n\n/**\n * Structural subset of `JobContext` the handler reads. Declared\n * locally so we don't depend on a `JobHandler` import — the queue\n * package doesn't re-export it from any subpath today (matches the\n * `SendEmailJobContext` pattern in `@murumets-ee/notifications`).\n *\n * Function-param contravariance lets a handler returned with this\n * narrower shape be assigned to the wider `JobHandler<TPayload>` that\n * `registerJob` expects.\n */\nexport interface ImportsRunJobContext {\n id: string\n payload: ImportsRunJobPayload\n updateProgress(data: ImportRunProgress): void\n}\n\n/**\n * Payload schema. `importRunId` is the `import_run.id` UUID — the worker\n * looks up everything else (filePath, transformName, params, …) from\n * that row. Keeping the payload tiny means the queue's progress JSON\n * column never bloats with feed metadata duplicated across `toolkit_jobs`.\n */\nexport const importsRunJobPayloadSchema = z.object({\n importRunId: z.string().uuid(),\n})\nexport type ImportsRunJobPayload = z.infer<typeof importsRunJobPayloadSchema>\n\n/**\n * Job definition. Consumers register their handler against this with\n * `registerJob(importsRunJob, createRunImportHandler({...}))`.\n *\n * `defaultRetries: 0` — re-running a multi-batch import against the\n * same `import_run.id` while the previous handler may still be writing\n * is a footgun (duplicate batches, double-counted progress). The\n * operator handles retries explicitly via PR 8a's \"retry\" button by\n * creating a new `import_run` row. When the resumable design lands\n * (post-PoC), retries become safe to enable.\n */\nexport const importsRunJob: JobDefinition<ImportsRunJobPayload> = defineJob({\n name: 'imports:run',\n description: 'Stream a feed file, transform rows, bulk-write to Elasticsearch.',\n schema: importsRunJobPayloadSchema,\n defaultRetries: 0,\n})\n\n/**\n * Resolves the ES client at handler-invocation time. A function (rather\n * than the bare `EsClientLike`) so the consumer can lazy-construct the\n * client — typical Next.js setups create the ES connection in a\n * route-handler initialiser, not at plugin-init time.\n */\nexport type EsClientResolver = () => EsClientLike | Promise<EsClientLike>\n\n/**\n * Resolves the value of `import_run.filePath` (whatever the upload\n * route persisted there — typically a storage adapter key) to a\n * readable LOCAL filesystem path that {@link runImport} can hand to\n * `node:fs.createReadStream`.\n *\n * **Why this exists:** the streaming reader (`streamFeed`) reads from\n * local disk via `createReadStream`. The upload route may persist a\n * remote-storage object key (R2, S3, …) on `import_run.filePath`\n * because that's what `@murumets-ee/storage` returns. Without a\n * resolver, `createReadStream('uploads/2026/05/<uuid>/feed.txt')`\n * crashes with `ENOENT`. The resolver is the documented integration\n * point — typical wiring downloads the storage object to a tmpfile\n * and returns its path.\n *\n * **`cleanup`** runs after the run finishes (success OR failure). The\n * worker awaits it best-effort — a failed cleanup logs but does not\n * crash the run.\n *\n * **No-resolver fallback:** when `resolveFilePath` is unset, the\n * worker treats `import_run.filePath` as already a local FS path\n * (back-compat with the original PoC design where uploads landed on\n * local disk). This stays valid for fixture-driven tests + on-disk\n * deployments.\n */\nexport type FilePathResolver = (\n storageKey: string,\n) => Promise<{ localPath: string; cleanup?: () => Promise<void> }>\n\nexport interface RunImportHandlerConfig {\n /** AdminClient over the `import_run` entity. */\n importRuns: ImportRunClient\n /** Transform registry to dispatch against. Defaults to the process-global singleton. */\n transforms: TransformRegistry\n /** Resolver for the ES client. */\n esClient: EsClientResolver\n /** ES alias / index to bulk-write into. Per D6 callers always pass an alias. */\n esIndex: string\n /**\n * Optional resolver for `import_run.filePath`. See {@link FilePathResolver}.\n * Required when uploads land in remote storage (R2/S3); optional for\n * on-disk PoC setups.\n */\n resolveFilePath?: FilePathResolver\n /** Optional structured logger. Defaults to silent. */\n logger?: Logger\n}\n\n/**\n * Build the `JobHandler` for {@link importsRunJob}. The returned\n * handler is what gets passed to `registerJob`.\n *\n * The body is wrapped in `runAsCli` so AdminClient calls inside have a\n * synthetic `cli` admin context — `auditable()` records `updatedBy:\n * 'cli'` rather than NULL, and the firewall checker passes. This is\n * the documented worker entry-point pattern (see `runAsCli` JSDoc in\n * `@murumets-ee/core`).\n */\nexport function createRunImportHandler(\n config: RunImportHandlerConfig,\n): (job: ImportsRunJobContext) => Promise<void> {\n const { importRuns, transforms, esClient, esIndex, resolveFilePath, logger } = config\n\n return async (job: ImportsRunJobContext): Promise<void> =>\n runAsCli(async () => {\n const { importRunId } = job.payload\n const log = logger?.child({ jobId: job.id, importRunId, type: 'imports:run' })\n\n const run = await importRuns.findById(importRunId)\n if (!run) {\n // Row was deleted between enqueue and dispatch — nothing to do.\n // Don't throw: a thrown handler retries with no payoff because\n // the row stays gone. Log + return so the queue marks the job\n // completed.\n log?.warn('import_run row not found — skipping')\n return\n }\n\n // `transformName`, `filePath`, `label` are declared `field.text({\n // required: true })` so the inferred DTO already types them as\n // `string` — no cast needed. `params` is `field.json()` which\n // infers as `JsonValue | undefined`; narrow to a record so the\n // transform receives an object.\n const transform = transforms.get(run.transformName)\n if (!transform) {\n await markFailed(importRuns, importRunId, `Unknown transform \"${run.transformName}\"`)\n throw new Error(`No transform registered for name \"${run.transformName}\"`)\n }\n const params: Record<string, unknown> =\n run.params && typeof run.params === 'object' && !Array.isArray(run.params)\n ? (run.params as Record<string, unknown>)\n : {}\n\n await importRuns.update(importRunId, {\n status: 'running' satisfies ImportRunStatus,\n queueJobId: job.id,\n startedAt: new Date(),\n })\n\n let resolvedClient: EsClientLike\n try {\n resolvedClient = await esClient()\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, `Could not resolve ES client: ${reason}`)\n throw err\n }\n\n // Materialise the file locally if the consumer wired a resolver.\n // The `cleanup` callback (when present) runs in the `finally`\n // below regardless of run outcome so a tmpfile created here\n // doesn't outlive the handler.\n let localPath: string\n let fileCleanup: (() => Promise<void>) | undefined\n if (resolveFilePath) {\n try {\n const resolved = await resolveFilePath(run.filePath)\n localPath = resolved.localPath\n fileCleanup = resolved.cleanup\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, `Could not resolve filePath: ${reason}`)\n throw err\n }\n } else {\n // Back-compat: no resolver wired → treat `filePath` as already\n // a local FS path. Fine for fixture-driven tests + the original\n // on-disk PoC design.\n localPath = run.filePath\n }\n\n let result: Awaited<ReturnType<typeof runImport>>\n try {\n result = await runImport({\n importRunId,\n runLabel: run.label,\n params,\n transform,\n feed: { filePath: localPath },\n esClient: resolvedClient,\n esIndex,\n onProgress: (progress) => job.updateProgress(progress),\n })\n } catch (err) {\n const reason = err instanceof Error ? err.message : String(err)\n await markFailed(importRuns, importRunId, reason)\n if (fileCleanup) {\n await fileCleanup().catch((cleanupErr: unknown) => {\n log?.warn({ err: cleanupErr }, 'filePath cleanup after failed run threw — ignoring')\n })\n }\n throw err\n }\n\n // Cleanup runs before the success-update so a cleanup throw\n // surfaces as a logged warning rather than a crashed handler at\n // the worst possible moment (just after the row was about to be\n // marked succeeded).\n if (fileCleanup) {\n await fileCleanup().catch((cleanupErr: unknown) => {\n log?.warn({ err: cleanupErr }, 'filePath cleanup after successful run threw — ignoring')\n })\n }\n\n await importRuns.update(importRunId, {\n status: 'succeeded' satisfies ImportRunStatus,\n finishedAt: new Date(),\n totals: {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n rowsSkipped: result.rowsSkipped,\n batchesCompleted: result.batchesCompleted,\n },\n // The typed `ErrorTrackerSnapshot` is fully JSON-serialisable but\n // doesn't structurally satisfy entity's recursive `JsonValue`\n // (objects without an index signature aren't `Record<string,\n // JsonValue>`). Round-trip through `JSON.parse(JSON.stringify(...))`\n // to land as the recursive JSON shape — the runtime cost is one\n // serialise per import_run completion, the type-cast cost is zero.\n errorSummary: JSON.parse(JSON.stringify(result.errors)),\n })\n\n log?.info(\n {\n rowsRead: result.rowsRead,\n rowsSucceeded: result.rowsSucceeded,\n rowsFailed: result.rowsFailed,\n rowsSkipped: result.rowsSkipped,\n batches: result.batchesCompleted,\n },\n 'import_run completed',\n )\n })\n}\n\n/** Write a `failed` row with a fatal error reason. Best-effort — logs but does not throw on DB failure. */\nasync function markFailed(\n importRuns: ImportRunClient,\n id: string,\n reason: string,\n): Promise<void> {\n try {\n await importRuns.update(id, {\n status: 'failed' satisfies ImportRunStatus,\n finishedAt: new Date(),\n errorSummary: { fatal: reason },\n })\n } catch {\n // The handler already threw / will throw — the queue will surface\n // the original error. A second failure here just logs noise.\n }\n}\n"],"mappings":"qKA6CA,MAAa,EAA6B,EAAE,OAAO,CACjD,YAAa,EAAE,QAAQ,CAAC,MAAM,CAC/B,CAAC,CAcW,EAAqD,EAAU,CAC1E,KAAM,cACN,YAAa,mEACb,OAAQ,EACR,eAAgB,EACjB,CAAC,CAoEF,SAAgB,EACd,EAC8C,CAC9C,GAAM,CAAE,aAAY,aAAY,WAAU,UAAS,kBAAiB,UAAW,EAE/E,OAAO,KAAO,IACZ,EAAS,SAAY,CACnB,GAAM,CAAE,eAAgB,EAAI,QACtB,EAAM,GAAQ,MAAM,CAAE,MAAO,EAAI,GAAI,cAAa,KAAM,cAAe,CAAC,CAExE,EAAM,MAAM,EAAW,SAAS,EAAY,CAClD,GAAI,CAAC,EAAK,CAKR,GAAK,KAAK,sCAAsC,CAChD,OAQF,IAAM,EAAY,EAAW,IAAI,EAAI,cAAc,CACnD,GAAI,CAAC,EAEH,MADA,MAAM,EAAW,EAAY,EAAa,sBAAsB,EAAI,cAAc,GAAG,CAC3E,MAAM,qCAAqC,EAAI,cAAc,GAAG,CAE5E,IAAM,EACJ,EAAI,QAAU,OAAO,EAAI,QAAW,UAAY,CAAC,MAAM,QAAQ,EAAI,OAAO,CACrE,EAAI,OACL,EAAE,CAER,MAAM,EAAW,OAAO,EAAa,CACnC,OAAQ,UACR,WAAY,EAAI,GAChB,UAAW,IAAI,KAChB,CAAC,CAEF,IAAI,EACJ,GAAI,CACF,EAAiB,MAAM,GAAU,OAC1B,EAAK,CAGZ,MADA,MAAM,EAAW,EAAY,EAAa,gCAD3B,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,GACoB,CAC7E,EAOR,IAAI,EACA,EACJ,GAAI,EACF,GAAI,CACF,IAAM,EAAW,MAAM,EAAgB,EAAI,SAAS,CACpD,EAAY,EAAS,UACrB,EAAc,EAAS,cAChB,EAAK,CAGZ,MADA,MAAM,EAAW,EAAY,EAAa,+BAD3B,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,GACmB,CAC5E,OAMR,EAAY,EAAI,SAGlB,IAAI,EACJ,GAAI,CACF,EAAS,MAAM,EAAU,CACvB,cACA,SAAU,EAAI,MACd,SACA,YACA,KAAM,CAAE,SAAU,EAAW,CAC7B,SAAU,EACV,UACA,WAAa,GAAa,EAAI,eAAe,EAAS,CACvD,CAAC,OACK,EAAK,CAQZ,MANA,MAAM,EAAW,EAAY,EADd,aAAe,MAAQ,EAAI,QAAU,OAAO,EAAI,CACd,CAC7C,GACF,MAAM,GAAa,CAAC,MAAO,GAAwB,CACjD,GAAK,KAAK,CAAE,IAAK,EAAY,CAAE,qDAAqD,EACpF,CAEE,EAOJ,GACF,MAAM,GAAa,CAAC,MAAO,GAAwB,CACjD,GAAK,KAAK,CAAE,IAAK,EAAY,CAAE,yDAAyD,EACxF,CAGJ,MAAM,EAAW,OAAO,EAAa,CACnC,OAAQ,YACR,WAAY,IAAI,KAChB,OAAQ,CACN,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACnB,YAAa,EAAO,YACpB,iBAAkB,EAAO,iBAC1B,CAOD,aAAc,KAAK,MAAM,KAAK,UAAU,EAAO,OAAO,CAAC,CACxD,CAAC,CAEF,GAAK,KACH,CACE,SAAU,EAAO,SACjB,cAAe,EAAO,cACtB,WAAY,EAAO,WACnB,YAAa,EAAO,YACpB,QAAS,EAAO,iBACjB,CACD,uBACD,EACD,CAIN,eAAe,EACb,EACA,EACA,EACe,CACf,GAAI,CACF,MAAM,EAAW,OAAO,EAAI,CAC1B,OAAQ,SACR,WAAY,IAAI,KAChB,aAAc,CAAE,MAAO,EAAQ,CAChC,CAAC,MACI"}
package/package.json ADDED
@@ -0,0 +1,58 @@
1
+ {
2
+ "name": "@murumets-ee/imports",
3
+ "version": "0.12.0",
4
+ "license": "Elastic-2.0",
5
+ "type": "module",
6
+ "exports": {
7
+ ".": {
8
+ "types": "./dist/index.d.mts",
9
+ "import": "./dist/index.mjs"
10
+ },
11
+ "./plugin": {
12
+ "types": "./dist/plugin.d.mts",
13
+ "import": "./dist/plugin.mjs"
14
+ },
15
+ "./worker": {
16
+ "types": "./dist/worker.d.mts",
17
+ "import": "./dist/worker.mjs"
18
+ },
19
+ "./transforms": {
20
+ "types": "./dist/transforms.d.mts",
21
+ "import": "./dist/transforms.mjs"
22
+ },
23
+ "./storage-resolver": {
24
+ "types": "./dist/storage-resolver.d.mts",
25
+ "import": "./dist/storage-resolver.mjs"
26
+ }
27
+ },
28
+ "files": [
29
+ "dist"
30
+ ],
31
+ "dependencies": {
32
+ "csv-parse": "^5.5.3",
33
+ "drizzle-orm": "^0.45.2",
34
+ "zod": "^3.24.1",
35
+ "@murumets-ee/core": "0.12.0",
36
+ "@murumets-ee/db": "0.12.0",
37
+ "@murumets-ee/entity": "0.12.0",
38
+ "@murumets-ee/logging": "0.12.0",
39
+ "@murumets-ee/queue": "0.12.0",
40
+ "@murumets-ee/search-elasticsearch": "0.12.0",
41
+ "@murumets-ee/storage": "0.12.0"
42
+ },
43
+ "devDependencies": {
44
+ "@types/node": "^20.19.39",
45
+ "tsdown": "^0.21.10",
46
+ "typescript": "^5.7.3",
47
+ "vitest": "^2.1.8"
48
+ },
49
+ "typeCoverage": {
50
+ "atLeast": 100
51
+ },
52
+ "scripts": {
53
+ "build": "tsdown",
54
+ "dev": "tsdown --watch",
55
+ "test": "vitest",
56
+ "test:integration": "vitest run --config vitest.integration.config.ts"
57
+ }
58
+ }