gcp-job-runner 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -44,7 +44,7 @@ Same code, same arguments, same secrets. The cloud command automatically builds
44
44
  - **Cloud Run deployment** — no Terraform or manual GCP config needed, just `job cloud run`
45
45
  - **Smart caching** — a single Docker image contains all jobs; running different jobs or different arguments doesn't rebuild
46
46
  - **GCP Secret Manager** — secrets are loaded transparently for both local and cloud execution
47
- - **Unified file I/O** — `getFileWriter()` writes JSON/CSV/binary to a configured output location and `getInputFilesPath()` exposes the input location for reads, targeting local directories during development and Cloud Storage buckets in production with the same handler code
47
+ - **Unified file I/O** — `getFileWriter()` writes JSON/CSV/binary to a configured output location and `readInputJson()` / `readInputText()` / `readInputBuffer()` read from a configured input location, targeting local directories during development and Cloud Storage buckets in production with the same handler code
48
48
  - **Multi-environment** — configure staging, production, etc. and switch with a single argument
49
49
 
50
50
  ## Install
package/dist/files.d.mts CHANGED
@@ -77,6 +77,34 @@ declare function fileInput(): z.ZodString;
77
77
  * Throws when no input destination is configured.
78
78
  */
79
79
  declare function listInputFiles(): Promise<string[]>;
80
+ /**
81
+ * Read a file from the configured input files destination as raw bytes.
82
+ * Local paths and `gs://` URIs are handled transparently.
83
+ *
84
+ * `relativePath` is sanitized the same way the writer sanitizes output
85
+ * paths — absolute paths and upward traversal (`..`) are rejected so a
86
+ * job can't escape its configured destination.
87
+ *
88
+ * Throws when no input destination is configured.
89
+ */
90
+ declare function readInputBuffer(relativePath: string): Promise<Buffer>;
91
+ /**
92
+ * Read a UTF-8 text file from the configured input files destination.
93
+ * Use this for CSV, JSON, plain text, SVG — anything character-based.
94
+ *
95
+ * See `readInputBuffer()` for path-sanitization and configuration
96
+ * semantics.
97
+ */
98
+ declare function readInputText(relativePath: string): Promise<string>;
99
+ /**
100
+ * Read and `JSON.parse` a file from the configured input files
101
+ * destination. Parse errors from a malformed file propagate as the
102
+ * standard `SyntaxError` thrown by `JSON.parse`.
103
+ *
104
+ * See `readInputBuffer()` for path-sanitization and configuration
105
+ * semantics.
106
+ */
107
+ declare function readInputJson<T = unknown>(relativePath: string): Promise<T>;
80
108
  //#endregion
81
- export { FileWriter, fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles };
109
+ export { FileWriter, fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles, readInputBuffer, readInputJson, readInputText };
82
110
  //# sourceMappingURL=files.d.mts.map
package/dist/files.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  import path from "node:path";
2
2
  import { consola } from "consola";
3
3
  import { z } from "zod";
4
- import { mkdir, readdir, writeFile } from "node:fs/promises";
4
+ import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
5
5
 
6
6
  //#region src/files.ts
7
7
  const GCS_PREFIX = "gs://";
@@ -77,6 +77,52 @@ async function listInputFiles() {
77
77
  if (destination.startsWith(GCS_PREFIX)) return listGcsFiles(destination);
78
78
  return listLocalFiles(path.resolve(destination));
79
79
  }
80
+ /**
81
+ * Read a file from the configured input files destination as raw bytes.
82
+ * Local paths and `gs://` URIs are handled transparently.
83
+ *
84
+ * `relativePath` is sanitized the same way the writer sanitizes output
85
+ * paths — absolute paths and upward traversal (`..`) are rejected so a
86
+ * job can't escape its configured destination.
87
+ *
88
+ * Throws when no input destination is configured.
89
+ */
90
+ async function readInputBuffer(relativePath) {
91
+ return readInputRaw(relativePath);
92
+ }
93
+ /**
94
+ * Read a UTF-8 text file from the configured input files destination.
95
+ * Use this for CSV, JSON, plain text, SVG — anything character-based.
96
+ *
97
+ * See `readInputBuffer()` for path-sanitization and configuration
98
+ * semantics.
99
+ */
100
+ async function readInputText(relativePath) {
101
+ return (await readInputRaw(relativePath)).toString("utf8");
102
+ }
103
+ /**
104
+ * Read and `JSON.parse` a file from the configured input files
105
+ * destination. Parse errors from a malformed file propagate as the
106
+ * standard `SyntaxError` thrown by `JSON.parse`.
107
+ *
108
+ * See `readInputBuffer()` for path-sanitization and configuration
109
+ * semantics.
110
+ */
111
+ async function readInputJson(relativePath) {
112
+ const text = await readInputText(relativePath);
113
+ return JSON.parse(text);
114
+ }
115
+ async function readInputRaw(relativePath) {
116
+ const destination = readInputDestination();
117
+ const safeRelative = sanitizeRelativePath(relativePath);
118
+ if (destination.startsWith(GCS_PREFIX)) return readGcsObject(destination, safeRelative);
119
+ return readFile(path.join(path.resolve(destination), safeRelative));
120
+ }
121
+ async function readGcsObject(uri, safeRelative) {
122
+ const { bucket, prefix } = parseGcsUri(uri);
123
+ const [buf] = await (await getStorageClient()).bucket(bucket).file(joinGcsPath(prefix, safeRelative)).download();
124
+ return buf;
125
+ }
80
126
  async function listLocalFiles(basePath) {
81
127
  return (await readdir(basePath, { withFileTypes: true })).filter((entry) => entry.isFile()).map((entry) => entry.name).sort();
82
128
  }
@@ -231,5 +277,5 @@ function contentTypeFor(relativePath) {
231
277
  }
232
278
 
233
279
  //#endregion
234
- export { fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles };
280
+ export { fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles, readInputBuffer, readInputJson, readInputText };
235
281
  //# sourceMappingURL=files.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"files.mjs","names":[],"sources":["../src/files.ts"],"sourcesContent":["import { mkdir, readdir, writeFile } from \"node:fs/promises\";\nimport path from \"node:path\";\nimport { consola } from \"consola\";\nimport { z } from \"zod\";\nimport type { Storage } from \"@google-cloud/storage\";\n\n/**\n * Writer for job output files, produced by `getFileWriter()`. A single\n * writer instance targets the destination configured via the\n * `outputFilesPath` runner option (local directory or `gs://` URI). All\n * methods return the resolved absolute path or `gs://` URI of the written\n * file.\n */\nexport interface FileWriter {\n /**\n * Write a value as pretty-printed JSON (2-space indent, trailing newline).\n * The `.json` extension is added if missing.\n */\n writeJson(relativePath: string, data: unknown): Promise<string>;\n /** Write a UTF-8 string (e.g. CSV, SVG, plain text). */\n writeText(relativePath: string, content: string): Promise<string>;\n /** Write a binary buffer. */\n writeBuffer(\n relativePath: string,\n content: Buffer | Uint8Array,\n ): Promise<string>;\n}\n\nconst GCS_PREFIX = \"gs://\";\n\n/**\n * Return the resolved input files destination — where the job reads from.\n * Local paths are resolved to an absolute filesystem path; `gs://` URIs\n * are validated and canonicalized (trailing slashes stripped). Use this\n * to locate fixtures, reference datasets, or files produced by another\n * job, and read them with `node:fs` or `@google-cloud/storage` directly.\n *\n * Throws when no input destination is configured or the configured\n * `gs://` URI is malformed (e.g. missing bucket).\n */\nexport function getInputFilesPath(): string {\n return resolveDestination(readInputDestination());\n}\n\n/**\n * Return the resolved output files destination — where `getFileWriter()`\n * writes to. Local paths are resolved to an absolute filesystem path;\n * `gs://` URIs are validated and canonicalized (trailing slashes\n * stripped). Useful when a job needs to read back its own artifacts\n * (e.g., to chain steps within a handler) or pass the destination to\n * another tool.\n *\n * Throws when no output destination is configured or the configured\n * `gs://` URI is malformed (e.g. missing bucket).\n */\nexport function getOutputFilesPath(): string {\n return resolveDestination(readOutputDestination());\n}\n\n/**\n * Return a writer that persists files to the destination configured via\n * the `outputFilesPath` runner option. Local paths are used for local\n * execution; `gs://bucket[/prefix]` URIs are used for Cloud Run\n * deployments.\n *\n * Throws when no output destination is configured.\n */\nexport function getFileWriter(): FileWriter {\n const destination = readOutputDestination();\n\n if (destination.startsWith(GCS_PREFIX)) {\n return createGcsWriter(destination);\n }\n\n return createLocalWriter(destination);\n}\n\n/**\n * Mark a Zod string field as a file input. Interactive mode detects this\n * marker and offers a selectable list of files from `getInputFilesPath()`\n * instead of a free-text prompt.\n *\n * ```ts\n * z.object({\n * file: fileInput().describe(\"CSV to process\"),\n * });\n * ```\n *\n * Chains with `.describe()`, `.optional()`, `.default()` as usual.\n */\nexport function fileInput() {\n return z.string().meta({ kind: \"file\" });\n}\n\n/**\n * List filenames available under the configured input files destination.\n * Returns names relative to the destination directory (no leading slash).\n * Results are sorted alphabetically.\n *\n * Local destinations list only top-level files (nested directories are\n * skipped for now). `gs://` destinations list every object under the\n * configured prefix.\n *\n * Throws when no input destination is configured.\n */\nexport async function listInputFiles(): Promise<string[]> {\n const destination = readInputDestination();\n if (destination.startsWith(GCS_PREFIX)) {\n return listGcsFiles(destination);\n }\n return listLocalFiles(path.resolve(destination));\n}\n\nasync function listLocalFiles(basePath: string): Promise<string[]> {\n const entries = await readdir(basePath, { withFileTypes: true });\n return entries\n .filter((entry) => entry.isFile())\n .map((entry) => entry.name)\n .sort();\n}\n\nasync function listGcsFiles(uri: string): Promise<string[]> {\n const { bucket, prefix } = parseGcsUri(uri);\n\n /**\n * Terminate the list prefix with `/` so GCS treats it as a folder and\n * not a bare substring — otherwise a configured `inputs` would also\n * match sibling keys like `inputs2/…` or `inputs-backup/…`.\n */\n const listPrefix = prefix ? `${prefix}/` : undefined;\n const stripLen = listPrefix ? listPrefix.length : 0;\n\n const storage = await getStorageClient();\n const [files] = await storage.bucket(bucket).getFiles({\n prefix: listPrefix,\n });\n\n return files\n .map((file) => file.name.slice(stripLen))\n .filter((name) => name.length > 0 && !name.endsWith(\"/\"))\n .sort();\n}\n\nfunction resolveDestination(destination: string): string {\n if (destination.startsWith(GCS_PREFIX)) {\n /**\n * Validate and canonicalize via the same parser the writer uses, so\n * read and write paths stay consistent — reject `gs://` without a\n * bucket and strip any trailing slash from the prefix.\n */\n const { bucket, prefix } = parseGcsUri(destination);\n return prefix\n ? `${GCS_PREFIX}${bucket}/${prefix}`\n : `${GCS_PREFIX}${bucket}`;\n }\n return path.resolve(destination);\n}\n\nfunction readInputDestination(): string {\n const destination = process.env.JOB_INPUT_FILES_PATH;\n\n if (!destination) {\n throw new Error(\n \"No input files destination configured.\\n\" +\n \"Set `localInputFilesPath` or the current environment's \" +\n \"`inputFilesPath` in your job-runner.config.ts, or set \" +\n \"JOB_INPUT_FILES_PATH directly in the environment.\",\n );\n }\n\n return destination;\n}\n\nfunction readOutputDestination(): string {\n const destination = process.env.JOB_OUTPUT_FILES_PATH;\n\n if (!destination) {\n throw new Error(\n \"No output files destination configured.\\n\" +\n \"Set `localOutputFilesPath` or the current environment's \" +\n \"`outputFilesPath` in your job-runner.config.ts, or set \" +\n \"JOB_OUTPUT_FILES_PATH directly in the environment.\",\n );\n }\n\n return destination;\n}\n\nfunction createLocalWriter(basePath: string): FileWriter {\n const resolvedBase = path.resolve(basePath);\n\n /** Takes an already-sanitized relative path and writes the content. */\n async function write(\n safeRelative: string,\n content: string | Uint8Array,\n ): Promise<string> {\n const fullPath = path.join(resolvedBase, safeRelative);\n\n await mkdir(path.dirname(fullPath), { recursive: true });\n await writeFile(fullPath, content);\n\n consola.info(`File written: ${fullPath}`);\n return fullPath;\n }\n\n return {\n async writeJson(relativePath, data) {\n /** Sanitize before appending the extension so \"\" doesn't become \".json\". */\n const withExtension = ensureExtension(\n sanitizeRelativePath(relativePath),\n \".json\",\n );\n return write(withExtension, formatJson(data));\n },\n async writeText(relativePath, content) {\n return write(sanitizeRelativePath(relativePath), content);\n },\n async writeBuffer(relativePath, content) {\n return write(sanitizeRelativePath(relativePath), content);\n },\n };\n}\n\ninterface GcsTarget {\n bucket: string;\n prefix: string;\n}\n\nfunction createGcsWriter(uri: string): FileWriter {\n const target = parseGcsUri(uri);\n\n /** Takes an already-sanitized relative path and uploads the content. */\n async function write(\n safeRelative: string,\n content: string | Buffer | Uint8Array,\n contentType: string,\n ): Promise<string> {\n const objectName = joinGcsPath(target.prefix, safeRelative);\n const fullUri = `${GCS_PREFIX}${target.bucket}/${objectName}`;\n\n const storage = await getStorageClient();\n const file = storage.bucket(target.bucket).file(objectName);\n\n const body =\n typeof content === \"string\" || Buffer.isBuffer(content)\n ? content\n : Buffer.from(content);\n\n await file.save(body, {\n contentType,\n resumable: false,\n });\n\n consola.info(`File written: ${fullUri}`);\n return fullUri;\n }\n\n return {\n async writeJson(relativePath, data) {\n /** Sanitize before appending the extension so \"\" doesn't become \".json\". */\n const withExtension = ensureExtension(\n sanitizeRelativePath(relativePath),\n \".json\",\n );\n return write(withExtension, formatJson(data), \"application/json\");\n },\n async writeText(relativePath, content) {\n const safe = sanitizeRelativePath(relativePath);\n return write(safe, content, contentTypeFor(safe));\n },\n async writeBuffer(relativePath, content) {\n return write(\n sanitizeRelativePath(relativePath),\n content,\n \"application/octet-stream\",\n );\n },\n };\n}\n\n/**\n * Lazy Storage client singleton. The `@google-cloud/storage` module is only\n * loaded when a `gs://` destination is actually used for a write.\n */\nlet storageClient: Storage | null = null;\nasync function getStorageClient(): Promise<Storage> {\n if (storageClient) return storageClient;\n const { Storage: StorageCtor } = await import(\"@google-cloud/storage\");\n storageClient = new StorageCtor();\n return storageClient;\n}\n\nfunction parseGcsUri(uri: string): GcsTarget {\n const withoutScheme = uri.slice(GCS_PREFIX.length);\n const slashIndex = withoutScheme.indexOf(\"/\");\n\n if (slashIndex === -1) {\n if (!withoutScheme) {\n throw new Error(`Invalid GCS URI: \"${uri}\" (missing bucket name)`);\n }\n return { bucket: withoutScheme, prefix: \"\" };\n }\n\n const bucket = withoutScheme.slice(0, slashIndex);\n const prefix = withoutScheme.slice(slashIndex + 1).replace(/\\/+$/, \"\");\n\n if (!bucket) {\n throw new Error(`Invalid GCS URI: \"${uri}\" (missing bucket name)`);\n }\n\n return { bucket, prefix };\n}\n\nfunction joinGcsPath(prefix: string, relative: string): string {\n const normalizedRelative = relative.replace(/^\\/+/, \"\");\n return prefix ? `${prefix}/${normalizedRelative}` : normalizedRelative;\n}\n\nfunction sanitizeRelativePath(relativePath: string): string {\n if (!relativePath || relativePath.trim() === \"\") {\n throw new Error(\"File path is empty\");\n }\n\n if (path.isAbsolute(relativePath) || relativePath.startsWith(\"/\")) {\n throw new Error(\n `File path must be relative, got \"${relativePath}\". ` +\n \"Absolute paths are not allowed.\",\n );\n }\n\n const normalized = path.posix.normalize(relativePath.replace(/\\\\/g, \"/\"));\n\n /**\n * Reject `..` only as a distinct path segment, not as a prefix. A filename\n * like \"..hidden\" is a legitimate dotfile variant, while \"..\" / \"../x\" /\n * \"a/../../x\" all produce a `..` segment after normalization.\n */\n if (normalized.split(\"/\").includes(\"..\")) {\n throw new Error(\n `File path must not traverse upward, got \"${relativePath}\".`,\n );\n }\n\n return normalized;\n}\n\nfunction ensureExtension(relativePath: string, extension: string): string {\n return relativePath.endsWith(extension)\n ? relativePath\n : `${relativePath}${extension}`;\n}\n\nfunction formatJson(data: unknown): string {\n return `${JSON.stringify(data, null, 2)}\\n`;\n}\n\nconst TEXT_CONTENT_TYPES: Record<string, string> = {\n \".csv\": \"text/csv; charset=utf-8\",\n \".json\": \"application/json\",\n \".svg\": \"image/svg+xml\",\n \".txt\": \"text/plain; charset=utf-8\",\n \".html\": \"text/html; charset=utf-8\",\n \".xml\": \"application/xml\",\n \".yaml\": \"application/yaml\",\n \".yml\": \"application/yaml\",\n \".md\": \"text/markdown; charset=utf-8\",\n};\n\nfunction contentTypeFor(relativePath: string): string {\n const ext = path.posix.extname(relativePath).toLowerCase();\n return TEXT_CONTENT_TYPES[ext] ?? \"text/plain; charset=utf-8\";\n}\n"],"mappings":";;;;;;AA4BA,MAAM,aAAa;;;;;;;;;;;AAYnB,SAAgB,oBAA4B;AAC1C,QAAO,mBAAmB,sBAAsB,CAAC;;;;;;;;;;;;;AAcnD,SAAgB,qBAA6B;AAC3C,QAAO,mBAAmB,uBAAuB,CAAC;;;;;;;;;;AAWpD,SAAgB,gBAA4B;CAC1C,MAAM,cAAc,uBAAuB;AAE3C,KAAI,YAAY,WAAW,WAAW,CACpC,QAAO,gBAAgB,YAAY;AAGrC,QAAO,kBAAkB,YAAY;;;;;;;;;;;;;;;AAgBvC,SAAgB,YAAY;AAC1B,QAAO,EAAE,QAAQ,CAAC,KAAK,EAAE,MAAM,QAAQ,CAAC;;;;;;;;;;;;;AAc1C,eAAsB,iBAAoC;CACxD,MAAM,cAAc,sBAAsB;AAC1C,KAAI,YAAY,WAAW,WAAW,CACpC,QAAO,aAAa,YAAY;AAElC,QAAO,eAAe,KAAK,QAAQ,YAAY,CAAC;;AAGlD,eAAe,eAAe,UAAqC;AAEjE,SADgB,MAAM,QAAQ,UAAU,EAAE,eAAe,MAAM,CAAC,EAE7D,QAAQ,UAAU,MAAM,QAAQ,CAAC,CACjC,KAAK,UAAU,MAAM,KAAK,CAC1B,MAAM;;AAGX,eAAe,aAAa,KAAgC;CAC1D,MAAM,EAAE,QAAQ,WAAW,YAAY,IAAI;;;;;;CAO3C,MAAM,aAAa,SAAS,GAAG,OAAO,KAAK;CAC3C,MAAM,WAAW,aAAa,WAAW,SAAS;CAGlD,MAAM,CAAC,SAAS,OADA,MAAM,kBAAkB,EACV,OAAO,OAAO,CAAC,SAAS,EACpD,QAAQ,YACT,CAAC;AAEF,QAAO,MACJ,KAAK,SAAS,KAAK,KAAK,MAAM,SAAS,CAAC,CACxC,QAAQ,SAAS,KAAK,SAAS,KAAK,CAAC,KAAK,SAAS,IAAI,CAAC,CACxD,MAAM;;AAGX,SAAS,mBAAmB,aAA6B;AACvD,KAAI,YAAY,WAAW,WAAW,EAAE;;;;;;EAMtC,MAAM,EAAE,QAAQ,WAAW,YAAY,YAAY;AACnD,SAAO,SACH,GAAG,aAAa,OAAO,GAAG,WAC1B,GAAG,aAAa;;AAEtB,QAAO,KAAK,QAAQ,YAAY;;AAGlC,SAAS,uBAA+B;CACtC,MAAM,cAAc,QAAQ,IAAI;AAEhC,KAAI,CAAC,YACH,OAAM,IAAI,MACR,yMAID;AAGH,QAAO;;AAGT,SAAS,wBAAgC;CACvC,MAAM,cAAc,QAAQ,IAAI;AAEhC,KAAI,CAAC,YACH,OAAM,IAAI,MACR,6MAID;AAGH,QAAO;;AAGT,SAAS,kBAAkB,UAA8B;CACvD,MAAM,eAAe,KAAK,QAAQ,SAAS;;CAG3C,eAAe,MACb,cACA,SACiB;EACjB,MAAM,WAAW,KAAK,KAAK,cAAc,aAAa;AAEtD,QAAM,MAAM,KAAK,QAAQ,SAAS,EAAE,EAAE,WAAW,MAAM,CAAC;AACxD,QAAM,UAAU,UAAU,QAAQ;AAElC,UAAQ,KAAK,iBAAiB,WAAW;AACzC,SAAO;;AAGT,QAAO;EACL,MAAM,UAAU,cAAc,MAAM;AAMlC,UAAO,MAJe,gBACpB,qBAAqB,aAAa,EAClC,QACD,EAC2B,WAAW,KAAK,CAAC;;EAE/C,MAAM,UAAU,cAAc,SAAS;AACrC,UAAO,MAAM,qBAAqB,aAAa,EAAE,QAAQ;;EAE3D,MAAM,YAAY,cAAc,SAAS;AACvC,UAAO,MAAM,qBAAqB,aAAa,EAAE,QAAQ;;EAE5D;;AAQH,SAAS,gBAAgB,KAAyB;CAChD,MAAM,SAAS,YAAY,IAAI;;CAG/B,eAAe,MACb,cACA,SACA,aACiB;EACjB,MAAM,aAAa,YAAY,OAAO,QAAQ,aAAa;EAC3D,MAAM,UAAU,GAAG,aAAa,OAAO,OAAO,GAAG;EAGjD,MAAM,QADU,MAAM,kBAAkB,EACnB,OAAO,OAAO,OAAO,CAAC,KAAK,WAAW;EAE3D,MAAM,OACJ,OAAO,YAAY,YAAY,OAAO,SAAS,QAAQ,GACnD,UACA,OAAO,KAAK,QAAQ;AAE1B,QAAM,KAAK,KAAK,MAAM;GACpB;GACA,WAAW;GACZ,CAAC;AAEF,UAAQ,KAAK,iBAAiB,UAAU;AACxC,SAAO;;AAGT,QAAO;EACL,MAAM,UAAU,cAAc,MAAM;AAMlC,UAAO,MAJe,gBACpB,qBAAqB,aAAa,EAClC,QACD,EAC2B,WAAW,KAAK,EAAE,mBAAmB;;EAEnE,MAAM,UAAU,cAAc,SAAS;GACrC,MAAM,OAAO,qBAAqB,aAAa;AAC/C,UAAO,MAAM,MAAM,SAAS,eAAe,KAAK,CAAC;;EAEnD,MAAM,YAAY,cAAc,SAAS;AACvC,UAAO,MACL,qBAAqB,aAAa,EAClC,SACA,2BACD;;EAEJ;;;;;;AAOH,IAAI,gBAAgC;AACpC,eAAe,mBAAqC;AAClD,KAAI,cAAe,QAAO;CAC1B,MAAM,EAAE,SAAS,gBAAgB,MAAM,OAAO;AAC9C,iBAAgB,IAAI,aAAa;AACjC,QAAO;;AAGT,SAAS,YAAY,KAAwB;CAC3C,MAAM,gBAAgB,IAAI,MAAM,EAAkB;CAClD,MAAM,aAAa,cAAc,QAAQ,IAAI;AAE7C,KAAI,eAAe,IAAI;AACrB,MAAI,CAAC,cACH,OAAM,IAAI,MAAM,qBAAqB,IAAI,yBAAyB;AAEpE,SAAO;GAAE,QAAQ;GAAe,QAAQ;GAAI;;CAG9C,MAAM,SAAS,cAAc,MAAM,GAAG,WAAW;CACjD,MAAM,SAAS,cAAc,MAAM,aAAa,EAAE,CAAC,QAAQ,QAAQ,GAAG;AAEtE,KAAI,CAAC,OACH,OAAM,IAAI,MAAM,qBAAqB,IAAI,yBAAyB;AAGpE,QAAO;EAAE;EAAQ;EAAQ;;AAG3B,SAAS,YAAY,QAAgB,UAA0B;CAC7D,MAAM,qBAAqB,SAAS,QAAQ,QAAQ,GAAG;AACvD,QAAO,SAAS,GAAG,OAAO,GAAG,uBAAuB;;AAGtD,SAAS,qBAAqB,cAA8B;AAC1D,KAAI,CAAC,gBAAgB,aAAa,MAAM,KAAK,GAC3C,OAAM,IAAI,MAAM,qBAAqB;AAGvC,KAAI,KAAK,WAAW,aAAa,IAAI,aAAa,WAAW,IAAI,CAC/D,OAAM,IAAI,MACR,oCAAoC,aAAa,oCAElD;CAGH,MAAM,aAAa,KAAK,MAAM,UAAU,aAAa,QAAQ,OAAO,IAAI,CAAC;;;;;;AAOzE,KAAI,WAAW,MAAM,IAAI,CAAC,SAAS,KAAK,CACtC,OAAM,IAAI,MACR,4CAA4C,aAAa,IAC1D;AAGH,QAAO;;AAGT,SAAS,gBAAgB,cAAsB,WAA2B;AACxE,QAAO,aAAa,SAAS,UAAU,GACnC,eACA,GAAG,eAAe;;AAGxB,SAAS,WAAW,MAAuB;AACzC,QAAO,GAAG,KAAK,UAAU,MAAM,MAAM,EAAE,CAAC;;AAG1C,MAAM,qBAA6C;CACjD,QAAQ;CACR,SAAS;CACT,QAAQ;CACR,QAAQ;CACR,SAAS;CACT,QAAQ;CACR,SAAS;CACT,QAAQ;CACR,OAAO;CACR;AAED,SAAS,eAAe,cAA8B;AAEpD,QAAO,mBADK,KAAK,MAAM,QAAQ,aAAa,CAAC,aAAa,KACxB"}
1
+ {"version":3,"file":"files.mjs","names":[],"sources":["../src/files.ts"],"sourcesContent":["import { mkdir, readdir, readFile, writeFile } from \"node:fs/promises\";\nimport path from \"node:path\";\nimport { consola } from \"consola\";\nimport { z } from \"zod\";\nimport type { Storage } from \"@google-cloud/storage\";\n\n/**\n * Writer for job output files, produced by `getFileWriter()`. A single\n * writer instance targets the destination configured via the\n * `outputFilesPath` runner option (local directory or `gs://` URI). All\n * methods return the resolved absolute path or `gs://` URI of the written\n * file.\n */\nexport interface FileWriter {\n /**\n * Write a value as pretty-printed JSON (2-space indent, trailing newline).\n * The `.json` extension is added if missing.\n */\n writeJson(relativePath: string, data: unknown): Promise<string>;\n /** Write a UTF-8 string (e.g. CSV, SVG, plain text). */\n writeText(relativePath: string, content: string): Promise<string>;\n /** Write a binary buffer. */\n writeBuffer(\n relativePath: string,\n content: Buffer | Uint8Array,\n ): Promise<string>;\n}\n\nconst GCS_PREFIX = \"gs://\";\n\n/**\n * Return the resolved input files destination — where the job reads from.\n * Local paths are resolved to an absolute filesystem path; `gs://` URIs\n * are validated and canonicalized (trailing slashes stripped). Use this\n * to locate fixtures, reference datasets, or files produced by another\n * job, and read them with `node:fs` or `@google-cloud/storage` directly.\n *\n * Throws when no input destination is configured or the configured\n * `gs://` URI is malformed (e.g. missing bucket).\n */\nexport function getInputFilesPath(): string {\n return resolveDestination(readInputDestination());\n}\n\n/**\n * Return the resolved output files destination — where `getFileWriter()`\n * writes to. Local paths are resolved to an absolute filesystem path;\n * `gs://` URIs are validated and canonicalized (trailing slashes\n * stripped). Useful when a job needs to read back its own artifacts\n * (e.g., to chain steps within a handler) or pass the destination to\n * another tool.\n *\n * Throws when no output destination is configured or the configured\n * `gs://` URI is malformed (e.g. missing bucket).\n */\nexport function getOutputFilesPath(): string {\n return resolveDestination(readOutputDestination());\n}\n\n/**\n * Return a writer that persists files to the destination configured via\n * the `outputFilesPath` runner option. Local paths are used for local\n * execution; `gs://bucket[/prefix]` URIs are used for Cloud Run\n * deployments.\n *\n * Throws when no output destination is configured.\n */\nexport function getFileWriter(): FileWriter {\n const destination = readOutputDestination();\n\n if (destination.startsWith(GCS_PREFIX)) {\n return createGcsWriter(destination);\n }\n\n return createLocalWriter(destination);\n}\n\n/**\n * Mark a Zod string field as a file input. Interactive mode detects this\n * marker and offers a selectable list of files from `getInputFilesPath()`\n * instead of a free-text prompt.\n *\n * ```ts\n * z.object({\n * file: fileInput().describe(\"CSV to process\"),\n * });\n * ```\n *\n * Chains with `.describe()`, `.optional()`, `.default()` as usual.\n */\nexport function fileInput() {\n return z.string().meta({ kind: \"file\" });\n}\n\n/**\n * List filenames available under the configured input files destination.\n * Returns names relative to the destination directory (no leading slash).\n * Results are sorted alphabetically.\n *\n * Local destinations list only top-level files (nested directories are\n * skipped for now). `gs://` destinations list every object under the\n * configured prefix.\n *\n * Throws when no input destination is configured.\n */\nexport async function listInputFiles(): Promise<string[]> {\n const destination = readInputDestination();\n if (destination.startsWith(GCS_PREFIX)) {\n return listGcsFiles(destination);\n }\n return listLocalFiles(path.resolve(destination));\n}\n\n/**\n * Read a file from the configured input files destination as raw bytes.\n * Local paths and `gs://` URIs are handled transparently.\n *\n * `relativePath` is sanitized the same way the writer sanitizes output\n * paths — absolute paths and upward traversal (`..`) are rejected so a\n * job can't escape its configured destination.\n *\n * Throws when no input destination is configured.\n */\nexport async function readInputBuffer(relativePath: string): Promise<Buffer> {\n return readInputRaw(relativePath);\n}\n\n/**\n * Read a UTF-8 text file from the configured input files destination.\n * Use this for CSV, JSON, plain text, SVG — anything character-based.\n *\n * See `readInputBuffer()` for path-sanitization and configuration\n * semantics.\n */\nexport async function readInputText(relativePath: string): Promise<string> {\n const buffer = await readInputRaw(relativePath);\n return buffer.toString(\"utf8\");\n}\n\n/**\n * Read and `JSON.parse` a file from the configured input files\n * destination. Parse errors from a malformed file propagate as the\n * standard `SyntaxError` thrown by `JSON.parse`.\n *\n * See `readInputBuffer()` for path-sanitization and configuration\n * semantics.\n */\nexport async function readInputJson<T = unknown>(\n relativePath: string,\n): Promise<T> {\n const text = await readInputText(relativePath);\n return JSON.parse(text) as T;\n}\n\nasync function readInputRaw(relativePath: string): Promise<Buffer> {\n const destination = readInputDestination();\n const safeRelative = sanitizeRelativePath(relativePath);\n\n if (destination.startsWith(GCS_PREFIX)) {\n return readGcsObject(destination, safeRelative);\n }\n\n return readFile(path.join(path.resolve(destination), safeRelative));\n}\n\nasync function readGcsObject(\n uri: string,\n safeRelative: string,\n): Promise<Buffer> {\n const { bucket, prefix } = parseGcsUri(uri);\n const storage = await getStorageClient();\n const [buf] = await storage\n .bucket(bucket)\n .file(joinGcsPath(prefix, safeRelative))\n .download();\n return buf;\n}\n\nasync function listLocalFiles(basePath: string): Promise<string[]> {\n const entries = await readdir(basePath, { withFileTypes: true });\n return entries\n .filter((entry) => entry.isFile())\n .map((entry) => entry.name)\n .sort();\n}\n\nasync function listGcsFiles(uri: string): Promise<string[]> {\n const { bucket, prefix } = parseGcsUri(uri);\n\n /**\n * Terminate the list prefix with `/` so GCS treats it as a folder and\n * not a bare substring — otherwise a configured `inputs` would also\n * match sibling keys like `inputs2/…` or `inputs-backup/…`.\n */\n const listPrefix = prefix ? `${prefix}/` : undefined;\n const stripLen = listPrefix ? listPrefix.length : 0;\n\n const storage = await getStorageClient();\n const [files] = await storage.bucket(bucket).getFiles({\n prefix: listPrefix,\n });\n\n return files\n .map((file) => file.name.slice(stripLen))\n .filter((name) => name.length > 0 && !name.endsWith(\"/\"))\n .sort();\n}\n\nfunction resolveDestination(destination: string): string {\n if (destination.startsWith(GCS_PREFIX)) {\n /**\n * Validate and canonicalize via the same parser the writer uses, so\n * read and write paths stay consistent — reject `gs://` without a\n * bucket and strip any trailing slash from the prefix.\n */\n const { bucket, prefix } = parseGcsUri(destination);\n return prefix\n ? `${GCS_PREFIX}${bucket}/${prefix}`\n : `${GCS_PREFIX}${bucket}`;\n }\n return path.resolve(destination);\n}\n\nfunction readInputDestination(): string {\n const destination = process.env.JOB_INPUT_FILES_PATH;\n\n if (!destination) {\n throw new Error(\n \"No input files destination configured.\\n\" +\n \"Set `localInputFilesPath` or the current environment's \" +\n \"`inputFilesPath` in your job-runner.config.ts, or set \" +\n \"JOB_INPUT_FILES_PATH directly in the environment.\",\n );\n }\n\n return destination;\n}\n\nfunction readOutputDestination(): string {\n const destination = process.env.JOB_OUTPUT_FILES_PATH;\n\n if (!destination) {\n throw new Error(\n \"No output files destination configured.\\n\" +\n \"Set `localOutputFilesPath` or the current environment's \" +\n \"`outputFilesPath` in your job-runner.config.ts, or set \" +\n \"JOB_OUTPUT_FILES_PATH directly in the environment.\",\n );\n }\n\n return destination;\n}\n\nfunction createLocalWriter(basePath: string): FileWriter {\n const resolvedBase = path.resolve(basePath);\n\n /** Takes an already-sanitized relative path and writes the content. */\n async function write(\n safeRelative: string,\n content: string | Uint8Array,\n ): Promise<string> {\n const fullPath = path.join(resolvedBase, safeRelative);\n\n await mkdir(path.dirname(fullPath), { recursive: true });\n await writeFile(fullPath, content);\n\n consola.info(`File written: ${fullPath}`);\n return fullPath;\n }\n\n return {\n async writeJson(relativePath, data) {\n /** Sanitize before appending the extension so \"\" doesn't become \".json\". */\n const withExtension = ensureExtension(\n sanitizeRelativePath(relativePath),\n \".json\",\n );\n return write(withExtension, formatJson(data));\n },\n async writeText(relativePath, content) {\n return write(sanitizeRelativePath(relativePath), content);\n },\n async writeBuffer(relativePath, content) {\n return write(sanitizeRelativePath(relativePath), content);\n },\n };\n}\n\ninterface GcsTarget {\n bucket: string;\n prefix: string;\n}\n\nfunction createGcsWriter(uri: string): FileWriter {\n const target = parseGcsUri(uri);\n\n /** Takes an already-sanitized relative path and uploads the content. */\n async function write(\n safeRelative: string,\n content: string | Buffer | Uint8Array,\n contentType: string,\n ): Promise<string> {\n const objectName = joinGcsPath(target.prefix, safeRelative);\n const fullUri = `${GCS_PREFIX}${target.bucket}/${objectName}`;\n\n const storage = await getStorageClient();\n const file = storage.bucket(target.bucket).file(objectName);\n\n const body =\n typeof content === \"string\" || Buffer.isBuffer(content)\n ? content\n : Buffer.from(content);\n\n await file.save(body, {\n contentType,\n resumable: false,\n });\n\n consola.info(`File written: ${fullUri}`);\n return fullUri;\n }\n\n return {\n async writeJson(relativePath, data) {\n /** Sanitize before appending the extension so \"\" doesn't become \".json\". */\n const withExtension = ensureExtension(\n sanitizeRelativePath(relativePath),\n \".json\",\n );\n return write(withExtension, formatJson(data), \"application/json\");\n },\n async writeText(relativePath, content) {\n const safe = sanitizeRelativePath(relativePath);\n return write(safe, content, contentTypeFor(safe));\n },\n async writeBuffer(relativePath, content) {\n return write(\n sanitizeRelativePath(relativePath),\n content,\n \"application/octet-stream\",\n );\n },\n };\n}\n\n/**\n * Lazy Storage client singleton. The `@google-cloud/storage` module is only\n * loaded when a `gs://` destination is actually used for a write.\n */\nlet storageClient: Storage | null = null;\nasync function getStorageClient(): Promise<Storage> {\n if (storageClient) return storageClient;\n const { Storage: StorageCtor } = await import(\"@google-cloud/storage\");\n storageClient = new StorageCtor();\n return storageClient;\n}\n\nfunction parseGcsUri(uri: string): GcsTarget {\n const withoutScheme = uri.slice(GCS_PREFIX.length);\n const slashIndex = withoutScheme.indexOf(\"/\");\n\n if (slashIndex === -1) {\n if (!withoutScheme) {\n throw new Error(`Invalid GCS URI: \"${uri}\" (missing bucket name)`);\n }\n return { bucket: withoutScheme, prefix: \"\" };\n }\n\n const bucket = withoutScheme.slice(0, slashIndex);\n const prefix = withoutScheme.slice(slashIndex + 1).replace(/\\/+$/, \"\");\n\n if (!bucket) {\n throw new Error(`Invalid GCS URI: \"${uri}\" (missing bucket name)`);\n }\n\n return { bucket, prefix };\n}\n\nfunction joinGcsPath(prefix: string, relative: string): string {\n const normalizedRelative = relative.replace(/^\\/+/, \"\");\n return prefix ? `${prefix}/${normalizedRelative}` : normalizedRelative;\n}\n\nfunction sanitizeRelativePath(relativePath: string): string {\n if (!relativePath || relativePath.trim() === \"\") {\n throw new Error(\"File path is empty\");\n }\n\n if (path.isAbsolute(relativePath) || relativePath.startsWith(\"/\")) {\n throw new Error(\n `File path must be relative, got \"${relativePath}\". ` +\n \"Absolute paths are not allowed.\",\n );\n }\n\n const normalized = path.posix.normalize(relativePath.replace(/\\\\/g, \"/\"));\n\n /**\n * Reject `..` only as a distinct path segment, not as a prefix. A filename\n * like \"..hidden\" is a legitimate dotfile variant, while \"..\" / \"../x\" /\n * \"a/../../x\" all produce a `..` segment after normalization.\n */\n if (normalized.split(\"/\").includes(\"..\")) {\n throw new Error(\n `File path must not traverse upward, got \"${relativePath}\".`,\n );\n }\n\n return normalized;\n}\n\nfunction ensureExtension(relativePath: string, extension: string): string {\n return relativePath.endsWith(extension)\n ? relativePath\n : `${relativePath}${extension}`;\n}\n\nfunction formatJson(data: unknown): string {\n return `${JSON.stringify(data, null, 2)}\\n`;\n}\n\nconst TEXT_CONTENT_TYPES: Record<string, string> = {\n \".csv\": \"text/csv; charset=utf-8\",\n \".json\": \"application/json\",\n \".svg\": \"image/svg+xml\",\n \".txt\": \"text/plain; charset=utf-8\",\n \".html\": \"text/html; charset=utf-8\",\n \".xml\": \"application/xml\",\n \".yaml\": \"application/yaml\",\n \".yml\": \"application/yaml\",\n \".md\": \"text/markdown; charset=utf-8\",\n};\n\nfunction contentTypeFor(relativePath: string): string {\n const ext = path.posix.extname(relativePath).toLowerCase();\n return TEXT_CONTENT_TYPES[ext] ?? \"text/plain; charset=utf-8\";\n}\n"],"mappings":";;;;;;AA4BA,MAAM,aAAa;;;;;;;;;;;AAYnB,SAAgB,oBAA4B;AAC1C,QAAO,mBAAmB,sBAAsB,CAAC;;;;;;;;;;;;;AAcnD,SAAgB,qBAA6B;AAC3C,QAAO,mBAAmB,uBAAuB,CAAC;;;;;;;;;;AAWpD,SAAgB,gBAA4B;CAC1C,MAAM,cAAc,uBAAuB;AAE3C,KAAI,YAAY,WAAW,WAAW,CACpC,QAAO,gBAAgB,YAAY;AAGrC,QAAO,kBAAkB,YAAY;;;;;;;;;;;;;;;AAgBvC,SAAgB,YAAY;AAC1B,QAAO,EAAE,QAAQ,CAAC,KAAK,EAAE,MAAM,QAAQ,CAAC;;;;;;;;;;;;;AAc1C,eAAsB,iBAAoC;CACxD,MAAM,cAAc,sBAAsB;AAC1C,KAAI,YAAY,WAAW,WAAW,CACpC,QAAO,aAAa,YAAY;AAElC,QAAO,eAAe,KAAK,QAAQ,YAAY,CAAC;;;;;;;;;;;;AAalD,eAAsB,gBAAgB,cAAuC;AAC3E,QAAO,aAAa,aAAa;;;;;;;;;AAUnC,eAAsB,cAAc,cAAuC;AAEzE,SADe,MAAM,aAAa,aAAa,EACjC,SAAS,OAAO;;;;;;;;;;AAWhC,eAAsB,cACpB,cACY;CACZ,MAAM,OAAO,MAAM,cAAc,aAAa;AAC9C,QAAO,KAAK,MAAM,KAAK;;AAGzB,eAAe,aAAa,cAAuC;CACjE,MAAM,cAAc,sBAAsB;CAC1C,MAAM,eAAe,qBAAqB,aAAa;AAEvD,KAAI,YAAY,WAAW,WAAW,CACpC,QAAO,cAAc,aAAa,aAAa;AAGjD,QAAO,SAAS,KAAK,KAAK,KAAK,QAAQ,YAAY,EAAE,aAAa,CAAC;;AAGrE,eAAe,cACb,KACA,cACiB;CACjB,MAAM,EAAE,QAAQ,WAAW,YAAY,IAAI;CAE3C,MAAM,CAAC,OAAO,OADE,MAAM,kBAAkB,EAErC,OAAO,OAAO,CACd,KAAK,YAAY,QAAQ,aAAa,CAAC,CACvC,UAAU;AACb,QAAO;;AAGT,eAAe,eAAe,UAAqC;AAEjE,SADgB,MAAM,QAAQ,UAAU,EAAE,eAAe,MAAM,CAAC,EAE7D,QAAQ,UAAU,MAAM,QAAQ,CAAC,CACjC,KAAK,UAAU,MAAM,KAAK,CAC1B,MAAM;;AAGX,eAAe,aAAa,KAAgC;CAC1D,MAAM,EAAE,QAAQ,WAAW,YAAY,IAAI;;;;;;CAO3C,MAAM,aAAa,SAAS,GAAG,OAAO,KAAK;CAC3C,MAAM,WAAW,aAAa,WAAW,SAAS;CAGlD,MAAM,CAAC,SAAS,OADA,MAAM,kBAAkB,EACV,OAAO,OAAO,CAAC,SAAS,EACpD,QAAQ,YACT,CAAC;AAEF,QAAO,MACJ,KAAK,SAAS,KAAK,KAAK,MAAM,SAAS,CAAC,CACxC,QAAQ,SAAS,KAAK,SAAS,KAAK,CAAC,KAAK,SAAS,IAAI,CAAC,CACxD,MAAM;;AAGX,SAAS,mBAAmB,aAA6B;AACvD,KAAI,YAAY,WAAW,WAAW,EAAE;;;;;;EAMtC,MAAM,EAAE,QAAQ,WAAW,YAAY,YAAY;AACnD,SAAO,SACH,GAAG,aAAa,OAAO,GAAG,WAC1B,GAAG,aAAa;;AAEtB,QAAO,KAAK,QAAQ,YAAY;;AAGlC,SAAS,uBAA+B;CACtC,MAAM,cAAc,QAAQ,IAAI;AAEhC,KAAI,CAAC,YACH,OAAM,IAAI,MACR,yMAID;AAGH,QAAO;;AAGT,SAAS,wBAAgC;CACvC,MAAM,cAAc,QAAQ,IAAI;AAEhC,KAAI,CAAC,YACH,OAAM,IAAI,MACR,6MAID;AAGH,QAAO;;AAGT,SAAS,kBAAkB,UAA8B;CACvD,MAAM,eAAe,KAAK,QAAQ,SAAS;;CAG3C,eAAe,MACb,cACA,SACiB;EACjB,MAAM,WAAW,KAAK,KAAK,cAAc,aAAa;AAEtD,QAAM,MAAM,KAAK,QAAQ,SAAS,EAAE,EAAE,WAAW,MAAM,CAAC;AACxD,QAAM,UAAU,UAAU,QAAQ;AAElC,UAAQ,KAAK,iBAAiB,WAAW;AACzC,SAAO;;AAGT,QAAO;EACL,MAAM,UAAU,cAAc,MAAM;AAMlC,UAAO,MAJe,gBACpB,qBAAqB,aAAa,EAClC,QACD,EAC2B,WAAW,KAAK,CAAC;;EAE/C,MAAM,UAAU,cAAc,SAAS;AACrC,UAAO,MAAM,qBAAqB,aAAa,EAAE,QAAQ;;EAE3D,MAAM,YAAY,cAAc,SAAS;AACvC,UAAO,MAAM,qBAAqB,aAAa,EAAE,QAAQ;;EAE5D;;AAQH,SAAS,gBAAgB,KAAyB;CAChD,MAAM,SAAS,YAAY,IAAI;;CAG/B,eAAe,MACb,cACA,SACA,aACiB;EACjB,MAAM,aAAa,YAAY,OAAO,QAAQ,aAAa;EAC3D,MAAM,UAAU,GAAG,aAAa,OAAO,OAAO,GAAG;EAGjD,MAAM,QADU,MAAM,kBAAkB,EACnB,OAAO,OAAO,OAAO,CAAC,KAAK,WAAW;EAE3D,MAAM,OACJ,OAAO,YAAY,YAAY,OAAO,SAAS,QAAQ,GACnD,UACA,OAAO,KAAK,QAAQ;AAE1B,QAAM,KAAK,KAAK,MAAM;GACpB;GACA,WAAW;GACZ,CAAC;AAEF,UAAQ,KAAK,iBAAiB,UAAU;AACxC,SAAO;;AAGT,QAAO;EACL,MAAM,UAAU,cAAc,MAAM;AAMlC,UAAO,MAJe,gBACpB,qBAAqB,aAAa,EAClC,QACD,EAC2B,WAAW,KAAK,EAAE,mBAAmB;;EAEnE,MAAM,UAAU,cAAc,SAAS;GACrC,MAAM,OAAO,qBAAqB,aAAa;AAC/C,UAAO,MAAM,MAAM,SAAS,eAAe,KAAK,CAAC;;EAEnD,MAAM,YAAY,cAAc,SAAS;AACvC,UAAO,MACL,qBAAqB,aAAa,EAClC,SACA,2BACD;;EAEJ;;;;;;AAOH,IAAI,gBAAgC;AACpC,eAAe,mBAAqC;AAClD,KAAI,cAAe,QAAO;CAC1B,MAAM,EAAE,SAAS,gBAAgB,MAAM,OAAO;AAC9C,iBAAgB,IAAI,aAAa;AACjC,QAAO;;AAGT,SAAS,YAAY,KAAwB;CAC3C,MAAM,gBAAgB,IAAI,MAAM,EAAkB;CAClD,MAAM,aAAa,cAAc,QAAQ,IAAI;AAE7C,KAAI,eAAe,IAAI;AACrB,MAAI,CAAC,cACH,OAAM,IAAI,MAAM,qBAAqB,IAAI,yBAAyB;AAEpE,SAAO;GAAE,QAAQ;GAAe,QAAQ;GAAI;;CAG9C,MAAM,SAAS,cAAc,MAAM,GAAG,WAAW;CACjD,MAAM,SAAS,cAAc,MAAM,aAAa,EAAE,CAAC,QAAQ,QAAQ,GAAG;AAEtE,KAAI,CAAC,OACH,OAAM,IAAI,MAAM,qBAAqB,IAAI,yBAAyB;AAGpE,QAAO;EAAE;EAAQ;EAAQ;;AAG3B,SAAS,YAAY,QAAgB,UAA0B;CAC7D,MAAM,qBAAqB,SAAS,QAAQ,QAAQ,GAAG;AACvD,QAAO,SAAS,GAAG,OAAO,GAAG,uBAAuB;;AAGtD,SAAS,qBAAqB,cAA8B;AAC1D,KAAI,CAAC,gBAAgB,aAAa,MAAM,KAAK,GAC3C,OAAM,IAAI,MAAM,qBAAqB;AAGvC,KAAI,KAAK,WAAW,aAAa,IAAI,aAAa,WAAW,IAAI,CAC/D,OAAM,IAAI,MACR,oCAAoC,aAAa,oCAElD;CAGH,MAAM,aAAa,KAAK,MAAM,UAAU,aAAa,QAAQ,OAAO,IAAI,CAAC;;;;;;AAOzE,KAAI,WAAW,MAAM,IAAI,CAAC,SAAS,KAAK,CACtC,OAAM,IAAI,MACR,4CAA4C,aAAa,IAC1D;AAGH,QAAO;;AAGT,SAAS,gBAAgB,cAAsB,WAA2B;AACxE,QAAO,aAAa,SAAS,UAAU,GACnC,eACA,GAAG,eAAe;;AAGxB,SAAS,WAAW,MAAuB;AACzC,QAAO,GAAG,KAAK,UAAU,MAAM,MAAM,EAAE,CAAC;;AAG1C,MAAM,qBAA6C;CACjD,QAAQ;CACR,SAAS;CACT,QAAQ;CACR,QAAQ;CACR,SAAS;CACT,QAAQ;CACR,SAAS;CACT,QAAQ;CACR,OAAO;CACR;AAED,SAAS,eAAe,cAA8B;AAEpD,QAAO,mBADK,KAAK,MAAM,QAAQ,aAAa,CAAC,aAAa,KACxB"}
package/dist/index.d.mts CHANGED
@@ -2,8 +2,8 @@ import { CloudConfig, CloudNetworkConfig, CloudResources, RunnerConfig, RunnerEn
2
2
  import { FlagAliases, JobFunction, JobInfo, JobMetadata, JobOptions, RunJobOptions } from "./types.mjs";
3
3
  import { defineJob } from "./define-job.mjs";
4
4
  import { discoverJobs } from "./discover-jobs.mjs";
5
- import { FileWriter, fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles } from "./files.mjs";
5
+ import { FileWriter, fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles, readInputBuffer, readInputJson, readInputText } from "./files.mjs";
6
6
  import { FieldInfo, extractFieldInfo, formatZodError, generateSchemaHelp, schemaToParseArgsOptions } from "./help.mjs";
7
7
  import { TaskContext, getTaskContext } from "./task-context.mjs";
8
8
  import { runJob } from "./run-job.mjs";
9
- export { type CloudConfig, type CloudNetworkConfig, type CloudResources, type FieldInfo, type FileWriter, type FlagAliases, type JobFunction, type JobInfo, type JobMetadata, type JobOptions, type RunJobOptions, type RunnerConfig, type RunnerEnvOptions, type TaskContext, defineJob, defineRunnerConfig, defineRunnerEnv, discoverJobs, extractFieldInfo, fileInput, formatZodError, generateSchemaHelp, getFileWriter, getInputFilesPath, getOutputFilesPath, getTaskContext, listInputFiles, runJob, schemaToParseArgsOptions };
9
+ export { type CloudConfig, type CloudNetworkConfig, type CloudResources, type FieldInfo, type FileWriter, type FlagAliases, type JobFunction, type JobInfo, type JobMetadata, type JobOptions, type RunJobOptions, type RunnerConfig, type RunnerEnvOptions, type TaskContext, defineJob, defineRunnerConfig, defineRunnerEnv, discoverJobs, extractFieldInfo, fileInput, formatZodError, generateSchemaHelp, getFileWriter, getInputFilesPath, getOutputFilesPath, getTaskContext, listInputFiles, readInputBuffer, readInputJson, readInputText, runJob, schemaToParseArgsOptions };
package/dist/index.mjs CHANGED
@@ -2,8 +2,8 @@ import { defineRunnerConfig, defineRunnerEnv } from "./config.mjs";
2
2
  import { extractFieldInfo, formatZodError, generateSchemaHelp, schemaToParseArgsOptions } from "./help.mjs";
3
3
  import { defineJob } from "./define-job.mjs";
4
4
  import { discoverJobs } from "./discover-jobs.mjs";
5
- import { fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles } from "./files.mjs";
5
+ import { fileInput, getFileWriter, getInputFilesPath, getOutputFilesPath, listInputFiles, readInputBuffer, readInputJson, readInputText } from "./files.mjs";
6
6
  import { getTaskContext } from "./task-context.mjs";
7
7
  import { runJob } from "./run-job.mjs";
8
8
 
9
- export { defineJob, defineRunnerConfig, defineRunnerEnv, discoverJobs, extractFieldInfo, fileInput, formatZodError, generateSchemaHelp, getFileWriter, getInputFilesPath, getOutputFilesPath, getTaskContext, listInputFiles, runJob, schemaToParseArgsOptions };
9
+ export { defineJob, defineRunnerConfig, defineRunnerEnv, discoverJobs, extractFieldInfo, fileInput, formatZodError, generateSchemaHelp, getFileWriter, getInputFilesPath, getOutputFilesPath, getTaskContext, listInputFiles, readInputBuffer, readInputJson, readInputText, runJob, schemaToParseArgsOptions };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gcp-job-runner",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "description": "Run schema-driven Cloud Run jobs seamlessly in any environment",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -37,7 +37,7 @@
37
37
  "docs:dev": "vitepress dev docs",
38
38
  "docs:build": "vitepress build docs",
39
39
  "docs:preview": "vitepress preview docs",
40
- "prepare": "husky && tsdown"
40
+ "prepare": "lefthook install && tsdown"
41
41
  },
42
42
  "dependencies": {
43
43
  "@google-cloud/logging": "^11.2.0",
@@ -52,8 +52,7 @@
52
52
  "@codecompose/typescript-config": "3.0.0",
53
53
  "@types/node": "^25.3.0",
54
54
  "del-cli": "^7.0.0",
55
- "husky": "^9.1.7",
56
- "lint-staged": "^16.2.7",
55
+ "lefthook": "^2.1.6",
57
56
  "oxfmt": "^0.34.0",
58
57
  "oxlint": "^1.49.0",
59
58
  "tsdown": "^0.20.1",
@@ -61,15 +60,6 @@
61
60
  "vitepress": "^1.6.4",
62
61
  "vitest": "^4.0.18"
63
62
  },
64
- "lint-staged": {
65
- "*.{js,jsx,ts,tsx,mjs,cjs}": [
66
- "oxfmt",
67
- "oxlint -c .oxlintrc.json --import-plugin"
68
- ],
69
- "*.{json,yaml,yml}": [
70
- "oxfmt"
71
- ]
72
- },
73
63
  "engines": {
74
64
  "node": ">=22.6.0"
75
65
  },
package/src/files.test.ts CHANGED
@@ -9,6 +9,9 @@ import {
9
9
  getInputFilesPath,
10
10
  getOutputFilesPath,
11
11
  listInputFiles,
12
+ readInputBuffer,
13
+ readInputJson,
14
+ readInputText,
12
15
  } from "./files";
13
16
 
14
17
  describe("getFileWriter", () => {
@@ -471,3 +474,184 @@ describe("listInputFiles (gcs)", () => {
471
474
  expect(result).toEqual(["real.txt"]);
472
475
  });
473
476
  });
477
+
478
+ describe("readInputText / readInputJson / readInputBuffer (local)", () => {
479
+ let tempDir: string;
480
+ const originalInputPath = process.env.JOB_INPUT_FILES_PATH;
481
+
482
+ beforeEach(() => {
483
+ tempDir = mkdtempSync(path.join(os.tmpdir(), "read-input-test-"));
484
+ process.env.JOB_INPUT_FILES_PATH = tempDir;
485
+ });
486
+
487
+ afterEach(() => {
488
+ rmSync(tempDir, { recursive: true, force: true });
489
+ if (originalInputPath === undefined) {
490
+ delete process.env.JOB_INPUT_FILES_PATH;
491
+ } else {
492
+ process.env.JOB_INPUT_FILES_PATH = originalInputPath;
493
+ }
494
+ });
495
+
496
+ it("readInputText returns file contents decoded as UTF-8", async () => {
497
+ writeFileSync(path.join(tempDir, "notes.txt"), "héllo\nworld");
498
+ expect(await readInputText("notes.txt")).toBe("héllo\nworld");
499
+ });
500
+
501
+ it("readInputJson parses JSON", async () => {
502
+ writeFileSync(path.join(tempDir, "data.json"), '{"count":42,"ok":true}');
503
+ expect(await readInputJson("data.json")).toEqual({ count: 42, ok: true });
504
+ });
505
+
506
+ it("readInputJson preserves the generic type parameter", async () => {
507
+ interface Row {
508
+ id: number;
509
+ }
510
+ writeFileSync(path.join(tempDir, "rows.json"), '[{"id":1},{"id":2}]');
511
+ const rows = await readInputJson<Row[]>("rows.json");
512
+ expect(rows.map((row) => row.id)).toEqual([1, 2]);
513
+ });
514
+
515
+ it("readInputJson surfaces parse errors from malformed input", async () => {
516
+ writeFileSync(path.join(tempDir, "bad.json"), "{not json");
517
+ await expect(readInputJson("bad.json")).rejects.toThrow(SyntaxError);
518
+ });
519
+
520
+ it("readInputBuffer returns raw bytes unchanged", async () => {
521
+ const bytes = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00, 0xff]);
522
+ writeFileSync(path.join(tempDir, "blob.bin"), bytes);
523
+ const result = await readInputBuffer("blob.bin");
524
+ expect(result.equals(bytes)).toBe(true);
525
+ });
526
+
527
+ it("resolves nested relative paths under the destination", async () => {
528
+ await mkdir(path.join(tempDir, "db", "airlines"), { recursive: true });
529
+ writeFileSync(
530
+ path.join(tempDir, "db", "airlines", "UA.json"),
531
+ '{"iata":"UA"}',
532
+ );
533
+ expect(await readInputJson("db/airlines/UA.json")).toEqual({ iata: "UA" });
534
+ });
535
+
536
+ it("rejects absolute paths", async () => {
537
+ await expect(readInputText("/etc/passwd")).rejects.toThrow(
538
+ /Absolute paths are not allowed/,
539
+ );
540
+ });
541
+
542
+ it("rejects upward traversal", async () => {
543
+ await expect(readInputText("../escape.txt")).rejects.toThrow(
544
+ /must not traverse upward/,
545
+ );
546
+ await expect(readInputText("a/../../escape.txt")).rejects.toThrow(
547
+ /must not traverse upward/,
548
+ );
549
+ });
550
+
551
+ it("rejects empty paths", async () => {
552
+ await expect(readInputText("")).rejects.toThrow(/empty/);
553
+ await expect(readInputJson("")).rejects.toThrow(/empty/);
554
+ await expect(readInputBuffer("")).rejects.toThrow(/empty/);
555
+ });
556
+
557
+ it("throws the standard unconfigured error when JOB_INPUT_FILES_PATH is unset", async () => {
558
+ delete process.env.JOB_INPUT_FILES_PATH;
559
+ await expect(readInputText("notes.txt")).rejects.toThrow(/inputFilesPath/);
560
+ });
561
+
562
+ it("throws the standard unconfigured error when JOB_INPUT_FILES_PATH is empty", async () => {
563
+ process.env.JOB_INPUT_FILES_PATH = "";
564
+ await expect(readInputText("notes.txt")).rejects.toThrow(/inputFilesPath/);
565
+ await expect(readInputJson("notes.txt")).rejects.toThrow(/inputFilesPath/);
566
+ await expect(readInputBuffer("notes.txt")).rejects.toThrow(
567
+ /inputFilesPath/,
568
+ );
569
+ });
570
+ });
571
+
572
+ describe("readInputText / readInputJson / readInputBuffer (gcs)", () => {
573
+ const originalInputPath = process.env.JOB_INPUT_FILES_PATH;
574
+ const downloadMock = vi.fn();
575
+ const fileMock = vi.fn(() => ({ download: downloadMock }));
576
+ const bucketMock = vi.fn(() => ({ file: fileMock }));
577
+
578
+ beforeEach(() => {
579
+ downloadMock.mockReset();
580
+ fileMock.mockClear();
581
+ bucketMock.mockClear();
582
+ vi.doMock("@google-cloud/storage", () => ({
583
+ Storage: class {
584
+ bucket = bucketMock;
585
+ },
586
+ }));
587
+ });
588
+
589
+ afterEach(() => {
590
+ vi.doUnmock("@google-cloud/storage");
591
+ vi.resetModules();
592
+ if (originalInputPath === undefined) {
593
+ delete process.env.JOB_INPUT_FILES_PATH;
594
+ } else {
595
+ process.env.JOB_INPUT_FILES_PATH = originalInputPath;
596
+ }
597
+ });
598
+
599
+ it("downloads the object and decodes it as text", async () => {
600
+ process.env.JOB_INPUT_FILES_PATH = "gs://my-bucket/inputs";
601
+ downloadMock.mockResolvedValue([Buffer.from("hello cloud", "utf8")]);
602
+
603
+ vi.resetModules();
604
+ const { readInputText: freshText } = await import("./files");
605
+ const result = await freshText("notes.txt");
606
+
607
+ expect(bucketMock).toHaveBeenCalledWith("my-bucket");
608
+ expect(fileMock).toHaveBeenCalledWith("inputs/notes.txt");
609
+ expect(result).toBe("hello cloud");
610
+ });
611
+
612
+ it("parses JSON downloaded from the configured bucket", async () => {
613
+ process.env.JOB_INPUT_FILES_PATH = "gs://my-bucket/inputs";
614
+ downloadMock.mockResolvedValue([Buffer.from('{"count":7}', "utf8")]);
615
+
616
+ vi.resetModules();
617
+ const { readInputJson: freshJson } = await import("./files");
618
+ const result = await freshJson<{ count: number }>("report.json");
619
+
620
+ expect(fileMock).toHaveBeenCalledWith("inputs/report.json");
621
+ expect(result).toEqual({ count: 7 });
622
+ });
623
+
624
+ it("returns the raw buffer without decoding", async () => {
625
+ process.env.JOB_INPUT_FILES_PATH = "gs://my-bucket/inputs";
626
+ const bytes = Buffer.from([0x00, 0xff, 0x10]);
627
+ downloadMock.mockResolvedValue([bytes]);
628
+
629
+ vi.resetModules();
630
+ const { readInputBuffer: freshBuffer } = await import("./files");
631
+ const result = await freshBuffer("blob.bin");
632
+
633
+ expect(result.equals(bytes)).toBe(true);
634
+ });
635
+
636
+ it("handles bucket-only URIs (no prefix)", async () => {
637
+ process.env.JOB_INPUT_FILES_PATH = "gs://my-bucket";
638
+ downloadMock.mockResolvedValue([Buffer.from("x", "utf8")]);
639
+
640
+ vi.resetModules();
641
+ const { readInputText: freshText } = await import("./files");
642
+ await freshText("a.txt");
643
+
644
+ expect(fileMock).toHaveBeenCalledWith("a.txt");
645
+ });
646
+
647
+ it("rejects traversal before issuing the download", async () => {
648
+ process.env.JOB_INPUT_FILES_PATH = "gs://my-bucket/inputs";
649
+
650
+ vi.resetModules();
651
+ const { readInputText: freshText } = await import("./files");
652
+ await expect(freshText("../secret.txt")).rejects.toThrow(
653
+ /must not traverse upward/,
654
+ );
655
+ expect(downloadMock).not.toHaveBeenCalled();
656
+ });
657
+ });
package/src/files.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { mkdir, readdir, writeFile } from "node:fs/promises";
1
+ import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
2
2
  import path from "node:path";
3
3
  import { consola } from "consola";
4
4
  import { z } from "zod";
@@ -111,6 +111,71 @@ export async function listInputFiles(): Promise<string[]> {
111
111
  return listLocalFiles(path.resolve(destination));
112
112
  }
113
113
 
114
+ /**
115
+ * Read a file from the configured input files destination as raw bytes.
116
+ * Local paths and `gs://` URIs are handled transparently.
117
+ *
118
+ * `relativePath` is sanitized the same way the writer sanitizes output
119
+ * paths — absolute paths and upward traversal (`..`) are rejected so a
120
+ * job can't escape its configured destination.
121
+ *
122
+ * Throws when no input destination is configured.
123
+ */
124
+ export async function readInputBuffer(relativePath: string): Promise<Buffer> {
125
+ return readInputRaw(relativePath);
126
+ }
127
+
128
+ /**
129
+ * Read a UTF-8 text file from the configured input files destination.
130
+ * Use this for CSV, JSON, plain text, SVG — anything character-based.
131
+ *
132
+ * See `readInputBuffer()` for path-sanitization and configuration
133
+ * semantics.
134
+ */
135
+ export async function readInputText(relativePath: string): Promise<string> {
136
+ const buffer = await readInputRaw(relativePath);
137
+ return buffer.toString("utf8");
138
+ }
139
+
140
+ /**
141
+ * Read and `JSON.parse` a file from the configured input files
142
+ * destination. Parse errors from a malformed file propagate as the
143
+ * standard `SyntaxError` thrown by `JSON.parse`.
144
+ *
145
+ * See `readInputBuffer()` for path-sanitization and configuration
146
+ * semantics.
147
+ */
148
+ export async function readInputJson<T = unknown>(
149
+ relativePath: string,
150
+ ): Promise<T> {
151
+ const text = await readInputText(relativePath);
152
+ return JSON.parse(text) as T;
153
+ }
154
+
155
+ async function readInputRaw(relativePath: string): Promise<Buffer> {
156
+ const destination = readInputDestination();
157
+ const safeRelative = sanitizeRelativePath(relativePath);
158
+
159
+ if (destination.startsWith(GCS_PREFIX)) {
160
+ return readGcsObject(destination, safeRelative);
161
+ }
162
+
163
+ return readFile(path.join(path.resolve(destination), safeRelative));
164
+ }
165
+
166
+ async function readGcsObject(
167
+ uri: string,
168
+ safeRelative: string,
169
+ ): Promise<Buffer> {
170
+ const { bucket, prefix } = parseGcsUri(uri);
171
+ const storage = await getStorageClient();
172
+ const [buf] = await storage
173
+ .bucket(bucket)
174
+ .file(joinGcsPath(prefix, safeRelative))
175
+ .download();
176
+ return buf;
177
+ }
178
+
114
179
  async function listLocalFiles(basePath: string): Promise<string[]> {
115
180
  const entries = await readdir(basePath, { withFileTypes: true });
116
181
  return entries
package/src/index.ts CHANGED
@@ -7,6 +7,9 @@ export {
7
7
  getInputFilesPath,
8
8
  getOutputFilesPath,
9
9
  listInputFiles,
10
+ readInputBuffer,
11
+ readInputJson,
12
+ readInputText,
10
13
  } from "./files";
11
14
  export type { FileWriter } from "./files";
12
15
  export {