@openneuro/server 4.47.7 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +10 -7
  2. package/src/app.ts +1 -1
  3. package/src/cache/__tests__/tree.spec.ts +212 -0
  4. package/src/cache/tree.ts +148 -0
  5. package/src/datalad/__tests__/dataRetentionNotifications.spec.ts +11 -0
  6. package/src/datalad/__tests__/files.spec.ts +249 -0
  7. package/src/datalad/dataRetentionNotifications.ts +5 -0
  8. package/src/datalad/dataset.ts +29 -1
  9. package/src/datalad/files.ts +362 -39
  10. package/src/datalad/snapshots.ts +29 -54
  11. package/src/graphql/resolvers/__tests__/response-status.spec.ts +42 -0
  12. package/src/graphql/resolvers/build-search-query.ts +391 -0
  13. package/src/graphql/resolvers/cache.ts +5 -1
  14. package/src/graphql/resolvers/dataset-search.ts +40 -23
  15. package/src/graphql/resolvers/datasetEvents.ts +48 -78
  16. package/src/graphql/resolvers/draft.ts +5 -2
  17. package/src/graphql/resolvers/holdDeletion.ts +21 -0
  18. package/src/graphql/resolvers/index.ts +6 -0
  19. package/src/graphql/resolvers/mutation.ts +2 -0
  20. package/src/graphql/resolvers/response-status.ts +43 -0
  21. package/src/graphql/resolvers/snapshots.ts +9 -18
  22. package/src/graphql/resolvers/summary.ts +17 -0
  23. package/src/graphql/schema.ts +54 -14
  24. package/src/handlers/datalad.ts +4 -0
  25. package/src/handlers/doi.ts +32 -36
  26. package/src/libs/doi/__tests__/doi.spec.ts +50 -12
  27. package/src/libs/doi/__tests__/validate.spec.ts +110 -0
  28. package/src/libs/doi/index.ts +108 -71
  29. package/src/libs/doi/metadata.ts +101 -0
  30. package/src/libs/doi/validate.ts +59 -0
  31. package/src/libs/presign.ts +137 -0
  32. package/src/models/dataset.ts +2 -0
  33. package/src/models/doi.ts +7 -0
  34. package/src/queues/producer-methods.ts +9 -5
  35. package/src/queues/queue-schedule.ts +1 -1
  36. package/src/queues/queues.ts +2 -2
  37. package/src/routes.ts +10 -2
  38. package/src/types/datacite/LICENSE +37 -0
  39. package/src/types/datacite/README.md +3 -0
  40. package/src/types/datacite/datacite-v4.5.json +643 -0
  41. package/src/types/datacite/datacite-v4.5.ts +281 -0
  42. package/src/types/datacite.ts +53 -63
  43. package/src/utils/datacite-mapper.ts +7 -3
  44. package/src/utils/datacite-utils.ts +12 -15
  45. package/src/libs/doi/__tests__/__snapshots__/doi.spec.ts.snap +0 -17
@@ -1,25 +1,63 @@
1
1
  import { vi } from "vitest"
2
- import { formatBasicAuth, template } from "../index.js"
2
+ import { buildPayload, createDOI, formatBasicAuth } from "../index.js"
3
3
 
4
4
  vi.mock("ioredis")
5
5
 
6
6
  describe("DOI minting utils", () => {
7
- describe("auth()", () => {
7
+ describe("formatBasicAuth()", () => {
8
8
  it("returns a base64 basic auth string", () => {
9
9
  const doiConfig = { username: "test", password: "12345" }
10
10
  expect(formatBasicAuth(doiConfig)).toBe("Basic dGVzdDoxMjM0NQ==")
11
11
  })
12
12
  })
13
- describe("template()", () => {
14
- it("accepts expected arguments", () => {
15
- const context = {
16
- doi: "12345",
17
- creators: ["A. User", "B. User"],
18
- title: "Test Dataset",
19
- year: "1999",
20
- resourceType: "fMRI",
21
- }
22
- expect(template(context)).toMatchSnapshot()
13
+
14
+ describe("createDOI()", () => {
15
+ it("creates a DOI without snapshot", () => {
16
+ const doi = createDOI("ds000001")
17
+ expect(doi).toMatch(/\/openneuro\.ds000001$/)
18
+ })
19
+ it("creates a DOI with snapshot", () => {
20
+ const doi = createDOI("ds000001", "1.0.0")
21
+ expect(doi).toMatch(/\/openneuro\.ds000001\.v1\.0\.0$/)
22
+ })
23
+ })
24
+
25
+ describe("buildPayload()", () => {
26
+ const attributes = {
27
+ doi: "10.18112/openneuro.ds000001.v1.0.0",
28
+ url: "https://openneuro.org/datasets/ds000001/versions/1.0.0",
29
+ creators: [{ name: "A. User", nameType: "Personal" as const }],
30
+ titles: [{ title: "Test Dataset" }],
31
+ publisher: { name: "OpenNeuro" },
32
+ publicationYear: "2024",
33
+ types: { resourceTypeGeneral: "Dataset" as const },
34
+ schemaVersion: "http://datacite.org/schema/kernel-4" as const,
35
+ }
36
+
37
+ it("builds a valid Datacite JSON API payload", () => {
38
+ const payload = buildPayload(attributes)
39
+ expect(payload.data.type).toBe("dois")
40
+ expect(payload.data.attributes.doi).toBe(
41
+ "10.18112/openneuro.ds000001.v1.0.0",
42
+ )
43
+ expect(payload.data.attributes.event).toBeUndefined()
44
+ expect(payload.data.attributes.schemaVersion).toBe(
45
+ "http://datacite.org/schema/kernel-4",
46
+ )
47
+ })
48
+
49
+ it("omits event when not provided", () => {
50
+ const payload = buildPayload(attributes)
51
+ expect(payload.data.attributes.event).toBeUndefined()
52
+ })
53
+
54
+ it("preserves all metadata attributes", () => {
55
+ const payload = buildPayload(attributes, "publish")
56
+ expect(payload.data.attributes.creators).toHaveLength(1)
57
+ expect(payload.data.attributes.titles[0].title).toBe("Test Dataset")
58
+ expect(payload.data.attributes.publisher.name).toBe("OpenNeuro")
59
+ expect(payload.data.attributes.publicationYear).toBe("2024")
60
+ expect(payload.data.attributes.types.resourceTypeGeneral).toBe("Dataset")
23
61
  })
24
62
  })
25
63
  })
@@ -0,0 +1,110 @@
1
+ import { vi } from "vitest"
2
+ import { validateDataciteMetadata } from "../validate.js"
3
+ import type { ResourceTypeGeneral } from "../../../types/datacite/datacite-v4.5.ts"
4
+
5
+ vi.mock("ioredis")
6
+
7
+ describe("validateDataciteMetadata", () => {
8
+ const validAttrs = {
9
+ doi: "10.18112/openneuro.ds000001.v1.0.0",
10
+ url: "https://openneuro.org/datasets/ds000001/versions/1.0.0",
11
+ creators: [{ name: "A. User", nameType: "Personal" as const }],
12
+ titles: [{ title: "Test Dataset" }],
13
+ publisher: { name: "OpenNeuro" },
14
+ publicationYear: "2024",
15
+ types: { resourceTypeGeneral: "Dataset" as const },
16
+ schemaVersion: "http://datacite.org/schema/kernel-4" as const,
17
+ }
18
+
19
+ it("returns no errors for valid metadata", () => {
20
+ expect(validateDataciteMetadata(validAttrs)).toEqual([])
21
+ })
22
+
23
+ it("requires at least one creator", () => {
24
+ const errors = validateDataciteMetadata({ ...validAttrs, creators: [] })
25
+ expect(errors).toEqual(
26
+ expect.arrayContaining([
27
+ expect.objectContaining({ field: "creators" }),
28
+ ]),
29
+ )
30
+ })
31
+
32
+ it("requires each creator to have a name", () => {
33
+ const errors = validateDataciteMetadata({
34
+ ...validAttrs,
35
+ creators: [{ name: "", nameType: "Personal" }],
36
+ })
37
+ expect(errors).toEqual(
38
+ expect.arrayContaining([
39
+ expect.objectContaining({ field: "creators" }),
40
+ ]),
41
+ )
42
+ })
43
+
44
+ it("requires at least one title", () => {
45
+ const errors = validateDataciteMetadata({ ...validAttrs, titles: [] })
46
+ expect(errors).toEqual(
47
+ expect.arrayContaining([
48
+ expect.objectContaining({ field: "titles" }),
49
+ ]),
50
+ )
51
+ })
52
+
53
+ it("requires a non-empty title", () => {
54
+ const errors = validateDataciteMetadata({
55
+ ...validAttrs,
56
+ titles: [{ title: "" }],
57
+ })
58
+ expect(errors).toEqual(
59
+ expect.arrayContaining([
60
+ expect.objectContaining({ field: "titles" }),
61
+ ]),
62
+ )
63
+ })
64
+
65
+ it("requires publisher name", () => {
66
+ const errors = validateDataciteMetadata({
67
+ ...validAttrs,
68
+ publisher: { name: "" },
69
+ })
70
+ expect(errors).toEqual(
71
+ expect.arrayContaining([
72
+ expect.objectContaining({ field: "publisher" }),
73
+ ]),
74
+ )
75
+ })
76
+
77
+ it("requires a four-digit year string for publicationYear", () => {
78
+ const errors = validateDataciteMetadata({
79
+ ...validAttrs,
80
+ publicationYear: "0",
81
+ })
82
+ expect(errors).toEqual(
83
+ expect.arrayContaining([
84
+ expect.objectContaining({ field: "publicationYear" }),
85
+ ]),
86
+ )
87
+ })
88
+
89
+ it("requires resourceTypeGeneral", () => {
90
+ const errors = validateDataciteMetadata({
91
+ ...validAttrs,
92
+ types: {
93
+ resourceTypeGeneral: "" as unknown as ResourceTypeGeneral,
94
+ },
95
+ })
96
+ expect(errors).toEqual(
97
+ expect.arrayContaining([
98
+ expect.objectContaining({ field: "types" }),
99
+ ]),
100
+ )
101
+ })
102
+
103
+ it("returns multiple errors when multiple fields are invalid", () => {
104
+ const errors = validateDataciteMetadata({
105
+ doi: "10.18112/test",
106
+ url: "https://example.com",
107
+ })
108
+ expect(errors.length).toBeGreaterThanOrEqual(4)
109
+ })
110
+ })
@@ -1,32 +1,5 @@
1
- import request from "superagent"
2
1
  import config from "../../config"
3
-
4
- export const template = ({
5
- doi,
6
- creators,
7
- title,
8
- year,
9
- resourceType,
10
- }) =>
11
- `<?xml version="1.0" encoding="UTF-8"?>
12
- <resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
13
- <identifier identifierType="DOI">${doi}</identifier>
14
- <creators>
15
- ${
16
- creators
17
- .map((creator) =>
18
- `<creator><creatorName>${creator}</creatorName></creator>`
19
- )
20
- .join("")
21
- }
22
- </creators>
23
- <titles>
24
- <title xml:lang="en-us">${title}</title>
25
- </titles>
26
- <publisher>Openneuro</publisher>
27
- <publicationYear>${year}</publicationYear>
28
- <resourceType resourceTypeGeneral="Dataset">${resourceType}</resourceType>
29
- </resource>`
2
+ import type { DataCite, DataciteDoiRequest } from "../../types/datacite"
30
3
 
31
4
  /**
32
5
  * @param {Object} doiConfig
@@ -37,50 +10,114 @@ export const formatBasicAuth = (doiConfig) =>
37
10
  "Basic " +
38
11
  Buffer.from(doiConfig.username + ":" + doiConfig.password).toString("base64")
39
12
 
40
- export default {
41
- auth: formatBasicAuth(config.doi),
42
- createDOI(accNumber, snapshotId) {
43
- let doi = config.doi.prefix + "/openneuro." + accNumber
44
- if (snapshotId) {
45
- doi = doi + ".v" + snapshotId
46
- }
47
- return doi
48
- },
13
+ /**
14
+ * Build a DOI string from dataset accession number and optional snapshot ID.
15
+ */
16
+ export function createDOI(accNumber: string, snapshotId?: string): string {
17
+ let doi = config.doi.prefix + "/openneuro." + accNumber
18
+ if (snapshotId) {
19
+ doi = doi + ".v" + snapshotId
20
+ }
21
+ return doi
22
+ }
23
+
24
+ /**
25
+ * Build the Datacite JSON API request payload.
26
+ */
27
+ export function buildPayload(
28
+ attributes: DataCite,
29
+ event?: DataCite["event"],
30
+ ): DataciteDoiRequest {
31
+ return {
32
+ data: {
33
+ type: "dois",
34
+ attributes: {
35
+ ...attributes,
36
+ ...(event ? { event } : {}),
37
+ schemaVersion: "http://datacite.org/schema/kernel-4",
38
+ },
39
+ },
40
+ }
41
+ }
42
+
43
+ /**
44
+ * Create or update a DOI via the Datacite JSON REST API.
45
+ * Uses PUT to {baseUrl}dois/{doi} which handles both create and update.
46
+ */
47
+ export async function upsertDoi(
48
+ payload: DataciteDoiRequest,
49
+ ): Promise<Response> {
50
+ const doi = payload.data.attributes.doi
51
+ const url = `${config.doi.url}dois/${encodeURIComponent(doi)}`
52
+ const response = await fetch(url, {
53
+ method: "PUT",
54
+ headers: {
55
+ "Authorization": formatBasicAuth(config.doi),
56
+ "Content-Type": "application/vnd.api+json",
57
+ },
58
+ body: JSON.stringify(payload),
59
+ })
60
+ if (!response.ok) {
61
+ const body = await response.text()
62
+ throw new Error(
63
+ `Datacite API error ${response.status} for ${doi}: ${body}`,
64
+ )
65
+ }
66
+ return response
67
+ }
68
+
69
+ /**
70
+ * Transition a DOI's state without re-sending full metadata.
71
+ */
72
+ export async function updateDoiState(
73
+ doi: string,
74
+ event: DataCite["event"],
75
+ ): Promise<void> {
76
+ const url = `${config.doi.url}dois/${encodeURIComponent(doi)}`
77
+ const payload = {
78
+ data: {
79
+ type: "dois",
80
+ attributes: { event },
81
+ },
82
+ }
83
+ const response = await fetch(url, {
84
+ method: "PUT",
85
+ headers: {
86
+ "Authorization": formatBasicAuth(config.doi),
87
+ "Content-Type": "application/vnd.api+json",
88
+ },
89
+ body: JSON.stringify(payload),
90
+ })
91
+ if (!response.ok) {
92
+ const body = await response.text()
93
+ throw new Error(
94
+ `Datacite API state transition error ${response.status} for ${doi}: ${body}`,
95
+ )
96
+ }
97
+ }
49
98
 
50
- async mintDOI(doi, url) {
51
- return await request
52
- .put(config.doi.url + "doi/" + doi)
53
- .set("Authorization", this.auth)
54
- .set("Content-Type", "text/plain;charset=UTF-8")
55
- .send("doi=" + doi + "\nurl=" + url)
56
- },
99
+ /**
100
+ * Create a draft DOI for a dataset snapshot.
101
+ * Returns the DOI string.
102
+ */
103
+ export async function createDraftDoi(
104
+ attributes: DataCite,
105
+ ): Promise<string> {
106
+ const payload = buildPayload(attributes)
107
+ await upsertDoi(payload)
108
+ return attributes.doi
109
+ }
57
110
 
58
- registerMetadata(context) {
59
- const xml = template(context)
60
- return request
61
- .post(config.doi.url + "metadata/")
62
- .set("Authorization", this.auth)
63
- .set("Content-Type", "application/xml;charset=UTF-8")
64
- .send(xml)
65
- },
111
+ /**
112
+ * Transition a DOI from draft to findable.
113
+ */
114
+ export async function publishDoi(doi: string): Promise<void> {
115
+ await updateDoiState(doi, "publish")
116
+ }
66
117
 
67
- registerSnapshotDoi(datasetId, snapshotId, oldDesc) {
68
- const baseDoi = this.createDOI(datasetId, snapshotId)
69
- const url =
70
- `https://openneuro.org/datasets/${datasetId}/versions/${snapshotId}`
71
- const context = {
72
- doi: baseDoi,
73
- creators: oldDesc.Authors.filter((x) => x),
74
- title: oldDesc.Name,
75
- year: new Date().getFullYear(),
76
- resourceType: "fMRI",
77
- }
78
- return this.registerMetadata(context)
79
- .then(() => {
80
- return this.mintDOI(baseDoi, url)
81
- })
82
- .then(() => {
83
- return baseDoi
84
- })
85
- },
118
+ /**
119
+ * Transition a DOI from findable to registered (hidden but reserved).
120
+ */
121
+ export async function hideDoi(doi: string): Promise<void> {
122
+ await updateDoiState(doi, "hide")
86
123
  }
@@ -0,0 +1,101 @@
1
+ import config from "../../config"
2
+ import { createDOI } from "./index"
3
+ import { validateDataciteMetadata } from "./validate"
4
+ import { getDataciteYml } from "../../utils/datacite-utils"
5
+ import { description } from "../../datalad/description"
6
+ import { getPrimaryModality } from "../../graphql/resolvers/summary"
7
+ import type { Creator, DataCite } from "../../types/datacite"
8
+
9
+ /**
10
+ * Assemble Datacite metadata for a DOI from datacite.yml or BIDS fallback.
11
+ *
12
+ * Priority:
13
+ * 1. If datacite.yml exists and has creators, use its attributes as the base.
14
+ * 2. Otherwise, build minimal metadata from dataset_description.json.
15
+ *
16
+ * Always ensures publisher, publicationYear, types.resourceTypeGeneral,
17
+ * doi, and url are set.
18
+ */
19
+ export async function assembleMetadata(
20
+ datasetId: string,
21
+ snapshotId: string,
22
+ revision?: string,
23
+ ): Promise<DataCite> {
24
+ const doi = createDOI(datasetId, snapshotId)
25
+ const url = `${config.url}/datasets/${datasetId}/versions/${snapshotId}`
26
+
27
+ const dataciteYml = await getDataciteYml(datasetId, revision)
28
+ const ymlAttrs = dataciteYml?.data?.attributes
29
+
30
+ // Check if datacite.yml provided meaningful creator data
31
+ const hasDataciteCreators = Array.isArray(ymlAttrs?.creators) &&
32
+ ymlAttrs.creators.length > 0
33
+
34
+ let creators: Creator[]
35
+ let titles: DataCite["titles"]
36
+ let descriptions: DataCite["descriptions"]
37
+ let contributors: DataCite["contributors"]
38
+ let resourceType: string | undefined
39
+
40
+ if (hasDataciteCreators) {
41
+ // Use datacite.yml metadata
42
+ creators = ymlAttrs.creators
43
+ titles = ymlAttrs.descriptions?.length
44
+ ? [{ title: ymlAttrs.descriptions[0].description }]
45
+ : []
46
+ descriptions = ymlAttrs.descriptions
47
+ contributors = ymlAttrs.contributors
48
+ resourceType = ymlAttrs.types?.resourceType
49
+ } else {
50
+ // Fall back to BIDS dataset_description.json
51
+ const desc = await description({
52
+ id: datasetId,
53
+ revision: revision || "HEAD",
54
+ })
55
+ creators = (desc.Authors || [])
56
+ .filter((author: string) => author)
57
+ .map((author: string) => ({
58
+ name: author,
59
+ nameType: "Personal" as const,
60
+ }))
61
+ titles = [{ title: desc.Name || datasetId }]
62
+ descriptions = desc.Description
63
+ ? [{ description: desc.Description, descriptionType: "Abstract" }]
64
+ : undefined
65
+ contributors = undefined
66
+ resourceType = await getPrimaryModality(datasetId)
67
+ }
68
+
69
+ // If datacite.yml had titles via a different path, use them
70
+ if (hasDataciteCreators && titles.length === 0) {
71
+ const desc = await description({
72
+ id: datasetId,
73
+ revision: revision || "HEAD",
74
+ })
75
+ titles = [{ title: desc.Name || datasetId }]
76
+ }
77
+
78
+ const attributes: DataCite = {
79
+ doi,
80
+ url,
81
+ creators: creators as DataCite["creators"],
82
+ titles: titles as DataCite["titles"],
83
+ publisher: { name: "OpenNeuro" },
84
+ publicationYear: String(new Date().getFullYear()),
85
+ types: {
86
+ resourceTypeGeneral: "Dataset",
87
+ ...(resourceType ? { resourceType } : {}),
88
+ },
89
+ schemaVersion: "http://datacite.org/schema/kernel-4",
90
+ ...(descriptions ? { descriptions } : {}),
91
+ ...(contributors?.length ? { contributors } : {}),
92
+ }
93
+
94
+ const errors = validateDataciteMetadata(attributes)
95
+ if (errors.length > 0) {
96
+ const messages = errors.map((e) => `${e.field}: ${e.message}`).join("; ")
97
+ throw new Error(`DOI metadata validation failed: ${messages}`)
98
+ }
99
+
100
+ return attributes
101
+ }
@@ -0,0 +1,59 @@
1
+ import type { DataCite } from "../../types/datacite"
2
+
3
+ export interface ValidationError {
4
+ field: string
5
+ message: string
6
+ }
7
+
8
+ /**
9
+ * Validate required Datacite metadata fields before submitting to the API.
10
+ * Returns an empty array if valid.
11
+ */
12
+ export function validateDataciteMetadata(
13
+ attrs: Partial<DataCite>,
14
+ ): ValidationError[] {
15
+ const errors: ValidationError[] = []
16
+
17
+ if (!Array.isArray(attrs.creators) || attrs.creators.length === 0) {
18
+ errors.push({
19
+ field: "creators",
20
+ message: "At least one creator is required",
21
+ })
22
+ } else {
23
+ for (const creator of attrs.creators) {
24
+ if (!creator.name) {
25
+ errors.push({
26
+ field: "creators",
27
+ message: "Each creator must have a name",
28
+ })
29
+ break
30
+ }
31
+ }
32
+ }
33
+
34
+ if (!Array.isArray(attrs.titles) || attrs.titles.length === 0) {
35
+ errors.push({ field: "titles", message: "At least one title is required" })
36
+ } else if (!attrs.titles[0].title) {
37
+ errors.push({ field: "titles", message: "Title must not be empty" })
38
+ }
39
+
40
+ if (!attrs.publisher?.name) {
41
+ errors.push({ field: "publisher", message: "Publisher name is required" })
42
+ }
43
+
44
+ if (!attrs.publicationYear || !/^[0-9]{4}$/.test(attrs.publicationYear)) {
45
+ errors.push({
46
+ field: "publicationYear",
47
+ message: "Publication year must be a four-digit year string",
48
+ })
49
+ }
50
+
51
+ if (!attrs.types?.resourceTypeGeneral) {
52
+ errors.push({
53
+ field: "types",
54
+ message: "resourceTypeGeneral is required",
55
+ })
56
+ }
57
+
58
+ return errors
59
+ }
@@ -0,0 +1,137 @@
1
+ import type { Redis } from "ioredis"
2
+ import { createHMAC, createSHA1 } from "hash-wasm"
3
+
4
+ const PRESIGN_TTL = 5 * 24 * 60 * 60 // 5 days in seconds
5
+ const PRESIGN_EXPIRATION = 7 * 24 * 60 * 60 // 7 days for the presigned URL itself
6
+
7
+ const defaultBucket = process.env.AWS_S3_PUBLIC_BUCKET
8
+ const accessKeyId = process.env.AWS_ACCESS_KEY_ID
9
+ const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY
10
+
11
+ /** Resolve bucket name, falling back to the default configured bucket */
12
+ function resolveBucket(bucket: string): string {
13
+ return bucket || defaultBucket
14
+ }
15
+
16
+ function presignKey(bucket: string, s3Key: string, versionId: string): string {
17
+ return `ps:${bucket}:${s3Key}:${versionId}`
18
+ }
19
+
20
+ /**
21
+ * Pre-initialized HMAC-SHA1 signer keyed with the AWS secret.
22
+ */
23
+ let hmacPromise: ReturnType<typeof createHMAC> | null = null
24
+
25
+ function getHMAC(): ReturnType<typeof createHMAC> {
26
+ if (!hmacPromise) {
27
+ if (!secretAccessKey) {
28
+ throw new Error("AWS_SECRET_ACCESS_KEY is required for presigned URLs")
29
+ }
30
+ hmacPromise = createHMAC(
31
+ createSHA1(),
32
+ new TextEncoder().encode(secretAccessKey),
33
+ )
34
+ }
35
+ return hmacPromise
36
+ }
37
+
38
+ /**
39
+ * Generate a V2 query-string presigned URL for an S3 GetObject request.
40
+ * Uses HMAC-SHA1 via hash-wasm (WASM)
41
+ * Replaced V4 signatures for performance reasons
42
+ */
43
+ function presignV2(
44
+ hmac: Awaited<ReturnType<typeof createHMAC>>,
45
+ bucket: string,
46
+ s3Key: string,
47
+ versionId: string,
48
+ expires: number,
49
+ ): string {
50
+ // StringToSign = HTTP-Verb + "\n" + "\n" + "\n" + Expires + "\n" + CanonicalizedResource
51
+ const resource = `/${bucket}/${s3Key}?versionId=${versionId}`
52
+ const stringToSign = `GET\n\n\n${expires}\n${resource}`
53
+ hmac.init()
54
+ hmac.update(stringToSign)
55
+ const signature = Buffer.from(hmac.digest("binary")).toString("base64")
56
+ const encodedSig = encodeURIComponent(signature)
57
+ const encodedKey = encodeURIComponent(accessKeyId!)
58
+ return `https://s3.amazonaws.com/${bucket}/${s3Key}?versionId=${versionId}&AWSAccessKeyId=${encodedKey}&Expires=${expires}&Signature=${encodedSig}`
59
+ }
60
+
61
+ /**
62
+ * Get or generate a presigned URL, caching it in Redis.
63
+ * @param bucket - S3 bucket name, or empty string for the default bucket
64
+ */
65
+ export async function getPresignedUrl(
66
+ redis: Redis,
67
+ bucket: string,
68
+ s3Key: string,
69
+ versionId: string,
70
+ ): Promise<string> {
71
+ const resolvedBucket = resolveBucket(bucket)
72
+ const key = presignKey(resolvedBucket, s3Key, versionId)
73
+ const cached = await redis.get(key)
74
+ if (cached) {
75
+ return cached
76
+ }
77
+ const hmac = await getHMAC()
78
+ const expires = Math.floor(Date.now() / 1000) + PRESIGN_EXPIRATION
79
+ const url = presignV2(hmac, resolvedBucket, s3Key, versionId, expires)
80
+ await redis.setex(key, PRESIGN_TTL, url)
81
+ return url
82
+ }
83
+
84
+ /**
85
+ * Bulk-resolve presigned URLs for many files in two pipelined Redis calls.
86
+ * Returns an array of resolved URLs matching the input order.
87
+ */
88
+ export async function getPresignedUrlsBulk(
89
+ redis: Redis,
90
+ items: { bucket: string; s3Key: string; versionId: string }[],
91
+ ): Promise<string[]> {
92
+ if (items.length === 0) return []
93
+
94
+ const resolved = items.map((item) => ({
95
+ ...item,
96
+ bucket: resolveBucket(item.bucket),
97
+ }))
98
+ const keys = resolved.map((r) => presignKey(r.bucket, r.s3Key, r.versionId))
99
+ const cached = await redis.mget(...keys)
100
+
101
+ // Fill hits from cache, sign misses and queue them for write-back
102
+ const hmac = await getHMAC()
103
+ const expires = Math.floor(Date.now() / 1000) + PRESIGN_EXPIRATION
104
+ const writePipeline = redis.pipeline()
105
+ let misses = 0
106
+
107
+ const results = cached.map((val, i) => {
108
+ if (val) return val
109
+ misses++
110
+ const url = presignV2(
111
+ hmac,
112
+ resolved[i].bucket,
113
+ resolved[i].s3Key,
114
+ resolved[i].versionId,
115
+ expires,
116
+ )
117
+ writePipeline.setex(keys[i], PRESIGN_TTL, url)
118
+ return url
119
+ })
120
+
121
+ if (misses > 0) await writePipeline.exec()
122
+ return results
123
+ }
124
+
125
+ /**
126
+ * Build a public (non-presigned) S3 URL from key and versionId.
127
+ * @param bucket - S3 bucket name, or empty string for the default bucket
128
+ */
129
+ export function publicS3Url(
130
+ bucket: string,
131
+ s3Key: string,
132
+ versionId: string,
133
+ ): string {
134
+ return `https://s3.amazonaws.com/${
135
+ resolveBucket(bucket)
136
+ }/${s3Key}?versionId=${versionId}`
137
+ }
@@ -29,6 +29,7 @@ export interface DatasetDocument extends Document {
29
29
  views: number
30
30
  related: [DatasetRelationDocument]
31
31
  schemaValidator: boolean
32
+ holdDeletion: boolean
32
33
  _conditions: object
33
34
  }
34
35
 
@@ -45,6 +46,7 @@ const datasetSchema = new Schema<DatasetDocument>(
45
46
  views: Number,
46
47
  related: [RelationSchema],
47
48
  schemaValidator: { type: Boolean, default: false },
49
+ holdDeletion: { type: Boolean, default: false },
48
50
  },
49
51
  { toJSON: { virtuals: true }, toObject: { virtuals: true } },
50
52
  )