@openneuro/server 4.47.6 → 5.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +10 -7
  2. package/src/app.ts +1 -1
  3. package/src/cache/__tests__/tree.spec.ts +212 -0
  4. package/src/cache/tree.ts +148 -0
  5. package/src/datalad/__tests__/dataRetentionNotifications.spec.ts +11 -0
  6. package/src/datalad/__tests__/files.spec.ts +249 -0
  7. package/src/datalad/dataRetentionNotifications.ts +5 -0
  8. package/src/datalad/dataset.ts +29 -1
  9. package/src/datalad/files.ts +362 -39
  10. package/src/datalad/snapshots.ts +29 -54
  11. package/src/graphql/resolvers/__tests__/response-status.spec.ts +42 -0
  12. package/src/graphql/resolvers/__tests__/user.spec.ts +55 -1
  13. package/src/graphql/resolvers/build-search-query.ts +391 -0
  14. package/src/graphql/resolvers/cache.ts +5 -1
  15. package/src/graphql/resolvers/dataset-search.ts +40 -23
  16. package/src/graphql/resolvers/datasetEvents.ts +48 -78
  17. package/src/graphql/resolvers/draft.ts +5 -2
  18. package/src/graphql/resolvers/holdDeletion.ts +21 -0
  19. package/src/graphql/resolvers/index.ts +6 -0
  20. package/src/graphql/resolvers/mutation.ts +2 -0
  21. package/src/graphql/resolvers/response-status.ts +43 -0
  22. package/src/graphql/resolvers/snapshots.ts +9 -18
  23. package/src/graphql/resolvers/summary.ts +17 -0
  24. package/src/graphql/resolvers/user.ts +1 -1
  25. package/src/graphql/schema.ts +54 -14
  26. package/src/handlers/datalad.ts +4 -0
  27. package/src/handlers/doi.ts +32 -36
  28. package/src/libs/doi/__tests__/doi.spec.ts +50 -12
  29. package/src/libs/doi/__tests__/validate.spec.ts +110 -0
  30. package/src/libs/doi/index.ts +108 -71
  31. package/src/libs/doi/metadata.ts +101 -0
  32. package/src/libs/doi/validate.ts +59 -0
  33. package/src/libs/presign.ts +137 -0
  34. package/src/models/dataset.ts +2 -0
  35. package/src/models/doi.ts +7 -0
  36. package/src/queues/producer-methods.ts +9 -5
  37. package/src/queues/queue-schedule.ts +1 -1
  38. package/src/queues/queues.ts +2 -2
  39. package/src/routes.ts +10 -2
  40. package/src/types/datacite/LICENSE +37 -0
  41. package/src/types/datacite/README.md +3 -0
  42. package/src/types/datacite/datacite-v4.5.json +643 -0
  43. package/src/types/datacite/datacite-v4.5.ts +281 -0
  44. package/src/types/datacite.ts +53 -63
  45. package/src/utils/datacite-mapper.ts +7 -3
  46. package/src/utils/datacite-utils.ts +12 -15
  47. package/src/libs/doi/__tests__/__snapshots__/doi.spec.ts.snap +0 -17
@@ -0,0 +1,137 @@
1
+ import type { Redis } from "ioredis"
2
+ import { createHMAC, createSHA1 } from "hash-wasm"
3
+
4
+ const PRESIGN_TTL = 5 * 24 * 60 * 60 // 5 days in seconds
5
+ const PRESIGN_EXPIRATION = 7 * 24 * 60 * 60 // 7 days for the presigned URL itself
6
+
7
+ const defaultBucket = process.env.AWS_S3_PUBLIC_BUCKET
8
+ const accessKeyId = process.env.AWS_ACCESS_KEY_ID
9
+ const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY
10
+
11
+ /** Resolve bucket name, falling back to the default configured bucket */
12
+ function resolveBucket(bucket: string): string {
13
+ return bucket || defaultBucket
14
+ }
15
+
16
+ function presignKey(bucket: string, s3Key: string, versionId: string): string {
17
+ return `ps:${bucket}:${s3Key}:${versionId}`
18
+ }
19
+
20
+ /**
21
+ * Pre-initialized HMAC-SHA1 signer keyed with the AWS secret.
22
+ */
23
+ let hmacPromise: ReturnType<typeof createHMAC> | null = null
24
+
25
+ function getHMAC(): ReturnType<typeof createHMAC> {
26
+ if (!hmacPromise) {
27
+ if (!secretAccessKey) {
28
+ throw new Error("AWS_SECRET_ACCESS_KEY is required for presigned URLs")
29
+ }
30
+ hmacPromise = createHMAC(
31
+ createSHA1(),
32
+ new TextEncoder().encode(secretAccessKey),
33
+ )
34
+ }
35
+ return hmacPromise
36
+ }
37
+
38
+ /**
39
+ * Generate a V2 query-string presigned URL for an S3 GetObject request.
40
+ * Uses HMAC-SHA1 via hash-wasm (WASM)
41
+ * Replaced V4 signatures for performance reasons
42
+ */
43
+ function presignV2(
44
+ hmac: Awaited<ReturnType<typeof createHMAC>>,
45
+ bucket: string,
46
+ s3Key: string,
47
+ versionId: string,
48
+ expires: number,
49
+ ): string {
50
+ // StringToSign = HTTP-Verb + "\n" + "\n" + "\n" + Expires + "\n" + CanonicalizedResource
51
+ const resource = `/${bucket}/${s3Key}?versionId=${versionId}`
52
+ const stringToSign = `GET\n\n\n${expires}\n${resource}`
53
+ hmac.init()
54
+ hmac.update(stringToSign)
55
+ const signature = Buffer.from(hmac.digest("binary")).toString("base64")
56
+ const encodedSig = encodeURIComponent(signature)
57
+ const encodedKey = encodeURIComponent(accessKeyId!)
58
+ return `https://s3.amazonaws.com/${bucket}/${s3Key}?versionId=${versionId}&AWSAccessKeyId=${encodedKey}&Expires=${expires}&Signature=${encodedSig}`
59
+ }
60
+
61
+ /**
62
+ * Get or generate a presigned URL, caching it in Redis.
63
+ * @param bucket - S3 bucket name, or empty string for the default bucket
64
+ */
65
+ export async function getPresignedUrl(
66
+ redis: Redis,
67
+ bucket: string,
68
+ s3Key: string,
69
+ versionId: string,
70
+ ): Promise<string> {
71
+ const resolvedBucket = resolveBucket(bucket)
72
+ const key = presignKey(resolvedBucket, s3Key, versionId)
73
+ const cached = await redis.get(key)
74
+ if (cached) {
75
+ return cached
76
+ }
77
+ const hmac = await getHMAC()
78
+ const expires = Math.floor(Date.now() / 1000) + PRESIGN_EXPIRATION
79
+ const url = presignV2(hmac, resolvedBucket, s3Key, versionId, expires)
80
+ await redis.setex(key, PRESIGN_TTL, url)
81
+ return url
82
+ }
83
+
84
+ /**
85
+ * Bulk-resolve presigned URLs for many files in two pipelined Redis calls.
86
+ * Returns an array of resolved URLs matching the input order.
87
+ */
88
+ export async function getPresignedUrlsBulk(
89
+ redis: Redis,
90
+ items: { bucket: string; s3Key: string; versionId: string }[],
91
+ ): Promise<string[]> {
92
+ if (items.length === 0) return []
93
+
94
+ const resolved = items.map((item) => ({
95
+ ...item,
96
+ bucket: resolveBucket(item.bucket),
97
+ }))
98
+ const keys = resolved.map((r) => presignKey(r.bucket, r.s3Key, r.versionId))
99
+ const cached = await redis.mget(...keys)
100
+
101
+ // Fill hits from cache, sign misses and queue them for write-back
102
+ const hmac = await getHMAC()
103
+ const expires = Math.floor(Date.now() / 1000) + PRESIGN_EXPIRATION
104
+ const writePipeline = redis.pipeline()
105
+ let misses = 0
106
+
107
+ const results = cached.map((val, i) => {
108
+ if (val) return val
109
+ misses++
110
+ const url = presignV2(
111
+ hmac,
112
+ resolved[i].bucket,
113
+ resolved[i].s3Key,
114
+ resolved[i].versionId,
115
+ expires,
116
+ )
117
+ writePipeline.setex(keys[i], PRESIGN_TTL, url)
118
+ return url
119
+ })
120
+
121
+ if (misses > 0) await writePipeline.exec()
122
+ return results
123
+ }
124
+
125
+ /**
126
+ * Build a public (non-presigned) S3 URL from key and versionId.
127
+ * @param bucket - S3 bucket name, or empty string for the default bucket
128
+ */
129
+ export function publicS3Url(
130
+ bucket: string,
131
+ s3Key: string,
132
+ versionId: string,
133
+ ): string {
134
+ return `https://s3.amazonaws.com/${
135
+ resolveBucket(bucket)
136
+ }/${s3Key}?versionId=${versionId}`
137
+ }
@@ -29,6 +29,7 @@ export interface DatasetDocument extends Document {
29
29
  views: number
30
30
  related: [DatasetRelationDocument]
31
31
  schemaValidator: boolean
32
+ holdDeletion: boolean
32
33
  _conditions: object
33
34
  }
34
35
 
@@ -45,6 +46,7 @@ const datasetSchema = new Schema<DatasetDocument>(
45
46
  views: Number,
46
47
  related: [RelationSchema],
47
48
  schemaValidator: { type: Boolean, default: false },
49
+ holdDeletion: { type: Boolean, default: false },
48
50
  },
49
51
  { toJSON: { virtuals: true }, toObject: { virtuals: true } },
50
52
  )
package/src/models/doi.ts CHANGED
@@ -1,17 +1,24 @@
1
1
  import mongoose from "mongoose"
2
2
  import type { Document } from "mongoose"
3
+ import type { DoiState } from "../types/datacite"
3
4
  const { Schema, model } = mongoose
4
5
 
5
6
  export interface DoiDocument extends Document {
6
7
  datasetId: string
7
8
  snapshotId: string
8
9
  doi: string
10
+ state: DoiState
9
11
  }
10
12
 
11
13
  const doiSchema = new Schema({
12
14
  datasetId: String,
13
15
  snapshotId: String,
14
16
  doi: String,
17
+ state: {
18
+ type: String,
19
+ enum: ["draft", "registered", "findable"],
20
+ default: "draft",
21
+ },
15
22
  })
16
23
 
17
24
  const Doi = model<DoiDocument>("Doi", doiSchema)
@@ -25,14 +25,18 @@ export function queueIndexDataset(datasetId: string) {
25
25
  * Queue data retention check for a dataset
26
26
  * @param datasetId Dataset to check
27
27
  */
28
- export function queueDataRetentionCheck(datasetId: string) {
28
+ export async function queueDataRetentionCheck(
29
+ datasetId: string,
30
+ ): Promise<void> {
29
31
  try {
30
32
  const msg = new ProducibleMessage()
31
33
  msg.setQueue(OpenNeuroQueues.DATARETENTION).setBody({ datasetId })
32
- producer.produce(msg, (err) => {
33
- if (err) {
34
- Sentry.captureException(err)
35
- }
34
+ msg.setTTL(64800000) // 18 hours in ms to survive the consumer rate limits
35
+ await new Promise<void>((resolve, reject) => {
36
+ producer.produce(msg, (err) => {
37
+ if (err) reject(err)
38
+ else resolve()
39
+ })
36
40
  })
37
41
  } catch (err) {
38
42
  Sentry.captureException(err)
@@ -14,7 +14,7 @@ async function enqueueAllDatasetChecks(): Promise<void> {
14
14
  const cursor = Dataset.find({}, "id").cursor()
15
15
  for await (const dataset of cursor) {
16
16
  // Check data retention policy status and send notifications
17
- queueDataRetentionCheck(dataset.id)
17
+ await queueDataRetentionCheck(dataset.id)
18
18
  }
19
19
  }
20
20
 
@@ -55,6 +55,6 @@ export async function setupQueues(): Promise<void> {
55
55
  // Limit indexing queue to 8 runs per minute to avoid stacking indexing excessively
56
56
  await setRateLimit(OpenNeuroQueues.INDEXING, 8, 60000)
57
57
 
58
- // Rate limit data retention queue to 16 runs per minute
59
- await setRateLimit(OpenNeuroQueues.DATARETENTION, 16, 60000)
58
+ // Rate limit data retention queue to 60 runs per minute
59
+ await setRateLimit(OpenNeuroQueues.DATARETENTION, 60, 60000)
60
60
  }
package/src/routes.ts CHANGED
@@ -190,8 +190,16 @@ const routes = [
190
190
  // git redirect routes
191
191
  { method: "get", url: "/git/:datasetId", handler: datalad.gitRepo },
192
192
  { method: "post", url: "/git/:datasetId", handler: datalad.gitRepo },
193
- { method: "get", url: "/git/:datasetId/*", handler: datalad.gitRepo },
194
- { method: "post", url: "/git/:datasetId/*", handler: datalad.gitRepo },
193
+ {
194
+ method: "get",
195
+ url: "/git/:datasetId/*arguments",
196
+ handler: datalad.gitRepo,
197
+ },
198
+ {
199
+ method: "post",
200
+ url: "/git/:datasetId/*arguments",
201
+ handler: datalad.gitRepo,
202
+ },
195
203
  ]
196
204
 
197
205
  // initialize routes -------------------------------
@@ -0,0 +1,37 @@
1
+ DataCite is free software; you can redistribute it and/or modify
2
+ it under the terms of the Revised BSD License quoted below.
3
+
4
+ Copyright (C) 2015-2018 CERN.
5
+ Copyright (C) 2018 Center for Open Science.
6
+ Copyright (C) 2019-2024 Caltech.
7
+ Copyright (C) 2024 Institute of Biotechnology of the Czech Academy of Sciences.
8
+
9
+ All rights reserved.
10
+
11
+ Redistribution and use in source and binary forms, with or without
12
+ modification, are permitted provided that the following conditions are
13
+ met:
14
+
15
+ * Redistributions of source code must retain the above copyright
16
+ notice, this list of conditions and the following disclaimer.
17
+
18
+ * Redistributions in binary form must reproduce the above copyright
19
+ notice, this list of conditions and the following disclaimer in the
20
+ documentation and/or other materials provided with the distribution.
21
+
22
+ * Neither the name of the copyright holder nor the names of its
23
+ contributors may be used to endorse or promote products derived from
24
+ this software without specific prior written permission.
25
+
26
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30
+ HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33
+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
34
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
35
+ TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36
+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
37
+ DAMAGE.
@@ -0,0 +1,3 @@
1
+ Generate `datacite-v4.5.ts` with `npx json-schema-to-typescript datacite-v4.5.json`.
2
+
3
+ `datacite-v4.5.json` sourced from https://github.com/inveniosoftware/datacite/blob/5506d1347a070952d2c2b96c213f44c5fa46d0dd/datacite/schemas/datacite-v4.5.json