@openneuro/server 4.47.6 → 5.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -7
- package/src/app.ts +1 -1
- package/src/cache/__tests__/tree.spec.ts +212 -0
- package/src/cache/tree.ts +148 -0
- package/src/datalad/__tests__/dataRetentionNotifications.spec.ts +11 -0
- package/src/datalad/__tests__/files.spec.ts +249 -0
- package/src/datalad/dataRetentionNotifications.ts +5 -0
- package/src/datalad/dataset.ts +29 -1
- package/src/datalad/files.ts +362 -39
- package/src/datalad/snapshots.ts +29 -54
- package/src/graphql/resolvers/__tests__/response-status.spec.ts +42 -0
- package/src/graphql/resolvers/__tests__/user.spec.ts +55 -1
- package/src/graphql/resolvers/build-search-query.ts +391 -0
- package/src/graphql/resolvers/cache.ts +5 -1
- package/src/graphql/resolvers/dataset-search.ts +40 -23
- package/src/graphql/resolvers/datasetEvents.ts +48 -78
- package/src/graphql/resolvers/draft.ts +5 -2
- package/src/graphql/resolvers/holdDeletion.ts +21 -0
- package/src/graphql/resolvers/index.ts +6 -0
- package/src/graphql/resolvers/mutation.ts +2 -0
- package/src/graphql/resolvers/response-status.ts +43 -0
- package/src/graphql/resolvers/snapshots.ts +9 -18
- package/src/graphql/resolvers/summary.ts +17 -0
- package/src/graphql/resolvers/user.ts +1 -1
- package/src/graphql/schema.ts +54 -14
- package/src/handlers/datalad.ts +4 -0
- package/src/handlers/doi.ts +32 -36
- package/src/libs/doi/__tests__/doi.spec.ts +50 -12
- package/src/libs/doi/__tests__/validate.spec.ts +110 -0
- package/src/libs/doi/index.ts +108 -71
- package/src/libs/doi/metadata.ts +101 -0
- package/src/libs/doi/validate.ts +59 -0
- package/src/libs/presign.ts +137 -0
- package/src/models/dataset.ts +2 -0
- package/src/models/doi.ts +7 -0
- package/src/queues/producer-methods.ts +9 -5
- package/src/queues/queue-schedule.ts +1 -1
- package/src/queues/queues.ts +2 -2
- package/src/routes.ts +10 -2
- package/src/types/datacite/LICENSE +37 -0
- package/src/types/datacite/README.md +3 -0
- package/src/types/datacite/datacite-v4.5.json +643 -0
- package/src/types/datacite/datacite-v4.5.ts +281 -0
- package/src/types/datacite.ts +53 -63
- package/src/utils/datacite-mapper.ts +7 -3
- package/src/utils/datacite-utils.ts +12 -15
- package/src/libs/doi/__tests__/__snapshots__/doi.spec.ts.snap +0 -17
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import type { Redis } from "ioredis"
|
|
2
|
+
import { createHMAC, createSHA1 } from "hash-wasm"
|
|
3
|
+
|
|
4
|
+
const PRESIGN_TTL = 5 * 24 * 60 * 60 // 5 days in seconds
|
|
5
|
+
const PRESIGN_EXPIRATION = 7 * 24 * 60 * 60 // 7 days for the presigned URL itself
|
|
6
|
+
|
|
7
|
+
const defaultBucket = process.env.AWS_S3_PUBLIC_BUCKET
|
|
8
|
+
const accessKeyId = process.env.AWS_ACCESS_KEY_ID
|
|
9
|
+
const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY
|
|
10
|
+
|
|
11
|
+
/** Resolve bucket name, falling back to the default configured bucket */
|
|
12
|
+
function resolveBucket(bucket: string): string {
|
|
13
|
+
return bucket || defaultBucket
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function presignKey(bucket: string, s3Key: string, versionId: string): string {
|
|
17
|
+
return `ps:${bucket}:${s3Key}:${versionId}`
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Pre-initialized HMAC-SHA1 signer keyed with the AWS secret.
|
|
22
|
+
*/
|
|
23
|
+
let hmacPromise: ReturnType<typeof createHMAC> | null = null
|
|
24
|
+
|
|
25
|
+
function getHMAC(): ReturnType<typeof createHMAC> {
|
|
26
|
+
if (!hmacPromise) {
|
|
27
|
+
if (!secretAccessKey) {
|
|
28
|
+
throw new Error("AWS_SECRET_ACCESS_KEY is required for presigned URLs")
|
|
29
|
+
}
|
|
30
|
+
hmacPromise = createHMAC(
|
|
31
|
+
createSHA1(),
|
|
32
|
+
new TextEncoder().encode(secretAccessKey),
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
return hmacPromise
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Generate a V2 query-string presigned URL for an S3 GetObject request.
|
|
40
|
+
* Uses HMAC-SHA1 via hash-wasm (WASM)
|
|
41
|
+
* Replaced V4 signatures for performance reasons
|
|
42
|
+
*/
|
|
43
|
+
function presignV2(
|
|
44
|
+
hmac: Awaited<ReturnType<typeof createHMAC>>,
|
|
45
|
+
bucket: string,
|
|
46
|
+
s3Key: string,
|
|
47
|
+
versionId: string,
|
|
48
|
+
expires: number,
|
|
49
|
+
): string {
|
|
50
|
+
// StringToSign = HTTP-Verb + "\n" + "\n" + "\n" + Expires + "\n" + CanonicalizedResource
|
|
51
|
+
const resource = `/${bucket}/${s3Key}?versionId=${versionId}`
|
|
52
|
+
const stringToSign = `GET\n\n\n${expires}\n${resource}`
|
|
53
|
+
hmac.init()
|
|
54
|
+
hmac.update(stringToSign)
|
|
55
|
+
const signature = Buffer.from(hmac.digest("binary")).toString("base64")
|
|
56
|
+
const encodedSig = encodeURIComponent(signature)
|
|
57
|
+
const encodedKey = encodeURIComponent(accessKeyId!)
|
|
58
|
+
return `https://s3.amazonaws.com/${bucket}/${s3Key}?versionId=${versionId}&AWSAccessKeyId=${encodedKey}&Expires=${expires}&Signature=${encodedSig}`
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get or generate a presigned URL, caching it in Redis.
|
|
63
|
+
* @param bucket - S3 bucket name, or empty string for the default bucket
|
|
64
|
+
*/
|
|
65
|
+
export async function getPresignedUrl(
|
|
66
|
+
redis: Redis,
|
|
67
|
+
bucket: string,
|
|
68
|
+
s3Key: string,
|
|
69
|
+
versionId: string,
|
|
70
|
+
): Promise<string> {
|
|
71
|
+
const resolvedBucket = resolveBucket(bucket)
|
|
72
|
+
const key = presignKey(resolvedBucket, s3Key, versionId)
|
|
73
|
+
const cached = await redis.get(key)
|
|
74
|
+
if (cached) {
|
|
75
|
+
return cached
|
|
76
|
+
}
|
|
77
|
+
const hmac = await getHMAC()
|
|
78
|
+
const expires = Math.floor(Date.now() / 1000) + PRESIGN_EXPIRATION
|
|
79
|
+
const url = presignV2(hmac, resolvedBucket, s3Key, versionId, expires)
|
|
80
|
+
await redis.setex(key, PRESIGN_TTL, url)
|
|
81
|
+
return url
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Bulk-resolve presigned URLs for many files in two pipelined Redis calls.
|
|
86
|
+
* Returns an array of resolved URLs matching the input order.
|
|
87
|
+
*/
|
|
88
|
+
export async function getPresignedUrlsBulk(
|
|
89
|
+
redis: Redis,
|
|
90
|
+
items: { bucket: string; s3Key: string; versionId: string }[],
|
|
91
|
+
): Promise<string[]> {
|
|
92
|
+
if (items.length === 0) return []
|
|
93
|
+
|
|
94
|
+
const resolved = items.map((item) => ({
|
|
95
|
+
...item,
|
|
96
|
+
bucket: resolveBucket(item.bucket),
|
|
97
|
+
}))
|
|
98
|
+
const keys = resolved.map((r) => presignKey(r.bucket, r.s3Key, r.versionId))
|
|
99
|
+
const cached = await redis.mget(...keys)
|
|
100
|
+
|
|
101
|
+
// Fill hits from cache, sign misses and queue them for write-back
|
|
102
|
+
const hmac = await getHMAC()
|
|
103
|
+
const expires = Math.floor(Date.now() / 1000) + PRESIGN_EXPIRATION
|
|
104
|
+
const writePipeline = redis.pipeline()
|
|
105
|
+
let misses = 0
|
|
106
|
+
|
|
107
|
+
const results = cached.map((val, i) => {
|
|
108
|
+
if (val) return val
|
|
109
|
+
misses++
|
|
110
|
+
const url = presignV2(
|
|
111
|
+
hmac,
|
|
112
|
+
resolved[i].bucket,
|
|
113
|
+
resolved[i].s3Key,
|
|
114
|
+
resolved[i].versionId,
|
|
115
|
+
expires,
|
|
116
|
+
)
|
|
117
|
+
writePipeline.setex(keys[i], PRESIGN_TTL, url)
|
|
118
|
+
return url
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
if (misses > 0) await writePipeline.exec()
|
|
122
|
+
return results
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Build a public (non-presigned) S3 URL from key and versionId.
|
|
127
|
+
* @param bucket - S3 bucket name, or empty string for the default bucket
|
|
128
|
+
*/
|
|
129
|
+
export function publicS3Url(
|
|
130
|
+
bucket: string,
|
|
131
|
+
s3Key: string,
|
|
132
|
+
versionId: string,
|
|
133
|
+
): string {
|
|
134
|
+
return `https://s3.amazonaws.com/${
|
|
135
|
+
resolveBucket(bucket)
|
|
136
|
+
}/${s3Key}?versionId=${versionId}`
|
|
137
|
+
}
|
package/src/models/dataset.ts
CHANGED
|
@@ -29,6 +29,7 @@ export interface DatasetDocument extends Document {
|
|
|
29
29
|
views: number
|
|
30
30
|
related: [DatasetRelationDocument]
|
|
31
31
|
schemaValidator: boolean
|
|
32
|
+
holdDeletion: boolean
|
|
32
33
|
_conditions: object
|
|
33
34
|
}
|
|
34
35
|
|
|
@@ -45,6 +46,7 @@ const datasetSchema = new Schema<DatasetDocument>(
|
|
|
45
46
|
views: Number,
|
|
46
47
|
related: [RelationSchema],
|
|
47
48
|
schemaValidator: { type: Boolean, default: false },
|
|
49
|
+
holdDeletion: { type: Boolean, default: false },
|
|
48
50
|
},
|
|
49
51
|
{ toJSON: { virtuals: true }, toObject: { virtuals: true } },
|
|
50
52
|
)
|
package/src/models/doi.ts
CHANGED
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
import mongoose from "mongoose"
|
|
2
2
|
import type { Document } from "mongoose"
|
|
3
|
+
import type { DoiState } from "../types/datacite"
|
|
3
4
|
const { Schema, model } = mongoose
|
|
4
5
|
|
|
5
6
|
export interface DoiDocument extends Document {
|
|
6
7
|
datasetId: string
|
|
7
8
|
snapshotId: string
|
|
8
9
|
doi: string
|
|
10
|
+
state: DoiState
|
|
9
11
|
}
|
|
10
12
|
|
|
11
13
|
const doiSchema = new Schema({
|
|
12
14
|
datasetId: String,
|
|
13
15
|
snapshotId: String,
|
|
14
16
|
doi: String,
|
|
17
|
+
state: {
|
|
18
|
+
type: String,
|
|
19
|
+
enum: ["draft", "registered", "findable"],
|
|
20
|
+
default: "draft",
|
|
21
|
+
},
|
|
15
22
|
})
|
|
16
23
|
|
|
17
24
|
const Doi = model<DoiDocument>("Doi", doiSchema)
|
|
@@ -25,14 +25,18 @@ export function queueIndexDataset(datasetId: string) {
|
|
|
25
25
|
* Queue data retention check for a dataset
|
|
26
26
|
* @param datasetId Dataset to check
|
|
27
27
|
*/
|
|
28
|
-
export function queueDataRetentionCheck(
|
|
28
|
+
export async function queueDataRetentionCheck(
|
|
29
|
+
datasetId: string,
|
|
30
|
+
): Promise<void> {
|
|
29
31
|
try {
|
|
30
32
|
const msg = new ProducibleMessage()
|
|
31
33
|
msg.setQueue(OpenNeuroQueues.DATARETENTION).setBody({ datasetId })
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
msg.setTTL(64800000) // 18 hours in ms to survive the consumer rate limits
|
|
35
|
+
await new Promise<void>((resolve, reject) => {
|
|
36
|
+
producer.produce(msg, (err) => {
|
|
37
|
+
if (err) reject(err)
|
|
38
|
+
else resolve()
|
|
39
|
+
})
|
|
36
40
|
})
|
|
37
41
|
} catch (err) {
|
|
38
42
|
Sentry.captureException(err)
|
|
@@ -14,7 +14,7 @@ async function enqueueAllDatasetChecks(): Promise<void> {
|
|
|
14
14
|
const cursor = Dataset.find({}, "id").cursor()
|
|
15
15
|
for await (const dataset of cursor) {
|
|
16
16
|
// Check data retention policy status and send notifications
|
|
17
|
-
queueDataRetentionCheck(dataset.id)
|
|
17
|
+
await queueDataRetentionCheck(dataset.id)
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
20
|
|
package/src/queues/queues.ts
CHANGED
|
@@ -55,6 +55,6 @@ export async function setupQueues(): Promise<void> {
|
|
|
55
55
|
// Limit indexing queue to 8 runs per minute to avoid stacking indexing excessively
|
|
56
56
|
await setRateLimit(OpenNeuroQueues.INDEXING, 8, 60000)
|
|
57
57
|
|
|
58
|
-
// Rate limit data retention queue to
|
|
59
|
-
await setRateLimit(OpenNeuroQueues.DATARETENTION,
|
|
58
|
+
// Rate limit data retention queue to 60 runs per minute
|
|
59
|
+
await setRateLimit(OpenNeuroQueues.DATARETENTION, 60, 60000)
|
|
60
60
|
}
|
package/src/routes.ts
CHANGED
|
@@ -190,8 +190,16 @@ const routes = [
|
|
|
190
190
|
// git redirect routes
|
|
191
191
|
{ method: "get", url: "/git/:datasetId", handler: datalad.gitRepo },
|
|
192
192
|
{ method: "post", url: "/git/:datasetId", handler: datalad.gitRepo },
|
|
193
|
-
{
|
|
194
|
-
|
|
193
|
+
{
|
|
194
|
+
method: "get",
|
|
195
|
+
url: "/git/:datasetId/*arguments",
|
|
196
|
+
handler: datalad.gitRepo,
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
method: "post",
|
|
200
|
+
url: "/git/:datasetId/*arguments",
|
|
201
|
+
handler: datalad.gitRepo,
|
|
202
|
+
},
|
|
195
203
|
]
|
|
196
204
|
|
|
197
205
|
// initialize routes -------------------------------
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
DataCite is free software; you can redistribute it and/or modify
|
|
2
|
+
it under the terms of the Revised BSD License quoted below.
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2015-2018 CERN.
|
|
5
|
+
Copyright (C) 2018 Center for Open Science.
|
|
6
|
+
Copyright (C) 2019-2024 Caltech.
|
|
7
|
+
Copyright (C) 2024 Institute of Biotechnology of the Czech Academy of Sciences.
|
|
8
|
+
|
|
9
|
+
All rights reserved.
|
|
10
|
+
|
|
11
|
+
Redistribution and use in source and binary forms, with or without
|
|
12
|
+
modification, are permitted provided that the following conditions are
|
|
13
|
+
met:
|
|
14
|
+
|
|
15
|
+
* Redistributions of source code must retain the above copyright
|
|
16
|
+
notice, this list of conditions and the following disclaimer.
|
|
17
|
+
|
|
18
|
+
* Redistributions in binary form must reproduce the above copyright
|
|
19
|
+
notice, this list of conditions and the following disclaimer in the
|
|
20
|
+
documentation and/or other materials provided with the distribution.
|
|
21
|
+
|
|
22
|
+
* Neither the name of the copyright holder nor the names of its
|
|
23
|
+
contributors may be used to endorse or promote products derived from
|
|
24
|
+
this software without specific prior written permission.
|
|
25
|
+
|
|
26
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
27
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
28
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
29
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
30
|
+
HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
31
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
32
|
+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
33
|
+
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
34
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
|
35
|
+
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
36
|
+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
|
37
|
+
DAMAGE.
|