@openneuro/server 4.47.7 → 5.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -7
- package/src/app.ts +1 -1
- package/src/cache/__tests__/tree.spec.ts +212 -0
- package/src/cache/tree.ts +148 -0
- package/src/datalad/__tests__/dataRetentionNotifications.spec.ts +11 -0
- package/src/datalad/__tests__/files.spec.ts +249 -0
- package/src/datalad/dataRetentionNotifications.ts +5 -0
- package/src/datalad/dataset.ts +29 -1
- package/src/datalad/files.ts +362 -39
- package/src/datalad/snapshots.ts +29 -54
- package/src/graphql/resolvers/__tests__/response-status.spec.ts +42 -0
- package/src/graphql/resolvers/build-search-query.ts +391 -0
- package/src/graphql/resolvers/cache.ts +5 -1
- package/src/graphql/resolvers/dataset-search.ts +40 -23
- package/src/graphql/resolvers/datasetEvents.ts +48 -78
- package/src/graphql/resolvers/draft.ts +5 -2
- package/src/graphql/resolvers/holdDeletion.ts +21 -0
- package/src/graphql/resolvers/index.ts +6 -0
- package/src/graphql/resolvers/mutation.ts +2 -0
- package/src/graphql/resolvers/response-status.ts +43 -0
- package/src/graphql/resolvers/snapshots.ts +9 -18
- package/src/graphql/resolvers/summary.ts +17 -0
- package/src/graphql/schema.ts +54 -14
- package/src/handlers/datalad.ts +4 -0
- package/src/handlers/doi.ts +32 -36
- package/src/libs/doi/__tests__/doi.spec.ts +50 -12
- package/src/libs/doi/__tests__/validate.spec.ts +110 -0
- package/src/libs/doi/index.ts +108 -71
- package/src/libs/doi/metadata.ts +101 -0
- package/src/libs/doi/validate.ts +59 -0
- package/src/libs/presign.ts +137 -0
- package/src/models/dataset.ts +2 -0
- package/src/models/doi.ts +7 -0
- package/src/queues/producer-methods.ts +9 -5
- package/src/queues/queue-schedule.ts +1 -1
- package/src/queues/queues.ts +2 -2
- package/src/routes.ts +10 -2
- package/src/types/datacite/LICENSE +37 -0
- package/src/types/datacite/README.md +3 -0
- package/src/types/datacite/datacite-v4.5.json +643 -0
- package/src/types/datacite/datacite-v4.5.ts +281 -0
- package/src/types/datacite.ts +53 -63
- package/src/utils/datacite-mapper.ts +7 -3
- package/src/utils/datacite-utils.ts +12 -15
- package/src/libs/doi/__tests__/__snapshots__/doi.spec.ts.snap +0 -17
package/src/datalad/files.ts
CHANGED
|
@@ -1,13 +1,28 @@
|
|
|
1
1
|
import { redis } from "../libs/redis"
|
|
2
|
-
import CacheItem, { CacheType } from "../cache/item"
|
|
3
2
|
import { getDatasetWorker } from "../libs/datalad-service"
|
|
3
|
+
import {
|
|
4
|
+
getPresignedUrl,
|
|
5
|
+
getPresignedUrlsBulk,
|
|
6
|
+
publicS3Url,
|
|
7
|
+
} from "../libs/presign"
|
|
8
|
+
import Dataset from "../models/dataset"
|
|
9
|
+
import {
|
|
10
|
+
addDatasetTrees,
|
|
11
|
+
getCommitTrees,
|
|
12
|
+
getTree,
|
|
13
|
+
getTreesBulk,
|
|
14
|
+
setCommitTrees,
|
|
15
|
+
setTree,
|
|
16
|
+
type TreeEntry,
|
|
17
|
+
} from "../cache/tree"
|
|
18
|
+
import { join } from "node:path"
|
|
4
19
|
|
|
5
20
|
/**
|
|
6
21
|
* Convert to URL compatible path
|
|
7
22
|
* @param {String} path
|
|
8
23
|
*/
|
|
9
24
|
export const encodeFilePath = (path: string): string => {
|
|
10
|
-
return path.replace(
|
|
25
|
+
return path.replace(/\//g, ":")
|
|
11
26
|
}
|
|
12
27
|
|
|
13
28
|
/**
|
|
@@ -15,7 +30,7 @@ export const encodeFilePath = (path: string): string => {
|
|
|
15
30
|
* @param {String} path
|
|
16
31
|
*/
|
|
17
32
|
export const decodeFilePath = (path: string): string => {
|
|
18
|
-
return path.replace(
|
|
33
|
+
return path.replace(/:/g, "/")
|
|
19
34
|
}
|
|
20
35
|
|
|
21
36
|
/**
|
|
@@ -24,8 +39,8 @@ export const decodeFilePath = (path: string): string => {
|
|
|
24
39
|
* @param {String} filename
|
|
25
40
|
*/
|
|
26
41
|
export const getFileName = (path: string, filename: string): string => {
|
|
27
|
-
|
|
28
|
-
return
|
|
42
|
+
if (!filename) return encodeFilePath(path)
|
|
43
|
+
return encodeFilePath(path ? `${path}/${filename}` : filename)
|
|
29
44
|
}
|
|
30
45
|
|
|
31
46
|
/**
|
|
@@ -65,7 +80,7 @@ export const filesUrl = (datasetId: string): string =>
|
|
|
65
80
|
`http://${getDatasetWorker(datasetId)}/datasets/${datasetId}/files`
|
|
66
81
|
|
|
67
82
|
/** Minimal variant of DatasetFile type from GraphQL API */
|
|
68
|
-
type DatasetFile = {
|
|
83
|
+
export type DatasetFile = {
|
|
69
84
|
id: string
|
|
70
85
|
filename: string
|
|
71
86
|
directory: boolean
|
|
@@ -76,47 +91,355 @@ type DatasetFile = {
|
|
|
76
91
|
/**
|
|
77
92
|
* Sum all file sizes for total dataset size
|
|
78
93
|
*/
|
|
79
|
-
export const computeTotalSize = (files: [
|
|
94
|
+
export const computeTotalSize = (files: DatasetFile[]): number =>
|
|
80
95
|
files.reduce((size, f) => size + f.size, 0)
|
|
81
96
|
|
|
82
97
|
/**
|
|
83
|
-
*
|
|
84
|
-
*
|
|
85
|
-
* @param {string} datasetId - Dataset accession number
|
|
86
|
-
* @param {string} treeish - Git treeish hexsha
|
|
98
|
+
* Parse an S3 URL from the worker into key and versionId components.
|
|
99
|
+
* URLs: https://s3.amazonaws.com/{bucket}/{key}?versionId={ver}
|
|
87
100
|
*/
|
|
88
|
-
export
|
|
89
|
-
|
|
101
|
+
export function parseS3Url(
|
|
102
|
+
url: string,
|
|
103
|
+
): { bucket: string; s3Key: string; versionId: string } | null {
|
|
104
|
+
try {
|
|
105
|
+
const parsed = new URL(url)
|
|
106
|
+
const versionId = parsed.searchParams.get("versionId") || ""
|
|
107
|
+
// Path is /{bucket}/{key...} - strip the leading slash and bucket
|
|
108
|
+
const pathParts = parsed.pathname.split("/")
|
|
109
|
+
pathParts.shift() // empty string before leading /
|
|
110
|
+
const bucket = pathParts.shift() || "" // bucket name
|
|
111
|
+
const s3Key = decodeURIComponent(pathParts.join("/"))
|
|
112
|
+
return { bucket, s3Key, versionId }
|
|
113
|
+
} catch {
|
|
114
|
+
return null
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Check if a dataset requires presigned URLs
|
|
120
|
+
*
|
|
121
|
+
* TODO - extend this for granular control for DUA datasets
|
|
122
|
+
*/
|
|
123
|
+
async function datasetNeedsPresign(datasetId: string): Promise<boolean> {
|
|
124
|
+
const ds = await Dataset.findOne({ id: datasetId }, { public: 1 }).lean()
|
|
125
|
+
return !ds?.public
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Convert a worker response file to a compact TreeEntry */
|
|
129
|
+
export function workerFileToEntry(
|
|
130
|
+
file: DatasetFile,
|
|
131
|
+
needsPresign: boolean,
|
|
132
|
+
): TreeEntry {
|
|
133
|
+
if (file.directory) {
|
|
134
|
+
return {
|
|
135
|
+
n: file.filename,
|
|
136
|
+
h: file.id,
|
|
137
|
+
s: 0,
|
|
138
|
+
k: "",
|
|
139
|
+
v: "",
|
|
140
|
+
b: "",
|
|
141
|
+
p: false,
|
|
142
|
+
d: true,
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
const parsed = file.urls[0] ? parseS3Url(file.urls[0]) : null
|
|
146
|
+
// Store empty string for the default bucket to save cache space
|
|
147
|
+
const defaultBucket = process.env.AWS_S3_PUBLIC_BUCKET || ""
|
|
148
|
+
const bucket = parsed?.bucket === defaultBucket ? "" : (parsed?.bucket || "")
|
|
149
|
+
return {
|
|
150
|
+
n: file.filename,
|
|
151
|
+
h: file.id,
|
|
152
|
+
s: file.size,
|
|
153
|
+
k: parsed?.s3Key || "",
|
|
154
|
+
v: parsed?.versionId || "",
|
|
155
|
+
b: bucket,
|
|
156
|
+
p: needsPresign,
|
|
157
|
+
d: false,
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Convert a TreeEntry back to a DatasetFile, resolving presigned URLs if needed */
|
|
162
|
+
export async function entryToDatasetFile(
|
|
163
|
+
entry: TreeEntry,
|
|
164
|
+
datasetId: string,
|
|
165
|
+
): Promise<DatasetFile> {
|
|
166
|
+
if (entry.d) {
|
|
167
|
+
return {
|
|
168
|
+
id: entry.h,
|
|
169
|
+
filename: entry.n,
|
|
170
|
+
directory: true,
|
|
171
|
+
size: 0,
|
|
172
|
+
urls: [],
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
let url: string
|
|
176
|
+
if (entry.p && entry.k && entry.v) {
|
|
177
|
+
url = await getPresignedUrl(redis, entry.b, entry.k, entry.v)
|
|
178
|
+
} else if (entry.k && entry.v) {
|
|
179
|
+
url = publicS3Url(entry.b, entry.k, entry.v)
|
|
180
|
+
} else {
|
|
181
|
+
const serverUrl = process.env.CRN_SERVER_URL
|
|
182
|
+
const filename = encodeURIComponent(entry.n)
|
|
183
|
+
url =
|
|
184
|
+
`${serverUrl}/crn/datasets/${datasetId}/objects/${entry.h}?filename=${filename}`
|
|
185
|
+
}
|
|
186
|
+
return {
|
|
187
|
+
id: entry.h,
|
|
188
|
+
filename: entry.n,
|
|
189
|
+
directory: false,
|
|
190
|
+
size: entry.s,
|
|
191
|
+
urls: [url],
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** Convert an array of TreeEntry to DatasetFile[], resolving URLs */
|
|
196
|
+
async function entriesToDatasetFiles(
|
|
197
|
+
entries: TreeEntry[],
|
|
198
|
+
datasetId: string,
|
|
199
|
+
): Promise<DatasetFile[]> {
|
|
200
|
+
return Promise.all(
|
|
201
|
+
entries.map((entry) => entryToDatasetFile(entry, datasetId)),
|
|
202
|
+
)
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Fetch multiple trees from the worker in a single batch POST request.
|
|
207
|
+
* Returns a map of tree hash -> DatasetFile[].
|
|
208
|
+
*/
|
|
209
|
+
async function fetchTreesFromWorker(
|
|
210
|
+
datasetId: string,
|
|
211
|
+
treeHashes: string[],
|
|
212
|
+
): Promise<Map<string, DatasetFile[]>> {
|
|
213
|
+
const response = await fetch(
|
|
214
|
+
`http://${getDatasetWorker(datasetId)}/datasets/${datasetId}/tree`,
|
|
215
|
+
{
|
|
216
|
+
method: "POST",
|
|
217
|
+
headers: { "Content-Type": "application/json" },
|
|
218
|
+
body: JSON.stringify({ trees: treeHashes }),
|
|
219
|
+
signal: AbortSignal.timeout(30000),
|
|
220
|
+
},
|
|
221
|
+
)
|
|
222
|
+
const body = await response.json()
|
|
223
|
+
const treesData: Record<string, DatasetFile[]> | undefined = body?.trees
|
|
224
|
+
const result = new Map<string, DatasetFile[]>()
|
|
225
|
+
if (treesData) {
|
|
226
|
+
for (const [hash, files] of Object.entries(treesData)) {
|
|
227
|
+
result.set(hash, files || [])
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return result
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Cache a batch of worker results, returning entries for each tree.
|
|
235
|
+
*/
|
|
236
|
+
async function cacheWorkerTrees(
|
|
237
|
+
datasetId: string,
|
|
238
|
+
workerResults: Map<string, DatasetFile[]>,
|
|
239
|
+
needsPresign: boolean,
|
|
240
|
+
): Promise<Map<string, TreeEntry[]>> {
|
|
241
|
+
const result = new Map<string, TreeEntry[]>()
|
|
242
|
+
const permanentHashes: string[] = []
|
|
243
|
+
for (const [hash, files] of workerResults) {
|
|
244
|
+
if (files.length > 0) {
|
|
245
|
+
const entries = files.map((f) => workerFileToEntry(f, needsPresign))
|
|
246
|
+
result.set(hash, entries)
|
|
247
|
+
const allExported = files.every(
|
|
248
|
+
(f) => f.directory || f.urls[0]?.includes("s3.amazonaws.com"),
|
|
249
|
+
)
|
|
250
|
+
if (allExported) {
|
|
251
|
+
void setTree(redis, hash, entries)
|
|
252
|
+
permanentHashes.push(hash)
|
|
253
|
+
} else {
|
|
254
|
+
void setTree(redis, hash, entries, 600)
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
if (permanentHashes.length > 0) {
|
|
259
|
+
void addDatasetTrees(redis, datasetId, permanentHashes)
|
|
260
|
+
}
|
|
261
|
+
return result
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Get files for a specific revision (tree hash or commit hash).
|
|
266
|
+
* Uses content-addressed caching keyed by full git hash.
|
|
267
|
+
*/
|
|
268
|
+
export const getFiles = async (
|
|
269
|
+
datasetId: string,
|
|
270
|
+
treeish: string,
|
|
271
|
+
): Promise<DatasetFile[]> => {
|
|
272
|
+
// Try cache first
|
|
273
|
+
const cached = await getTree(redis, treeish)
|
|
274
|
+
if (cached) {
|
|
275
|
+
return entriesToDatasetFiles(cached, datasetId)
|
|
276
|
+
}
|
|
277
|
+
const needsPresign = await datasetNeedsPresign(datasetId)
|
|
278
|
+
// Cache miss: fetch from worker via batch endpoint
|
|
279
|
+
const workerResults = await fetchTreesFromWorker(datasetId, [treeish])
|
|
280
|
+
const newEntriesMap = await cacheWorkerTrees(
|
|
90
281
|
datasetId,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
282
|
+
workerResults,
|
|
283
|
+
needsPresign,
|
|
284
|
+
)
|
|
285
|
+
const entries = newEntriesMap.get(treeish)
|
|
286
|
+
if (entries && entries.length > 0) {
|
|
287
|
+
return entriesToDatasetFiles(entries, datasetId)
|
|
288
|
+
}
|
|
289
|
+
return []
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Recursively get all files for a commit/tree, with commit-level caching.
|
|
294
|
+
* Returns flattened file listing with full paths.
|
|
295
|
+
*/
|
|
296
|
+
export async function getFilesRecursive(
|
|
297
|
+
datasetId: string,
|
|
298
|
+
tree: string,
|
|
299
|
+
path = "",
|
|
300
|
+
): Promise<DatasetFile[]> {
|
|
301
|
+
const needsPresign = await datasetNeedsPresign(datasetId)
|
|
302
|
+
// Check for cached commit-to-trees mapping
|
|
303
|
+
const cachedTreeHashes = await getCommitTrees(redis, tree)
|
|
304
|
+
if (cachedTreeHashes) {
|
|
305
|
+
// Bulk-fetch all trees in one pipeline
|
|
306
|
+
const treesMap = await getTreesBulk(redis, cachedTreeHashes)
|
|
307
|
+
if (treesMap.size < cachedTreeHashes.length) {
|
|
308
|
+
// Batch-fetch all missing trees from the worker in one request
|
|
309
|
+
const missingHashes = cachedTreeHashes.filter((h) => !treesMap.has(h))
|
|
310
|
+
const workerResults = await fetchTreesFromWorker(datasetId, missingHashes)
|
|
311
|
+
const newEntriesMap = await cacheWorkerTrees(
|
|
312
|
+
datasetId,
|
|
313
|
+
workerResults,
|
|
314
|
+
needsPresign,
|
|
315
|
+
)
|
|
316
|
+
for (const [hash, entries] of newEntriesMap) {
|
|
317
|
+
treesMap.set(hash, entries)
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return reconstructFromTrees(treesMap, tree, path, datasetId)
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Breadth-first walk: batch all uncached trees per level into one request
|
|
324
|
+
const treesMap = new Map<string, TreeEntry[]>()
|
|
325
|
+
const collectedHashes = new Set<string>()
|
|
326
|
+
let pendingHashes = [tree]
|
|
327
|
+
|
|
328
|
+
while (pendingHashes.length > 0) {
|
|
329
|
+
// Check cache for all pending hashes
|
|
330
|
+
const cached = await getTreesBulk(redis, pendingHashes)
|
|
331
|
+
const uncached = pendingHashes.filter((h) => !cached.has(h))
|
|
332
|
+
|
|
333
|
+
// Fetch all uncached trees in one worker request
|
|
334
|
+
if (uncached.length > 0) {
|
|
335
|
+
const workerResults = await fetchTreesFromWorker(datasetId, uncached)
|
|
336
|
+
const newEntriesMap = await cacheWorkerTrees(
|
|
337
|
+
datasetId,
|
|
338
|
+
workerResults,
|
|
339
|
+
needsPresign,
|
|
104
340
|
)
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
341
|
+
for (const [hash, entries] of newEntriesMap) {
|
|
342
|
+
cached.set(hash, entries)
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Merge into treesMap and collect next level of directory hashes
|
|
347
|
+
const nextLevel: string[] = []
|
|
348
|
+
for (const hash of pendingHashes) {
|
|
349
|
+
collectedHashes.add(hash)
|
|
350
|
+
const entries = cached.get(hash)
|
|
351
|
+
if (entries) {
|
|
352
|
+
treesMap.set(hash, entries)
|
|
353
|
+
for (const entry of entries) {
|
|
354
|
+
if (entry.d && !collectedHashes.has(entry.h)) {
|
|
355
|
+
nextLevel.push(entry.h)
|
|
113
356
|
}
|
|
114
357
|
}
|
|
115
|
-
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
pendingHashes = nextLevel
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Cache the commit-to-trees mapping for next time
|
|
364
|
+
if (collectedHashes.size > 0) {
|
|
365
|
+
const hashArray = [...collectedHashes]
|
|
366
|
+
void setCommitTrees(redis, tree, hashArray)
|
|
367
|
+
void addDatasetTrees(redis, datasetId, hashArray)
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return reconstructFromTrees(treesMap, tree, path, datasetId)
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Reconstruct a full file listing from a map of cached trees.
|
|
375
|
+
* Walks the tree structure using directory entries' child hashes.
|
|
376
|
+
*/
|
|
377
|
+
async function reconstructFromTrees(
|
|
378
|
+
treesMap: Map<string, TreeEntry[]>,
|
|
379
|
+
rootTree: string,
|
|
380
|
+
path: string,
|
|
381
|
+
datasetId: string,
|
|
382
|
+
): Promise<DatasetFile[]> {
|
|
383
|
+
const stack: { hash: string; path: string }[] = [{ hash: rootTree, path }]
|
|
384
|
+
const fileEntries: { entry: TreeEntry; absPath: string }[] = []
|
|
385
|
+
|
|
386
|
+
// Phase 1: walk tree structure (sync), collect file entries
|
|
387
|
+
while (stack.length > 0) {
|
|
388
|
+
const { hash, path: currentPath } = stack.pop()!
|
|
389
|
+
const entries = treesMap.get(hash)
|
|
390
|
+
if (!entries) continue
|
|
391
|
+
for (const entry of entries) {
|
|
392
|
+
const absPath = currentPath ? join(currentPath, entry.n) : entry.n
|
|
393
|
+
if (entry.d) {
|
|
394
|
+
stack.push({ hash: entry.h, path: absPath })
|
|
116
395
|
} else {
|
|
117
|
-
|
|
118
|
-
return []
|
|
396
|
+
fileEntries.push({ entry, absPath })
|
|
119
397
|
}
|
|
120
|
-
}
|
|
121
|
-
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Phase 2: build results, collecting presign-needed indices
|
|
402
|
+
const presignIndices: number[] = []
|
|
403
|
+
const serverUrl = process.env.CRN_SERVER_URL
|
|
404
|
+
|
|
405
|
+
const results: DatasetFile[] = fileEntries.map(({ entry, absPath }, i) => {
|
|
406
|
+
const file: DatasetFile = {
|
|
407
|
+
id: entry.h,
|
|
408
|
+
filename: absPath,
|
|
409
|
+
directory: false,
|
|
410
|
+
size: entry.s,
|
|
411
|
+
urls: [],
|
|
412
|
+
}
|
|
413
|
+
if (entry.p && entry.k && entry.v) {
|
|
414
|
+
// To be presigned
|
|
415
|
+
presignIndices.push(i)
|
|
416
|
+
} else if (entry.k && entry.v) {
|
|
417
|
+
// Known public S3 URL
|
|
418
|
+
file.urls = [publicS3Url(entry.b, entry.k, entry.v)]
|
|
419
|
+
} else {
|
|
420
|
+
// Fallback URL using object API
|
|
421
|
+
const filename = encodeURIComponent(entry.n)
|
|
422
|
+
file.urls = [
|
|
423
|
+
`${serverUrl}/crn/datasets/${datasetId}/objects/${entry.h}?filename=${filename}`,
|
|
424
|
+
]
|
|
425
|
+
}
|
|
426
|
+
return file
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
// Bulk-resolve presigned URLs in minimal Redis requests
|
|
430
|
+
if (presignIndices.length > 0) {
|
|
431
|
+
const urls = await getPresignedUrlsBulk(
|
|
432
|
+
redis,
|
|
433
|
+
presignIndices.map((i) => ({
|
|
434
|
+
bucket: fileEntries[i].entry.b,
|
|
435
|
+
s3Key: fileEntries[i].entry.k,
|
|
436
|
+
versionId: fileEntries[i].entry.v,
|
|
437
|
+
})),
|
|
438
|
+
)
|
|
439
|
+
for (let j = 0; j < presignIndices.length; j++) {
|
|
440
|
+
results[presignIndices[j]].urls = [urls[j]]
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
return results
|
|
122
445
|
}
|
package/src/datalad/snapshots.ts
CHANGED
|
@@ -10,8 +10,9 @@ import {
|
|
|
10
10
|
snapshotCreationComparison,
|
|
11
11
|
updateDatasetName,
|
|
12
12
|
} from "../graphql/resolvers/dataset"
|
|
13
|
-
import {
|
|
14
|
-
import
|
|
13
|
+
import { createDraftDoi } from "../libs/doi/index"
|
|
14
|
+
import { assembleMetadata } from "../libs/doi/metadata"
|
|
15
|
+
import Doi from "../models/doi"
|
|
15
16
|
import { getFiles } from "./files"
|
|
16
17
|
import { generateDataladCookie } from "../libs/authentication/jwt"
|
|
17
18
|
import notifications from "../libs/notifications"
|
|
@@ -20,7 +21,6 @@ import Snapshot from "../models/snapshot"
|
|
|
20
21
|
import type { SnapshotDocument } from "../models/snapshot"
|
|
21
22
|
import { updateDatasetRevision } from "./draft"
|
|
22
23
|
import { getDatasetWorker } from "../libs/datalad-service"
|
|
23
|
-
import { join } from "path"
|
|
24
24
|
import { createEvent, updateEvent } from "../libs/events"
|
|
25
25
|
import { queueIndexDataset } from "../queues/producer-methods"
|
|
26
26
|
|
|
@@ -51,25 +51,32 @@ const createIfNotExistsDoi = async (
|
|
|
51
51
|
tag,
|
|
52
52
|
descriptionFieldUpdates,
|
|
53
53
|
) => {
|
|
54
|
-
if (config.doi.username
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
54
|
+
if (!config.doi.username || !config.doi.password) return
|
|
55
|
+
|
|
56
|
+
// Skip if DOI already exists for this snapshot
|
|
57
|
+
const existing = await Doi.findOne({ datasetId, snapshotId: tag })
|
|
58
|
+
if (existing) {
|
|
59
|
+
descriptionFieldUpdates["DatasetDOI"] = `doi:${existing.doi}`
|
|
60
|
+
return
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
const attributes = await assembleMetadata(datasetId, tag, "HEAD")
|
|
65
|
+
const doi = await createDraftDoi(attributes)
|
|
66
|
+
|
|
67
|
+
// Persist to MongoDB
|
|
68
|
+
await Doi.updateOne(
|
|
69
|
+
{ datasetId, snapshotId: tag },
|
|
70
|
+
{ $set: { doi, state: "draft" } },
|
|
71
|
+
{ upsert: true },
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
descriptionFieldUpdates["DatasetDOI"] = `doi:${doi}`
|
|
75
|
+
} catch (err) {
|
|
76
|
+
Sentry.captureException(err)
|
|
77
|
+
// eslint-disable-next-line no-console
|
|
78
|
+
console.error(err)
|
|
79
|
+
throw new Error(`DOI minting failed: ${err.message}`)
|
|
73
80
|
}
|
|
74
81
|
}
|
|
75
82
|
|
|
@@ -292,35 +299,3 @@ export const getPublicSnapshots = () => {
|
|
|
292
299
|
]).exec()
|
|
293
300
|
})
|
|
294
301
|
}
|
|
295
|
-
|
|
296
|
-
/**
|
|
297
|
-
* For snapshots, precache all trees for downloads
|
|
298
|
-
*/
|
|
299
|
-
export const downloadFiles = (datasetId, tag) => {
|
|
300
|
-
const downloadCache = new CacheItem(redis, CacheType.snapshotDownload, [
|
|
301
|
-
datasetId,
|
|
302
|
-
tag,
|
|
303
|
-
], 432000)
|
|
304
|
-
// Return an existing cache object if we have one
|
|
305
|
-
return downloadCache.get(async () => {
|
|
306
|
-
// If not, fetch all trees sequentially and cache the result (hopefully some or all trees are cached)
|
|
307
|
-
const files = await getFilesRecursive(datasetId, tag, "")
|
|
308
|
-
files.sort()
|
|
309
|
-
return files
|
|
310
|
-
})
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
export async function getFilesRecursive(datasetId, tree, path = "") {
|
|
314
|
-
const files = []
|
|
315
|
-
// Fetch files
|
|
316
|
-
const fileTree = await getFiles(datasetId, tree)
|
|
317
|
-
for (const file of fileTree) {
|
|
318
|
-
const absPath = join(path, file.filename)
|
|
319
|
-
if (file.directory) {
|
|
320
|
-
files.push(...(await getFilesRecursive(datasetId, file.id, absPath)))
|
|
321
|
-
} else {
|
|
322
|
-
files.push({ ...file, filename: absPath })
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
return files
|
|
326
|
-
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest"
|
|
2
|
+
import { toDbStatus, toGraphqlStatus } from "../response-status"
|
|
3
|
+
|
|
4
|
+
describe("toGraphqlStatus", () => {
|
|
5
|
+
it("maps lowercase DB values to uppercase SDL enum values", () => {
|
|
6
|
+
expect(toGraphqlStatus("pending")).toBe("PENDING")
|
|
7
|
+
expect(toGraphqlStatus("accepted")).toBe("ACCEPTED")
|
|
8
|
+
expect(toGraphqlStatus("denied")).toBe("DENIED")
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
it("passes null through unchanged", () => {
|
|
12
|
+
expect(toGraphqlStatus(null)).toBeNull()
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
it("passes undefined through as null", () => {
|
|
16
|
+
expect(toGraphqlStatus(undefined)).toBeNull()
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
it("throws on an unrecognized DB value", () => {
|
|
20
|
+
expect(() => toGraphqlStatus("bogus" as never)).toThrow(
|
|
21
|
+
/unrecognized/i,
|
|
22
|
+
)
|
|
23
|
+
})
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
describe("toDbStatus", () => {
|
|
27
|
+
it("maps uppercase SDL enum values to lowercase DB values", () => {
|
|
28
|
+
expect(toDbStatus("PENDING")).toBe("pending")
|
|
29
|
+
expect(toDbStatus("ACCEPTED")).toBe("accepted")
|
|
30
|
+
expect(toDbStatus("DENIED")).toBe("denied")
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
it("throws on an unrecognized SDL value", () => {
|
|
34
|
+
expect(() => toDbStatus("bogus" as never)).toThrow(
|
|
35
|
+
/unrecognized/i,
|
|
36
|
+
)
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it("throws on null input", () => {
|
|
40
|
+
expect(() => toDbStatus(null as never)).toThrow(/unrecognized/i)
|
|
41
|
+
})
|
|
42
|
+
})
|