@openneuro/server 4.47.7 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +10 -7
  2. package/src/app.ts +1 -1
  3. package/src/cache/__tests__/tree.spec.ts +212 -0
  4. package/src/cache/tree.ts +148 -0
  5. package/src/datalad/__tests__/dataRetentionNotifications.spec.ts +11 -0
  6. package/src/datalad/__tests__/files.spec.ts +249 -0
  7. package/src/datalad/dataRetentionNotifications.ts +5 -0
  8. package/src/datalad/dataset.ts +29 -1
  9. package/src/datalad/files.ts +362 -39
  10. package/src/datalad/snapshots.ts +29 -54
  11. package/src/graphql/resolvers/__tests__/response-status.spec.ts +42 -0
  12. package/src/graphql/resolvers/build-search-query.ts +391 -0
  13. package/src/graphql/resolvers/cache.ts +5 -1
  14. package/src/graphql/resolvers/dataset-search.ts +40 -23
  15. package/src/graphql/resolvers/datasetEvents.ts +48 -78
  16. package/src/graphql/resolvers/draft.ts +5 -2
  17. package/src/graphql/resolvers/holdDeletion.ts +21 -0
  18. package/src/graphql/resolvers/index.ts +6 -0
  19. package/src/graphql/resolvers/mutation.ts +2 -0
  20. package/src/graphql/resolvers/response-status.ts +43 -0
  21. package/src/graphql/resolvers/snapshots.ts +9 -18
  22. package/src/graphql/resolvers/summary.ts +17 -0
  23. package/src/graphql/schema.ts +54 -14
  24. package/src/handlers/datalad.ts +4 -0
  25. package/src/handlers/doi.ts +32 -36
  26. package/src/libs/doi/__tests__/doi.spec.ts +50 -12
  27. package/src/libs/doi/__tests__/validate.spec.ts +110 -0
  28. package/src/libs/doi/index.ts +108 -71
  29. package/src/libs/doi/metadata.ts +101 -0
  30. package/src/libs/doi/validate.ts +59 -0
  31. package/src/libs/presign.ts +137 -0
  32. package/src/models/dataset.ts +2 -0
  33. package/src/models/doi.ts +7 -0
  34. package/src/queues/producer-methods.ts +9 -5
  35. package/src/queues/queue-schedule.ts +1 -1
  36. package/src/queues/queues.ts +2 -2
  37. package/src/routes.ts +10 -2
  38. package/src/types/datacite/LICENSE +37 -0
  39. package/src/types/datacite/README.md +3 -0
  40. package/src/types/datacite/datacite-v4.5.json +643 -0
  41. package/src/types/datacite/datacite-v4.5.ts +281 -0
  42. package/src/types/datacite.ts +53 -63
  43. package/src/utils/datacite-mapper.ts +7 -3
  44. package/src/utils/datacite-utils.ts +12 -15
  45. package/src/libs/doi/__tests__/__snapshots__/doi.spec.ts.snap +0 -17
@@ -0,0 +1,391 @@
1
+ /**
2
+ * Translate DatasetSearchInput into ElasticSearch Query DSL
3
+ *
4
+ * Port of query building logic from the React side
5
+ */
6
+
7
+ interface DatasetSearchInput {
8
+ keywords?: string[]
9
+ modality?: string
10
+ ageRange?: (number | null)[]
11
+ subjectCountRange?: (number | null)[]
12
+ diagnosis?: string
13
+ tasks?: string[]
14
+ authors?: string[]
15
+ sex?: string
16
+ dateRange?: string
17
+ species?: string
18
+ studyStructure?: string
19
+ studyDomains?: string[]
20
+ bidsDatasetType?: string
21
+ brainInitiative?: boolean
22
+ bodyParts?: string[]
23
+ scannerManufacturers?: string[]
24
+ scannerManufacturersModelNames?: string[]
25
+ tracerNames?: string[]
26
+ tracerRadionuclides?: string[]
27
+ userId?: string
28
+ publicOnly?: boolean
29
+ }
30
+
31
+ const KNOWN_SPECIES = ["Human", "Rat", "Mouse"]
32
+
33
+ const SECONDARY_MODALITIES: Record<
34
+ string,
35
+ { secondary: string; primary: string }
36
+ > = {
37
+ Diffusion: { secondary: "mri_diffusion", primary: "mri" },
38
+ Structural: { secondary: "mri_structural", primary: "mri" },
39
+ Functional: { secondary: "mri_functional", primary: "mri" },
40
+ Perfusion: { secondary: "mri_perfusion", primary: "mri" },
41
+ Static: { secondary: "pet_static", primary: "pet" },
42
+ Dynamic: { secondary: "pet_dynamic", primary: "pet" },
43
+ }
44
+
45
+ interface BoolQuery {
46
+ bool: {
47
+ must?: object[]
48
+ filter?: object[]
49
+ must_not?: object[]
50
+ should?: object[]
51
+ }
52
+ }
53
+
54
+ const addClause = (
55
+ query: BoolQuery,
56
+ type: "must" | "filter" | "must_not" | "should",
57
+ clause: object,
58
+ ) => {
59
+ if (query.bool[type]) {
60
+ query.bool[type] = [...query.bool[type], clause]
61
+ } else {
62
+ query.bool[type] = [clause]
63
+ }
64
+ }
65
+
66
+ const simpleQueryString = (
67
+ queryString: string,
68
+ fields?: string[],
69
+ fuzzy = true,
70
+ ) => ({
71
+ simple_query_string: {
72
+ query: `${queryString}${fuzzy ? "~" : ""}`,
73
+ fields,
74
+ },
75
+ })
76
+
77
+ const matchQuery = (
78
+ field: string,
79
+ queryString: string,
80
+ fuzziness?: string,
81
+ operator?: string,
82
+ ) => ({
83
+ match: {
84
+ [field]: {
85
+ query: queryString,
86
+ fuzziness,
87
+ operator,
88
+ },
89
+ },
90
+ })
91
+
92
+ const multiMatchQuery = (field: string, queryStrings: string[]) => ({
93
+ bool: {
94
+ should: queryStrings.map((qs) => matchQuery(field, qs)),
95
+ minimum_should_match: 1,
96
+ },
97
+ })
98
+
99
+ const rangeQuery = (
100
+ field: string,
101
+ gte?: number | string | null,
102
+ lte?: number | string | null,
103
+ relation: string = "INTERSECTS",
104
+ ) => ({
105
+ range: {
106
+ [field]: {
107
+ gte,
108
+ lte,
109
+ relation,
110
+ },
111
+ },
112
+ })
113
+
114
+ const rangeListLengthQuery = (field: string, gte: number, lte: number) => ({
115
+ script: {
116
+ script: {
117
+ lang: "painless",
118
+ source: `
119
+ if (doc[params.field].size() != 0) {
120
+ return ( doc[params.field].size() >= params.gte && doc[params.field].size() <= params.lte )
121
+ } else return false`,
122
+ params: { field, gte, lte },
123
+ },
124
+ },
125
+ })
126
+
127
+ const sqsJoinWithAND = (list: string[]) =>
128
+ list.map((str) => `${str}`).join(" + ")
129
+ const joinWithOR = (list: string[]) => list.map((str) => `${str}`).join(" | ")
130
+
131
+ const isActiveRange = (range: (number | null)[] | undefined): boolean =>
132
+ Array.isArray(range) &&
133
+ range.length === 2 &&
134
+ (range[0] !== null || range[1] !== null)
135
+
136
+ /**
137
+ * Build an ElasticSearch bool query from structured search input.
138
+ * Returns { query, isEmpty } where query is the ES Query DSL object.
139
+ */
140
+ export const buildElasticQuery = (
141
+ input: DatasetSearchInput,
142
+ ): { query: BoolQuery; isEmpty: boolean } => {
143
+ const query: BoolQuery = { bool: {} }
144
+
145
+ if (input.keywords?.length) {
146
+ addClause(
147
+ query,
148
+ "must",
149
+ simpleQueryString(sqsJoinWithAND(input.keywords), [
150
+ "id^20",
151
+ "latestSnapshot.readme",
152
+ "latestSnapshot.description.Name^6",
153
+ "latestSnapshot.description.Authors^3",
154
+ "latestSnapshot.contributors.name^2",
155
+ ]),
156
+ )
157
+ }
158
+
159
+ if (input.modality) {
160
+ if (SECONDARY_MODALITIES[input.modality]) {
161
+ addClause(
162
+ query,
163
+ "filter",
164
+ matchQuery(
165
+ "latestSnapshot.summary.secondaryModalities",
166
+ SECONDARY_MODALITIES[input.modality].secondary,
167
+ ),
168
+ )
169
+ } else {
170
+ addClause(
171
+ query,
172
+ "filter",
173
+ matchQuery("latestSnapshot.summary.modalities", input.modality),
174
+ )
175
+ }
176
+ }
177
+
178
+ if (isActiveRange(input.ageRange)) {
179
+ addClause(
180
+ query,
181
+ "filter",
182
+ rangeQuery(
183
+ "latestSnapshot.summary.subjectMetadata.age",
184
+ input.ageRange[0],
185
+ input.ageRange[1],
186
+ ),
187
+ )
188
+ }
189
+
190
+ if (isActiveRange(input.subjectCountRange)) {
191
+ addClause(
192
+ query,
193
+ "filter",
194
+ rangeListLengthQuery(
195
+ "latestSnapshot.summary.subjects",
196
+ input.subjectCountRange[0] || 0,
197
+ input.subjectCountRange[1] || 1000000,
198
+ ),
199
+ )
200
+ }
201
+
202
+ if (input.diagnosis) {
203
+ addClause(
204
+ query,
205
+ "filter",
206
+ matchQuery("metadata.dxStatus", input.diagnosis),
207
+ )
208
+ }
209
+
210
+ if (input.bidsDatasetType) {
211
+ addClause(
212
+ query,
213
+ "filter",
214
+ matchQuery(
215
+ "latestSnapshot.description.DatasetType",
216
+ input.bidsDatasetType,
217
+ ),
218
+ )
219
+ }
220
+
221
+ if (input.brainInitiative) {
222
+ addClause(
223
+ query,
224
+ "filter",
225
+ matchQuery("brainInitiative", String(input.brainInitiative)),
226
+ )
227
+ }
228
+
229
+ if (input.tasks?.length) {
230
+ addClause(
231
+ query,
232
+ "must",
233
+ simpleQueryString(sqsJoinWithAND(input.tasks), [
234
+ "latestSnapshot.summary.tasks",
235
+ ]),
236
+ )
237
+ }
238
+
239
+ if (input.authors?.length) {
240
+ const authorQuery = matchQuery(
241
+ "latestSnapshot.contributors.name",
242
+ joinWithOR(input.authors),
243
+ "2",
244
+ )
245
+ addClause(query, "must", {
246
+ bool: {
247
+ should: [authorQuery],
248
+ },
249
+ })
250
+ }
251
+
252
+ if (input.sex && input.sex !== "All") {
253
+ let queryStrings: string[] = []
254
+ if (input.sex === "Male") {
255
+ queryStrings = ["male", "m", "M", "MALE", "Male"]
256
+ } else if (input.sex === "Female") {
257
+ queryStrings = ["female", "f", "F", "FEMALE", "Female"]
258
+ }
259
+ addClause(
260
+ query,
261
+ "filter",
262
+ multiMatchQuery(
263
+ "latestSnapshot.summary.subjectMetadata.sex",
264
+ queryStrings,
265
+ ),
266
+ )
267
+ }
268
+
269
+ if (input.dateRange && input.dateRange !== "All Time") {
270
+ let d: number
271
+ if (input.dateRange === "Last 30 days") {
272
+ d = 30
273
+ } else if (input.dateRange === "Last 180 days") {
274
+ d = 180
275
+ } else {
276
+ d = 365
277
+ }
278
+ addClause(query, "filter", rangeQuery("created", `now-${d}d/d`, "now/d"))
279
+ }
280
+
281
+ if (input.species) {
282
+ if (input.species === "Other") {
283
+ const species = KNOWN_SPECIES.join(" ")
284
+ addClause(
285
+ query,
286
+ "must_not",
287
+ matchQuery("metadata.species", species, "AUTO", "OR"),
288
+ )
289
+ } else if (input.species === "Human") {
290
+ query.bool["should"] = [
291
+ matchQuery("metadata.species", "Human", "AUTO"),
292
+ { term: { _content: "" } },
293
+ ]
294
+ } else {
295
+ addClause(
296
+ query,
297
+ "filter",
298
+ matchQuery("metadata.species", input.species, "AUTO"),
299
+ )
300
+ }
301
+ }
302
+
303
+ if (input.studyStructure) {
304
+ addClause(
305
+ query,
306
+ "filter",
307
+ matchQuery("metadata.studyLongitudinal", input.studyStructure, "AUTO"),
308
+ )
309
+ }
310
+
311
+ if (input.studyDomains?.length) {
312
+ addClause(
313
+ query,
314
+ "must",
315
+ matchQuery("metadata.studyDomain", joinWithOR(input.studyDomains)),
316
+ )
317
+ }
318
+
319
+ // PET-specific fields (only apply when modality is pet or unset)
320
+ if (
321
+ input.modality === "pet" || input.modality === null ||
322
+ input.modality === undefined
323
+ ) {
324
+ if (input.bodyParts?.length) {
325
+ addClause(
326
+ query,
327
+ "must",
328
+ simpleQueryString(sqsJoinWithAND(input.bodyParts), [
329
+ "latestSnapshot.summary.pet.BodyPart",
330
+ ]),
331
+ )
332
+ }
333
+ if (input.scannerManufacturers?.length) {
334
+ addClause(
335
+ query,
336
+ "must",
337
+ simpleQueryString(sqsJoinWithAND(input.scannerManufacturers), [
338
+ "latestSnapshot.summary.pet.ScannerManufacturer",
339
+ ]),
340
+ )
341
+ }
342
+ if (input.scannerManufacturersModelNames?.length) {
343
+ addClause(
344
+ query,
345
+ "must",
346
+ simpleQueryString(
347
+ sqsJoinWithAND(input.scannerManufacturersModelNames),
348
+ [
349
+ "latestSnapshot.summary.pet.ScannerManufacturersModelName",
350
+ ],
351
+ ),
352
+ )
353
+ }
354
+ if (input.tracerNames?.length) {
355
+ addClause(
356
+ query,
357
+ "must",
358
+ simpleQueryString(sqsJoinWithAND(input.tracerNames), [
359
+ "latestSnapshot.summary.pet.TracerName",
360
+ ]),
361
+ )
362
+ }
363
+ if (input.tracerRadionuclides?.length) {
364
+ addClause(
365
+ query,
366
+ "must",
367
+ simpleQueryString(sqsJoinWithAND(input.tracerRadionuclides), [
368
+ "latestSnapshot.summary.pet.TracerRadionuclide",
369
+ ]),
370
+ )
371
+ }
372
+ }
373
+
374
+ if (input.userId) {
375
+ addClause(query, "filter", {
376
+ terms: {
377
+ "permissions.userPermissions.user.id": [input.userId],
378
+ },
379
+ })
380
+ }
381
+
382
+ if (input.publicOnly) {
383
+ addClause(query, "filter", {
384
+ term: { public: { value: true } },
385
+ })
386
+ }
387
+
388
+ const isEmpty = Object.keys(query.bool).length === 0
389
+
390
+ return { query, isEmpty }
391
+ }
@@ -1,4 +1,5 @@
1
1
  import { redis } from "../../libs/redis.js"
2
+ import { clearDatasetTrees } from "../../cache/tree"
2
3
 
3
4
  /**
4
5
  * Clear all cache entries for a given datasetId
@@ -11,8 +12,11 @@ export async function cacheClear(
11
12
  // Check for admin and validate datasetId argument
12
13
  if (userInfo?.admin && datasetId.length == 8 && datasetId.startsWith("ds")) {
13
14
  try {
15
+ // Clear tree cache entries via the dataset-to-trees index
16
+ await clearDatasetTrees(redis, datasetId)
17
+
18
+ // Also clear non-tree cache keys (descriptions, snapshots, etc.)
14
19
  const stream = redis.scanStream({
15
- // Scan for any keys that include the datasetId
16
20
  match: `*${datasetId}*`,
17
21
  })
18
22
  const pipeline = redis.pipeline()
@@ -5,6 +5,7 @@ import Star from "../../models/stars"
5
5
  import Subscription from "../../models/subscription"
6
6
  import Permission from "../../models/permission"
7
7
  import { hashObject } from "../../libs/authentication/crypto"
8
+ import { buildElasticQuery } from "./build-search-query"
8
9
 
9
10
  const elasticIndex = "datasets"
10
11
 
@@ -195,43 +196,56 @@ const parseQuery = async (query, datasetType, datasetStatus, userId) => {
195
196
  }
196
197
 
197
198
  /**
198
- * Search result cursor resolver
199
- * TODO this is a Relay pagination type and could use the interface
200
- * @param {any} obj
201
- * @param {object} args
202
- * @param {object} args.query Stringified Query (DSL) argument for ElasticSearch
203
- * @param {boolean} args.allDatasets Admin option for returning all datasets (overrides datasetType and datasetStatus, but keeps other search parameters) (default = false)
204
- * @param {string} args.datasetType Stringified Query (DSL) argument for ElasticSearch
205
- * @param {string} args.datasetStatus Stringified Query (DSL) argument for ElasticSearch
206
- * @param {object} args.sortBy Stringified Query (DSL) argument for ElasticSearch
207
- * @param {string} args.after Cursor for paging forward
208
- * @param {number} args.first Limit of entries to find
199
+ * Resolve the sort parameter from SearchSortOption enum to ES sort objects.
200
+ */
201
+ const resolveSort = (sortBy: string | undefined, isEmpty: boolean) => {
202
+ if (sortBy === "newest") return { created: "desc" }
203
+ if (sortBy === "oldest") return { created: "asc" }
204
+ if (sortBy === "activity") return { "analytics.downloads": "desc" }
205
+ if (sortBy === "name_asc") return { "metadata.datasetName": "asc" }
206
+ if (sortBy === "name_desc") return { "metadata.datasetName": "desc" }
207
+ if (sortBy === "last_updated") {
208
+ return { "metadata.latestSnapshotCreatedAt": "desc" }
209
+ }
210
+ // "relevance" or undefined: if no filters are set, sort by newest
211
+ if (isEmpty) return { created: "desc" }
212
+ return null
213
+ }
214
+
215
+ /**
216
+ * Search result cursor resolver using typed DatasetSearchInput
209
217
  */
210
218
  export const advancedDatasetSearchConnection = async (
211
219
  obj,
212
220
  {
213
- query,
221
+ query: searchInput,
214
222
  allDatasets = false,
215
223
  datasetType,
216
224
  datasetStatus,
217
- sortBy,
218
225
  after,
219
226
  first = 25,
220
227
  },
221
228
  { user, userInfo },
222
229
  ) => {
230
+ // Build the ES query from structured input
231
+ const { query: esQuery, isEmpty } = buildElasticQuery(searchInput)
232
+ const sortByOption = searchInput.sortBy
233
+
223
234
  // Create an identity for this search (used to cache connections)
224
235
  const searchId = hashObject({
225
- query,
236
+ searchInput,
226
237
  datasetType,
227
238
  datasetStatus,
228
- sortBy,
239
+ sortByOption,
229
240
  user,
230
241
  })
231
- const sort = [{ _score: "desc" }, { id: "desc" }]
232
- if (sortBy) {
233
- sort.unshift(sortBy)
242
+
243
+ const sort: Record<string, string>[] = [{ _score: "desc" }, { id: "desc" }]
244
+ const extraSort = resolveSort(sortByOption, isEmpty)
245
+ if (extraSort) {
246
+ sort.unshift(extraSort)
234
247
  }
248
+
235
249
  // Parse out the decode token and add it to our query if successful
236
250
  let search_after
237
251
  if (after) {
@@ -241,13 +255,17 @@ export const advancedDatasetSearchConnection = async (
241
255
  // Don't include search_after if parsing fails
242
256
  }
243
257
  }
258
+
259
+ // Apply dataset type/status/permission filters
260
+ const query = allDatasets
261
+ ? esQuery
262
+ : await parseQuery(esQuery, datasetType, datasetStatus, user)
263
+
244
264
  const requestBody = {
245
265
  index: elasticIndex,
246
266
  size: first,
247
267
  sort,
248
- query: allDatasets
249
- ? query
250
- : await parseQuery(query, datasetType, datasetStatus, user),
268
+ query,
251
269
  search_after,
252
270
  }
253
271
  // Run the query
@@ -268,11 +286,10 @@ export const advancedDatasetSearch = {
268
286
  type: "DatasetConnection",
269
287
  resolve: advancedDatasetSearchConnection,
270
288
  args: {
271
- query: { type: "JSON!" },
289
+ query: { type: "DatasetSearchInput!" },
272
290
  allDatasets: { type: "Boolean" },
273
291
  datasetType: { type: "String" },
274
292
  datasetStatus: { type: "String" },
275
- sortBy: { type: "JSON" },
276
293
  after: { type: "String" },
277
294
  first: { type: "Int" },
278
295
  },