@openneuro/server 4.17.1 → 4.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Dockerfile CHANGED
@@ -4,7 +4,7 @@ FROM openneuro/node AS build
4
4
  WORKDIR /srv/packages/openneuro-server
5
5
  RUN yarn build
6
6
 
7
- FROM node:18.12.0-alpine
7
+ FROM node:18.15.0-alpine
8
8
 
9
9
  WORKDIR /srv
10
10
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openneuro/server",
3
- "version": "4.17.1",
3
+ "version": "4.18.0",
4
4
  "description": "Core service for the OpenNeuro platform.",
5
5
  "license": "MIT",
6
6
  "main": "src/server.js",
@@ -17,7 +17,7 @@
17
17
  "dependencies": {
18
18
  "@apollo/client": "3.7.2",
19
19
  "@elastic/elasticsearch": "7.15.0",
20
- "@openneuro/search": "^4.17.1",
20
+ "@openneuro/search": "^4.18.0",
21
21
  "@passport-next/passport-google-oauth2": "^1.0.0",
22
22
  "@sentry/node": "^4.5.3",
23
23
  "apollo-server": "2.25.4",
@@ -92,5 +92,5 @@
92
92
  "publishConfig": {
93
93
  "access": "public"
94
94
  },
95
- "gitHead": "c93f1fd308cddfbaef9990be47e77497a277b35e"
95
+ "gitHead": "0135730a3d7ff196b43abb69a6c89e6699c6a19e"
96
96
  }
package/src/cache/item.ts CHANGED
@@ -52,26 +52,34 @@ class CacheItem {
52
52
  const deserialized: T = JSON.parse(decompressed.toString())
53
53
  return deserialized
54
54
  }
55
- public async get<T>(miss: () => Promise<T>): Promise<T> {
55
+ public async get<T>(
56
+ miss: (setDoNotCache?: (doNotCache) => void) => Promise<T>,
57
+ ): Promise<T> {
56
58
  try {
57
59
  const data = await this.redis.getBuffer(this.key)
58
60
  if (data) {
59
61
  return this.deserialize(data)
60
62
  } else {
63
+ let skipCaching = false
64
+ const doNotCache = (doNotCache: boolean): void => {
65
+ skipCaching = doNotCache
66
+ }
61
67
  // Call the cache miss function if we didn't get anything
62
- const data = await miss()
68
+ const data = await miss(doNotCache)
63
69
  const serialized = await this.serialize(data)
64
- // Allow for the simple case of aging out keys
65
- if (this.expiration > 0) {
66
- void this.redis.setex(this.key, this.expiration, serialized)
67
- } else {
68
- void this.redis.set(this.key, serialized)
70
+ // Allow the cache miss function to return a value that isn't cached
71
+ if (!skipCaching) {
72
+ // Allow for the simple case of aging out keys
73
+ if (this.expiration > 0) {
74
+ void this.redis.setex(this.key, this.expiration, serialized)
75
+ } else {
76
+ void this.redis.set(this.key, serialized)
77
+ }
69
78
  }
70
79
  return data
71
80
  }
72
81
  } catch {
73
82
  // Keep going as though we had a cache miss if there is a problem but don't cache it
74
- // TODO: Sentry reporting doesn't work here but should be fixed
75
83
  return miss()
76
84
  }
77
85
  }
@@ -11,4 +11,5 @@ export enum CacheType {
11
11
  snapshot = 'snapshot',
12
12
  snapshotIndex = 'snapshotIndex',
13
13
  participantCount = 'participantCount',
14
+ snapshotDownload = 'download',
14
15
  }
@@ -2,9 +2,8 @@ import {
2
2
  encodeFilePath,
3
3
  decodeFilePath,
4
4
  fileUrl,
5
- filterFiles,
6
5
  computeTotalSize,
7
- } from '../files.js'
6
+ } from '../files'
8
7
 
9
8
  vi.mock('ioredis')
10
9
  vi.mock('../../config.js')
@@ -59,53 +58,6 @@ describe('datalad files', () => {
59
58
  )
60
59
  })
61
60
  })
62
- describe('filterFiles()', () => {
63
- it('disables the filter when set to null', () => {
64
- expect(filterFiles(null)(mockFiles)).toBe(mockFiles)
65
- })
66
- it("returns only root level files with '' filter", () => {
67
- const mockDirs = [
68
- {
69
- filename: 'sub-01',
70
- id: 'directory:sub-01',
71
- urls: [],
72
- size: 2,
73
- directory: true,
74
- },
75
- {
76
- filename: 'sub-02',
77
- id: 'directory:sub-02',
78
- urls: [],
79
- size: 2,
80
- directory: true,
81
- },
82
- {
83
- filename: 'sub-03',
84
- id: 'directory:sub-03',
85
- urls: [],
86
- size: 2,
87
- directory: true,
88
- },
89
- {
90
- filename: 'derivatives',
91
- id: 'directory:derivatives',
92
- urls: [],
93
- size: 1,
94
- directory: true,
95
- },
96
- ]
97
- expect(filterFiles('')(mockFiles)).toEqual([
98
- ...mockRootFiles,
99
- ...mockDirs,
100
- ])
101
- })
102
- it('returns only matching prefixed files with a directory name filter', () => {
103
- expect(filterFiles('sub-01')(mockFiles)).toEqual(mockSub01)
104
- })
105
- it('works correctly for deeply nested files', () => {
106
- expect(filterFiles('sub-01/func')(mockFiles)).toEqual([mockSub01[1]])
107
- })
108
- })
109
61
  describe('computeTotalSize()', () => {
110
62
  it('computes the size correctly', () => {
111
63
  const mockFileSizes = [
@@ -5,7 +5,7 @@ import config from '../config'
5
5
  import request from 'superagent'
6
6
  import { redis } from '../libs/redis.js'
7
7
  import { commitFiles } from './dataset.js'
8
- import { fileUrl } from './files.js'
8
+ import { fileUrl } from './files'
9
9
  import { generateDataladCookie } from '../libs/authentication/jwt'
10
10
  import { getDatasetWorker } from '../libs/datalad-service'
11
11
  import CacheItem, { CacheType } from '../cache/item'
@@ -0,0 +1,115 @@
1
+ import request from 'superagent'
2
+ import { redis } from '../libs/redis'
3
+ import CacheItem, { CacheType } from '../cache/item'
4
+ import { getDatasetWorker } from '../libs/datalad-service'
5
+
6
+ /**
7
+ * Convert to URL compatible path
8
+ * @param {String} path
9
+ */
10
+ export const encodeFilePath = (path: string): string => {
11
+ return path.replace(new RegExp('/', 'g'), ':')
12
+ }
13
+
14
+ /**
15
+ * Convert to from URL compatible path fo filepath
16
+ * @param {String} path
17
+ */
18
+ export const decodeFilePath = (path: string): string => {
19
+ return path.replace(new RegExp(':', 'g'), '/')
20
+ }
21
+
22
+ /**
23
+ * If path is provided, this is a subdirectory, otherwise a root level file.
24
+ * @param {String} path
25
+ * @param {String} filename
26
+ */
27
+ export const getFileName = (path: string, filename: string): string => {
28
+ const filePath = path ? [path, filename].join('/') : filename
29
+ return filename ? encodeFilePath(filePath) : encodeFilePath(path)
30
+ }
31
+
32
+ /**
33
+ * Generate file URL for DataLad service
34
+ * @param {string} datasetId
35
+ * @param {string} path - Relative path for the file
36
+ * @param {string} filename
37
+ * @param {string} [revision] - Git hash of commit or tree owning this file
38
+ */
39
+ export const fileUrl = (
40
+ datasetId: string,
41
+ path: string,
42
+ filename: string,
43
+ revision?: string,
44
+ ): string => {
45
+ const fileName = getFileName(path, filename)
46
+ if (revision) {
47
+ return `http://${getDatasetWorker(
48
+ datasetId,
49
+ )}/datasets/${datasetId}/snapshots/${revision}/files/${fileName}`
50
+ } else {
51
+ return `http://${getDatasetWorker(
52
+ datasetId,
53
+ )}/datasets/${datasetId}/files/${fileName}`
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Generate path URL (such a directory or virtual path) for DataLad service
59
+ * @param {String} datasetId
60
+ */
61
+ export const filesUrl = (datasetId: string): string =>
62
+ `http://${getDatasetWorker(datasetId)}/datasets/${datasetId}/files`
63
+
64
+ /** Minimal variant of DatasetFile type from GraphQL API */
65
+ type DatasetFile = {
66
+ id: string
67
+ filename: string
68
+ directory: boolean
69
+ size: number
70
+ urls: string[]
71
+ }
72
+
73
+ /**
74
+ * Sum all file sizes for total dataset size
75
+ */
76
+ export const computeTotalSize = (files: [DatasetFile]): number =>
77
+ files.reduce((size, f) => size + f.size, 0)
78
+
79
+ /**
80
+ * Get files for a specific revision
81
+ * Similar to getDraftFiles but different cache key and fixed revisions
82
+ * @param {string} datasetId - Dataset accession number
83
+ * @param {string} treeish - Git treeish hexsha
84
+ */
85
+ export const getFiles = (datasetId, treeish): Promise<[DatasetFile]> => {
86
+ const cache = new CacheItem(redis, CacheType.commitFiles, [
87
+ datasetId,
88
+ treeish.substring(0, 7),
89
+ ])
90
+ return cache.get(
91
+ doNotCache =>
92
+ request
93
+ .get(
94
+ `${getDatasetWorker(
95
+ datasetId,
96
+ )}/datasets/${datasetId}/tree/${treeish}`,
97
+ )
98
+ .set('Accept', 'application/json')
99
+ .then(response => {
100
+ if (response.status === 200) {
101
+ const {
102
+ body: { files },
103
+ } = response
104
+ for (const f of files) {
105
+ // Skip caching this tree if it doesn't contain S3 URLs - likely still exporting
106
+ if (!f.directory && !f.urls[0].includes('s3.amazonaws.com')) {
107
+ doNotCache(true)
108
+ break
109
+ }
110
+ }
111
+ return files as [DatasetFile]
112
+ }
113
+ }) as Promise<[DatasetFile]>,
114
+ )
115
+ }
@@ -20,6 +20,7 @@ import Dataset from '../models/dataset'
20
20
  import Snapshot from '../models/snapshot'
21
21
  import { updateDatasetRevision } from './draft.js'
22
22
  import { getDatasetWorker } from '../libs/datalad-service'
23
+ import { join } from 'path'
23
24
 
24
25
  const lockSnapshot = (datasetId, tag) => {
25
26
  return redlock.lock(
@@ -264,3 +265,35 @@ export const getPublicSnapshots = () => {
264
265
  ]).exec()
265
266
  })
266
267
  }
268
+
269
+ /**
270
+ * For snapshots, precache all trees for downloads
271
+ */
272
+ export const downloadFiles = (datasetId, tag) => {
273
+ const downloadCache = new CacheItem(redis, CacheType.snapshotDownload, [
274
+ datasetId,
275
+ tag,
276
+ ])
277
+ // Return an existing cache object if we have one
278
+ return downloadCache.get(async () => {
279
+ // If not, fetch all trees sequentially and cache the result (hopefully some or all trees are cached)
280
+ const files = await getFilesRecursive(datasetId, tag, '')
281
+ files.sort()
282
+ return files
283
+ })
284
+ }
285
+
286
+ export async function getFilesRecursive(datasetId, tree, path = '') {
287
+ const files = []
288
+ // Fetch files
289
+ const fileTree = await getFiles(datasetId, tree)
290
+ for (const file of fileTree) {
291
+ const absPath = join(path, file.filename)
292
+ if (file.directory) {
293
+ files.push(...(await getFilesRecursive(datasetId, file.id, absPath)))
294
+ } else {
295
+ files.push({ ...file, filename: absPath })
296
+ }
297
+ }
298
+ return files
299
+ }
@@ -1,21 +1,25 @@
1
1
  import { redis } from '../../libs/redis.js'
2
+ import CacheItem from '../../cache/item'
3
+ import { CacheType } from '../../cache/types'
2
4
 
5
+ /**
6
+ * Clear the snapshotDownload cache after exports
7
+ */
3
8
  export async function cacheClear(
4
9
  obj: Record<string, unknown>,
5
- { datasetId }: { datasetId: string },
10
+ { datasetId, tag }: { datasetId: string; tag: string },
6
11
  { userInfo }: { userInfo: { admin: boolean } },
7
12
  ): Promise<boolean> {
8
13
  // Check for admin and validate datasetId argument
9
14
  if (userInfo?.admin && datasetId.length == 8 && datasetId.startsWith('ds')) {
10
- const keys = await redis.keys(`*${datasetId}*`)
11
- if (keys.length) {
12
- const transaction = redis.pipeline()
13
- keys.forEach(key => {
14
- transaction.unlink(key)
15
- })
16
- transaction.exec()
15
+ const downloadCache = new CacheItem(redis, CacheType.snapshotDownload, [
16
+ datasetId,
17
+ tag,
18
+ ])
19
+ try {
20
+ await downloadCache.drop()
17
21
  return true
18
- } else {
22
+ } catch (err) {
19
23
  return false
20
24
  }
21
25
  } else {
@@ -19,7 +19,7 @@ import Deletion from '../../models/deletion'
19
19
  import { reviewers } from './reviewer'
20
20
  import { getDatasetWorker } from '../../libs/datalad-service.js'
21
21
  import { getDraftHead } from '../../datalad/dataset.js'
22
- import { getFileName } from '../../datalad/files.js'
22
+ import { getFileName } from '../../datalad/files'
23
23
  import { onBrainlife } from './brainlife'
24
24
  import { derivatives } from './derivatives'
25
25
  import semver from 'semver'
@@ -5,7 +5,7 @@ import { description } from './description.js'
5
5
  import { readme } from './readme.js'
6
6
  import { getDraftRevision, updateDatasetRevision } from '../../datalad/draft.js'
7
7
  import { checkDatasetWrite } from '../permissions.js'
8
- import { getFiles } from '../../datalad/files.js'
8
+ import { getFiles } from '../../datalad/files'
9
9
  import { filterRemovedAnnexObjects } from '../utils/file.js'
10
10
 
11
11
  // A draft must have a dataset parent
@@ -4,7 +4,6 @@
4
4
  import { dataset, datasets } from './dataset.js'
5
5
  import { snapshot, participantCount } from './snapshots.js'
6
6
  import { user, users } from './user.js'
7
- import { datasetChanges } from './dataset-change.js'
8
7
  import { flaggedFiles } from './flaggedFiles'
9
8
 
10
9
  const Query = {
@@ -14,7 +13,6 @@ const Query = {
14
13
  users,
15
14
  snapshot,
16
15
  participantCount,
17
- datasetChanges,
18
16
  flaggedFiles,
19
17
  }
20
18
 
@@ -6,7 +6,7 @@ import { readme } from './readme.js'
6
6
  import { description } from './description.js'
7
7
  import { summary } from './summary.js'
8
8
  import { snapshotIssues } from './issues.js'
9
- import { getFiles } from '../../datalad/files.js'
9
+ import { getFiles } from '../../datalad/files'
10
10
  import Summary from '../../models/summary'
11
11
  import DatasetModel from '../../models/dataset'
12
12
  import { filterRemovedAnnexObjects } from '../utils/file.js'
@@ -15,6 +15,7 @@ import { redis } from '../../libs/redis'
15
15
  import CacheItem, { CacheType } from '../../cache/item'
16
16
  import { normalizeDOI } from '../../libs/doi/normalize'
17
17
  import { getDraftHead } from '../../datalad/dataset'
18
+ import { downloadFiles } from '../../datalad/snapshots'
18
19
 
19
20
  export const snapshots = obj => {
20
21
  return datalad.getSnapshots(obj.id)
@@ -40,6 +41,7 @@ export const snapshot = (obj, { datasetId, tag }, context) => {
40
41
  deprecated: () => deprecated({ datasetId, tag }),
41
42
  related: () => related(datasetId),
42
43
  onBrainlife: () => onBrainlife(snapshot),
44
+ downloadFiles: () => downloadFiles(datasetId, tag),
43
45
  }))
44
46
  },
45
47
  )
@@ -70,19 +70,12 @@ export const filesUpdated = {
70
70
  },
71
71
  }
72
72
 
73
- export const datasetChanged = {
74
- type: 'DatasetChange',
75
- subscribe: () => pubsub.asyncIterator('datasetChanged'),
76
- args: {},
77
- }
78
-
79
73
  const Subscription = {
80
74
  datasetDeleted,
81
75
  snapshotsUpdated,
82
76
  permissionsUpdated,
83
77
  draftUpdated,
84
78
  filesUpdated,
85
- datasetChanged,
86
79
  }
87
80
 
88
81
  export default Subscription
@@ -90,11 +90,6 @@ export const typeDefs = `
90
90
  participantCount(modality: String): Int @cacheControl(maxAge: 3600, scope: PUBLIC)
91
91
  # Request one snapshot
92
92
  snapshot(datasetId: ID!, tag: String!): Snapshot
93
- # Get recent dataset changes (newest first)
94
- datasetChanges(
95
- "Limit results, default 100, max 1000"
96
- limit: Int = 100
97
- ): [DatasetChange]
98
93
  # Get annexed files that have been flagged or removed.
99
94
  flaggedFiles(
100
95
  "Get files that have been flagged, default true."
@@ -165,8 +160,8 @@ export const typeDefs = `
165
160
  prepareUpload(datasetId: ID!, uploadId: ID!): UploadMetadata
166
161
  # Add files from a completed upload to the dataset draft
167
162
  finishUpload(uploadId: ID!): Boolean
168
- # Drop caches for a given dataset - requires site admin access
169
- cacheClear(datasetId: ID!): Boolean
163
+ # Drop download cache for a snapshot - requires site admin access
164
+ cacheClear(datasetId: ID!, tag: String!): Boolean
170
165
  # Rerun the latest validator on a given commit
171
166
  revalidate(datasetId: ID!, ref: String!): Boolean
172
167
  # Request a temporary token for git access
@@ -478,6 +473,8 @@ export const typeDefs = `
478
473
  onBrainlife: Boolean @cacheControl(maxAge: 10080, scope: PUBLIC)
479
474
  # Total size in bytes of this snapshot
480
475
  size: BigInt
476
+ # Single list of files to download this snapshot (only available on snapshots)
477
+ downloadFiles: [DatasetFile]
481
478
  }
482
479
 
483
480
  # RelatedObject nature of relationship
@@ -700,15 +697,6 @@ export const typeDefs = `
700
697
  payload: [DatasetFile]
701
698
  }
702
699
 
703
- # Recent changes to datasets
704
- type DatasetChange {
705
- datasetId: String!
706
- created: Boolean
707
- modified: Boolean
708
- deleted: Boolean
709
- timestamp: DateTime
710
- }
711
-
712
700
  # Analytics for a dataset
713
701
  type Analytic @cacheControl(maxAge: 300, scope: PUBLIC) {
714
702
  datasetId: ID!
@@ -1,5 +1,7 @@
1
1
  import request from 'superagent'
2
+ import { Readable } from 'node:stream'
2
3
  import mime from 'mime-types'
4
+ import { getFiles } from '../datalad/files'
3
5
  import { getDatasetWorker } from '../libs/datalad-service'
4
6
 
5
7
  /**
@@ -14,14 +16,48 @@ import { getDatasetWorker } from '../libs/datalad-service'
14
16
  /**
15
17
  * Get a file from a dataset
16
18
  */
17
- export const getFile = (req, res) => {
19
+ export const getFile = async (req, res) => {
18
20
  const { datasetId, snapshotId, filename } = req.params
19
21
  const worker = getDatasetWorker(datasetId)
20
- res.set('Content-Type', mime.lookup(filename) || 'application/octet-stream')
21
- const uri = snapshotId
22
- ? `${worker}/datasets/${datasetId}/snapshots/${snapshotId}/files/${filename}`
23
- : `${worker}/datasets/${datasetId}/files/${filename}`
24
- return request.get(uri).pipe(res)
22
+ // Find the right tree
23
+ const pathComponents = filename.split(':')
24
+ let tree = snapshotId || 'HEAD'
25
+ let file
26
+ for (const level of pathComponents) {
27
+ const files = await getFiles(datasetId, tree)
28
+ if (level == pathComponents.slice(-1)) {
29
+ file = files.find(f => !f.directory && f.filename === level)
30
+ } else {
31
+ tree = files.find(f => f.directory && f.filename === level).id
32
+ }
33
+ }
34
+ // Get the file URL and redirect if external or serve if local
35
+ if (file && file.urls[0].startsWith('https://s3.amazonaws.com/')) {
36
+ res.redirect(file.urls[0])
37
+ } else {
38
+ // Serve the file directly
39
+ res.set('Content-Type', mime.lookup(filename) || 'application/octet-stream')
40
+ const uri = snapshotId
41
+ ? `http://${worker}/datasets/${datasetId}/snapshots/${snapshotId}/files/${filename}`
42
+ : `http://${worker}/datasets/${datasetId}/files/${filename}`
43
+ return (
44
+ fetch(uri)
45
+ .then(r => {
46
+ // Set the content length (allow clients to catch HTTP issues better)
47
+ res.setHeader(
48
+ 'Content-Length',
49
+ Number(r.headers.get('content-length')),
50
+ )
51
+ return r.body
52
+ })
53
+ // @ts-expect-error
54
+ .then(stream => Readable.fromWeb(stream).pipe(res))
55
+ .catch(err => {
56
+ console.error(err)
57
+ res.status(500).send('Internal error transferring requested file')
58
+ })
59
+ )
60
+ }
25
61
  }
26
62
 
27
63
  /**
@@ -21,16 +21,16 @@ export const authCallback = (req, res, next) =>
21
21
  const existingAuth = parsedJwtFromRequest(req)
22
22
  if (existingAuth) {
23
23
  // Save ORCID to primary account
24
- User.findOne({ id: existingAuth.sub }, (err, userModel) => {
25
- if (err) {
26
- return next(err)
27
- } else {
24
+ User.findOne({ id: existingAuth.sub })
25
+ .then(userModel => {
28
26
  userModel.orcid = user.providerId
29
27
  return userModel.save().then(() => {
30
28
  res.redirect('/')
31
29
  })
32
- }
33
- })
30
+ })
31
+ .catch(err => {
32
+ return next(err)
33
+ })
34
34
  } else {
35
35
  // Complete login with ORCID as primary account
36
36
  req.logIn(user, { session: false }, err => {
@@ -1,6 +1,5 @@
1
1
  import mongoose, { Document } from 'mongoose'
2
2
  const { Schema, model } = mongoose
3
- import DatasetChange from './datasetChange'
4
3
 
5
4
  // External relations annotating the whole dataset
6
5
  export interface DatasetRelationDocument extends Document {
@@ -74,22 +73,6 @@ datasetSchema.virtual('subscriptions', {
74
73
  justOne: true,
75
74
  })
76
75
 
77
- datasetSchema.post('updateOne', function () {
78
- const datasetId = this.getQuery()?.['id']
79
- return new DatasetChange({
80
- datasetId,
81
- modified: true,
82
- }).save()
83
- })
84
-
85
- datasetSchema.post('deleteOne', function () {
86
- const datasetId = this.getQuery()?.['id']
87
- return new DatasetChange({
88
- datasetId,
89
- deleted: true,
90
- }).save()
91
- })
92
-
93
76
  const Dataset = model<DatasetDocument>('Dataset', datasetSchema)
94
77
 
95
78
  export default Dataset
@@ -1,132 +0,0 @@
1
- import request from 'superagent'
2
- import { redis } from '../libs/redis'
3
- import CacheItem, { CacheType } from '../cache/item'
4
- import { getDatasetWorker } from '../libs/datalad-service'
5
-
6
- /**
7
- * Convert to URL compatible path
8
- * @param {String} path
9
- */
10
- export const encodeFilePath = path => {
11
- return path.replace(new RegExp('/', 'g'), ':')
12
- }
13
-
14
- /**
15
- * Convert to from URL compatible path fo filepath
16
- * @param {String} path
17
- */
18
- export const decodeFilePath = path => {
19
- return path.replace(new RegExp(':', 'g'), '/')
20
- }
21
-
22
- /**
23
- * If path is provided, this is a subdirectory, otherwise a root level file.
24
- * @param {String} path
25
- * @param {String} filename
26
- */
27
- export const getFileName = (path, filename) => {
28
- const filePath = path ? [path, filename].join('/') : filename
29
- return filename ? encodeFilePath(filePath) : encodeFilePath(path)
30
- }
31
-
32
- /**
33
- * Generate file URL for DataLad service
34
- * @param {String} datasetId
35
- * @param {String} path - Relative path for the file
36
- * @param {String} filename
37
- * @param {String} [revision] - Git hash of commit or tree owning this file
38
- */
39
- export const fileUrl = (datasetId, path, filename, revision) => {
40
- const fileName = getFileName(path, filename)
41
- if (revision) {
42
- return `http://${getDatasetWorker(
43
- datasetId,
44
- )}/datasets/${datasetId}/snapshots/${revision}/files/${fileName}`
45
- } else {
46
- return `http://${getDatasetWorker(
47
- datasetId,
48
- )}/datasets/${datasetId}/files/${fileName}`
49
- }
50
- }
51
-
52
- /**
53
- * Generate path URL (such a directory or virtual path) for DataLad service
54
- * @param {String} datasetId
55
- */
56
- export const filesUrl = datasetId =>
57
- `http://${getDatasetWorker(datasetId)}/datasets/${datasetId}/files`
58
-
59
- /**
60
- * Sum all file sizes for total dataset size
61
- */
62
- export const computeTotalSize = files =>
63
- files.reduce((size, f) => size + f.size, 0)
64
-
65
- /**
66
- * Get files for a specific revision
67
- * Similar to getDraftFiles but different cache key and fixed revisions
68
- * @param {string} datasetId - Dataset accession number
69
- * @param {string} treeish - Git treeish hexsha
70
- */
71
- export const getFiles = (datasetId, treeish) => {
72
- const cache = new CacheItem(redis, CacheType.commitFiles, [
73
- datasetId,
74
- treeish.substring(0, 7),
75
- ])
76
- return cache.get(() =>
77
- request
78
- .get(
79
- `${getDatasetWorker(datasetId)}/datasets/${datasetId}/tree/${treeish}`,
80
- )
81
- .set('Accept', 'application/json')
82
- .then(response => {
83
- if (response.status === 200) {
84
- const {
85
- body: { files },
86
- } = response
87
- return files
88
- }
89
- }),
90
- )
91
- }
92
-
93
- /**
94
- * Given a list of files (from getFiles), return a subset matching the prefix
95
- * @param {string} prefix The prefix to filter on
96
- * @returns {(files: Object[]) => Object[]}
97
- */
98
- export const filterFiles =
99
- (prefix = '') =>
100
- files => {
101
- // Disable on null
102
- if (prefix === null) {
103
- return files
104
- }
105
- // Track potential directories and include those as "files"
106
- const directoryFacades = {}
107
- // Return only root level files if prefix is set
108
- const matchingFiles = files.filter(f => {
109
- if (prefix === '') {
110
- if (f.filename.includes('/')) {
111
- const dirName = f.filename.split('/').slice(0, 1)[0]
112
- if (directoryFacades[dirName] !== undefined) {
113
- directoryFacades[dirName].size += 1
114
- } else {
115
- directoryFacades[dirName] = {
116
- id: `directory:${dirName}`,
117
- urls: [],
118
- filename: dirName,
119
- size: 1,
120
- directory: true,
121
- }
122
- }
123
- return false
124
- } else {
125
- return true
126
- }
127
- } else {
128
- return f.filename.startsWith(prefix)
129
- }
130
- })
131
- return [...matchingFiles, ...Object.values(directoryFacades)]
132
- }
@@ -1,5 +0,0 @@
1
- import DatasetChange from '../../models/datasetChange'
2
-
3
- export const datasetChanges = (_, { limit = 100 }) => {
4
- return DatasetChange.find().sort({ $natural: -1 }).limit(limit)
5
- }
@@ -1,39 +0,0 @@
1
- import mongoose, { Document } from 'mongoose'
2
- const { Schema, model } = mongoose
3
- import pubsub from '../graphql/pubsub'
4
-
5
- export interface DatasetChangeDocument extends Document {
6
- datasetId: string
7
- created: boolean
8
- modified: boolean
9
- deleted: boolean
10
- timestamp: Date
11
- }
12
-
13
- const datasetChangeSchema = new Schema(
14
- {
15
- datasetId: { type: String, required: true },
16
- created: { type: Boolean, default: false },
17
- modified: { type: Boolean, default: false },
18
- deleted: { type: Boolean, default: false },
19
- timestamp: { type: Date, default: Date.now },
20
- },
21
- {
22
- // limits the collection size to 1000 documents
23
- // works like a circular buffer
24
- capped: 1000,
25
- },
26
- )
27
-
28
- datasetChangeSchema.post('save', doc => {
29
- pubsub.publish('datasetChanged', {
30
- datasetChanged: doc,
31
- })
32
- })
33
-
34
- const DatasetChange = model<DatasetChangeDocument>(
35
- 'DatasetChange',
36
- datasetChangeSchema,
37
- )
38
-
39
- export default DatasetChange