@openneuro/server 4.17.2 → 4.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +1 -1
- package/package.json +3 -3
- package/src/cache/item.ts +16 -8
- package/src/cache/types.ts +1 -0
- package/src/datalad/__tests__/files.spec.js +1 -49
- package/src/datalad/description.js +1 -1
- package/src/datalad/files.ts +115 -0
- package/src/datalad/snapshots.js +33 -0
- package/src/graphql/resolvers/cache.ts +13 -9
- package/src/graphql/resolvers/dataset.js +1 -1
- package/src/graphql/resolvers/draft.js +1 -1
- package/src/graphql/resolvers/query.js +0 -2
- package/src/graphql/resolvers/snapshots.js +3 -1
- package/src/graphql/resolvers/subscriptions.js +0 -7
- package/src/graphql/schema.js +4 -16
- package/src/handlers/datalad.js +42 -6
- package/src/models/dataset.ts +0 -17
- package/src/datalad/files.js +0 -132
- package/src/graphql/resolvers/dataset-change.js +0 -5
- package/src/models/datasetChange.ts +0 -39
package/Dockerfile
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@openneuro/server",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.18.1",
|
|
4
4
|
"description": "Core service for the OpenNeuro platform.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"main": "src/server.js",
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"@apollo/client": "3.7.2",
|
|
19
19
|
"@elastic/elasticsearch": "7.15.0",
|
|
20
|
-
"@openneuro/search": "^4.
|
|
20
|
+
"@openneuro/search": "^4.18.1",
|
|
21
21
|
"@passport-next/passport-google-oauth2": "^1.0.0",
|
|
22
22
|
"@sentry/node": "^4.5.3",
|
|
23
23
|
"apollo-server": "2.25.4",
|
|
@@ -92,5 +92,5 @@
|
|
|
92
92
|
"publishConfig": {
|
|
93
93
|
"access": "public"
|
|
94
94
|
},
|
|
95
|
-
"gitHead": "
|
|
95
|
+
"gitHead": "fbd8afe9033abf11e39e17968b3e9f60261a9c01"
|
|
96
96
|
}
|
package/src/cache/item.ts
CHANGED
|
@@ -52,26 +52,34 @@ class CacheItem {
|
|
|
52
52
|
const deserialized: T = JSON.parse(decompressed.toString())
|
|
53
53
|
return deserialized
|
|
54
54
|
}
|
|
55
|
-
public async get<T>(
|
|
55
|
+
public async get<T>(
|
|
56
|
+
miss: (setDoNotCache?: (doNotCache) => void) => Promise<T>,
|
|
57
|
+
): Promise<T> {
|
|
56
58
|
try {
|
|
57
59
|
const data = await this.redis.getBuffer(this.key)
|
|
58
60
|
if (data) {
|
|
59
61
|
return this.deserialize(data)
|
|
60
62
|
} else {
|
|
63
|
+
let skipCaching = false
|
|
64
|
+
const doNotCache = (doNotCache: boolean): void => {
|
|
65
|
+
skipCaching = doNotCache
|
|
66
|
+
}
|
|
61
67
|
// Call the cache miss function if we didn't get anything
|
|
62
|
-
const data = await miss()
|
|
68
|
+
const data = await miss(doNotCache)
|
|
63
69
|
const serialized = await this.serialize(data)
|
|
64
|
-
// Allow
|
|
65
|
-
if (
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
// Allow the cache miss function to return a value that isn't cached
|
|
71
|
+
if (!skipCaching) {
|
|
72
|
+
// Allow for the simple case of aging out keys
|
|
73
|
+
if (this.expiration > 0) {
|
|
74
|
+
void this.redis.setex(this.key, this.expiration, serialized)
|
|
75
|
+
} else {
|
|
76
|
+
void this.redis.set(this.key, serialized)
|
|
77
|
+
}
|
|
69
78
|
}
|
|
70
79
|
return data
|
|
71
80
|
}
|
|
72
81
|
} catch {
|
|
73
82
|
// Keep going as though we had a cache miss if there is a problem but don't cache it
|
|
74
|
-
// TODO: Sentry reporting doesn't work here but should be fixed
|
|
75
83
|
return miss()
|
|
76
84
|
}
|
|
77
85
|
}
|
package/src/cache/types.ts
CHANGED
|
@@ -2,9 +2,8 @@ import {
|
|
|
2
2
|
encodeFilePath,
|
|
3
3
|
decodeFilePath,
|
|
4
4
|
fileUrl,
|
|
5
|
-
filterFiles,
|
|
6
5
|
computeTotalSize,
|
|
7
|
-
} from '../files
|
|
6
|
+
} from '../files'
|
|
8
7
|
|
|
9
8
|
vi.mock('ioredis')
|
|
10
9
|
vi.mock('../../config.js')
|
|
@@ -59,53 +58,6 @@ describe('datalad files', () => {
|
|
|
59
58
|
)
|
|
60
59
|
})
|
|
61
60
|
})
|
|
62
|
-
describe('filterFiles()', () => {
|
|
63
|
-
it('disables the filter when set to null', () => {
|
|
64
|
-
expect(filterFiles(null)(mockFiles)).toBe(mockFiles)
|
|
65
|
-
})
|
|
66
|
-
it("returns only root level files with '' filter", () => {
|
|
67
|
-
const mockDirs = [
|
|
68
|
-
{
|
|
69
|
-
filename: 'sub-01',
|
|
70
|
-
id: 'directory:sub-01',
|
|
71
|
-
urls: [],
|
|
72
|
-
size: 2,
|
|
73
|
-
directory: true,
|
|
74
|
-
},
|
|
75
|
-
{
|
|
76
|
-
filename: 'sub-02',
|
|
77
|
-
id: 'directory:sub-02',
|
|
78
|
-
urls: [],
|
|
79
|
-
size: 2,
|
|
80
|
-
directory: true,
|
|
81
|
-
},
|
|
82
|
-
{
|
|
83
|
-
filename: 'sub-03',
|
|
84
|
-
id: 'directory:sub-03',
|
|
85
|
-
urls: [],
|
|
86
|
-
size: 2,
|
|
87
|
-
directory: true,
|
|
88
|
-
},
|
|
89
|
-
{
|
|
90
|
-
filename: 'derivatives',
|
|
91
|
-
id: 'directory:derivatives',
|
|
92
|
-
urls: [],
|
|
93
|
-
size: 1,
|
|
94
|
-
directory: true,
|
|
95
|
-
},
|
|
96
|
-
]
|
|
97
|
-
expect(filterFiles('')(mockFiles)).toEqual([
|
|
98
|
-
...mockRootFiles,
|
|
99
|
-
...mockDirs,
|
|
100
|
-
])
|
|
101
|
-
})
|
|
102
|
-
it('returns only matching prefixed files with a directory name filter', () => {
|
|
103
|
-
expect(filterFiles('sub-01')(mockFiles)).toEqual(mockSub01)
|
|
104
|
-
})
|
|
105
|
-
it('works correctly for deeply nested files', () => {
|
|
106
|
-
expect(filterFiles('sub-01/func')(mockFiles)).toEqual([mockSub01[1]])
|
|
107
|
-
})
|
|
108
|
-
})
|
|
109
61
|
describe('computeTotalSize()', () => {
|
|
110
62
|
it('computes the size correctly', () => {
|
|
111
63
|
const mockFileSizes = [
|
|
@@ -5,7 +5,7 @@ import config from '../config'
|
|
|
5
5
|
import request from 'superagent'
|
|
6
6
|
import { redis } from '../libs/redis.js'
|
|
7
7
|
import { commitFiles } from './dataset.js'
|
|
8
|
-
import { fileUrl } from './files
|
|
8
|
+
import { fileUrl } from './files'
|
|
9
9
|
import { generateDataladCookie } from '../libs/authentication/jwt'
|
|
10
10
|
import { getDatasetWorker } from '../libs/datalad-service'
|
|
11
11
|
import CacheItem, { CacheType } from '../cache/item'
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import request from 'superagent'
|
|
2
|
+
import { redis } from '../libs/redis'
|
|
3
|
+
import CacheItem, { CacheType } from '../cache/item'
|
|
4
|
+
import { getDatasetWorker } from '../libs/datalad-service'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Convert to URL compatible path
|
|
8
|
+
* @param {String} path
|
|
9
|
+
*/
|
|
10
|
+
export const encodeFilePath = (path: string): string => {
|
|
11
|
+
return path.replace(new RegExp('/', 'g'), ':')
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Convert to from URL compatible path fo filepath
|
|
16
|
+
* @param {String} path
|
|
17
|
+
*/
|
|
18
|
+
export const decodeFilePath = (path: string): string => {
|
|
19
|
+
return path.replace(new RegExp(':', 'g'), '/')
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* If path is provided, this is a subdirectory, otherwise a root level file.
|
|
24
|
+
* @param {String} path
|
|
25
|
+
* @param {String} filename
|
|
26
|
+
*/
|
|
27
|
+
export const getFileName = (path: string, filename: string): string => {
|
|
28
|
+
const filePath = path ? [path, filename].join('/') : filename
|
|
29
|
+
return filename ? encodeFilePath(filePath) : encodeFilePath(path)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Generate file URL for DataLad service
|
|
34
|
+
* @param {string} datasetId
|
|
35
|
+
* @param {string} path - Relative path for the file
|
|
36
|
+
* @param {string} filename
|
|
37
|
+
* @param {string} [revision] - Git hash of commit or tree owning this file
|
|
38
|
+
*/
|
|
39
|
+
export const fileUrl = (
|
|
40
|
+
datasetId: string,
|
|
41
|
+
path: string,
|
|
42
|
+
filename: string,
|
|
43
|
+
revision?: string,
|
|
44
|
+
): string => {
|
|
45
|
+
const fileName = getFileName(path, filename)
|
|
46
|
+
if (revision) {
|
|
47
|
+
return `http://${getDatasetWorker(
|
|
48
|
+
datasetId,
|
|
49
|
+
)}/datasets/${datasetId}/snapshots/${revision}/files/${fileName}`
|
|
50
|
+
} else {
|
|
51
|
+
return `http://${getDatasetWorker(
|
|
52
|
+
datasetId,
|
|
53
|
+
)}/datasets/${datasetId}/files/${fileName}`
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Generate path URL (such a directory or virtual path) for DataLad service
|
|
59
|
+
* @param {String} datasetId
|
|
60
|
+
*/
|
|
61
|
+
export const filesUrl = (datasetId: string): string =>
|
|
62
|
+
`http://${getDatasetWorker(datasetId)}/datasets/${datasetId}/files`
|
|
63
|
+
|
|
64
|
+
/** Minimal variant of DatasetFile type from GraphQL API */
|
|
65
|
+
type DatasetFile = {
|
|
66
|
+
id: string
|
|
67
|
+
filename: string
|
|
68
|
+
directory: boolean
|
|
69
|
+
size: number
|
|
70
|
+
urls: string[]
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Sum all file sizes for total dataset size
|
|
75
|
+
*/
|
|
76
|
+
export const computeTotalSize = (files: [DatasetFile]): number =>
|
|
77
|
+
files.reduce((size, f) => size + f.size, 0)
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Get files for a specific revision
|
|
81
|
+
* Similar to getDraftFiles but different cache key and fixed revisions
|
|
82
|
+
* @param {string} datasetId - Dataset accession number
|
|
83
|
+
* @param {string} treeish - Git treeish hexsha
|
|
84
|
+
*/
|
|
85
|
+
export const getFiles = (datasetId, treeish): Promise<[DatasetFile]> => {
|
|
86
|
+
const cache = new CacheItem(redis, CacheType.commitFiles, [
|
|
87
|
+
datasetId,
|
|
88
|
+
treeish.substring(0, 7),
|
|
89
|
+
])
|
|
90
|
+
return cache.get(
|
|
91
|
+
doNotCache =>
|
|
92
|
+
request
|
|
93
|
+
.get(
|
|
94
|
+
`${getDatasetWorker(
|
|
95
|
+
datasetId,
|
|
96
|
+
)}/datasets/${datasetId}/tree/${treeish}`,
|
|
97
|
+
)
|
|
98
|
+
.set('Accept', 'application/json')
|
|
99
|
+
.then(response => {
|
|
100
|
+
if (response.status === 200) {
|
|
101
|
+
const {
|
|
102
|
+
body: { files },
|
|
103
|
+
} = response
|
|
104
|
+
for (const f of files) {
|
|
105
|
+
// Skip caching this tree if it doesn't contain S3 URLs - likely still exporting
|
|
106
|
+
if (!f.directory && !f.urls[0].includes('s3.amazonaws.com')) {
|
|
107
|
+
doNotCache(true)
|
|
108
|
+
break
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return files as [DatasetFile]
|
|
112
|
+
}
|
|
113
|
+
}) as Promise<[DatasetFile]>,
|
|
114
|
+
)
|
|
115
|
+
}
|
package/src/datalad/snapshots.js
CHANGED
|
@@ -20,6 +20,7 @@ import Dataset from '../models/dataset'
|
|
|
20
20
|
import Snapshot from '../models/snapshot'
|
|
21
21
|
import { updateDatasetRevision } from './draft.js'
|
|
22
22
|
import { getDatasetWorker } from '../libs/datalad-service'
|
|
23
|
+
import { join } from 'path'
|
|
23
24
|
|
|
24
25
|
const lockSnapshot = (datasetId, tag) => {
|
|
25
26
|
return redlock.lock(
|
|
@@ -264,3 +265,35 @@ export const getPublicSnapshots = () => {
|
|
|
264
265
|
]).exec()
|
|
265
266
|
})
|
|
266
267
|
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* For snapshots, precache all trees for downloads
|
|
271
|
+
*/
|
|
272
|
+
export const downloadFiles = (datasetId, tag) => {
|
|
273
|
+
const downloadCache = new CacheItem(redis, CacheType.snapshotDownload, [
|
|
274
|
+
datasetId,
|
|
275
|
+
tag,
|
|
276
|
+
])
|
|
277
|
+
// Return an existing cache object if we have one
|
|
278
|
+
return downloadCache.get(async () => {
|
|
279
|
+
// If not, fetch all trees sequentially and cache the result (hopefully some or all trees are cached)
|
|
280
|
+
const files = await getFilesRecursive(datasetId, tag, '')
|
|
281
|
+
files.sort()
|
|
282
|
+
return files
|
|
283
|
+
})
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export async function getFilesRecursive(datasetId, tree, path = '') {
|
|
287
|
+
const files = []
|
|
288
|
+
// Fetch files
|
|
289
|
+
const fileTree = await getFiles(datasetId, tree)
|
|
290
|
+
for (const file of fileTree) {
|
|
291
|
+
const absPath = join(path, file.filename)
|
|
292
|
+
if (file.directory) {
|
|
293
|
+
files.push(...(await getFilesRecursive(datasetId, file.id, absPath)))
|
|
294
|
+
} else {
|
|
295
|
+
files.push({ ...file, filename: absPath })
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return files
|
|
299
|
+
}
|
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
import { redis } from '../../libs/redis.js'
|
|
2
|
+
import CacheItem from '../../cache/item'
|
|
3
|
+
import { CacheType } from '../../cache/types'
|
|
2
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Clear the snapshotDownload cache after exports
|
|
7
|
+
*/
|
|
3
8
|
export async function cacheClear(
|
|
4
9
|
obj: Record<string, unknown>,
|
|
5
|
-
{ datasetId }: { datasetId: string },
|
|
10
|
+
{ datasetId, tag }: { datasetId: string; tag: string },
|
|
6
11
|
{ userInfo }: { userInfo: { admin: boolean } },
|
|
7
12
|
): Promise<boolean> {
|
|
8
13
|
// Check for admin and validate datasetId argument
|
|
9
14
|
if (userInfo?.admin && datasetId.length == 8 && datasetId.startsWith('ds')) {
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
transaction.exec()
|
|
15
|
+
const downloadCache = new CacheItem(redis, CacheType.snapshotDownload, [
|
|
16
|
+
datasetId,
|
|
17
|
+
tag,
|
|
18
|
+
])
|
|
19
|
+
try {
|
|
20
|
+
await downloadCache.drop()
|
|
17
21
|
return true
|
|
18
|
-
}
|
|
22
|
+
} catch (err) {
|
|
19
23
|
return false
|
|
20
24
|
}
|
|
21
25
|
} else {
|
|
@@ -19,7 +19,7 @@ import Deletion from '../../models/deletion'
|
|
|
19
19
|
import { reviewers } from './reviewer'
|
|
20
20
|
import { getDatasetWorker } from '../../libs/datalad-service.js'
|
|
21
21
|
import { getDraftHead } from '../../datalad/dataset.js'
|
|
22
|
-
import { getFileName } from '../../datalad/files
|
|
22
|
+
import { getFileName } from '../../datalad/files'
|
|
23
23
|
import { onBrainlife } from './brainlife'
|
|
24
24
|
import { derivatives } from './derivatives'
|
|
25
25
|
import semver from 'semver'
|
|
@@ -5,7 +5,7 @@ import { description } from './description.js'
|
|
|
5
5
|
import { readme } from './readme.js'
|
|
6
6
|
import { getDraftRevision, updateDatasetRevision } from '../../datalad/draft.js'
|
|
7
7
|
import { checkDatasetWrite } from '../permissions.js'
|
|
8
|
-
import { getFiles } from '../../datalad/files
|
|
8
|
+
import { getFiles } from '../../datalad/files'
|
|
9
9
|
import { filterRemovedAnnexObjects } from '../utils/file.js'
|
|
10
10
|
|
|
11
11
|
// A draft must have a dataset parent
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
import { dataset, datasets } from './dataset.js'
|
|
5
5
|
import { snapshot, participantCount } from './snapshots.js'
|
|
6
6
|
import { user, users } from './user.js'
|
|
7
|
-
import { datasetChanges } from './dataset-change.js'
|
|
8
7
|
import { flaggedFiles } from './flaggedFiles'
|
|
9
8
|
|
|
10
9
|
const Query = {
|
|
@@ -14,7 +13,6 @@ const Query = {
|
|
|
14
13
|
users,
|
|
15
14
|
snapshot,
|
|
16
15
|
participantCount,
|
|
17
|
-
datasetChanges,
|
|
18
16
|
flaggedFiles,
|
|
19
17
|
}
|
|
20
18
|
|
|
@@ -6,7 +6,7 @@ import { readme } from './readme.js'
|
|
|
6
6
|
import { description } from './description.js'
|
|
7
7
|
import { summary } from './summary.js'
|
|
8
8
|
import { snapshotIssues } from './issues.js'
|
|
9
|
-
import { getFiles } from '../../datalad/files
|
|
9
|
+
import { getFiles } from '../../datalad/files'
|
|
10
10
|
import Summary from '../../models/summary'
|
|
11
11
|
import DatasetModel from '../../models/dataset'
|
|
12
12
|
import { filterRemovedAnnexObjects } from '../utils/file.js'
|
|
@@ -15,6 +15,7 @@ import { redis } from '../../libs/redis'
|
|
|
15
15
|
import CacheItem, { CacheType } from '../../cache/item'
|
|
16
16
|
import { normalizeDOI } from '../../libs/doi/normalize'
|
|
17
17
|
import { getDraftHead } from '../../datalad/dataset'
|
|
18
|
+
import { downloadFiles } from '../../datalad/snapshots'
|
|
18
19
|
|
|
19
20
|
export const snapshots = obj => {
|
|
20
21
|
return datalad.getSnapshots(obj.id)
|
|
@@ -40,6 +41,7 @@ export const snapshot = (obj, { datasetId, tag }, context) => {
|
|
|
40
41
|
deprecated: () => deprecated({ datasetId, tag }),
|
|
41
42
|
related: () => related(datasetId),
|
|
42
43
|
onBrainlife: () => onBrainlife(snapshot),
|
|
44
|
+
downloadFiles: () => downloadFiles(datasetId, tag),
|
|
43
45
|
}))
|
|
44
46
|
},
|
|
45
47
|
)
|
|
@@ -70,19 +70,12 @@ export const filesUpdated = {
|
|
|
70
70
|
},
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
export const datasetChanged = {
|
|
74
|
-
type: 'DatasetChange',
|
|
75
|
-
subscribe: () => pubsub.asyncIterator('datasetChanged'),
|
|
76
|
-
args: {},
|
|
77
|
-
}
|
|
78
|
-
|
|
79
73
|
const Subscription = {
|
|
80
74
|
datasetDeleted,
|
|
81
75
|
snapshotsUpdated,
|
|
82
76
|
permissionsUpdated,
|
|
83
77
|
draftUpdated,
|
|
84
78
|
filesUpdated,
|
|
85
|
-
datasetChanged,
|
|
86
79
|
}
|
|
87
80
|
|
|
88
81
|
export default Subscription
|
package/src/graphql/schema.js
CHANGED
|
@@ -90,11 +90,6 @@ export const typeDefs = `
|
|
|
90
90
|
participantCount(modality: String): Int @cacheControl(maxAge: 3600, scope: PUBLIC)
|
|
91
91
|
# Request one snapshot
|
|
92
92
|
snapshot(datasetId: ID!, tag: String!): Snapshot
|
|
93
|
-
# Get recent dataset changes (newest first)
|
|
94
|
-
datasetChanges(
|
|
95
|
-
"Limit results, default 100, max 1000"
|
|
96
|
-
limit: Int = 100
|
|
97
|
-
): [DatasetChange]
|
|
98
93
|
# Get annexed files that have been flagged or removed.
|
|
99
94
|
flaggedFiles(
|
|
100
95
|
"Get files that have been flagged, default true."
|
|
@@ -165,8 +160,8 @@ export const typeDefs = `
|
|
|
165
160
|
prepareUpload(datasetId: ID!, uploadId: ID!): UploadMetadata
|
|
166
161
|
# Add files from a completed upload to the dataset draft
|
|
167
162
|
finishUpload(uploadId: ID!): Boolean
|
|
168
|
-
# Drop
|
|
169
|
-
cacheClear(datasetId: ID!): Boolean
|
|
163
|
+
# Drop download cache for a snapshot - requires site admin access
|
|
164
|
+
cacheClear(datasetId: ID!, tag: String!): Boolean
|
|
170
165
|
# Rerun the latest validator on a given commit
|
|
171
166
|
revalidate(datasetId: ID!, ref: String!): Boolean
|
|
172
167
|
# Request a temporary token for git access
|
|
@@ -478,6 +473,8 @@ export const typeDefs = `
|
|
|
478
473
|
onBrainlife: Boolean @cacheControl(maxAge: 10080, scope: PUBLIC)
|
|
479
474
|
# Total size in bytes of this snapshot
|
|
480
475
|
size: BigInt
|
|
476
|
+
# Single list of files to download this snapshot (only available on snapshots)
|
|
477
|
+
downloadFiles: [DatasetFile]
|
|
481
478
|
}
|
|
482
479
|
|
|
483
480
|
# RelatedObject nature of relationship
|
|
@@ -700,15 +697,6 @@ export const typeDefs = `
|
|
|
700
697
|
payload: [DatasetFile]
|
|
701
698
|
}
|
|
702
699
|
|
|
703
|
-
# Recent changes to datasets
|
|
704
|
-
type DatasetChange {
|
|
705
|
-
datasetId: String!
|
|
706
|
-
created: Boolean
|
|
707
|
-
modified: Boolean
|
|
708
|
-
deleted: Boolean
|
|
709
|
-
timestamp: DateTime
|
|
710
|
-
}
|
|
711
|
-
|
|
712
700
|
# Analytics for a dataset
|
|
713
701
|
type Analytic @cacheControl(maxAge: 300, scope: PUBLIC) {
|
|
714
702
|
datasetId: ID!
|
package/src/handlers/datalad.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import request from 'superagent'
|
|
2
|
+
import { Readable } from 'node:stream'
|
|
2
3
|
import mime from 'mime-types'
|
|
4
|
+
import { getFiles } from '../datalad/files'
|
|
3
5
|
import { getDatasetWorker } from '../libs/datalad-service'
|
|
4
6
|
|
|
5
7
|
/**
|
|
@@ -14,14 +16,48 @@ import { getDatasetWorker } from '../libs/datalad-service'
|
|
|
14
16
|
/**
|
|
15
17
|
* Get a file from a dataset
|
|
16
18
|
*/
|
|
17
|
-
export const getFile = (req, res) => {
|
|
19
|
+
export const getFile = async (req, res) => {
|
|
18
20
|
const { datasetId, snapshotId, filename } = req.params
|
|
19
21
|
const worker = getDatasetWorker(datasetId)
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
// Find the right tree
|
|
23
|
+
const pathComponents = filename.split(':')
|
|
24
|
+
let tree = snapshotId || 'HEAD'
|
|
25
|
+
let file
|
|
26
|
+
for (const level of pathComponents) {
|
|
27
|
+
const files = await getFiles(datasetId, tree)
|
|
28
|
+
if (level == pathComponents.slice(-1)) {
|
|
29
|
+
file = files.find(f => !f.directory && f.filename === level)
|
|
30
|
+
} else {
|
|
31
|
+
tree = files.find(f => f.directory && f.filename === level).id
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// Get the file URL and redirect if external or serve if local
|
|
35
|
+
if (file && file.urls[0].startsWith('https://s3.amazonaws.com/')) {
|
|
36
|
+
res.redirect(file.urls[0])
|
|
37
|
+
} else {
|
|
38
|
+
// Serve the file directly
|
|
39
|
+
res.set('Content-Type', mime.lookup(filename) || 'application/octet-stream')
|
|
40
|
+
const uri = snapshotId
|
|
41
|
+
? `http://${worker}/datasets/${datasetId}/snapshots/${snapshotId}/files/${filename}`
|
|
42
|
+
: `http://${worker}/datasets/${datasetId}/files/${filename}`
|
|
43
|
+
return (
|
|
44
|
+
fetch(uri)
|
|
45
|
+
.then(r => {
|
|
46
|
+
// Set the content length (allow clients to catch HTTP issues better)
|
|
47
|
+
res.setHeader(
|
|
48
|
+
'Content-Length',
|
|
49
|
+
Number(r.headers.get('content-length')),
|
|
50
|
+
)
|
|
51
|
+
return r.body
|
|
52
|
+
})
|
|
53
|
+
// @ts-expect-error
|
|
54
|
+
.then(stream => Readable.fromWeb(stream).pipe(res))
|
|
55
|
+
.catch(err => {
|
|
56
|
+
console.error(err)
|
|
57
|
+
res.status(500).send('Internal error transferring requested file')
|
|
58
|
+
})
|
|
59
|
+
)
|
|
60
|
+
}
|
|
25
61
|
}
|
|
26
62
|
|
|
27
63
|
/**
|
package/src/models/dataset.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import mongoose, { Document } from 'mongoose'
|
|
2
2
|
const { Schema, model } = mongoose
|
|
3
|
-
import DatasetChange from './datasetChange'
|
|
4
3
|
|
|
5
4
|
// External relations annotating the whole dataset
|
|
6
5
|
export interface DatasetRelationDocument extends Document {
|
|
@@ -74,22 +73,6 @@ datasetSchema.virtual('subscriptions', {
|
|
|
74
73
|
justOne: true,
|
|
75
74
|
})
|
|
76
75
|
|
|
77
|
-
datasetSchema.post('updateOne', function () {
|
|
78
|
-
const datasetId = this.getQuery()?.['id']
|
|
79
|
-
return new DatasetChange({
|
|
80
|
-
datasetId,
|
|
81
|
-
modified: true,
|
|
82
|
-
}).save()
|
|
83
|
-
})
|
|
84
|
-
|
|
85
|
-
datasetSchema.post('deleteOne', function () {
|
|
86
|
-
const datasetId = this.getQuery()?.['id']
|
|
87
|
-
return new DatasetChange({
|
|
88
|
-
datasetId,
|
|
89
|
-
deleted: true,
|
|
90
|
-
}).save()
|
|
91
|
-
})
|
|
92
|
-
|
|
93
76
|
const Dataset = model<DatasetDocument>('Dataset', datasetSchema)
|
|
94
77
|
|
|
95
78
|
export default Dataset
|
package/src/datalad/files.js
DELETED
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
import request from 'superagent'
|
|
2
|
-
import { redis } from '../libs/redis'
|
|
3
|
-
import CacheItem, { CacheType } from '../cache/item'
|
|
4
|
-
import { getDatasetWorker } from '../libs/datalad-service'
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Convert to URL compatible path
|
|
8
|
-
* @param {String} path
|
|
9
|
-
*/
|
|
10
|
-
export const encodeFilePath = path => {
|
|
11
|
-
return path.replace(new RegExp('/', 'g'), ':')
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Convert to from URL compatible path fo filepath
|
|
16
|
-
* @param {String} path
|
|
17
|
-
*/
|
|
18
|
-
export const decodeFilePath = path => {
|
|
19
|
-
return path.replace(new RegExp(':', 'g'), '/')
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* If path is provided, this is a subdirectory, otherwise a root level file.
|
|
24
|
-
* @param {String} path
|
|
25
|
-
* @param {String} filename
|
|
26
|
-
*/
|
|
27
|
-
export const getFileName = (path, filename) => {
|
|
28
|
-
const filePath = path ? [path, filename].join('/') : filename
|
|
29
|
-
return filename ? encodeFilePath(filePath) : encodeFilePath(path)
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Generate file URL for DataLad service
|
|
34
|
-
* @param {String} datasetId
|
|
35
|
-
* @param {String} path - Relative path for the file
|
|
36
|
-
* @param {String} filename
|
|
37
|
-
* @param {String} [revision] - Git hash of commit or tree owning this file
|
|
38
|
-
*/
|
|
39
|
-
export const fileUrl = (datasetId, path, filename, revision) => {
|
|
40
|
-
const fileName = getFileName(path, filename)
|
|
41
|
-
if (revision) {
|
|
42
|
-
return `http://${getDatasetWorker(
|
|
43
|
-
datasetId,
|
|
44
|
-
)}/datasets/${datasetId}/snapshots/${revision}/files/${fileName}`
|
|
45
|
-
} else {
|
|
46
|
-
return `http://${getDatasetWorker(
|
|
47
|
-
datasetId,
|
|
48
|
-
)}/datasets/${datasetId}/files/${fileName}`
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Generate path URL (such a directory or virtual path) for DataLad service
|
|
54
|
-
* @param {String} datasetId
|
|
55
|
-
*/
|
|
56
|
-
export const filesUrl = datasetId =>
|
|
57
|
-
`http://${getDatasetWorker(datasetId)}/datasets/${datasetId}/files`
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Sum all file sizes for total dataset size
|
|
61
|
-
*/
|
|
62
|
-
export const computeTotalSize = files =>
|
|
63
|
-
files.reduce((size, f) => size + f.size, 0)
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Get files for a specific revision
|
|
67
|
-
* Similar to getDraftFiles but different cache key and fixed revisions
|
|
68
|
-
* @param {string} datasetId - Dataset accession number
|
|
69
|
-
* @param {string} treeish - Git treeish hexsha
|
|
70
|
-
*/
|
|
71
|
-
export const getFiles = (datasetId, treeish) => {
|
|
72
|
-
const cache = new CacheItem(redis, CacheType.commitFiles, [
|
|
73
|
-
datasetId,
|
|
74
|
-
treeish.substring(0, 7),
|
|
75
|
-
])
|
|
76
|
-
return cache.get(() =>
|
|
77
|
-
request
|
|
78
|
-
.get(
|
|
79
|
-
`${getDatasetWorker(datasetId)}/datasets/${datasetId}/tree/${treeish}`,
|
|
80
|
-
)
|
|
81
|
-
.set('Accept', 'application/json')
|
|
82
|
-
.then(response => {
|
|
83
|
-
if (response.status === 200) {
|
|
84
|
-
const {
|
|
85
|
-
body: { files },
|
|
86
|
-
} = response
|
|
87
|
-
return files
|
|
88
|
-
}
|
|
89
|
-
}),
|
|
90
|
-
)
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Given a list of files (from getFiles), return a subset matching the prefix
|
|
95
|
-
* @param {string} prefix The prefix to filter on
|
|
96
|
-
* @returns {(files: Object[]) => Object[]}
|
|
97
|
-
*/
|
|
98
|
-
export const filterFiles =
|
|
99
|
-
(prefix = '') =>
|
|
100
|
-
files => {
|
|
101
|
-
// Disable on null
|
|
102
|
-
if (prefix === null) {
|
|
103
|
-
return files
|
|
104
|
-
}
|
|
105
|
-
// Track potential directories and include those as "files"
|
|
106
|
-
const directoryFacades = {}
|
|
107
|
-
// Return only root level files if prefix is set
|
|
108
|
-
const matchingFiles = files.filter(f => {
|
|
109
|
-
if (prefix === '') {
|
|
110
|
-
if (f.filename.includes('/')) {
|
|
111
|
-
const dirName = f.filename.split('/').slice(0, 1)[0]
|
|
112
|
-
if (directoryFacades[dirName] !== undefined) {
|
|
113
|
-
directoryFacades[dirName].size += 1
|
|
114
|
-
} else {
|
|
115
|
-
directoryFacades[dirName] = {
|
|
116
|
-
id: `directory:${dirName}`,
|
|
117
|
-
urls: [],
|
|
118
|
-
filename: dirName,
|
|
119
|
-
size: 1,
|
|
120
|
-
directory: true,
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
return false
|
|
124
|
-
} else {
|
|
125
|
-
return true
|
|
126
|
-
}
|
|
127
|
-
} else {
|
|
128
|
-
return f.filename.startsWith(prefix)
|
|
129
|
-
}
|
|
130
|
-
})
|
|
131
|
-
return [...matchingFiles, ...Object.values(directoryFacades)]
|
|
132
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import mongoose, { Document } from 'mongoose'
|
|
2
|
-
const { Schema, model } = mongoose
|
|
3
|
-
import pubsub from '../graphql/pubsub'
|
|
4
|
-
|
|
5
|
-
export interface DatasetChangeDocument extends Document {
|
|
6
|
-
datasetId: string
|
|
7
|
-
created: boolean
|
|
8
|
-
modified: boolean
|
|
9
|
-
deleted: boolean
|
|
10
|
-
timestamp: Date
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
const datasetChangeSchema = new Schema(
|
|
14
|
-
{
|
|
15
|
-
datasetId: { type: String, required: true },
|
|
16
|
-
created: { type: Boolean, default: false },
|
|
17
|
-
modified: { type: Boolean, default: false },
|
|
18
|
-
deleted: { type: Boolean, default: false },
|
|
19
|
-
timestamp: { type: Date, default: Date.now },
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
// limits the collection size to 1000 documents
|
|
23
|
-
// works like a circular buffer
|
|
24
|
-
capped: 1000,
|
|
25
|
-
},
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
datasetChangeSchema.post('save', doc => {
|
|
29
|
-
pubsub.publish('datasetChanged', {
|
|
30
|
-
datasetChanged: doc,
|
|
31
|
-
})
|
|
32
|
-
})
|
|
33
|
-
|
|
34
|
-
const DatasetChange = model<DatasetChangeDocument>(
|
|
35
|
-
'DatasetChange',
|
|
36
|
-
datasetChangeSchema,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
export default DatasetChange
|