@sanity/export 3.39.0 → 3.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/constants.js +8 -0
- package/src/export.js +21 -5
- package/src/filterSystemDocuments.js +5 -0
- package/src/getDocumentCursorStream.js +85 -0
- package/src/validateOptions.js +12 -0
package/package.json
CHANGED
package/src/constants.js
CHANGED
|
@@ -33,3 +33,11 @@ exports.DOCUMENT_STREAM_DEBUG_INTERVAL = 10000
|
|
|
33
33
|
* @internal
|
|
34
34
|
*/
|
|
35
35
|
exports.REQUEST_READ_TIMEOUT = 3 * 60 * 1000 // 3 minutes
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
What mode to use when exporting documents.
|
|
39
|
+
stream: Export all documents in the dataset in one request, this will be consistent but might be slow on large datasets.
|
|
40
|
+
cursor: Export documents using a cursor, this might lead to inconsistent results if a mutation is performed while exporting.
|
|
41
|
+
*/
|
|
42
|
+
exports.MODE_STREAM = 'stream'
|
|
43
|
+
exports.MODE_CURSOR = 'cursor'
|
package/src/export.js
CHANGED
|
@@ -11,13 +11,14 @@ const filterDocumentTypes = require('./filterDocumentTypes')
|
|
|
11
11
|
const filterDrafts = require('./filterDrafts')
|
|
12
12
|
const filterSystemDocuments = require('./filterSystemDocuments')
|
|
13
13
|
const getDocumentsStream = require('./getDocumentsStream')
|
|
14
|
+
const getDocumentCursorStream = require('./getDocumentCursorStream')
|
|
14
15
|
const logFirstChunk = require('./logFirstChunk')
|
|
15
16
|
const rejectOnApiError = require('./rejectOnApiError')
|
|
16
17
|
const stringifyStream = require('./stringifyStream')
|
|
17
18
|
const tryParseJson = require('./tryParseJson')
|
|
18
19
|
const rimraf = require('./util/rimraf')
|
|
19
20
|
const validateOptions = require('./validateOptions')
|
|
20
|
-
const {DOCUMENT_STREAM_DEBUG_INTERVAL} = require('./constants')
|
|
21
|
+
const {DOCUMENT_STREAM_DEBUG_INTERVAL, MODE_CURSOR, MODE_STREAM} = require('./constants')
|
|
21
22
|
|
|
22
23
|
const noop = () => null
|
|
23
24
|
|
|
@@ -90,7 +91,7 @@ async function exportDataset(opts) {
|
|
|
90
91
|
debug('Archive finished')
|
|
91
92
|
})
|
|
92
93
|
|
|
93
|
-
debug('Getting dataset export stream')
|
|
94
|
+
debug('Getting dataset export stream, mode: "%s"', options.mode)
|
|
94
95
|
onProgress({step: 'Exporting documents...'})
|
|
95
96
|
|
|
96
97
|
let documentCount = 0
|
|
@@ -118,9 +119,13 @@ async function exportDataset(opts) {
|
|
|
118
119
|
cb(null, doc)
|
|
119
120
|
}
|
|
120
121
|
|
|
121
|
-
const inputStream = await
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
const inputStream = await getDocumentInputStream(options)
|
|
123
|
+
if (inputStream.statusCode) {
|
|
124
|
+
debug('Got HTTP %d', inputStream.statusCode)
|
|
125
|
+
}
|
|
126
|
+
if (inputStream.headers) {
|
|
127
|
+
debug('Response headers: %o', inputStream.headers)
|
|
128
|
+
}
|
|
124
129
|
|
|
125
130
|
let debugTimer = null
|
|
126
131
|
function scheduleDebugTimer() {
|
|
@@ -250,6 +255,17 @@ async function exportDataset(opts) {
|
|
|
250
255
|
return result
|
|
251
256
|
}
|
|
252
257
|
|
|
258
|
+
function getDocumentInputStream(options) {
|
|
259
|
+
if (options.mode === MODE_STREAM) {
|
|
260
|
+
return getDocumentsStream(options)
|
|
261
|
+
}
|
|
262
|
+
if (options.mode === MODE_CURSOR) {
|
|
263
|
+
return getDocumentCursorStream(options)
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
throw new Error(`Invalid mode: ${options.mode}`)
|
|
267
|
+
}
|
|
268
|
+
|
|
253
269
|
function isWritableStream(val) {
|
|
254
270
|
return (
|
|
255
271
|
val !== null &&
|
|
@@ -2,6 +2,7 @@ const miss = require('mississippi')
|
|
|
2
2
|
const debug = require('./debug')
|
|
3
3
|
|
|
4
4
|
const isSystemDocument = (doc) => doc && doc._id && doc._id.indexOf('_.') === 0
|
|
5
|
+
const isCursor = (doc) => doc && !doc._id && doc.nextCursor !== undefined
|
|
5
6
|
|
|
6
7
|
module.exports = () =>
|
|
7
8
|
miss.through.obj((doc, enc, callback) => {
|
|
@@ -9,6 +10,10 @@ module.exports = () =>
|
|
|
9
10
|
debug('%s is a system document, skipping', doc && doc._id)
|
|
10
11
|
return callback()
|
|
11
12
|
}
|
|
13
|
+
if (isCursor(doc)) {
|
|
14
|
+
debug('%o is a cursor, skipping', doc)
|
|
15
|
+
return callback()
|
|
16
|
+
}
|
|
12
17
|
|
|
13
18
|
return callback(null, doc)
|
|
14
19
|
})
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
const {Transform} = require('node:stream')
|
|
2
|
+
|
|
3
|
+
const pkg = require('../package.json')
|
|
4
|
+
const debug = require('./debug')
|
|
5
|
+
const requestStream = require('./requestStream')
|
|
6
|
+
|
|
7
|
+
// same regex as split2 is using by default: https://github.com/mcollina/split2/blob/53432f54bd5bf422bd55d91d38f898b6c9496fc1/index.js#L86
|
|
8
|
+
const splitRegex = /\r?\n/
|
|
9
|
+
|
|
10
|
+
module.exports = async (options) => {
|
|
11
|
+
let streamsInflight = 0
|
|
12
|
+
function decrementInflight(stream) {
|
|
13
|
+
streamsInflight--
|
|
14
|
+
if (streamsInflight === 0) {
|
|
15
|
+
stream.end()
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const stream = new Transform({
|
|
20
|
+
async transform(chunk, encoding, callback) {
|
|
21
|
+
if (encoding !== 'buffer' && encoding !== 'string') {
|
|
22
|
+
callback(null, chunk)
|
|
23
|
+
return
|
|
24
|
+
}
|
|
25
|
+
this.push(chunk, encoding)
|
|
26
|
+
|
|
27
|
+
let parsedChunk = null
|
|
28
|
+
for (const chunkStr of chunk.toString().split(splitRegex)) {
|
|
29
|
+
if (chunkStr.trim() === '') {
|
|
30
|
+
continue
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
try {
|
|
34
|
+
parsedChunk = JSON.parse(chunkStr)
|
|
35
|
+
} catch (err) {
|
|
36
|
+
// Ignore JSON parse errors
|
|
37
|
+
// this can happen if the chunk is not a JSON object. We just pass it through and let the caller handle it.
|
|
38
|
+
debug('Failed to parse JSON chunk, ignoring', err, chunkStr)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (
|
|
42
|
+
parsedChunk !== null &&
|
|
43
|
+
typeof parsedChunk === 'object' &&
|
|
44
|
+
'nextCursor' in parsedChunk &&
|
|
45
|
+
typeof parsedChunk.nextCursor === 'string' &&
|
|
46
|
+
!('_id' in parsedChunk)
|
|
47
|
+
) {
|
|
48
|
+
debug('Got next cursor "%s", fetching next stream', parsedChunk.nextCursor)
|
|
49
|
+
streamsInflight++
|
|
50
|
+
|
|
51
|
+
const reqStream = await startStream(options, parsedChunk.nextCursor)
|
|
52
|
+
reqStream.on('end', () => decrementInflight(this))
|
|
53
|
+
reqStream.pipe(this, {end: false})
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
callback()
|
|
58
|
+
},
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
streamsInflight++
|
|
62
|
+
const reqStream = await startStream(options, '')
|
|
63
|
+
reqStream.on('end', () => decrementInflight(stream))
|
|
64
|
+
reqStream.pipe(stream, {end: false})
|
|
65
|
+
return stream
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function startStream(options, nextCursor) {
|
|
69
|
+
const url = options.client.getUrl(
|
|
70
|
+
`/data/export/${options.dataset}?nextCursor=${encodeURIComponent(nextCursor)}`,
|
|
71
|
+
)
|
|
72
|
+
const token = options.client.config().token
|
|
73
|
+
const headers = {
|
|
74
|
+
'User-Agent': `${pkg.name}@${pkg.version}`,
|
|
75
|
+
...(token ? {Authorization: `Bearer ${token}`} : {}),
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
debug('Starting stream with cursor "%s"', nextCursor)
|
|
79
|
+
|
|
80
|
+
return requestStream({url, headers, maxRetries: options.maxRetries}).then((res) => {
|
|
81
|
+
debug('Got stream with HTTP %d', res.statusCode)
|
|
82
|
+
|
|
83
|
+
return res
|
|
84
|
+
})
|
|
85
|
+
}
|
package/src/validateOptions.js
CHANGED
|
@@ -3,6 +3,8 @@ const {
|
|
|
3
3
|
DOCUMENT_STREAM_MAX_RETRIES,
|
|
4
4
|
ASSET_DOWNLOAD_MAX_RETRIES,
|
|
5
5
|
REQUEST_READ_TIMEOUT,
|
|
6
|
+
MODE_STREAM,
|
|
7
|
+
MODE_CURSOR,
|
|
6
8
|
} = require('./constants')
|
|
7
9
|
|
|
8
10
|
const clientMethods = ['getUrl', 'config']
|
|
@@ -13,6 +15,7 @@ const exportDefaults = {
|
|
|
13
15
|
drafts: true,
|
|
14
16
|
assets: true,
|
|
15
17
|
raw: false,
|
|
18
|
+
mode: MODE_STREAM,
|
|
16
19
|
maxRetries: DOCUMENT_STREAM_MAX_RETRIES,
|
|
17
20
|
maxAssetRetries: ASSET_DOWNLOAD_MAX_RETRIES,
|
|
18
21
|
readTimeout: REQUEST_READ_TIMEOUT,
|
|
@@ -25,6 +28,15 @@ function validateOptions(opts) {
|
|
|
25
28
|
throw new Error(`options.dataset must be a valid dataset name`)
|
|
26
29
|
}
|
|
27
30
|
|
|
31
|
+
if (
|
|
32
|
+
typeof options.mode !== 'string' ||
|
|
33
|
+
(options.mode !== MODE_STREAM && options.mode !== MODE_CURSOR)
|
|
34
|
+
) {
|
|
35
|
+
throw new Error(
|
|
36
|
+
`options.mode must be either "${MODE_STREAM}" or "${MODE_CURSOR}", got "${options.mode}"`,
|
|
37
|
+
)
|
|
38
|
+
}
|
|
39
|
+
|
|
28
40
|
if (options.onProgress && typeof options.onProgress !== 'function') {
|
|
29
41
|
throw new Error(`options.onProgress must be a function`)
|
|
30
42
|
}
|