@rpcbase/server 0.366.0 → 0.368.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/files/constants.ts +2 -0
- package/src/files/finalize_file_upload.ts +20 -8
- package/src/files/helpers/get_grid_fs_bucket.ts +4 -2
- package/src/files/tasks/finalize_file_upload/apply_img_preview.ts +25 -0
- package/src/files/tasks/finalize_file_upload/download_file.ts +113 -0
- package/src/files/tasks/finalize_file_upload/index.ts +32 -0
- package/src/files/upload_chunk.ts +3 -2
- package/src/helpers/sim_test_inject.ts +1 -1
- package/src/files/tasks/finalize_file_upload.ts +0 -5
package/package.json
CHANGED
|
@@ -3,17 +3,29 @@ import Promise from "bluebird"
|
|
|
3
3
|
import queue from "../../queue"
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
export type ProcessFilePayload = {
|
|
7
|
+
hash: string;
|
|
8
|
+
ocr?: boolean;
|
|
9
|
+
vectors?: boolean;
|
|
10
|
+
img_preview?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const DEFAULTS = {
|
|
14
|
+
ocr: false,
|
|
15
|
+
vectors: false,
|
|
16
|
+
img_preview: false,
|
|
17
|
+
}
|
|
18
|
+
|
|
6
19
|
// after we've uploaded a file, we process it with ocr, llm vectorization, png rendering, etc
|
|
7
|
-
export const finalize_file_upload = async(files: Array<
|
|
20
|
+
export const finalize_file_upload = async(files: Array<ProcessFilePayload>) => {
|
|
8
21
|
|
|
9
22
|
await Promise.map(files,
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
23
|
+
async(file: ProcessFilePayload) => {
|
|
24
|
+
await queue.add("finalize_file_upload", {...DEFAULTS, ...file}, {
|
|
25
|
+
jobId: `finalize_file_upload-${file.hash}`,
|
|
26
|
+
removeOnComplete: true,
|
|
27
|
+
removeOnFail: true,
|
|
28
|
+
})
|
|
15
29
|
})
|
|
16
|
-
}
|
|
17
|
-
)
|
|
18
30
|
|
|
19
31
|
}
|
|
@@ -4,8 +4,10 @@ import mongoose from "../../../mongoose"
|
|
|
4
4
|
|
|
5
5
|
const CHUNK_SIZE = 1024 * 1024
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
export const get_grid_fs_bucket = (
|
|
8
|
+
bucket_name: string,
|
|
9
|
+
chunk_size: number = CHUNK_SIZE,
|
|
10
|
+
) => {
|
|
9
11
|
assert(chunk_size === CHUNK_SIZE, "chunk_size must match default CHUNK_SIZE")
|
|
10
12
|
|
|
11
13
|
const {db} = mongoose.connection
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import {exec} from "child_process"
|
|
2
|
+
import {promisify} from "util"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
const execAsync = promisify(exec)
|
|
6
|
+
|
|
7
|
+
interface Payload {
|
|
8
|
+
pdfPath: string;
|
|
9
|
+
imgOutputPath: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const apply_img_preview = async(payload: Payload): Promise<void> => {
|
|
13
|
+
console.log("PL", payload, execAsync)
|
|
14
|
+
// try {
|
|
15
|
+
// const {stdout, stderr} = await execAsync(
|
|
16
|
+
// `pdftoppm -png ${payload.pdfPath} ${payload.imgOutputPath}`,
|
|
17
|
+
// )
|
|
18
|
+
// if (stderr) {
|
|
19
|
+
// console.error(`Error: ${stderr}`)
|
|
20
|
+
// }
|
|
21
|
+
// console.log(`Image preview created: ${stdout}`)
|
|
22
|
+
// } catch (error) {
|
|
23
|
+
// console.error(`Execution error: ${error}`)
|
|
24
|
+
// }
|
|
25
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import fs from "fs/promises"
|
|
2
|
+
import {createReadStream, createWriteStream} from "fs"
|
|
3
|
+
import path from "path"
|
|
4
|
+
import {createBrotliDecompress} from "zlib"
|
|
5
|
+
import Promise from "bluebird"
|
|
6
|
+
|
|
7
|
+
import mongoose from "../../../../mongoose"
|
|
8
|
+
import {UPLOAD_BUCKET_NAME} from "../../constants"
|
|
9
|
+
import {get_grid_fs_bucket} from "../../helpers/get_grid_fs_bucket"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
const DL_CHUNK_CONCURRENCY = 8
|
|
13
|
+
|
|
14
|
+
const get_metadata = async(hash: string) => {
|
|
15
|
+
const files_coll = mongoose.connection.db.collection(
|
|
16
|
+
`${UPLOAD_BUCKET_NAME}.files`,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
const first_chunk_name = `${hash}.0`
|
|
20
|
+
|
|
21
|
+
const file_metadata = await files_coll.findOne({
|
|
22
|
+
filename: first_chunk_name,
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
return file_metadata.metadata
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const download_chunk =
|
|
29
|
+
(bucket: mongoose.mongo.GridFSBucket, tmp_wd: string, metadata: any) =>
|
|
30
|
+
(chunk_num: number) =>
|
|
31
|
+
new Promise((resolve, reject) => {
|
|
32
|
+
const filename = `${metadata.hash}.${chunk_num}`
|
|
33
|
+
|
|
34
|
+
const dl_stream = bucket.openDownloadStreamByName(filename)
|
|
35
|
+
|
|
36
|
+
const file_path = path.join(tmp_wd, `./${filename}`)
|
|
37
|
+
|
|
38
|
+
const write_stream = createWriteStream(file_path)
|
|
39
|
+
|
|
40
|
+
if (metadata.is_compressed) {
|
|
41
|
+
const decompress_stream = createBrotliDecompress()
|
|
42
|
+
dl_stream.pipe(decompress_stream).pipe(write_stream)
|
|
43
|
+
} else {
|
|
44
|
+
dl_stream.pipe(write_stream)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
dl_stream.on("error", (error) => {
|
|
48
|
+
console.error("Error downloading file:", error)
|
|
49
|
+
reject(error)
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
write_stream.on("error", (error) => {
|
|
53
|
+
console.error("Error writing file:", error)
|
|
54
|
+
reject(error)
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
write_stream.on("finish", () => {
|
|
58
|
+
console.log("File downloaded and written successfully", file_path)
|
|
59
|
+
resolve()
|
|
60
|
+
})
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
const rebuild_file_from_chunks = async(
|
|
65
|
+
tmp_wd: string,
|
|
66
|
+
metadata: any,
|
|
67
|
+
chunks: Array<number>,
|
|
68
|
+
) => {
|
|
69
|
+
const full_file_path = path.join(tmp_wd, `./${metadata.hash}`)
|
|
70
|
+
const output_stream = createWriteStream(full_file_path)
|
|
71
|
+
|
|
72
|
+
const append_chunk = (chunk_num: number) => new Promise<void>((resolve, reject) => {
|
|
73
|
+
const chunk_path = path.join(tmp_wd, `./${metadata.hash}.${chunk_num}`)
|
|
74
|
+
|
|
75
|
+
const chunk_read_stream = createReadStream(chunk_path)
|
|
76
|
+
|
|
77
|
+
chunk_read_stream.pipe(output_stream, {end: false})
|
|
78
|
+
|
|
79
|
+
chunk_read_stream.on("end", async() => {
|
|
80
|
+
await fs.rm(chunk_path)
|
|
81
|
+
resolve()
|
|
82
|
+
})
|
|
83
|
+
chunk_read_stream.on("error", reject)
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
for (const chunk_num of chunks) {
|
|
87
|
+
await append_chunk(chunk_num)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
output_stream.end()
|
|
91
|
+
|
|
92
|
+
return full_file_path
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// downloads all chunks, reconstruct if necessary
|
|
96
|
+
export const download_file = async(tmp_wd: string, hash: string): Promise<string> => {
|
|
97
|
+
const bucket = get_grid_fs_bucket(UPLOAD_BUCKET_NAME)
|
|
98
|
+
|
|
99
|
+
const metadata = await get_metadata(hash)
|
|
100
|
+
|
|
101
|
+
const chunks = Array.from({length: metadata.total_chunks}, (_, i) => i)
|
|
102
|
+
|
|
103
|
+
// download all file chunks
|
|
104
|
+
await Promise.map(
|
|
105
|
+
chunks,
|
|
106
|
+
download_chunk(bucket, tmp_wd, metadata),
|
|
107
|
+
{concurrency: DL_CHUNK_CONCURRENCY},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
const full_path = await rebuild_file_from_chunks(tmp_wd, metadata, chunks)
|
|
111
|
+
|
|
112
|
+
return full_path
|
|
113
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import path from "path"
|
|
2
|
+
import os from "os"
|
|
3
|
+
import {mkdirp} from "mkdirp"
|
|
4
|
+
|
|
5
|
+
import {download_file} from "./download_file"
|
|
6
|
+
|
|
7
|
+
import {TMP_UPLOADS_DIR} from "../../constants"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
const {RB_TENANT_ID} = process.env
|
|
11
|
+
|
|
12
|
+
export const finalize_file_upload = async(payload) => {
|
|
13
|
+
console.log("TASK:FINALIZE FILE UPLOAD")
|
|
14
|
+
|
|
15
|
+
const {hash, img_preview} = payload
|
|
16
|
+
|
|
17
|
+
const tmp_wd = path.join(
|
|
18
|
+
os.tmpdir(),
|
|
19
|
+
`./${TMP_UPLOADS_DIR}/${RB_TENANT_ID}/${hash}`,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
await mkdirp(tmp_wd)
|
|
23
|
+
console.log("created dir", tmp_wd)
|
|
24
|
+
|
|
25
|
+
const full_path = await download_file(tmp_wd, hash)
|
|
26
|
+
|
|
27
|
+
console.log("got full path", full_path)
|
|
28
|
+
|
|
29
|
+
if (img_preview) {
|
|
30
|
+
console.log("RUN img preview")
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -3,11 +3,12 @@ import fs from "fs"
|
|
|
3
3
|
import {formidable, File} from "formidable"
|
|
4
4
|
|
|
5
5
|
import {get_grid_fs_bucket} from "./helpers/get_grid_fs_bucket"
|
|
6
|
-
import {
|
|
6
|
+
import {sim_test_inject} from "../helpers/sim_test_inject"
|
|
7
|
+
import {UPLOAD_BUCKET_NAME} from "./constants"
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
const upload_file_to_bucket = async(file: File, metadata): Promise<void> => {
|
|
10
|
-
const bucket = get_grid_fs_bucket(
|
|
11
|
+
const bucket = get_grid_fs_bucket(UPLOAD_BUCKET_NAME, metadata.chunk_size)
|
|
11
12
|
|
|
12
13
|
const chunk_filename = `${metadata.hash}.${metadata.chunk_index}`
|
|
13
14
|
|