lfss 0.5.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lfss-0.5.1 → lfss-0.6.0}/PKG-INFO +1 -1
- {lfss-0.5.1 → lfss-0.6.0}/frontend/api.js +1 -1
- {lfss-0.5.1 → lfss-0.6.0}/frontend/popup.js +13 -1
- {lfss-0.5.1 → lfss-0.6.0}/frontend/scripts.js +38 -6
- {lfss-0.5.1 → lfss-0.6.0}/frontend/styles.css +1 -1
- {lfss-0.5.1 → lfss-0.6.0}/lfss/cli/balance.py +37 -24
- {lfss-0.5.1 → lfss-0.6.0}/lfss/client/api.py +28 -9
- {lfss-0.5.1 → lfss-0.6.0}/lfss/sql/init.sql +7 -6
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/database.py +97 -90
- lfss-0.6.0/lfss/src/datatype.py +55 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/server.py +65 -30
- {lfss-0.5.1 → lfss-0.6.0}/pyproject.toml +1 -1
- {lfss-0.5.1 → lfss-0.6.0}/Readme.md +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/docs/Known_issues.md +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/docs/Permission.md +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/frontend/index.html +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/frontend/popup.css +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/frontend/utils.js +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/cli/cli.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/cli/panel.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/cli/serve.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/cli/user.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/client/__init__.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/sql/pragma.sql +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/__init__.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/config.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/error.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/log.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/stat.py +0 -0
- {lfss-0.5.1 → lfss-0.6.0}/lfss/src/utils.py +0 -0
@@ -191,7 +191,7 @@ export default class Connector {
|
|
191
191
|
* @param {string} path - file path(url)
|
192
192
|
* @param {string} newPath - new file path(url)
|
193
193
|
*/
|
194
|
-
async
|
194
|
+
async move(path, newPath){
|
195
195
|
if (path.startsWith('/')){ path = path.slice(1); }
|
196
196
|
if (newPath.startsWith('/')){ newPath = newPath.slice(1); }
|
197
197
|
const dst = new URL(this.config.endpoint + '/_api/meta');
|
@@ -40,7 +40,7 @@ export function createFloatingWindow(innerHTML = '', {
|
|
40
40
|
return [floatingWindow, closeWindow];
|
41
41
|
}
|
42
42
|
|
43
|
-
/* select can be "last-filename" */
|
43
|
+
/* select can be "last-filename" or "last-pathname" */
|
44
44
|
export function showFloatingWindowLineInput(onSubmit = (v) => {}, {
|
45
45
|
text = "",
|
46
46
|
placeholder = "Enter text",
|
@@ -72,6 +72,7 @@ export function showFloatingWindowLineInput(onSubmit = (v) => {}, {
|
|
72
72
|
};
|
73
73
|
|
74
74
|
if (select === "last-filename") {
|
75
|
+
// select the last filename, e.g. "file" in "/path/to/file.txt"
|
75
76
|
const inputVal = input.value;
|
76
77
|
let lastSlash = inputVal.lastIndexOf("/");
|
77
78
|
if (lastSlash === -1) {
|
@@ -84,6 +85,17 @@ export function showFloatingWindowLineInput(onSubmit = (v) => {}, {
|
|
84
85
|
}
|
85
86
|
input.setSelectionRange(lastSlash + 1, lastSlash + lastDot + 1);
|
86
87
|
}
|
88
|
+
else if (select === "last-pathname") {
|
89
|
+
// select the last pathname, e.g. "to" in "/path/to/<filename>"
|
90
|
+
const lastSlash = input.value.lastIndexOf("/");
|
91
|
+
const secondLastSlash = input.value.lastIndexOf("/", input.value.lastIndexOf("/") - 1);
|
92
|
+
if (secondLastSlash !== -1) {
|
93
|
+
input.setSelectionRange(secondLastSlash + 1, lastSlash);
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
input.setSelectionRange(0, lastSlash);
|
97
|
+
}
|
98
|
+
}
|
87
99
|
|
88
100
|
return [floatingWindow, closeWindow];
|
89
101
|
}
|
@@ -5,6 +5,9 @@ import { formatSize, decodePathURI, ensurePathURI, copyToClipboard, getRandomStr
|
|
5
5
|
|
6
6
|
const conn = new Connector();
|
7
7
|
let userRecord = null;
|
8
|
+
const ensureSlashEnd = (path) => {
|
9
|
+
return path.endsWith('/') ? path : path + '/';
|
10
|
+
}
|
8
11
|
|
9
12
|
const endpointInput = document.querySelector('input#endpoint');
|
10
13
|
const tokenInput = document.querySelector('input#token');
|
@@ -128,11 +131,13 @@ uploadButton.addEventListener('click', () => {
|
|
128
131
|
throw new Error('File name cannot end with /');
|
129
132
|
}
|
130
133
|
path = path + fileName;
|
134
|
+
showPopup('Uploading...', {level: 'info', timeout: 3000});
|
131
135
|
conn.put(path, file)
|
132
136
|
.then(() => {
|
133
137
|
refreshFileList();
|
134
138
|
uploadFileNameInput.value = '';
|
135
139
|
onFileNameInpuChange();
|
140
|
+
showPopup('Upload success.', {level: 'success', timeout: 3000});
|
136
141
|
},
|
137
142
|
(err) => {
|
138
143
|
showPopup('Failed to upload file: ' + err, {level: 'error', timeout: 5000});
|
@@ -191,9 +196,14 @@ Are you sure you want to proceed?
|
|
191
196
|
const path = dstPath + file.name;
|
192
197
|
promises.push(uploadFile(file, path));
|
193
198
|
}
|
199
|
+
showPopup('Uploading multiple files...', {level: 'info', timeout: 3000});
|
194
200
|
Promise.all(promises).then(
|
195
201
|
() => {
|
202
|
+
showPopup('Upload success.', {level: 'success', timeout: 3000});
|
196
203
|
refreshFileList();
|
204
|
+
},
|
205
|
+
(err) => {
|
206
|
+
showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
|
197
207
|
}
|
198
208
|
);
|
199
209
|
}
|
@@ -260,15 +270,16 @@ function refreshFileList(){
|
|
260
270
|
tr.appendChild(accessTd);
|
261
271
|
}
|
262
272
|
{
|
273
|
+
const dirurl = ensureSlashEnd(dir.url);
|
263
274
|
const actTd = document.createElement('td');
|
264
275
|
const actContainer = document.createElement('div');
|
265
276
|
actContainer.classList.add('action-container');
|
266
277
|
|
267
278
|
const showMetaButton = document.createElement('a');
|
268
|
-
showMetaButton.textContent = '
|
279
|
+
showMetaButton.textContent = 'Reveal';
|
269
280
|
showMetaButton.style.cursor = 'pointer';
|
270
281
|
showMetaButton.addEventListener('click', () => {
|
271
|
-
const dirUrlEncap =
|
282
|
+
const dirUrlEncap = dirurl;
|
272
283
|
conn.getMetadata(dirUrlEncap).then(
|
273
284
|
(meta) => {
|
274
285
|
sizeTd.textContent = formatSize(meta.size);
|
@@ -280,6 +291,30 @@ function refreshFileList(){
|
|
280
291
|
});
|
281
292
|
actContainer.appendChild(showMetaButton);
|
282
293
|
|
294
|
+
const moveButton = document.createElement('a');
|
295
|
+
moveButton.textContent = 'Move';
|
296
|
+
moveButton.style.cursor = 'pointer';
|
297
|
+
moveButton.addEventListener('click', () => {
|
298
|
+
showFloatingWindowLineInput((dstPath) => {
|
299
|
+
dstPath = encodePathURI(dstPath);
|
300
|
+
console.log("Moving", dirurl, "to", dstPath);
|
301
|
+
conn.move(dirurl, dstPath)
|
302
|
+
.then(() => {
|
303
|
+
refreshFileList();
|
304
|
+
},
|
305
|
+
(err) => {
|
306
|
+
showPopup('Failed to move path: ' + err, {level: 'error'});
|
307
|
+
}
|
308
|
+
);
|
309
|
+
}, {
|
310
|
+
text: 'Enter the destination path: ',
|
311
|
+
placeholder: 'Destination path',
|
312
|
+
value: decodePathURI(dirurl),
|
313
|
+
select: "last-pathname"
|
314
|
+
});
|
315
|
+
});
|
316
|
+
actContainer.appendChild(moveButton);
|
317
|
+
|
283
318
|
const downloadButton = document.createElement('a');
|
284
319
|
downloadButton.textContent = 'Download';
|
285
320
|
downloadButton.href = conn.config.endpoint + '/_api/bundle?' +
|
@@ -402,10 +437,7 @@ function refreshFileList(){
|
|
402
437
|
moveButton.addEventListener('click', () => {
|
403
438
|
showFloatingWindowLineInput((dstPath) => {
|
404
439
|
dstPath = encodePathURI(dstPath);
|
405
|
-
|
406
|
-
dstPath = dstPath.slice(0, -1);
|
407
|
-
}
|
408
|
-
conn.moveFile(file.url, dstPath)
|
440
|
+
conn.move(file.url, dstPath)
|
409
441
|
.then(() => {
|
410
442
|
refreshFileList();
|
411
443
|
},
|
@@ -68,44 +68,57 @@ async def move_to_internal(f_id: str, flag: str = ''):
|
|
68
68
|
raise e
|
69
69
|
|
70
70
|
|
71
|
-
async def _main():
|
71
|
+
async def _main(batch_size: int = 10000):
|
72
72
|
|
73
73
|
tasks = []
|
74
74
|
start_time = time.time()
|
75
|
-
async with aiosqlite.connect(db_file) as conn:
|
76
|
-
exceeded_rows = await (await conn.execute(
|
77
|
-
"SELECT file_id FROM fmeta WHERE file_size > ? AND external = 0",
|
78
|
-
(LARGE_FILE_BYTES,)
|
79
|
-
)).fetchall()
|
80
|
-
|
81
|
-
for i in range(0, len(exceeded_rows)):
|
82
|
-
row = exceeded_rows[i]
|
83
|
-
f_id = row[0]
|
84
|
-
tasks.append(move_to_external(f_id, flag=f"[e-{i+1}/{len(exceeded_rows)}] "))
|
85
75
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
76
|
+
e_cout = 0
|
77
|
+
batch_count = 0
|
78
|
+
while True:
|
79
|
+
async with aiosqlite.connect(db_file) as conn:
|
80
|
+
exceeded_rows = list(await (await conn.execute(
|
81
|
+
"SELECT file_id FROM fmeta WHERE file_size > ? AND external = 0 LIMIT ? OFFSET ?",
|
82
|
+
(LARGE_FILE_BYTES, batch_size, batch_size * batch_count)
|
83
|
+
)).fetchall())
|
84
|
+
if not exceeded_rows:
|
85
|
+
break
|
86
|
+
e_cout += len(exceeded_rows)
|
87
|
+
for i in range(0, len(exceeded_rows)):
|
88
|
+
row = exceeded_rows[i]
|
89
|
+
f_id = row[0]
|
90
|
+
tasks.append(move_to_external(f_id, flag=f"[b{batch_count+1}-e{i+1}/{len(exceeded_rows)}] "))
|
91
|
+
await asyncio.gather(*tasks)
|
92
|
+
|
93
|
+
i_count = 0
|
94
|
+
batch_count = 0
|
95
|
+
while True:
|
96
|
+
async with aiosqlite.connect(db_file) as conn:
|
97
|
+
under_rows = list(await (await conn.execute(
|
98
|
+
"SELECT file_id, file_size, external FROM fmeta WHERE file_size <= ? AND external = 1 LIMIT ? OFFSET ?",
|
99
|
+
(LARGE_FILE_BYTES, batch_size, batch_size * batch_count)
|
100
|
+
)).fetchall())
|
101
|
+
if not under_rows:
|
102
|
+
break
|
103
|
+
i_count += len(under_rows)
|
104
|
+
for i in range(0, len(under_rows)):
|
105
|
+
row = under_rows[i]
|
106
|
+
f_id = row[0]
|
107
|
+
tasks.append(move_to_internal(f_id, flag=f"[b{batch_count+1}-i{i+1}/{len(under_rows)}] "))
|
108
|
+
await asyncio.gather(*tasks)
|
91
109
|
|
92
|
-
for i in range(0, len(under_rows)):
|
93
|
-
row = under_rows[i]
|
94
|
-
f_id = row[0]
|
95
|
-
tasks.append(move_to_internal(f_id, flag=f"[i-{i+1}/{len(under_rows)}] "))
|
96
|
-
|
97
|
-
await asyncio.gather(*tasks)
|
98
110
|
end_time = time.time()
|
99
111
|
print(f"Balancing complete, took {end_time - start_time:.2f} seconds. "
|
100
|
-
f"{
|
112
|
+
f"{e_cout} files moved to external storage, {i_count} files moved to internal storage.")
|
101
113
|
|
102
114
|
def main():
|
103
115
|
global sem
|
104
116
|
parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
|
105
117
|
parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
|
118
|
+
parser.add_argument("-b", "--batch-size", type=int, default=10000, help="Batch size for processing files")
|
106
119
|
args = parser.parse_args()
|
107
120
|
sem = Semaphore(args.jobs)
|
108
|
-
asyncio.run(_main())
|
121
|
+
asyncio.run(_main(args.batch_size))
|
109
122
|
|
110
123
|
if __name__ == '__main__':
|
111
124
|
main()
|
@@ -2,7 +2,7 @@ from typing import Optional, Literal
|
|
2
2
|
import os
|
3
3
|
import requests
|
4
4
|
import urllib.parse
|
5
|
-
from lfss.src.
|
5
|
+
from lfss.src.datatype import (
|
6
6
|
FileReadPermission, FileRecord, DirectoryRecord, UserRecord, PathContents
|
7
7
|
)
|
8
8
|
|
@@ -36,8 +36,6 @@ class Connector:
|
|
36
36
|
|
37
37
|
def put(self, path: str, file_data: bytes, permission: int | FileReadPermission = 0, conflict: Literal['overwrite', 'abort', 'skip'] = 'abort'):
|
38
38
|
"""Uploads a file to the specified path."""
|
39
|
-
if path.startswith('/'):
|
40
|
-
path = path[1:]
|
41
39
|
response = self._fetch('PUT', path, search_params={
|
42
40
|
'permission': int(permission),
|
43
41
|
'conflict': conflict
|
@@ -47,20 +45,41 @@ class Connector:
|
|
47
45
|
)
|
48
46
|
return response.json()
|
49
47
|
|
50
|
-
def
|
51
|
-
"""
|
48
|
+
def put_json(self, path: str, data: dict, permission: int | FileReadPermission = 0, conflict: Literal['overwrite', 'abort', 'skip'] = 'abort'):
|
49
|
+
"""Uploads a JSON file to the specified path."""
|
50
|
+
assert path.endswith('.json'), "Path must end with .json"
|
51
|
+
response = self._fetch('PUT', path, search_params={
|
52
|
+
'permission': int(permission),
|
53
|
+
'conflict': conflict
|
54
|
+
})(
|
55
|
+
json=data,
|
56
|
+
headers={'Content-Type': 'application/json'}
|
57
|
+
)
|
58
|
+
return response.json()
|
59
|
+
|
60
|
+
def _get(self, path: str) -> Optional[requests.Response]:
|
52
61
|
try:
|
53
62
|
response = self._fetch('GET', path)()
|
54
63
|
except requests.exceptions.HTTPError as e:
|
55
64
|
if e.response.status_code == 404:
|
56
65
|
return None
|
57
66
|
raise e
|
67
|
+
return response
|
68
|
+
|
69
|
+
def get(self, path: str) -> Optional[bytes]:
|
70
|
+
"""Downloads a file from the specified path."""
|
71
|
+
response = self._get(path)
|
72
|
+
if response is None: return None
|
58
73
|
return response.content
|
74
|
+
|
75
|
+
def get_json(self, path: str) -> Optional[dict]:
|
76
|
+
response = self._get(path)
|
77
|
+
if response is None: return None
|
78
|
+
assert response.headers['Content-Type'] == 'application/json'
|
79
|
+
return response.json()
|
59
80
|
|
60
81
|
def delete(self, path: str):
|
61
82
|
"""Deletes the file at the specified path."""
|
62
|
-
if path.startswith('/'):
|
63
|
-
path = path[1:]
|
64
83
|
self._fetch('DELETE', path)()
|
65
84
|
|
66
85
|
def get_metadata(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
|
@@ -87,8 +106,8 @@ class Connector:
|
|
87
106
|
headers={'Content-Type': 'application/www-form-urlencoded'}
|
88
107
|
)
|
89
108
|
|
90
|
-
def
|
91
|
-
"""
|
109
|
+
def move(self, path: str, new_path: str):
|
110
|
+
"""Move file or directory to a new path."""
|
92
111
|
self._fetch('POST', '_api/meta', {'path': path, 'new_path': new_path})(
|
93
112
|
headers = {'Content-Type': 'application/www-form-urlencoded'}
|
94
113
|
)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
CREATE TABLE IF NOT EXISTS user (
|
2
2
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
3
|
-
username VARCHAR(
|
4
|
-
credential VARCHAR(
|
3
|
+
username VARCHAR(256) UNIQUE NOT NULL,
|
4
|
+
credential VARCHAR(256) NOT NULL,
|
5
5
|
is_admin BOOLEAN DEFAULT FALSE,
|
6
6
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
7
7
|
last_active TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
@@ -10,19 +10,20 @@ CREATE TABLE IF NOT EXISTS user (
|
|
10
10
|
);
|
11
11
|
|
12
12
|
CREATE TABLE IF NOT EXISTS fmeta (
|
13
|
-
url VARCHAR(
|
13
|
+
url VARCHAR(1024) PRIMARY KEY,
|
14
14
|
owner_id INTEGER NOT NULL,
|
15
|
-
file_id
|
15
|
+
file_id CHAR(32) NOT NULL,
|
16
16
|
file_size INTEGER,
|
17
17
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
18
18
|
access_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
19
19
|
permission INTEGER DEFAULT 0,
|
20
|
-
external BOOLEAN DEFAULT FALSE,
|
20
|
+
external BOOLEAN DEFAULT FALSE,
|
21
|
+
mime_type VARCHAR(256) DEFAULT 'application/octet-stream',
|
21
22
|
FOREIGN KEY(owner_id) REFERENCES user(id)
|
22
23
|
);
|
23
24
|
|
24
25
|
CREATE TABLE IF NOT EXISTS fdata (
|
25
|
-
file_id
|
26
|
+
file_id CHAR(32) PRIMARY KEY,
|
26
27
|
data BLOB
|
27
28
|
);
|
28
29
|
|
@@ -4,16 +4,16 @@ from abc import ABC, abstractmethod
|
|
4
4
|
|
5
5
|
import urllib.parse
|
6
6
|
from pathlib import Path
|
7
|
-
import
|
7
|
+
import hashlib, uuid
|
8
8
|
from contextlib import asynccontextmanager
|
9
9
|
from functools import wraps
|
10
|
-
from enum import IntEnum
|
11
10
|
import zipfile, io, asyncio
|
12
11
|
|
13
12
|
import aiosqlite, aiofiles
|
14
13
|
import aiofiles.os
|
15
14
|
from asyncio import Lock
|
16
15
|
|
16
|
+
from .datatype import UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents
|
17
17
|
from .config import DATA_HOME, LARGE_BLOB_DIR
|
18
18
|
from .log import get_logger
|
19
19
|
from .utils import decode_uri_compnents
|
@@ -64,26 +64,6 @@ class DBConnBase(ABC):
|
|
64
64
|
async def commit(self):
|
65
65
|
await self.conn.commit()
|
66
66
|
|
67
|
-
class FileReadPermission(IntEnum):
|
68
|
-
UNSET = 0 # not set
|
69
|
-
PUBLIC = 1 # accessible by anyone
|
70
|
-
PROTECTED = 2 # accessible by any user
|
71
|
-
PRIVATE = 3 # accessible by owner only (including admin)
|
72
|
-
|
73
|
-
@dataclasses.dataclass
|
74
|
-
class UserRecord:
|
75
|
-
id: int
|
76
|
-
username: str
|
77
|
-
credential: str
|
78
|
-
is_admin: bool
|
79
|
-
create_time: str
|
80
|
-
last_active: str
|
81
|
-
max_storage: int
|
82
|
-
permission: 'FileReadPermission'
|
83
|
-
|
84
|
-
def __str__(self):
|
85
|
-
return f"User {self.username} (id={self.id}, admin={self.is_admin}, created at {self.create_time}, last active at {self.last_active}, storage={self.max_storage}, permission={self.permission})"
|
86
|
-
|
87
67
|
DECOY_USER = UserRecord(0, 'decoy', 'decoy', False, '2021-01-01 00:00:00', '2021-01-01 00:00:00', 0, FileReadPermission.PRIVATE)
|
88
68
|
class UserConn(DBConnBase):
|
89
69
|
|
@@ -174,37 +154,6 @@ class UserConn(DBConnBase):
|
|
174
154
|
await self.conn.execute("DELETE FROM user WHERE username = ?", (username, ))
|
175
155
|
self.logger.info(f"Delete user {username}")
|
176
156
|
|
177
|
-
@dataclasses.dataclass
|
178
|
-
class FileRecord:
|
179
|
-
url: str
|
180
|
-
owner_id: int
|
181
|
-
file_id: str # defines mapping from fmata to fdata
|
182
|
-
file_size: int
|
183
|
-
create_time: str
|
184
|
-
access_time: str
|
185
|
-
permission: FileReadPermission
|
186
|
-
external: bool
|
187
|
-
|
188
|
-
def __str__(self):
|
189
|
-
return f"File {self.url} (owner={self.owner_id}, created at {self.create_time}, accessed at {self.access_time}, " + \
|
190
|
-
f"file_id={self.file_id}, permission={self.permission}, size={self.file_size}, external={self.external})"
|
191
|
-
|
192
|
-
@dataclasses.dataclass
|
193
|
-
class DirectoryRecord:
|
194
|
-
url: str
|
195
|
-
size: int
|
196
|
-
create_time: str = ""
|
197
|
-
update_time: str = ""
|
198
|
-
access_time: str = ""
|
199
|
-
|
200
|
-
def __str__(self):
|
201
|
-
return f"Directory {self.url} (size={self.size})"
|
202
|
-
|
203
|
-
@dataclasses.dataclass
|
204
|
-
class PathContents:
|
205
|
-
dirs: list[DirectoryRecord]
|
206
|
-
files: list[FileRecord]
|
207
|
-
|
208
157
|
class FileConn(DBConnBase):
|
209
158
|
|
210
159
|
@staticmethod
|
@@ -235,6 +184,38 @@ class FileConn(DBConnBase):
|
|
235
184
|
ALTER TABLE fmeta ADD COLUMN external BOOLEAN DEFAULT FALSE
|
236
185
|
''')
|
237
186
|
|
187
|
+
# backward compatibility, since 0.6.0
|
188
|
+
async with self.conn.execute("SELECT * FROM fmeta") as cursor:
|
189
|
+
res = await cursor.fetchone()
|
190
|
+
if res and len(res) < 9:
|
191
|
+
self.logger.info("Updating fmeta table")
|
192
|
+
await self.conn.execute('''
|
193
|
+
ALTER TABLE fmeta ADD COLUMN mime_type TEXT DEFAULT 'application/octet-stream'
|
194
|
+
''')
|
195
|
+
# check all mime types
|
196
|
+
import mimetypes, mimesniff
|
197
|
+
async with self.conn.execute("SELECT url, file_id, external FROM fmeta") as cursor:
|
198
|
+
res = await cursor.fetchall()
|
199
|
+
async with self.conn.execute("SELECT count(*) FROM fmeta") as cursor:
|
200
|
+
count = await cursor.fetchone()
|
201
|
+
assert count is not None
|
202
|
+
for counter, r in enumerate(res, start=1):
|
203
|
+
print(f"Checking mimetype for {counter}/{count[0]}")
|
204
|
+
url, f_id, external = r
|
205
|
+
fname = url.split('/')[-1]
|
206
|
+
mime_type, _ = mimetypes.guess_type(fname)
|
207
|
+
if mime_type is None:
|
208
|
+
# try to sniff the file
|
209
|
+
if not external:
|
210
|
+
async with self.conn.execute("SELECT data FROM fdata WHERE file_id = ?", (f_id, )) as cursor:
|
211
|
+
blob = await cursor.fetchone()
|
212
|
+
assert blob is not None
|
213
|
+
blob = blob[0]
|
214
|
+
mime_type = mimesniff.what(blob)
|
215
|
+
else:
|
216
|
+
mime_type = mimesniff.what(LARGE_BLOB_DIR / f_id)
|
217
|
+
await self.conn.execute("UPDATE fmeta SET mime_type = ? WHERE url = ?", (mime_type, url))
|
218
|
+
|
238
219
|
return self
|
239
220
|
|
240
221
|
async def get_file_record(self, url: str) -> Optional[FileRecord]:
|
@@ -331,7 +312,7 @@ class FileConn(DBConnBase):
|
|
331
312
|
dirs = await asyncio.gather(*[get_dir(url + d) for d in dirs_str])
|
332
313
|
return PathContents(dirs, files)
|
333
314
|
|
334
|
-
async def get_path_record(self, url: str) ->
|
315
|
+
async def get_path_record(self, url: str) -> DirectoryRecord:
|
335
316
|
assert url.endswith('/'), "Path must end with /"
|
336
317
|
async with self.conn.execute("""
|
337
318
|
SELECT MIN(create_time) as create_time,
|
@@ -372,43 +353,42 @@ class FileConn(DBConnBase):
|
|
372
353
|
assert res is not None
|
373
354
|
return res[0] or 0
|
374
355
|
|
356
|
+
@atomic
|
357
|
+
async def update_file_record(
|
358
|
+
self, url, owner_id: Optional[int] = None, permission: Optional[FileReadPermission] = None
|
359
|
+
):
|
360
|
+
old = await self.get_file_record(url)
|
361
|
+
assert old is not None, f"File {url} not found"
|
362
|
+
if owner_id is None:
|
363
|
+
owner_id = old.owner_id
|
364
|
+
if permission is None:
|
365
|
+
permission = old.permission
|
366
|
+
await self.conn.execute(
|
367
|
+
"UPDATE fmeta SET owner_id = ?, permission = ? WHERE url = ?",
|
368
|
+
(owner_id, int(permission), url)
|
369
|
+
)
|
370
|
+
self.logger.info(f"Updated file {url}")
|
371
|
+
|
375
372
|
@atomic
|
376
373
|
async def set_file_record(
|
377
374
|
self, url: str,
|
378
|
-
owner_id:
|
379
|
-
file_id:
|
380
|
-
file_size:
|
381
|
-
permission:
|
382
|
-
external:
|
375
|
+
owner_id: int,
|
376
|
+
file_id:str,
|
377
|
+
file_size: int,
|
378
|
+
permission: FileReadPermission,
|
379
|
+
external: bool,
|
380
|
+
mime_type: str
|
383
381
|
):
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
if permission is None: permission = old.permission
|
395
|
-
await self.conn.execute(
|
396
|
-
"""
|
397
|
-
UPDATE fmeta SET owner_id = ?, permission = ?,
|
398
|
-
access_time = CURRENT_TIMESTAMP WHERE url = ?
|
399
|
-
""", (owner_id, int(permission), url))
|
400
|
-
self.logger.info(f"File {url} updated")
|
401
|
-
else:
|
402
|
-
self.logger.debug(f"Creating fmeta {url}: permission={permission}, owner_id={owner_id}, file_id={file_id}, file_size={file_size}, external={external}")
|
403
|
-
if permission is None:
|
404
|
-
permission = FileReadPermission.UNSET
|
405
|
-
assert owner_id is not None and file_id is not None and file_size is not None and external is not None
|
406
|
-
await self.conn.execute(
|
407
|
-
"INSERT INTO fmeta (url, owner_id, file_id, file_size, permission, external) VALUES (?, ?, ?, ?, ?, ?)",
|
408
|
-
(url, owner_id, file_id, file_size, int(permission), external)
|
409
|
-
)
|
410
|
-
await self._user_size_inc(owner_id, file_size)
|
411
|
-
self.logger.info(f"File {url} created")
|
382
|
+
self.logger.debug(f"Creating fmeta {url}: permission={permission}, owner_id={owner_id}, file_id={file_id}, file_size={file_size}, external={external}, mime_type={mime_type}")
|
383
|
+
if permission is None:
|
384
|
+
permission = FileReadPermission.UNSET
|
385
|
+
assert owner_id is not None and file_id is not None and file_size is not None and external is not None
|
386
|
+
await self.conn.execute(
|
387
|
+
"INSERT INTO fmeta (url, owner_id, file_id, file_size, permission, external, mime_type) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
388
|
+
(url, owner_id, file_id, file_size, int(permission), external, mime_type)
|
389
|
+
)
|
390
|
+
await self._user_size_inc(owner_id, file_size)
|
391
|
+
self.logger.info(f"File {url} created")
|
412
392
|
|
413
393
|
@atomic
|
414
394
|
async def move_file(self, old_url: str, new_url: str):
|
@@ -421,6 +401,25 @@ class FileConn(DBConnBase):
|
|
421
401
|
async with self.conn.execute("UPDATE fmeta SET url = ?, create_time = CURRENT_TIMESTAMP WHERE url = ?", (new_url, old_url)):
|
422
402
|
self.logger.info(f"Moved file {old_url} to {new_url}")
|
423
403
|
|
404
|
+
@atomic
|
405
|
+
async def move_path(self, old_url: str, new_url: str, conflict_handler: Literal['skip', 'overwrite'] = 'overwrite', user_id: Optional[int] = None):
|
406
|
+
assert old_url.endswith('/'), "Old path must end with /"
|
407
|
+
assert new_url.endswith('/'), "New path must end with /"
|
408
|
+
if user_id is None:
|
409
|
+
async with self.conn.execute("SELECT * FROM fmeta WHERE url LIKE ?", (old_url + '%', )) as cursor:
|
410
|
+
res = await cursor.fetchall()
|
411
|
+
else:
|
412
|
+
async with self.conn.execute("SELECT * FROM fmeta WHERE url LIKE ? AND owner_id = ?", (old_url + '%', user_id)) as cursor:
|
413
|
+
res = await cursor.fetchall()
|
414
|
+
for r in res:
|
415
|
+
new_r = new_url + r[0][len(old_url):]
|
416
|
+
if conflict_handler == 'overwrite':
|
417
|
+
await self.conn.execute("DELETE FROM fmeta WHERE url = ?", (new_r, ))
|
418
|
+
elif conflict_handler == 'skip':
|
419
|
+
if (await self.conn.execute("SELECT url FROM fmeta WHERE url = ?", (new_r, ))) is not None:
|
420
|
+
continue
|
421
|
+
await self.conn.execute("UPDATE fmeta SET url = ?, create_time = CURRENT_TIMESTAMP WHERE url = ?", (new_r, r[0]))
|
422
|
+
|
424
423
|
async def log_access(self, url: str):
|
425
424
|
await self.conn.execute("UPDATE fmeta SET access_time = CURRENT_TIMESTAMP WHERE url = ?", (url, ))
|
426
425
|
|
@@ -568,7 +567,8 @@ class Database:
|
|
568
567
|
async def save_file(
|
569
568
|
self, u: int | str, url: str,
|
570
569
|
blob: bytes | AsyncIterable[bytes],
|
571
|
-
permission: FileReadPermission = FileReadPermission.UNSET
|
570
|
+
permission: FileReadPermission = FileReadPermission.UNSET,
|
571
|
+
mime_type: str = 'application/octet-stream'
|
572
572
|
):
|
573
573
|
"""
|
574
574
|
if file_size is not provided, the blob must be bytes
|
@@ -600,7 +600,7 @@ class Database:
|
|
600
600
|
await self.file.set_file_blob(f_id, blob)
|
601
601
|
await self.file.set_file_record(
|
602
602
|
url, owner_id=user.id, file_id=f_id, file_size=file_size,
|
603
|
-
permission=permission, external=False)
|
603
|
+
permission=permission, external=False, mime_type=mime_type)
|
604
604
|
await self.user.set_active(user.username)
|
605
605
|
else:
|
606
606
|
assert isinstance(blob, AsyncIterable)
|
@@ -612,7 +612,7 @@ class Database:
|
|
612
612
|
raise StorageExceededError(f"Unable to save file, user {user.username} has storage limit of {user.max_storage}, used {user_size_used}, requested {file_size}")
|
613
613
|
await self.file.set_file_record(
|
614
614
|
url, owner_id=user.id, file_id=f_id, file_size=file_size,
|
615
|
-
permission=permission, external=True)
|
615
|
+
permission=permission, external=True, mime_type=mime_type)
|
616
616
|
await self.user.set_active(user.username)
|
617
617
|
|
618
618
|
async def read_file_stream(self, url: str) -> AsyncIterable[bytes]:
|
@@ -664,6 +664,13 @@ class Database:
|
|
664
664
|
|
665
665
|
async with transaction(self):
|
666
666
|
await self.file.move_file(old_url, new_url)
|
667
|
+
|
668
|
+
async def move_path(self, old_url: str, new_url: str, user_id: Optional[int] = None):
|
669
|
+
validate_url(old_url, is_file=False)
|
670
|
+
validate_url(new_url, is_file=False)
|
671
|
+
|
672
|
+
async with transaction(self):
|
673
|
+
await self.file.move_path(old_url, new_url, 'overwrite', user_id)
|
667
674
|
|
668
675
|
async def __batch_delete_file_blobs(self, file_records: list[FileRecord], batch_size: int = 512):
|
669
676
|
# https://github.com/langchain-ai/langchain/issues/10321
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from enum import IntEnum
|
2
|
+
import dataclasses
|
3
|
+
|
4
|
+
class FileReadPermission(IntEnum):
|
5
|
+
UNSET = 0 # not set
|
6
|
+
PUBLIC = 1 # accessible by anyone
|
7
|
+
PROTECTED = 2 # accessible by any user
|
8
|
+
PRIVATE = 3 # accessible by owner only (including admin)
|
9
|
+
|
10
|
+
@dataclasses.dataclass
|
11
|
+
class UserRecord:
|
12
|
+
id: int
|
13
|
+
username: str
|
14
|
+
credential: str
|
15
|
+
is_admin: bool
|
16
|
+
create_time: str
|
17
|
+
last_active: str
|
18
|
+
max_storage: int
|
19
|
+
permission: 'FileReadPermission'
|
20
|
+
|
21
|
+
def __str__(self):
|
22
|
+
return f"User {self.username} (id={self.id}, admin={self.is_admin}, created at {self.create_time}, last active at {self.last_active}, storage={self.max_storage}, permission={self.permission})"
|
23
|
+
|
24
|
+
@dataclasses.dataclass
|
25
|
+
class FileRecord:
|
26
|
+
url: str
|
27
|
+
owner_id: int
|
28
|
+
file_id: str # defines mapping from fmata to fdata
|
29
|
+
file_size: int
|
30
|
+
create_time: str
|
31
|
+
access_time: str
|
32
|
+
permission: FileReadPermission
|
33
|
+
external: bool
|
34
|
+
mime_type: str
|
35
|
+
|
36
|
+
def __str__(self):
|
37
|
+
return f"File {self.url} [{self.mime_type}] (owner={self.owner_id}, created at {self.create_time}, accessed at {self.access_time}, " + \
|
38
|
+
f"file_id={self.file_id}, permission={self.permission}, size={self.file_size}, external={self.external})"
|
39
|
+
|
40
|
+
@dataclasses.dataclass
|
41
|
+
class DirectoryRecord:
|
42
|
+
url: str
|
43
|
+
size: int
|
44
|
+
create_time: str = ""
|
45
|
+
update_time: str = ""
|
46
|
+
access_time: str = ""
|
47
|
+
|
48
|
+
def __str__(self):
|
49
|
+
return f"Directory {self.url} (size={self.size})"
|
50
|
+
|
51
|
+
@dataclasses.dataclass
|
52
|
+
class PathContents:
|
53
|
+
dirs: list[DirectoryRecord]
|
54
|
+
files: list[FileRecord]
|
55
|
+
|
@@ -15,7 +15,7 @@ from contextlib import asynccontextmanager
|
|
15
15
|
from .error import *
|
16
16
|
from .log import get_logger
|
17
17
|
from .stat import RequestDB
|
18
|
-
from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES,
|
18
|
+
from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES
|
19
19
|
from .utils import ensure_uri_compnents, format_last_modified, now_stamp
|
20
20
|
from .database import Database, UserRecord, DECOY_USER, FileRecord, check_user_permission, FileReadPermission
|
21
21
|
|
@@ -142,12 +142,10 @@ async def get_file(path: str, download = False, user: UserRecord = Depends(get_c
|
|
142
142
|
|
143
143
|
fname = path.split("/")[-1]
|
144
144
|
async def send(media_type: Optional[str] = None, disposition = "attachment"):
|
145
|
+
if media_type is None:
|
146
|
+
media_type = file_record.mime_type
|
145
147
|
if not file_record.external:
|
146
148
|
fblob = await conn.read_file(path)
|
147
|
-
if media_type is None:
|
148
|
-
media_type, _ = mimetypes.guess_type(fname)
|
149
|
-
if media_type is None:
|
150
|
-
media_type = mimesniff.what(fblob)
|
151
149
|
return Response(
|
152
150
|
content=fblob, media_type=media_type, headers={
|
153
151
|
"Content-Disposition": f"{disposition}; filename={fname}",
|
@@ -155,12 +153,7 @@ async def get_file(path: str, download = False, user: UserRecord = Depends(get_c
|
|
155
153
|
"Last-Modified": format_last_modified(file_record.create_time)
|
156
154
|
}
|
157
155
|
)
|
158
|
-
|
159
156
|
else:
|
160
|
-
if media_type is None:
|
161
|
-
media_type, _ = mimetypes.guess_type(fname)
|
162
|
-
if media_type is None:
|
163
|
-
media_type = mimesniff.what(str((LARGE_BLOB_DIR / file_record.file_id).absolute()))
|
164
157
|
return StreamingResponse(
|
165
158
|
await conn.read_file_stream(path), media_type=media_type, headers={
|
166
159
|
"Content-Disposition": f"{disposition}; filename={fname}",
|
@@ -228,14 +221,24 @@ async def put_file(
|
|
228
221
|
blobs = await request.body()
|
229
222
|
else:
|
230
223
|
blobs = await request.body()
|
224
|
+
|
225
|
+
# check file type
|
226
|
+
assert not path.endswith("/"), "Path must be a file"
|
227
|
+
fname = path.split("/")[-1]
|
228
|
+
mime_t, _ = mimetypes.guess_type(fname)
|
229
|
+
if mime_t is None:
|
230
|
+
mime_t = mimesniff.what(blobs)
|
231
|
+
if mime_t is None:
|
232
|
+
mime_t = "application/octet-stream"
|
233
|
+
|
231
234
|
if len(blobs) > LARGE_FILE_BYTES:
|
232
235
|
async def blob_reader():
|
233
236
|
chunk_size = 16 * 1024 * 1024 # 16MB
|
234
237
|
for b in range(0, len(blobs), chunk_size):
|
235
238
|
yield blobs[b:b+chunk_size]
|
236
|
-
await conn.save_file(user.id, path, blob_reader(), permission = FileReadPermission(permission))
|
239
|
+
await conn.save_file(user.id, path, blob_reader(), permission = FileReadPermission(permission), mime_type = mime_t)
|
237
240
|
else:
|
238
|
-
await conn.save_file(user.id, path, blobs, permission = FileReadPermission(permission))
|
241
|
+
await conn.save_file(user.id, path, blobs, permission = FileReadPermission(permission), mime_type=mime_t)
|
239
242
|
|
240
243
|
# https://developer.mozilla.org/zh-CN/docs/Web/HTTP/Methods/PUT
|
241
244
|
if exists_flag:
|
@@ -263,6 +266,7 @@ async def delete_file(path: str, user: UserRecord = Depends(get_current_user)):
|
|
263
266
|
else:
|
264
267
|
res = await conn.delete_file(path)
|
265
268
|
|
269
|
+
await conn.user.set_active(user.username)
|
266
270
|
if res:
|
267
271
|
return Response(status_code=200, content="Deleted")
|
268
272
|
else:
|
@@ -335,26 +339,57 @@ async def update_file_meta(
|
|
335
339
|
if user.id == 0:
|
336
340
|
raise HTTPException(status_code=401, detail="Permission denied")
|
337
341
|
path = ensure_uri_compnents(path)
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
if not (
|
344
|
-
|
345
|
-
|
342
|
+
if path.startswith("/"):
|
343
|
+
path = path[1:]
|
344
|
+
await conn.user.set_active(user.username)
|
345
|
+
|
346
|
+
# file
|
347
|
+
if not path.endswith("/"):
|
348
|
+
file_record = await conn.file.get_file_record(path)
|
349
|
+
if not file_record:
|
350
|
+
logger.debug(f"Reject update meta request from {user.username} to {path}")
|
351
|
+
raise HTTPException(status_code=404, detail="File not found")
|
352
|
+
|
353
|
+
if not (user.is_admin or user.id == file_record.owner_id):
|
354
|
+
logger.debug(f"Reject update meta request from {user.username} to {path}")
|
355
|
+
raise HTTPException(status_code=403, detail="Permission denied")
|
356
|
+
|
357
|
+
if perm is not None:
|
358
|
+
logger.info(f"Update permission of {path} to {perm}")
|
359
|
+
await conn.file.update_file_record(
|
360
|
+
url = file_record.url,
|
361
|
+
permission = FileReadPermission(perm)
|
362
|
+
)
|
346
363
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
permission = FileReadPermission(perm)
|
352
|
-
)
|
364
|
+
if new_path is not None:
|
365
|
+
new_path = ensure_uri_compnents(new_path)
|
366
|
+
logger.info(f"Update path of {path} to {new_path}")
|
367
|
+
await conn.move_file(path, new_path)
|
353
368
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
369
|
+
# directory
|
370
|
+
else:
|
371
|
+
assert perm is None, "Permission is not supported for directory"
|
372
|
+
if new_path is not None:
|
373
|
+
new_path = ensure_uri_compnents(new_path)
|
374
|
+
logger.info(f"Update path of {path} to {new_path}")
|
375
|
+
assert new_path.endswith("/"), "New path must end with /"
|
376
|
+
if new_path.startswith("/"):
|
377
|
+
new_path = new_path[1:]
|
378
|
+
|
379
|
+
# check if new path is under the user's directory
|
380
|
+
first_component = new_path.split("/")[0]
|
381
|
+
if not (first_component == user.username or user.is_admin):
|
382
|
+
raise HTTPException(status_code=403, detail="Permission denied, path must start with username")
|
383
|
+
elif user.is_admin:
|
384
|
+
_is_user = await conn.user.get_user(first_component)
|
385
|
+
if not _is_user:
|
386
|
+
raise HTTPException(status_code=404, detail="User not found, path must start with username")
|
387
|
+
|
388
|
+
# check if old path is under the user's directory (non-admin)
|
389
|
+
if not path.startswith(f"{user.username}/") and not user.is_admin:
|
390
|
+
raise HTTPException(status_code=403, detail="Permission denied, path must start with username")
|
391
|
+
# currently only move own file, with overwrite
|
392
|
+
await conn.move_path(path, new_path, user_id = user.id)
|
358
393
|
|
359
394
|
return Response(status_code=200, content="OK")
|
360
395
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|