lfss 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfss/api/__init__.py +6 -3
- lfss/cli/cli.py +1 -1
- lfss/cli/vacuum.py +62 -19
- lfss/eng/config.py +1 -1
- lfss/eng/database.py +117 -41
- lfss/eng/thumb.py +16 -23
- lfss/sql/init.sql +9 -4
- lfss/svc/app.py +1 -1
- {lfss-0.10.0.dist-info → lfss-0.11.0.dist-info}/METADATA +3 -3
- {lfss-0.10.0.dist-info → lfss-0.11.0.dist-info}/RECORD +13 -13
- /docs/{Changelog.md → changelog.md} +0 -0
- {lfss-0.10.0.dist-info → lfss-0.11.0.dist-info}/WHEEL +0 -0
- {lfss-0.10.0.dist-info → lfss-0.11.0.dist-info}/entry_points.txt +0 -0
lfss/api/__init__.py
CHANGED
@@ -170,14 +170,15 @@ def download_directory(
|
|
170
170
|
_counter = 0
|
171
171
|
_counter_lock = Lock()
|
172
172
|
failed_items: list[tuple[str, str]] = []
|
173
|
+
file_count = 0
|
173
174
|
def get_file(c, src_url):
|
174
|
-
nonlocal _counter, failed_items
|
175
|
+
nonlocal _counter, failed_items, file_count, verbose
|
175
176
|
with _counter_lock:
|
176
177
|
_counter += 1
|
177
178
|
this_count = _counter
|
178
179
|
dst_path = f"{directory}{os.path.relpath(decode_uri_compnents(src_url), decode_uri_compnents(src_path))}"
|
179
180
|
if verbose:
|
180
|
-
print(f"[{this_count}] Downloading {src_url} to {dst_path}")
|
181
|
+
print(f"[{this_count}/{file_count}] Downloading {src_url} to {dst_path}")
|
181
182
|
|
182
183
|
if not (res:=download_file(
|
183
184
|
c, src_url, dst_path,
|
@@ -185,11 +186,13 @@ def download_directory(
|
|
185
186
|
))[0]:
|
186
187
|
failed_items.append((src_url, res[1]))
|
187
188
|
|
188
|
-
batch_size =
|
189
|
+
batch_size = 10_000
|
189
190
|
file_list: list[FileRecord] = []
|
190
191
|
with connector.session(n_concurrent) as c:
|
191
192
|
file_count = c.count_files(src_path, flat=True)
|
192
193
|
for offset in range(0, file_count, batch_size):
|
194
|
+
if verbose:
|
195
|
+
print(f"Retrieving file list... ({offset}/{file_count})", end='\r')
|
193
196
|
file_list.extend(c.list_files(
|
194
197
|
src_path, offset=offset, limit=batch_size, flat=True
|
195
198
|
))
|
lfss/cli/cli.py
CHANGED
@@ -12,7 +12,7 @@ def parse_permission(s: str) -> FileReadPermission:
|
|
12
12
|
raise ValueError(f"Invalid permission {s}")
|
13
13
|
|
14
14
|
def parse_arguments():
|
15
|
-
parser = argparse.ArgumentParser(description="
|
15
|
+
parser = argparse.ArgumentParser(description="Client-side command line interface, set LFSS_ENDPOINT and LFSS_TOKEN environment variables for authentication.")
|
16
16
|
|
17
17
|
sp = parser.add_subparsers(dest="command", required=True)
|
18
18
|
|
lfss/cli/vacuum.py
CHANGED
@@ -2,10 +2,11 @@
|
|
2
2
|
Vacuum the database and external storage to ensure that the storage is consistent and minimal.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from lfss.eng.config import LARGE_BLOB_DIR
|
6
|
-
import argparse, time
|
5
|
+
from lfss.eng.config import LARGE_BLOB_DIR, THUMB_DB
|
6
|
+
import argparse, time, itertools
|
7
7
|
from functools import wraps
|
8
8
|
from asyncio import Semaphore
|
9
|
+
import aiosqlite
|
9
10
|
import aiofiles, asyncio
|
10
11
|
import aiofiles.os
|
11
12
|
from contextlib import contextmanager
|
@@ -32,7 +33,7 @@ def barriered(func):
|
|
32
33
|
return wrapper
|
33
34
|
|
34
35
|
@global_entrance()
|
35
|
-
async def vacuum_main(index: bool = False, blobs: bool = False):
|
36
|
+
async def vacuum_main(index: bool = False, blobs: bool = False, thumbs: bool = False, vacuum_all: bool = False):
|
36
37
|
|
37
38
|
# check if any file in the Large Blob directory is not in the database
|
38
39
|
# the reverse operation is not necessary, because by design, the database should be the source of truth...
|
@@ -49,23 +50,63 @@ async def vacuum_main(index: bool = False, blobs: bool = False):
|
|
49
50
|
|
50
51
|
# create a temporary index to speed up the process...
|
51
52
|
with indicator("Clearing un-referenced files in external storage"):
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
try:
|
54
|
+
async with transaction() as c:
|
55
|
+
await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
|
56
|
+
for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
|
57
|
+
f_id = f.name
|
58
|
+
await ensure_external_consistency(f_id)
|
59
|
+
if (i+1) % 1_000 == 0:
|
60
|
+
print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
|
61
|
+
finally:
|
62
|
+
async with transaction() as c:
|
63
|
+
await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
|
61
64
|
|
62
|
-
|
63
|
-
|
64
|
-
with
|
65
|
+
if index or vacuum_all:
|
66
|
+
with indicator("VACUUM-index"):
|
67
|
+
async with transaction() as c:
|
68
|
+
await c.execute("DELETE FROM dupcount WHERE count = 0")
|
69
|
+
async with unique_cursor(is_write=True) as c:
|
65
70
|
await c.execute("VACUUM main")
|
66
|
-
|
67
|
-
|
71
|
+
if blobs or vacuum_all:
|
72
|
+
with indicator("VACUUM-blobs"):
|
73
|
+
async with unique_cursor(is_write=True) as c:
|
68
74
|
await c.execute("VACUUM blobs")
|
75
|
+
|
76
|
+
if thumbs or vacuum_all:
|
77
|
+
try:
|
78
|
+
async with transaction() as c:
|
79
|
+
await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
|
80
|
+
with indicator("VACUUM-thumbs"):
|
81
|
+
if not THUMB_DB.exists():
|
82
|
+
raise FileNotFoundError("Thumbnail database not found.")
|
83
|
+
async with unique_cursor() as db_c:
|
84
|
+
async with aiosqlite.connect(THUMB_DB) as t_conn:
|
85
|
+
batch_size = 10_000
|
86
|
+
for batch_count in itertools.count(start=0):
|
87
|
+
exceeded_rows = list(await (await t_conn.execute(
|
88
|
+
"SELECT file_id FROM thumbs LIMIT ? OFFSET ?",
|
89
|
+
(batch_size, batch_size * batch_count)
|
90
|
+
)).fetchall())
|
91
|
+
if not exceeded_rows:
|
92
|
+
break
|
93
|
+
batch_ids = [row[0] for row in exceeded_rows]
|
94
|
+
for f_id in batch_ids:
|
95
|
+
cursor = await db_c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
|
96
|
+
if not await cursor.fetchone():
|
97
|
+
print(f"Thumbnail {f_id} not found in database, removing from thumb cache.")
|
98
|
+
await t_conn.execute("DELETE FROM thumbs WHERE file_id = ?", (f_id,))
|
99
|
+
print(f"Checked {batch_count+1} batches of {batch_size} thumbnails.")
|
100
|
+
|
101
|
+
await t_conn.commit()
|
102
|
+
await t_conn.execute("VACUUM")
|
103
|
+
except FileNotFoundError as e:
|
104
|
+
if "Thumbnail database not found." in str(e):
|
105
|
+
print("Thumbnail database not found, skipping.")
|
106
|
+
|
107
|
+
finally:
|
108
|
+
async with transaction() as c:
|
109
|
+
await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
|
69
110
|
|
70
111
|
async def vacuum_requests():
|
71
112
|
with indicator("VACUUM-requests"):
|
@@ -76,15 +117,17 @@ async def vacuum_requests():
|
|
76
117
|
def main():
|
77
118
|
global sem
|
78
119
|
parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
|
120
|
+
parser.add_argument("--all", action="store_true", help="Vacuum all")
|
79
121
|
parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
|
80
122
|
parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
|
81
123
|
parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
|
124
|
+
parser.add_argument("-t", "--thumb", action="store_true", help="Vacuum thumbnails")
|
82
125
|
parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
|
83
126
|
args = parser.parse_args()
|
84
127
|
sem = Semaphore(args.jobs)
|
85
|
-
asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
|
128
|
+
asyncio.run(vacuum_main(index=args.metadata, blobs=args.data, thumbs=args.thumb, vacuum_all=args.all))
|
86
129
|
|
87
|
-
if args.requests:
|
130
|
+
if args.requests or args.all:
|
88
131
|
asyncio.run(vacuum_requests())
|
89
132
|
|
90
133
|
if __name__ == '__main__':
|
lfss/eng/config.py
CHANGED
@@ -22,5 +22,5 @@ MAX_MEM_FILE_BYTES = 128 * 1024 * 1024 # 128MB
|
|
22
22
|
CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming (on large files)
|
23
23
|
DEBUG_MODE = os.environ.get('LFSS_DEBUG', '0') == '1'
|
24
24
|
|
25
|
-
THUMB_DB = DATA_HOME / 'thumbs.db'
|
25
|
+
THUMB_DB = DATA_HOME / 'thumbs.v0-11.db'
|
26
26
|
THUMB_SIZE = (48, 48)
|
lfss/eng/database.py
CHANGED
@@ -3,6 +3,7 @@ from typing import Optional, Literal, overload
|
|
3
3
|
from collections.abc import AsyncIterable
|
4
4
|
from contextlib import asynccontextmanager
|
5
5
|
from abc import ABC
|
6
|
+
import re
|
6
7
|
|
7
8
|
import uuid, datetime
|
8
9
|
import urllib.parse
|
@@ -20,7 +21,7 @@ from .datatype import (
|
|
20
21
|
)
|
21
22
|
from .config import LARGE_BLOB_DIR, CHUNK_SIZE, LARGE_FILE_BYTES, MAX_MEM_FILE_BYTES
|
22
23
|
from .log import get_logger
|
23
|
-
from .utils import decode_uri_compnents, hash_credential, concurrent_wrap, debounce_async,
|
24
|
+
from .utils import decode_uri_compnents, hash_credential, concurrent_wrap, debounce_async, static_vars
|
24
25
|
from .error import *
|
25
26
|
|
26
27
|
class DBObjectBase(ABC):
|
@@ -84,8 +85,9 @@ class UserConn(DBObjectBase):
|
|
84
85
|
max_storage: int = 1073741824, permission: FileReadPermission = FileReadPermission.UNSET
|
85
86
|
) -> int:
|
86
87
|
def validate_username(username: str):
|
88
|
+
assert not set(username) & {'/', ':'}, "Invalid username"
|
87
89
|
assert not username.startswith('_'), "Error: reserved username"
|
88
|
-
assert not (
|
90
|
+
assert not (len(username) > 255), "Username too long"
|
89
91
|
assert urllib.parse.quote(username) == username, "Invalid username, must be URL safe"
|
90
92
|
validate_username(username)
|
91
93
|
self.logger.debug(f"Creating user {username}")
|
@@ -249,7 +251,7 @@ class FileConn(DBObjectBase):
|
|
249
251
|
|
250
252
|
async def list_path_dirs(
|
251
253
|
self, url: str,
|
252
|
-
offset: int = 0, limit: int =
|
254
|
+
offset: int = 0, limit: int = 10_000,
|
253
255
|
order_by: DirSortKey = '', order_desc: bool = False,
|
254
256
|
skim: bool = True
|
255
257
|
) -> list[DirectoryRecord]:
|
@@ -293,7 +295,7 @@ class FileConn(DBObjectBase):
|
|
293
295
|
|
294
296
|
async def list_path_files(
|
295
297
|
self, url: str,
|
296
|
-
offset: int = 0, limit: int =
|
298
|
+
offset: int = 0, limit: int = 10_000,
|
297
299
|
order_by: FileSortKey = '', order_desc: bool = False,
|
298
300
|
flat: bool = False,
|
299
301
|
) -> list[FileRecord]:
|
@@ -324,7 +326,7 @@ class FileConn(DBObjectBase):
|
|
324
326
|
- It cannot flatten directories
|
325
327
|
- It cannot list directories with details
|
326
328
|
"""
|
327
|
-
MAX_ITEMS =
|
329
|
+
MAX_ITEMS = 10_000
|
328
330
|
dir_count = await self.count_path_dirs(url)
|
329
331
|
file_count = await self.count_path_files(url, flat=False)
|
330
332
|
if dir_count + file_count > MAX_ITEMS:
|
@@ -417,16 +419,12 @@ class FileConn(DBObjectBase):
|
|
417
419
|
new_exists = await self.get_file_record(new_url)
|
418
420
|
if new_exists is not None:
|
419
421
|
raise FileExistsError(f"File {new_url} already exists")
|
420
|
-
new_fid = str(uuid.uuid4())
|
421
422
|
user_id = old.owner_id if user_id is None else user_id
|
422
423
|
await self.cur.execute(
|
423
424
|
"INSERT INTO fmeta (url, owner_id, file_id, file_size, permission, external, mime_type) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
424
|
-
(new_url, user_id,
|
425
|
+
(new_url, user_id, old.file_id, old.file_size, old.permission, old.external, old.mime_type)
|
425
426
|
)
|
426
|
-
|
427
|
-
await self.set_file_blob(new_fid, await self.get_file_blob(old.file_id))
|
428
|
-
else:
|
429
|
-
await copy_file(LARGE_BLOB_DIR / old.file_id, LARGE_BLOB_DIR / new_fid)
|
427
|
+
await self.cur.execute("INSERT OR REPLACE INTO dupcount (file_id, count) VALUES (?, COALESCE((SELECT count FROM dupcount WHERE file_id = ?), 0) + 1)", (old.file_id, old.file_id))
|
430
428
|
await self._user_size_inc(user_id, old.file_size)
|
431
429
|
self.logger.info(f"Copied file {old_url} to {new_url}")
|
432
430
|
|
@@ -444,16 +442,12 @@ class FileConn(DBObjectBase):
|
|
444
442
|
new_r = new_url + old_record.url[len(old_url):]
|
445
443
|
if await (await self.cur.execute("SELECT url FROM fmeta WHERE url = ?", (new_r, ))).fetchone() is not None:
|
446
444
|
raise FileExistsError(f"File {new_r} already exists")
|
447
|
-
new_fid = str(uuid.uuid4())
|
448
445
|
user_id = old_record.owner_id if user_id is None else user_id
|
449
446
|
await self.cur.execute(
|
450
447
|
"INSERT INTO fmeta (url, owner_id, file_id, file_size, permission, external, mime_type) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
451
|
-
(new_r, user_id,
|
448
|
+
(new_r, user_id, old_record.file_id, old_record.file_size, old_record.permission, old_record.external, old_record.mime_type)
|
452
449
|
)
|
453
|
-
|
454
|
-
await self.set_file_blob(new_fid, await self.get_file_blob(old_record.file_id))
|
455
|
-
else:
|
456
|
-
await copy_file(LARGE_BLOB_DIR / old_record.file_id, LARGE_BLOB_DIR / new_fid)
|
450
|
+
await self.cur.execute("INSERT OR REPLACE INTO dupcount (file_id, count) VALUES (?, COALESCE((SELECT count FROM dupcount WHERE file_id = ?), 0) + 1)", (old_record.file_id, old_record.file_id))
|
457
451
|
await self._user_size_inc(user_id, old_record.file_size)
|
458
452
|
self.logger.info(f"Copied path {old_url} to {new_url}")
|
459
453
|
|
@@ -497,6 +491,7 @@ class FileConn(DBObjectBase):
|
|
497
491
|
return file_record
|
498
492
|
|
499
493
|
async def delete_user_file_records(self, owner_id: int) -> list[FileRecord]:
|
494
|
+
""" Delete all records with owner_id """
|
500
495
|
cursor = await self.cur.execute("SELECT * FROM fmeta WHERE owner_id = ?", (owner_id, ))
|
501
496
|
res = await cursor.fetchall()
|
502
497
|
await self.cur.execute("DELETE FROM usize WHERE user_id = ?", (owner_id, ))
|
@@ -528,7 +523,7 @@ class FileConn(DBObjectBase):
|
|
528
523
|
return [self.parse_record(r) for r in all_f_rec]
|
529
524
|
|
530
525
|
async def set_file_blob(self, file_id: str, blob: bytes):
|
531
|
-
await self.cur.execute("INSERT
|
526
|
+
await self.cur.execute("INSERT INTO blobs.fdata (file_id, data) VALUES (?, ?)", (file_id, blob))
|
532
527
|
|
533
528
|
@staticmethod
|
534
529
|
async def set_file_blob_external(file_id: str, stream: AsyncIterable[bytes])->int:
|
@@ -580,16 +575,78 @@ class FileConn(DBObjectBase):
|
|
580
575
|
if not chunk: break
|
581
576
|
yield chunk
|
582
577
|
|
583
|
-
|
584
|
-
|
578
|
+
async def unlink_file_blob_external(self, file_id: str):
|
579
|
+
# first check if the file has duplication
|
580
|
+
cursor = await self.cur.execute("SELECT count FROM dupcount WHERE file_id = ?", (file_id, ))
|
581
|
+
res = await cursor.fetchone()
|
582
|
+
if res is not None and res[0] > 0:
|
583
|
+
await self.cur.execute("UPDATE dupcount SET count = count - 1 WHERE file_id = ?", (file_id, ))
|
584
|
+
return
|
585
|
+
|
586
|
+
# finally delete the file and the duplication count
|
585
587
|
if (LARGE_BLOB_DIR / file_id).exists():
|
586
588
|
await aiofiles.os.remove(LARGE_BLOB_DIR / file_id)
|
589
|
+
await self.cur.execute("DELETE FROM dupcount WHERE file_id = ?", (file_id, ))
|
587
590
|
|
588
|
-
async def
|
591
|
+
async def unlink_file_blob(self, file_id: str):
|
592
|
+
# first check if the file has duplication
|
593
|
+
cursor = await self.cur.execute("SELECT count FROM dupcount WHERE file_id = ?", (file_id, ))
|
594
|
+
res = await cursor.fetchone()
|
595
|
+
if res is not None and res[0] > 0:
|
596
|
+
await self.cur.execute("UPDATE dupcount SET count = count - 1 WHERE file_id = ?", (file_id, ))
|
597
|
+
return
|
598
|
+
|
599
|
+
# finally delete the file and the duplication count
|
589
600
|
await self.cur.execute("DELETE FROM blobs.fdata WHERE file_id = ?", (file_id, ))
|
601
|
+
await self.cur.execute("DELETE FROM dupcount WHERE file_id = ?", (file_id, ))
|
602
|
+
|
603
|
+
async def _group_del(self, file_ids_all: list[str]):
|
604
|
+
"""
|
605
|
+
The file_ids_all may contain duplication,
|
606
|
+
yield tuples of unique (to_del_ids, to_dec_ids) for each iteration,
|
607
|
+
every iteration should unlink one copy of the files, repeat until all re-occurrence in the input list are removed.
|
608
|
+
"""
|
609
|
+
async def check_dup(file_ids: set[str]):
|
610
|
+
cursor = await self.cur.execute("SELECT file_id FROM dupcount WHERE file_id IN ({}) AND count > 0".format(','.join(['?'] * len(file_ids))), tuple(file_ids))
|
611
|
+
res = await cursor.fetchall()
|
612
|
+
to_dec_ids = [r[0] for r in res]
|
613
|
+
to_del_ids = list(file_ids - set(to_dec_ids))
|
614
|
+
return to_del_ids, to_dec_ids
|
615
|
+
# gather duplication from all file_ids
|
616
|
+
fid_occurrence = {}
|
617
|
+
for file_id in file_ids_all:
|
618
|
+
fid_occurrence[file_id] = fid_occurrence.get(file_id, 0) + 1
|
619
|
+
while fid_occurrence:
|
620
|
+
to_del_ids, to_dec_ids = await check_dup(set(fid_occurrence.keys()))
|
621
|
+
for file_id in to_del_ids:
|
622
|
+
del fid_occurrence[file_id]
|
623
|
+
for file_id in to_dec_ids:
|
624
|
+
fid_occurrence[file_id] -= 1
|
625
|
+
if fid_occurrence[file_id] == 0:
|
626
|
+
del fid_occurrence[file_id]
|
627
|
+
yield (to_del_ids, to_dec_ids)
|
590
628
|
|
591
|
-
async def
|
592
|
-
|
629
|
+
async def unlink_file_blobs(self, file_ids: list[str]):
|
630
|
+
async for (to_del_ids, to_dec_ids) in self._group_del(file_ids):
|
631
|
+
# delete the only copy
|
632
|
+
await self.cur.execute("DELETE FROM blobs.fdata WHERE file_id IN ({})".format(','.join(['?'] * len(to_del_ids))), to_del_ids)
|
633
|
+
await self.cur.execute("DELETE FROM dupcount WHERE file_id IN ({})".format(','.join(['?'] * len(to_del_ids))), to_del_ids)
|
634
|
+
# decrease duplication count
|
635
|
+
await self.cur.execute("UPDATE dupcount SET count = count - 1 WHERE file_id IN ({})".format(','.join(['?'] * len(to_dec_ids))), to_dec_ids)
|
636
|
+
|
637
|
+
async def unlink_file_blobs_external(self, file_ids: list[str]):
|
638
|
+
async def del_file(file_id: str):
|
639
|
+
if (LARGE_BLOB_DIR / file_id).exists():
|
640
|
+
await aiofiles.os.remove(LARGE_BLOB_DIR / file_id)
|
641
|
+
async for (to_del_ids, to_dec_ids) in self._group_del(file_ids):
|
642
|
+
# delete the only copy
|
643
|
+
await asyncio.gather(*(
|
644
|
+
[del_file(file_id) for file_id in to_del_ids] +
|
645
|
+
[self.cur.execute("DELETE FROM dupcount WHERE file_id = ?", (file_id, )) for file_id in to_del_ids]
|
646
|
+
))
|
647
|
+
# decrease duplication count
|
648
|
+
await self.cur.execute("UPDATE dupcount SET count = count - 1 WHERE file_id IN ({})".format(','.join(['?'] * len(to_dec_ids))), to_dec_ids)
|
649
|
+
|
593
650
|
|
594
651
|
_log_active_queue = []
|
595
652
|
_log_active_lock = asyncio.Lock()
|
@@ -621,20 +678,35 @@ async def delayed_log_access(url: str):
|
|
621
678
|
_log_access_queue.append(url)
|
622
679
|
await _log_all_access()
|
623
680
|
|
681
|
+
@static_vars(
|
682
|
+
prohibited_regex = re.compile(
|
683
|
+
r"^[/_.]", # start with / or _ or .
|
684
|
+
),
|
685
|
+
prohibited_part_regex = re.compile(
|
686
|
+
"|".join([
|
687
|
+
r"^\s*\.+\s*$", # dot path
|
688
|
+
"[{}]".format("".join(re.escape(c) for c in ('/', "\\", "'", '"', "*", "%"))), # prohibited characters
|
689
|
+
])
|
690
|
+
),
|
691
|
+
)
|
624
692
|
def validate_url(url: str, is_file = True):
|
625
|
-
|
626
|
-
|
627
|
-
|
693
|
+
""" Check if a path is valid. The input path is considered url safe """
|
694
|
+
if len(url) > 1024:
|
695
|
+
raise InvalidPathError(f"URL too long: {url}")
|
628
696
|
|
629
|
-
|
697
|
+
is_valid = validate_url.prohibited_regex.search(url) is None
|
698
|
+
if not is_valid: # early return, no need to check further
|
630
699
|
raise InvalidPathError(f"Invalid URL: {url}")
|
631
|
-
|
632
|
-
if is_file:
|
633
|
-
ret = ret and not url.endswith('/')
|
634
|
-
else:
|
635
|
-
ret = ret and url.endswith('/')
|
636
700
|
|
637
|
-
|
701
|
+
for part in url.split('/'):
|
702
|
+
if validate_url.prohibited_part_regex.search(urllib.parse.unquote(part)):
|
703
|
+
is_valid = False
|
704
|
+
break
|
705
|
+
|
706
|
+
if is_file: is_valid = is_valid and not url.endswith('/')
|
707
|
+
else: is_valid = is_valid and url.endswith('/')
|
708
|
+
|
709
|
+
if not is_valid:
|
638
710
|
raise InvalidPathError(f"Invalid URL: {url}")
|
639
711
|
|
640
712
|
async def get_user(cur: aiosqlite.Cursor, user: int | str) -> Optional[UserRecord]:
|
@@ -771,9 +843,9 @@ class Database:
|
|
771
843
|
raise PermissionDeniedError(f"Permission denied: {op_user.username} cannot delete file {url}")
|
772
844
|
f_id = r.file_id
|
773
845
|
if r.external:
|
774
|
-
await fconn.
|
846
|
+
await fconn.unlink_file_blob_external(f_id)
|
775
847
|
else:
|
776
|
-
await fconn.
|
848
|
+
await fconn.unlink_file_blob(f_id)
|
777
849
|
return r
|
778
850
|
|
779
851
|
async def move_file(self, old_url: str, new_url: str, op_user: Optional[UserRecord] = None):
|
@@ -872,11 +944,12 @@ class Database:
|
|
872
944
|
|
873
945
|
async def del_internal():
|
874
946
|
for i in range(0, len(internal_ids), batch_size):
|
875
|
-
await fconn.
|
947
|
+
await fconn.unlink_file_blobs([r for r in internal_ids[i:i+batch_size]])
|
876
948
|
async def del_external():
|
877
|
-
for i in range(0, len(external_ids)):
|
878
|
-
await fconn.
|
879
|
-
await
|
949
|
+
for i in range(0, len(external_ids), batch_size):
|
950
|
+
await fconn.unlink_file_blobs_external([r for r in external_ids[i:i+batch_size]])
|
951
|
+
await del_internal()
|
952
|
+
await del_external()
|
880
953
|
|
881
954
|
async def delete_path(self, url: str, op_user: Optional[UserRecord] = None) -> Optional[list[FileRecord]]:
|
882
955
|
validate_url(url, is_file=False)
|
@@ -997,7 +1070,7 @@ async def check_file_read_permission(user: UserRecord, file: FileRecord, cursor:
|
|
997
1070
|
This does not consider alias level permission,
|
998
1071
|
use check_path_permission for alias level permission check first:
|
999
1072
|
```
|
1000
|
-
if await check_path_permission(
|
1073
|
+
if await check_path_permission(file.url, user) < AccessLevel.READ:
|
1001
1074
|
read_allowed, reason = check_file_read_permission(user, file)
|
1002
1075
|
```
|
1003
1076
|
The implementation assumes the user is not admin and is not the owner of the file/path
|
@@ -1041,6 +1114,9 @@ async def check_path_permission(path: str, user: UserRecord, cursor: Optional[ai
|
|
1041
1114
|
if user.id == 0:
|
1042
1115
|
return AccessLevel.GUEST
|
1043
1116
|
|
1117
|
+
if user.is_admin:
|
1118
|
+
return AccessLevel.ALL
|
1119
|
+
|
1044
1120
|
@asynccontextmanager
|
1045
1121
|
async def this_cur():
|
1046
1122
|
if cursor is None:
|
@@ -1054,7 +1130,7 @@ async def check_path_permission(path: str, user: UserRecord, cursor: Optional[ai
|
|
1054
1130
|
path_owner = await _get_path_owner(cur, path)
|
1055
1131
|
|
1056
1132
|
# check if user is admin or the owner of the path
|
1057
|
-
if user.
|
1133
|
+
if user.id == path_owner.id:
|
1058
1134
|
return AccessLevel.ALL
|
1059
1135
|
|
1060
1136
|
# if the path is a file, check if the user is the owner
|
lfss/eng/thumb.py
CHANGED
@@ -11,47 +11,42 @@ from contextlib import asynccontextmanager
|
|
11
11
|
async def _maybe_init_thumb(c: aiosqlite.Cursor):
|
12
12
|
await c.execute('''
|
13
13
|
CREATE TABLE IF NOT EXISTS thumbs (
|
14
|
-
|
15
|
-
ctime TEXT,
|
14
|
+
file_id CHAR(32) PRIMARY KEY,
|
16
15
|
thumb BLOB
|
17
16
|
)
|
18
17
|
''')
|
19
|
-
await c.execute('CREATE INDEX IF NOT EXISTS thumbs_path_idx ON thumbs (
|
18
|
+
await c.execute('CREATE INDEX IF NOT EXISTS thumbs_path_idx ON thumbs (file_id)')
|
20
19
|
|
21
|
-
async def _get_cache_thumb(c: aiosqlite.Cursor,
|
20
|
+
async def _get_cache_thumb(c: aiosqlite.Cursor, file_id: str) -> Optional[bytes]:
|
22
21
|
res = await c.execute('''
|
23
|
-
SELECT
|
24
|
-
''', (
|
22
|
+
SELECT thumb FROM thumbs WHERE file_id = ?
|
23
|
+
''', (file_id, ))
|
25
24
|
row = await res.fetchone()
|
26
25
|
if row is None:
|
27
26
|
return None
|
28
|
-
|
29
|
-
if row[0] != ctime:
|
30
|
-
await _delete_cache_thumb(c, path)
|
31
|
-
return None
|
32
|
-
blob: bytes = row[1]
|
27
|
+
blob: bytes = row[0]
|
33
28
|
return blob
|
34
29
|
|
35
|
-
async def _save_cache_thumb(c: aiosqlite.Cursor,
|
30
|
+
async def _save_cache_thumb(c: aiosqlite.Cursor, file_id: str, raw_bytes: bytes) -> bytes:
|
36
31
|
try:
|
37
32
|
raw_img = Image.open(BytesIO(raw_bytes))
|
38
33
|
except Exception:
|
39
|
-
raise InvalidDataError('Invalid image data for thumbnail: ' +
|
34
|
+
raise InvalidDataError('Invalid image data for thumbnail: ' + file_id)
|
40
35
|
raw_img.thumbnail(THUMB_SIZE)
|
41
36
|
img = raw_img.convert('RGB')
|
42
37
|
bio = BytesIO()
|
43
38
|
img.save(bio, 'JPEG')
|
44
39
|
blob = bio.getvalue()
|
45
40
|
await c.execute('''
|
46
|
-
INSERT OR REPLACE INTO thumbs (
|
47
|
-
''', (
|
41
|
+
INSERT OR REPLACE INTO thumbs (file_id, thumb) VALUES (?, ?)
|
42
|
+
''', (file_id, blob))
|
48
43
|
await c.execute('COMMIT') # commit immediately
|
49
44
|
return blob
|
50
45
|
|
51
|
-
async def _delete_cache_thumb(c: aiosqlite.Cursor,
|
46
|
+
async def _delete_cache_thumb(c: aiosqlite.Cursor, file_id: str):
|
52
47
|
await c.execute('''
|
53
|
-
DELETE FROM thumbs WHERE
|
54
|
-
''', (
|
48
|
+
DELETE FROM thumbs WHERE file_id = ?
|
49
|
+
''', (file_id, ))
|
55
50
|
await c.execute('COMMIT')
|
56
51
|
|
57
52
|
@asynccontextmanager
|
@@ -75,15 +70,13 @@ async def get_thumb(path: str) -> Optional[tuple[bytes, str]]:
|
|
75
70
|
r = await fconn.get_file_record(path)
|
76
71
|
|
77
72
|
if r is None:
|
78
|
-
async with cache_cursor() as cur:
|
79
|
-
await _delete_cache_thumb(cur, path)
|
80
73
|
raise FileNotFoundError(f'File not found: {path}')
|
81
74
|
if not r.mime_type.startswith('image/'):
|
82
75
|
return None
|
83
76
|
|
77
|
+
file_id = r.file_id
|
84
78
|
async with cache_cursor() as cur:
|
85
|
-
|
86
|
-
thumb_blob = await _get_cache_thumb(cur, path, c_time)
|
79
|
+
thumb_blob = await _get_cache_thumb(cur, file_id)
|
87
80
|
if thumb_blob is not None:
|
88
81
|
return thumb_blob, "image/jpeg"
|
89
82
|
|
@@ -98,5 +91,5 @@ async def get_thumb(path: str) -> Optional[tuple[bytes, str]]:
|
|
98
91
|
data = await fconn.get_file_blob(r.file_id)
|
99
92
|
assert data is not None
|
100
93
|
|
101
|
-
thumb_blob = await _save_cache_thumb(cur,
|
94
|
+
thumb_blob = await _save_cache_thumb(cur, file_id, data)
|
102
95
|
return thumb_blob, "image/jpeg"
|
lfss/sql/init.sql
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
CREATE TABLE IF NOT EXISTS user (
|
1
|
+
CREATE TABLE IF NOT EXISTS main.user (
|
2
2
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
3
3
|
username VARCHAR(256) UNIQUE NOT NULL,
|
4
4
|
credential VARCHAR(256) NOT NULL,
|
@@ -9,7 +9,7 @@ CREATE TABLE IF NOT EXISTS user (
|
|
9
9
|
permission INTEGER DEFAULT 0
|
10
10
|
);
|
11
11
|
|
12
|
-
CREATE TABLE IF NOT EXISTS fmeta (
|
12
|
+
CREATE TABLE IF NOT EXISTS main.fmeta (
|
13
13
|
url VARCHAR(1024) PRIMARY KEY,
|
14
14
|
owner_id INTEGER NOT NULL,
|
15
15
|
file_id CHAR(32) NOT NULL,
|
@@ -22,12 +22,17 @@ CREATE TABLE IF NOT EXISTS fmeta (
|
|
22
22
|
FOREIGN KEY(owner_id) REFERENCES user(id)
|
23
23
|
);
|
24
24
|
|
25
|
-
CREATE TABLE IF NOT EXISTS
|
25
|
+
CREATE TABLE IF NOT EXISTS main.dupcount (
|
26
|
+
file_id CHAR(32) PRIMARY KEY,
|
27
|
+
count INTEGER DEFAULT 0
|
28
|
+
);
|
29
|
+
|
30
|
+
CREATE TABLE IF NOT EXISTS main.usize (
|
26
31
|
user_id INTEGER PRIMARY KEY,
|
27
32
|
size INTEGER DEFAULT 0
|
28
33
|
);
|
29
34
|
|
30
|
-
CREATE TABLE IF NOT EXISTS upeer (
|
35
|
+
CREATE TABLE IF NOT EXISTS main.upeer (
|
31
36
|
src_user_id INTEGER NOT NULL,
|
32
37
|
dst_user_id INTEGER NOT NULL,
|
33
38
|
access_level INTEGER DEFAULT 0,
|
lfss/svc/app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lfss
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.11.0
|
4
4
|
Summary: Lightweight file storage service
|
5
5
|
Home-page: https://github.com/MenxLi/lfss
|
6
|
-
Author:
|
7
|
-
Author-email:
|
6
|
+
Author: Li, Mengxun
|
7
|
+
Author-email: mengxunli@whu.edu.cn
|
8
8
|
Requires-Python: >=3.10
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
@@ -1,9 +1,9 @@
|
|
1
1
|
Readme.md,sha256=B-foESzFWoSI5MEd89AWUzKcVRrTwipM28TK8GN0o8c,1920
|
2
|
-
docs/Changelog.md,sha256=QYej_hmGnv9t8wjFHXBvmrBOvY7aACZ82oa5SVkIyzM,882
|
3
2
|
docs/Enviroment_variables.md,sha256=xaL8qBwT8B2Qe11FaOU3xWrRCh1mJ1VyTFCeFbkd0rs,570
|
4
3
|
docs/Known_issues.md,sha256=ZqETcWP8lzTOel9b2mxEgCnADFF8IxOrEtiVO1NoMAk,251
|
5
4
|
docs/Permission.md,sha256=thUJx7YRoU63Pb-eqo5l5450DrZN3QYZ36GCn8r66no,3152
|
6
5
|
docs/Webdav.md,sha256=-Ja-BTWSY1BEMAyZycvEMNnkNTPZ49gSPzmf3Lbib70,1547
|
6
|
+
docs/changelog.md,sha256=QYej_hmGnv9t8wjFHXBvmrBOvY7aACZ82oa5SVkIyzM,882
|
7
7
|
frontend/api.js,sha256=GlQsNoZFEcy7QUUsLbXv7aP-KxRnIxM37FQHTaakGiQ,19387
|
8
8
|
frontend/index.html,sha256=-k0bJ5FRqdl_H-O441D_H9E-iejgRCaL_z5UeYaS2qc,3384
|
9
9
|
frontend/info.css,sha256=Ny0N3GywQ3a9q1_Qph_QFEKB4fEnTe_2DJ1Y5OsLLmQ,595
|
@@ -18,34 +18,34 @@ frontend/styles.css,sha256=xcNLqI3KBsY5TLnku8UIP0Jfr7QLajr1_KNlZj9eheM,4935
|
|
18
18
|
frontend/thumb.css,sha256=rNsx766amYS2DajSQNabhpQ92gdTpNoQKmV69OKvtpI,295
|
19
19
|
frontend/thumb.js,sha256=46ViD2TlTTWy0fx6wjoAs_5CQ4ajYB90vVzM7UO2IHw,6182
|
20
20
|
frontend/utils.js,sha256=IYUZl77ugiXKcLxSNOWC4NSS0CdD5yRgUsDb665j0xM,2556
|
21
|
-
lfss/api/__init__.py,sha256=
|
21
|
+
lfss/api/__init__.py,sha256=vg9xx7RwfA9ypeqIteGkjDbjMq_kZy2Uti74-XlE7vM,6822
|
22
22
|
lfss/api/connector.py,sha256=Duh57M3dOeG_M5UidZ4hMHK7ot1JsUC6RdXgIn6KTC8,12913
|
23
23
|
lfss/cli/__init__.py,sha256=lPwPmqpa7EXQ4zlU7E7LOe6X2kw_xATGdwoHphUEirA,827
|
24
24
|
lfss/cli/balance.py,sha256=fUbKKAUyaDn74f7mmxMfBL4Q4voyBLHu6Lg_g8GfMOQ,4121
|
25
|
-
lfss/cli/cli.py,sha256=
|
25
|
+
lfss/cli/cli.py,sha256=ZgX3M-0gdArDmOi-zo8RLnRy-4GSwJDRGV1scnE4IJs,8090
|
26
26
|
lfss/cli/panel.py,sha256=Xq3I_n-ctveym-Gh9LaUpzHiLlvt3a_nuDiwUS-MGrg,1597
|
27
27
|
lfss/cli/serve.py,sha256=vTo6_BiD7Dn3VLvHsC5RKRBC3lMu45JVr_0SqpgHdj0,1086
|
28
28
|
lfss/cli/user.py,sha256=1mTroQbaKxHjFCPHT67xwd08v-zxH0RZ_OnVc-4MzL0,5364
|
29
|
-
lfss/cli/vacuum.py,sha256=
|
29
|
+
lfss/cli/vacuum.py,sha256=SciDsIdy7cfRqrXcCKBAFb9FOLyXriZBZnXlCuy6F5I,6232
|
30
30
|
lfss/eng/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
31
|
lfss/eng/bounded_pool.py,sha256=BI1dU-MBf82TMwJBYbjhEty7w1jIUKc5Bn9SnZ_-hoY,1288
|
32
|
-
lfss/eng/config.py,sha256=
|
32
|
+
lfss/eng/config.py,sha256=FcTtPL7bOpg54nVL_gX-VTIjfN1cafy423ezoWGvouY,874
|
33
33
|
lfss/eng/connection_pool.py,sha256=1aq7nSgd7hB9YNV4PjD1RDRyl_moDw3ubBtSLyfgGBs,6320
|
34
|
-
lfss/eng/database.py,sha256=
|
34
|
+
lfss/eng/database.py,sha256=bzby4R2CbWuRsNQoWtnN-3fBLMjwLoSL-iirp6IsA_4,53247
|
35
35
|
lfss/eng/datatype.py,sha256=27UB7-l9SICy5lAvKjdzpTL_GohZjzstQcr9PtAq7nM,2709
|
36
36
|
lfss/eng/error.py,sha256=JGf5NV-f4rL6tNIDSAx5-l9MG8dEj7F2w_MuOjj1d1o,732
|
37
37
|
lfss/eng/log.py,sha256=u6WRZZsE7iOx6_CV2NHh1ugea26p408FI4WstZh896A,5139
|
38
|
-
lfss/eng/thumb.py,sha256=
|
38
|
+
lfss/eng/thumb.py,sha256=AFyWEkkpuCKGWOB9bLlaDwPKzQ9JtCSSmHMhX2Gu3CI,3096
|
39
39
|
lfss/eng/utils.py,sha256=WYoXFFi5308UWtFC8VP792gpzrVbHZZHhP3PaFjxIEY,6770
|
40
|
-
lfss/sql/init.sql,sha256=
|
40
|
+
lfss/sql/init.sql,sha256=FBmVzkNjYUnWjEELRFzf7xb50GngmzmeDVffT1Uk8u8,1625
|
41
41
|
lfss/sql/pragma.sql,sha256=uENx7xXjARmro-A3XAK8OM8v5AxDMdCCRj47f86UuXg,206
|
42
|
-
lfss/svc/app.py,sha256=
|
42
|
+
lfss/svc/app.py,sha256=r1KUO3sPaaJWbkJF0bcVTD7arPKLs2jFlq52Ixicomo,220
|
43
43
|
lfss/svc/app_base.py,sha256=bTQbz945xalyB3UZLlqVBvL6JKGNQ8Fm2KpIvvucPZQ,6850
|
44
44
|
lfss/svc/app_dav.py,sha256=D0KSgjtTktPjIhyIKG5eRmBdh5X8HYFYH151E6gzlbc,18245
|
45
45
|
lfss/svc/app_native.py,sha256=JbPge-F9irl26tXKAzfA5DfyjCh0Dgttflztqqrvt0A,8890
|
46
46
|
lfss/svc/common_impl.py,sha256=5ZRM24zVZpAeipgDtZUVBMFtArkydlAkn17ic_XL7v8,13733
|
47
47
|
lfss/svc/request_log.py,sha256=v8yXEIzPjaksu76Oh5vgdbUEUrw8Kt4etLAXBWSGie8,3207
|
48
|
-
lfss-0.
|
49
|
-
lfss-0.
|
50
|
-
lfss-0.
|
51
|
-
lfss-0.
|
48
|
+
lfss-0.11.0.dist-info/METADATA,sha256=Exr7PdhSmrOqhURUXUiEyP_q8cUSTQ8ZWTgX7tcmp7s,2712
|
49
|
+
lfss-0.11.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
50
|
+
lfss-0.11.0.dist-info/entry_points.txt,sha256=VJ8svMz7RLtMCgNk99CElx7zo7M-N-z7BWDVw2HA92E,205
|
51
|
+
lfss-0.11.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|