lfss 0.7.9__tar.gz → 0.7.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lfss-0.7.9 → lfss-0.7.10}/PKG-INFO +1 -1
- {lfss-0.7.9 → lfss-0.7.10}/lfss/cli/balance.py +8 -58
- lfss-0.7.10/lfss/cli/vacuum.py +93 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/config.py +1 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/database.py +7 -3
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/log.py +2 -2
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/server.py +4 -4
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/stat.py +25 -1
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/utils.py +1 -0
- {lfss-0.7.9 → lfss-0.7.10}/pyproject.toml +2 -1
- {lfss-0.7.9 → lfss-0.7.10}/Readme.md +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/docs/Known_issues.md +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/docs/Permission.md +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/api.js +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/index.html +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/info.css +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/info.js +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/popup.css +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/popup.js +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/scripts.js +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/styles.css +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/frontend/utils.js +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/cli/cli.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/cli/panel.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/cli/serve.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/cli/user.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/client/__init__.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/client/api.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/sql/init.sql +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/sql/pragma.sql +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/__init__.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/connection_pool.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/datatype.py +0 -0
- {lfss-0.7.9 → lfss-0.7.10}/lfss/src/error.py +0 -0
@@ -8,26 +8,16 @@ from functools import wraps
|
|
8
8
|
from asyncio import Semaphore
|
9
9
|
import aiofiles, asyncio
|
10
10
|
import aiofiles.os
|
11
|
-
from contextlib import contextmanager
|
12
11
|
from lfss.src.database import transaction, unique_cursor
|
13
12
|
from lfss.src.connection_pool import global_entrance
|
14
13
|
|
15
|
-
|
16
|
-
def indicator(name: str):
|
17
|
-
print(f"\033[1;33mRunning {name}... \033[0m")
|
18
|
-
s = time.time()
|
19
|
-
yield
|
20
|
-
print(f"{name} took {time.time() - s:.2f} seconds.")
|
21
|
-
|
22
|
-
sem = Semaphore(1)
|
23
|
-
|
24
|
-
def _get_sem():
|
25
|
-
return sem
|
14
|
+
sem: Semaphore
|
26
15
|
|
27
16
|
def barriered(func):
|
28
17
|
@wraps(func)
|
29
18
|
async def wrapper(*args, **kwargs):
|
30
|
-
|
19
|
+
global sem
|
20
|
+
async with sem:
|
31
21
|
return await func(*args, **kwargs)
|
32
22
|
return wrapper
|
33
23
|
|
@@ -62,8 +52,9 @@ async def move_to_internal(f_id: str, flag: str = ''):
|
|
62
52
|
|
63
53
|
@global_entrance()
|
64
54
|
async def _main(batch_size: int = 10000):
|
65
|
-
|
55
|
+
start_time = time.time()
|
66
56
|
|
57
|
+
tasks = []
|
67
58
|
e_cout = 0
|
68
59
|
for batch_count in itertools.count(start=0):
|
69
60
|
async with unique_cursor() as conn:
|
@@ -80,6 +71,7 @@ async def _main(batch_size: int = 10000):
|
|
80
71
|
tasks.append(move_to_external(f_id, flag=f"[b{batch_count+1}-e{i+1}/{len(exceeded_rows)}] "))
|
81
72
|
await asyncio.gather(*tasks)
|
82
73
|
|
74
|
+
tasks = []
|
83
75
|
i_count = 0
|
84
76
|
for batch_count in itertools.count(start=0):
|
85
77
|
async with unique_cursor() as conn:
|
@@ -96,58 +88,16 @@ async def _main(batch_size: int = 10000):
|
|
96
88
|
tasks.append(move_to_internal(f_id, flag=f"[b{batch_count+1}-i{i+1}/{len(under_rows)}] "))
|
97
89
|
await asyncio.gather(*tasks)
|
98
90
|
|
99
|
-
print(f"
|
100
|
-
|
101
|
-
@global_entrance()
|
102
|
-
async def vacuum(index: bool = False, blobs: bool = False):
|
103
|
-
|
104
|
-
# check if any file in the Large Blob directory is not in the database
|
105
|
-
# the reverse operation is not necessary, because by design, the database should be the source of truth...
|
106
|
-
# we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
|
107
|
-
async def ensure_external_consistency(f_id: str):
|
108
|
-
@barriered
|
109
|
-
async def fn():
|
110
|
-
async with unique_cursor() as c:
|
111
|
-
cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
|
112
|
-
if not await cursor.fetchone():
|
113
|
-
print(f"File {f_id} not found in database, removing from external storage.")
|
114
|
-
await aiofiles.os.remove(f)
|
115
|
-
await asyncio.create_task(fn())
|
116
|
-
|
117
|
-
# create a temporary index to speed up the process...
|
118
|
-
with indicator("Clearing un-referenced files in external storage"):
|
119
|
-
async with transaction() as c:
|
120
|
-
await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
|
121
|
-
for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
|
122
|
-
f_id = f.name
|
123
|
-
await ensure_external_consistency(f_id)
|
124
|
-
if (i+1) % 1_000 == 0:
|
125
|
-
print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
|
126
|
-
async with transaction() as c:
|
127
|
-
await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
|
128
|
-
|
129
|
-
async with unique_cursor(is_write=True) as c:
|
91
|
+
print(f"Time elapsed: {time.time() - start_time:.2f}s. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
|
130
92
|
|
131
|
-
if index:
|
132
|
-
with indicator("VACUUM-index"):
|
133
|
-
await c.execute("VACUUM main")
|
134
|
-
if blobs:
|
135
|
-
with indicator("VACUUM-blobs"):
|
136
|
-
await c.execute("VACUUM blobs")
|
137
|
-
|
138
93
|
def main():
|
139
94
|
global sem
|
140
95
|
parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
|
141
96
|
parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
|
142
97
|
parser.add_argument("-b", "--batch-size", type=int, default=10000, help="Batch size for processing files")
|
143
|
-
parser.add_argument("--vacuum", action="store_true", help="Run VACUUM only on index.db after balancing")
|
144
|
-
parser.add_argument("--vacuum-all", action="store_true", help="Run VACUUM on both index.db and blobs.db after balancing")
|
145
98
|
args = parser.parse_args()
|
146
99
|
sem = Semaphore(args.jobs)
|
147
|
-
|
148
|
-
asyncio.run(_main(args.batch_size))
|
149
|
-
if args.vacuum or args.vacuum_all:
|
150
|
-
asyncio.run(vacuum(index=args.vacuum or args.vacuum_all, blobs=args.vacuum_all))
|
100
|
+
asyncio.run(_main(args.batch_size))
|
151
101
|
|
152
102
|
if __name__ == '__main__':
|
153
103
|
main()
|
@@ -0,0 +1,93 @@
|
|
1
|
+
"""
|
2
|
+
Vacuum the database and external storage to ensure that the storage is consistent and minimal.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from lfss.src.config import LARGE_BLOB_DIR, DATA_HOME
|
6
|
+
import argparse, time, os
|
7
|
+
from functools import wraps
|
8
|
+
from asyncio import Semaphore
|
9
|
+
import aiofiles, asyncio
|
10
|
+
import aiofiles.os
|
11
|
+
from contextlib import contextmanager
|
12
|
+
from lfss.src.database import transaction, unique_cursor
|
13
|
+
from lfss.src.stat import RequestDB
|
14
|
+
from lfss.src.connection_pool import global_entrance
|
15
|
+
|
16
|
+
sem: Semaphore
|
17
|
+
|
18
|
+
@contextmanager
|
19
|
+
def indicator(name: str):
|
20
|
+
print(f"\033[1;33mRunning {name}... \033[0m")
|
21
|
+
s = time.time()
|
22
|
+
yield
|
23
|
+
print(f"{name} took {time.time() - s:.2f} seconds.")
|
24
|
+
|
25
|
+
def barriered(func):
|
26
|
+
@wraps(func)
|
27
|
+
async def wrapper(*args, **kwargs):
|
28
|
+
global sem
|
29
|
+
async with sem:
|
30
|
+
return await func(*args, **kwargs)
|
31
|
+
return wrapper
|
32
|
+
|
33
|
+
@global_entrance()
|
34
|
+
async def vacuum_main(index: bool = False, blobs: bool = False):
|
35
|
+
|
36
|
+
# check if any file in the Large Blob directory is not in the database
|
37
|
+
# the reverse operation is not necessary, because by design, the database should be the source of truth...
|
38
|
+
# we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
|
39
|
+
async def ensure_external_consistency(f_id: str):
|
40
|
+
@barriered
|
41
|
+
async def fn():
|
42
|
+
async with unique_cursor() as c:
|
43
|
+
cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
|
44
|
+
if not await cursor.fetchone():
|
45
|
+
print(f"File {f_id} not found in database, removing from external storage.")
|
46
|
+
await aiofiles.os.remove(f)
|
47
|
+
await asyncio.create_task(fn())
|
48
|
+
|
49
|
+
# create a temporary index to speed up the process...
|
50
|
+
with indicator("Clearing un-referenced files in external storage"):
|
51
|
+
async with transaction() as c:
|
52
|
+
await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
|
53
|
+
for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
|
54
|
+
f_id = f.name
|
55
|
+
await ensure_external_consistency(f_id)
|
56
|
+
if (i+1) % 1_000 == 0:
|
57
|
+
print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
|
58
|
+
async with transaction() as c:
|
59
|
+
await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
|
60
|
+
|
61
|
+
async with unique_cursor(is_write=True) as c:
|
62
|
+
if index:
|
63
|
+
with indicator("VACUUM-index"):
|
64
|
+
await c.execute("VACUUM main")
|
65
|
+
if blobs:
|
66
|
+
with indicator("VACUUM-blobs"):
|
67
|
+
await c.execute("VACUUM blobs")
|
68
|
+
|
69
|
+
async def vacuum_requests():
|
70
|
+
with indicator("VACUUM-requests"):
|
71
|
+
req_db = await RequestDB().init()
|
72
|
+
try:
|
73
|
+
await req_db.shrink()
|
74
|
+
await req_db.conn.execute("VACUUM")
|
75
|
+
finally:
|
76
|
+
await req_db.close()
|
77
|
+
|
78
|
+
def main():
|
79
|
+
global sem
|
80
|
+
parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
|
81
|
+
parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
|
82
|
+
parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
|
83
|
+
parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
|
84
|
+
parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs")
|
85
|
+
args = parser.parse_args()
|
86
|
+
sem = Semaphore(args.jobs)
|
87
|
+
asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
|
88
|
+
|
89
|
+
if args.requests:
|
90
|
+
asyncio.run(vacuum_requests())
|
91
|
+
|
92
|
+
if __name__ == '__main__':
|
93
|
+
main()
|
@@ -11,7 +11,7 @@ import aiofiles.os
|
|
11
11
|
|
12
12
|
from .connection_pool import execute_sql, unique_cursor, transaction
|
13
13
|
from .datatype import UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents
|
14
|
-
from .config import LARGE_BLOB_DIR
|
14
|
+
from .config import LARGE_BLOB_DIR, CHUNK_SIZE
|
15
15
|
from .log import get_logger
|
16
16
|
from .utils import decode_uri_compnents, hash_credential, concurrent_wrap
|
17
17
|
from .error import *
|
@@ -333,7 +333,7 @@ class FileConn(DBObjectBase):
|
|
333
333
|
await self.cur.execute("DELETE FROM usize WHERE user_id = ?", (owner_id, ))
|
334
334
|
res = await self.cur.execute("DELETE FROM fmeta WHERE owner_id = ? RETURNING *", (owner_id, ))
|
335
335
|
ret = [self.parse_record(r) for r in await res.fetchall()]
|
336
|
-
self.logger.info(f"Deleted {len(ret)} file
|
336
|
+
self.logger.info(f"Deleted {len(ret)} file records for user {owner_id}") # type: ignore
|
337
337
|
return ret
|
338
338
|
|
339
339
|
async def delete_path_records(self, path: str, under_user_id: Optional[int] = None) -> list[FileRecord]:
|
@@ -384,7 +384,9 @@ class FileConn(DBObjectBase):
|
|
384
384
|
async def get_file_blob_external(self, file_id: str) -> AsyncIterable[bytes]:
|
385
385
|
assert (LARGE_BLOB_DIR / file_id).exists(), f"File {file_id} not found"
|
386
386
|
async with aiofiles.open(LARGE_BLOB_DIR / file_id, 'rb') as f:
|
387
|
-
|
387
|
+
while True:
|
388
|
+
chunk = await f.read(CHUNK_SIZE)
|
389
|
+
if not chunk: break
|
388
390
|
yield chunk
|
389
391
|
|
390
392
|
async def delete_file_blob_external(self, file_id: str):
|
@@ -639,7 +641,9 @@ class Database:
|
|
639
641
|
|
640
642
|
fconn = FileConn(cur)
|
641
643
|
records = await fconn.delete_user_file_records(user.id)
|
644
|
+
self.logger.debug("Deleting files...")
|
642
645
|
await self.__batch_delete_file_blobs(fconn, records)
|
646
|
+
self.logger.info(f"Deleted {len(records)} file(s) for user {user.username}")
|
643
647
|
|
644
648
|
# make sure the user's directory is deleted,
|
645
649
|
# may contain admin's files, but delete them all
|
@@ -94,11 +94,11 @@ def get_logger(
|
|
94
94
|
file_handler = logging.FileHandler(log_file)
|
95
95
|
elif file_handler_type == 'daily':
|
96
96
|
file_handler = handlers.TimedRotatingFileHandler(
|
97
|
-
log_file, when='midnight', interval=1, backupCount=
|
97
|
+
log_file, when='midnight', interval=1, backupCount=30
|
98
98
|
)
|
99
99
|
elif file_handler_type == 'rotate':
|
100
100
|
file_handler = handlers.RotatingFileHandler(
|
101
|
-
log_file, maxBytes=
|
101
|
+
log_file, maxBytes=1024*1024, backupCount=5
|
102
102
|
)
|
103
103
|
|
104
104
|
file_handler.setFormatter(formatter_plain)
|
@@ -15,7 +15,7 @@ from contextlib import asynccontextmanager
|
|
15
15
|
from .error import *
|
16
16
|
from .log import get_logger
|
17
17
|
from .stat import RequestDB
|
18
|
-
from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES
|
18
|
+
from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES, CHUNK_SIZE
|
19
19
|
from .utils import ensure_uri_compnents, format_last_modified, now_stamp
|
20
20
|
from .connection_pool import global_connection_init, global_connection_close, unique_cursor
|
21
21
|
from .database import Database, UserRecord, DECOY_USER, FileRecord, check_user_permission, FileReadPermission, UserConn, FileConn, PathContents
|
@@ -31,6 +31,7 @@ async def lifespan(app: FastAPI):
|
|
31
31
|
try:
|
32
32
|
await global_connection_init(n_read = 2)
|
33
33
|
await asyncio.gather(db.init(), req_conn.init())
|
34
|
+
await req_conn.shrink()
|
34
35
|
yield
|
35
36
|
await req_conn.commit()
|
36
37
|
finally:
|
@@ -253,9 +254,8 @@ async def put_file(
|
|
253
254
|
|
254
255
|
if len(blobs) > LARGE_FILE_BYTES:
|
255
256
|
async def blob_reader():
|
256
|
-
|
257
|
-
|
258
|
-
yield blobs[b:b+chunk_size]
|
257
|
+
for b in range(0, len(blobs), CHUNK_SIZE):
|
258
|
+
yield blobs[b:b+CHUNK_SIZE]
|
259
259
|
await db.save_file(user.id, path, blob_reader(), permission = FileReadPermission(permission), mime_type = mime_t)
|
260
260
|
else:
|
261
261
|
await db.save_file(user.id, path, blobs, permission = FileReadPermission(permission), mime_type=mime_t)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import Optional, Any
|
2
2
|
import aiosqlite
|
3
3
|
from .config import DATA_HOME
|
4
|
-
from .utils import debounce_async
|
4
|
+
from .utils import debounce_async, concurrent_wrap
|
5
5
|
|
6
6
|
class RequestDB:
|
7
7
|
conn: aiosqlite.Connection
|
@@ -25,6 +25,7 @@ class RequestDB:
|
|
25
25
|
status INTEGER
|
26
26
|
)
|
27
27
|
''')
|
28
|
+
return self
|
28
29
|
|
29
30
|
async def close(self):
|
30
31
|
await self.conn.close()
|
@@ -63,4 +64,27 @@ class RequestDB:
|
|
63
64
|
''', (time, method, path, headers, query, client, duration, request_size, response_size, status)) as cursor:
|
64
65
|
assert cursor.lastrowid is not None
|
65
66
|
return cursor.lastrowid
|
67
|
+
|
68
|
+
@concurrent_wrap()
|
69
|
+
async def shrink(self, max_rows: int = 1_000_000, time_before: float = 0):
|
70
|
+
async with aiosqlite.connect(self.db) as conn:
|
71
|
+
|
72
|
+
# remove all but the last max_rows
|
73
|
+
res = await (await conn.execute('SELECT COUNT(*) FROM requests')).fetchone()
|
74
|
+
assert res is not None
|
75
|
+
row_len = res[0]
|
76
|
+
if row_len > max_rows:
|
77
|
+
await conn.execute('''
|
78
|
+
DELETE FROM requests WHERE id NOT IN (
|
79
|
+
SELECT id FROM requests ORDER BY time DESC LIMIT ?
|
80
|
+
)
|
81
|
+
''', (max_rows,))
|
82
|
+
|
83
|
+
# remove old requests that is older than time_before
|
84
|
+
if time_before > 0:
|
85
|
+
await conn.execute('''
|
86
|
+
DELETE FROM requests WHERE time < ?
|
87
|
+
''', (time_before,))
|
88
|
+
|
89
|
+
await conn.commit()
|
66
90
|
|
@@ -57,6 +57,7 @@ def format_last_modified(last_modified_gmt: str):
|
|
57
57
|
return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
|
58
58
|
|
59
59
|
def now_stamp() -> float:
|
60
|
+
""" Get the current timestamp, in seconds """
|
60
61
|
return datetime.datetime.now().timestamp()
|
61
62
|
|
62
63
|
def stamp_to_str(stamp: float) -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "lfss"
|
3
|
-
version = "0.7.
|
3
|
+
version = "0.7.10"
|
4
4
|
description = "Lightweight file storage service"
|
5
5
|
authors = ["li, mengxun <limengxun45@outlook.com>"]
|
6
6
|
readme = "Readme.md"
|
@@ -24,6 +24,7 @@ lfss-serve = "lfss.cli.serve:main"
|
|
24
24
|
lfss-user = "lfss.cli.user:main"
|
25
25
|
lfss-panel = "lfss.cli.panel:main"
|
26
26
|
lfss-cli = "lfss.cli.cli:main"
|
27
|
+
lfss-vacuum = "lfss.cli.vacuum:main"
|
27
28
|
lfss-balance = "lfss.cli.balance:main"
|
28
29
|
|
29
30
|
[build-system]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|