PyPI - lfss - Versions diffs - 0.7.9__tar.gz → 0.7.10__tar.gz - Mend

lfss 0.7.9tar.gz → 0.7.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{lfss-0.7.9 → lfss-0.7.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lfss
-Version: 0.7.9
+Version: 0.7.10
 Summary: Lightweight file storage service
 Home-page: https://github.com/MenxLi/lfss
 Author: li, mengxun

{lfss-0.7.9 → lfss-0.7.10}/lfss/cli/balance.py RENAMED Viewed

@@ -8,26 +8,16 @@ from functools import wraps
 from asyncio import Semaphore
 import aiofiles, asyncio
 import aiofiles.os
-from contextlib import contextmanager
 from lfss.src.database import transaction, unique_cursor
 from lfss.src.connection_pool import global_entrance
-@contextmanager
-def indicator(name: str):
-    print(f"\033[1;33mRunning {name}... \033[0m")
-    s = time.time()
-    yield
-    print(f"{name} took {time.time() - s:.2f} seconds.")
-sem = Semaphore(1)
-def _get_sem():
-    return sem
+sem: Semaphore
 def barriered(func):
     @wraps(func)
     async def wrapper(*args, **kwargs):
-        async with _get_sem():
+        global sem
+        async with sem:
             return await func(*args, **kwargs)
     return wrapper
@@ -62,8 +52,9 @@ async def move_to_internal(f_id: str, flag: str = ''):
 @global_entrance()
 async def _main(batch_size: int = 10000):
-    tasks = []
+    start_time = time.time()
+    tasks = []
     e_cout = 0
     for batch_count in itertools.count(start=0):
         async with unique_cursor() as conn:
@@ -80,6 +71,7 @@ async def _main(batch_size: int = 10000):
             tasks.append(move_to_external(f_id, flag=f"[b{batch_count+1}-e{i+1}/{len(exceeded_rows)}] "))
         await asyncio.gather(*tasks)
+    tasks = []
     i_count = 0
     for batch_count in itertools.count(start=0):
         async with unique_cursor() as conn:
@@ -96,58 +88,16 @@ async def _main(batch_size: int = 10000):
             tasks.append(move_to_internal(f_id, flag=f"[b{batch_count+1}-i{i+1}/{len(under_rows)}] "))
         await asyncio.gather(*tasks)
-    print(f"Finished. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
-@global_entrance()
-async def vacuum(index: bool = False, blobs: bool = False):
-    # check if any file in the Large Blob directory is not in the database
-    # the reverse operation is not necessary, because by design, the database should be the source of truth...
-    # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
-    async def ensure_external_consistency(f_id: str):
-        @barriered
-        async def fn():
-            async with unique_cursor() as c:
-                cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
-                if not await cursor.fetchone():
-                    print(f"File {f_id} not found in database, removing from external storage.")
-                    await aiofiles.os.remove(f)
-        await asyncio.create_task(fn())
-    # create a temporary index to speed up the process...
-    with indicator("Clearing un-referenced files in external storage"):
-        async with transaction() as c:
-            await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
-        for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
-            f_id = f.name
-            await ensure_external_consistency(f_id)
-            if (i+1) % 1_000 == 0:
-                print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
-        async with transaction() as c:
-            await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
-    async with unique_cursor(is_write=True) as c:
+    print(f"Time elapsed: {time.time() - start_time:.2f}s. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
-        if index:
-            with indicator("VACUUM-index"):
-                await c.execute("VACUUM main")
-        if blobs:
-            with indicator("VACUUM-blobs"):
-                await c.execute("VACUUM blobs")
 def main():
     global sem
     parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
     parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
     parser.add_argument("-b", "--batch-size", type=int, default=10000, help="Batch size for processing files")
-    parser.add_argument("--vacuum", action="store_true", help="Run VACUUM only on index.db after balancing")
-    parser.add_argument("--vacuum-all", action="store_true", help="Run VACUUM on both index.db and blobs.db after balancing")
     args = parser.parse_args()
     sem = Semaphore(args.jobs)
-    with indicator("Balancing"):
-        asyncio.run(_main(args.batch_size))
-    if args.vacuum or args.vacuum_all:
-        asyncio.run(vacuum(index=args.vacuum or args.vacuum_all, blobs=args.vacuum_all))
+    asyncio.run(_main(args.batch_size))
 if __name__ == '__main__':
     main()

lfss-0.7.10/lfss/cli/vacuum.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+Vacuum the database and external storage to ensure that the storage is consistent and minimal.
+"""
+from lfss.src.config import LARGE_BLOB_DIR, DATA_HOME
+import argparse, time, os
+from functools import wraps
+from asyncio import Semaphore
+import aiofiles, asyncio
+import aiofiles.os
+from contextlib import contextmanager
+from lfss.src.database import transaction, unique_cursor
+from lfss.src.stat import RequestDB
+from lfss.src.connection_pool import global_entrance
+sem: Semaphore
+@contextmanager
+def indicator(name: str):
+    print(f"\033[1;33mRunning {name}... \033[0m")
+    s = time.time()
+    yield
+    print(f"{name} took {time.time() - s:.2f} seconds.")
+def barriered(func):
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        global sem
+        async with sem:
+            return await func(*args, **kwargs)
+    return wrapper
+@global_entrance()
+async def vacuum_main(index: bool = False, blobs: bool = False):
+    # check if any file in the Large Blob directory is not in the database
+    # the reverse operation is not necessary, because by design, the database should be the source of truth...
+    # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
+    async def ensure_external_consistency(f_id: str):
+        @barriered
+        async def fn():
+            async with unique_cursor() as c:
+                cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
+                if not await cursor.fetchone():
+                    print(f"File {f_id} not found in database, removing from external storage.")
+                    await aiofiles.os.remove(f)
+        await asyncio.create_task(fn())
+    # create a temporary index to speed up the process...
+    with indicator("Clearing un-referenced files in external storage"):
+        async with transaction() as c:
+            await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
+        for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
+            f_id = f.name
+            await ensure_external_consistency(f_id)
+            if (i+1) % 1_000 == 0:
+                print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
+        async with transaction() as c:
+            await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
+    async with unique_cursor(is_write=True) as c:
+        if index:
+            with indicator("VACUUM-index"):
+                await c.execute("VACUUM main")
+        if blobs:
+            with indicator("VACUUM-blobs"):
+                await c.execute("VACUUM blobs")
+async def vacuum_requests():
+    with indicator("VACUUM-requests"):
+        req_db = await RequestDB().init()
+        try:
+            await req_db.shrink()
+            await req_db.conn.execute("VACUUM")
+        finally:
+            await req_db.close()
+def main():
+    global sem
+    parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
+    parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
+    parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
+    parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
+    parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs")
+    args = parser.parse_args()
+    sem = Semaphore(args.jobs)
+    asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
+    if args.requests:
+        asyncio.run(vacuum_requests())
+if __name__ == '__main__':
+    main()

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/config.py RENAMED Viewed

@@ -20,3 +20,4 @@ else:
     LARGE_FILE_BYTES = 8 * 1024 * 1024  # 8MB
 MAX_FILE_BYTES = 512 * 1024 * 1024   # 512MB
 MAX_BUNDLE_BYTES = 512 * 1024 * 1024   # 512MB
+CHUNK_SIZE = 1024 * 1024   # 1MB chunks for streaming (on large files)

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/database.py RENAMED Viewed

@@ -11,7 +11,7 @@ import aiofiles.os
 from .connection_pool import execute_sql, unique_cursor, transaction
 from .datatype import UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents
-from .config import LARGE_BLOB_DIR
+from .config import LARGE_BLOB_DIR, CHUNK_SIZE
 from .log import get_logger
 from .utils import decode_uri_compnents, hash_credential, concurrent_wrap
 from .error import *
@@ -333,7 +333,7 @@ class FileConn(DBObjectBase):
         await self.cur.execute("DELETE FROM usize WHERE user_id = ?", (owner_id, ))
         res = await self.cur.execute("DELETE FROM fmeta WHERE owner_id = ? RETURNING *", (owner_id, ))
         ret = [self.parse_record(r) for r in await res.fetchall()]
-        self.logger.info(f"Deleted {len(ret)} file(s) for user {owner_id}") # type: ignore
+        self.logger.info(f"Deleted {len(ret)} file records for user {owner_id}") # type: ignore
         return ret
     async def delete_path_records(self, path: str, under_user_id: Optional[int] = None) -> list[FileRecord]:
@@ -384,7 +384,9 @@ class FileConn(DBObjectBase):
     async def get_file_blob_external(self, file_id: str) -> AsyncIterable[bytes]:
         assert (LARGE_BLOB_DIR / file_id).exists(), f"File {file_id} not found"
         async with aiofiles.open(LARGE_BLOB_DIR / file_id, 'rb') as f:
-            async for chunk in f:
+            while True:
+                chunk = await f.read(CHUNK_SIZE)
+                if not chunk: break
                 yield chunk
     async def delete_file_blob_external(self, file_id: str):
@@ -639,7 +641,9 @@ class Database:
             fconn = FileConn(cur)
             records = await fconn.delete_user_file_records(user.id)
+            self.logger.debug("Deleting files...")
             await self.__batch_delete_file_blobs(fconn, records)
+            self.logger.info(f"Deleted {len(records)} file(s) for user {user.username}")
             # make sure the user's directory is deleted,
             # may contain admin's files, but delete them all

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/log.py RENAMED Viewed

@@ -94,11 +94,11 @@ def get_logger(
             file_handler = logging.FileHandler(log_file)
         elif file_handler_type == 'daily':
             file_handler = handlers.TimedRotatingFileHandler(
-                log_file, when='midnight', interval=1, backupCount=5
+                log_file, when='midnight', interval=1, backupCount=30
             )
         elif file_handler_type == 'rotate':
             file_handler = handlers.RotatingFileHandler(
-                log_file, maxBytes=1000000, backupCount=5
+                log_file, maxBytes=1024*1024, backupCount=5
             )
         file_handler.setFormatter(formatter_plain)

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/server.py RENAMED Viewed

@@ -15,7 +15,7 @@ from contextlib import asynccontextmanager
 from .error import *
 from .log import get_logger
 from .stat import RequestDB
-from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES
+from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES, CHUNK_SIZE
 from .utils import ensure_uri_compnents, format_last_modified, now_stamp
 from .connection_pool import global_connection_init, global_connection_close, unique_cursor
 from .database import Database, UserRecord, DECOY_USER, FileRecord, check_user_permission, FileReadPermission, UserConn, FileConn, PathContents
@@ -31,6 +31,7 @@ async def lifespan(app: FastAPI):
     try:
         await global_connection_init(n_read = 2)
         await asyncio.gather(db.init(), req_conn.init())
+        await req_conn.shrink()
         yield
         await req_conn.commit()
     finally:
@@ -253,9 +254,8 @@ async def put_file(
     if len(blobs) > LARGE_FILE_BYTES:
         async def blob_reader():
-            chunk_size = 16 * 1024 * 1024    # 16MB
-            for b in range(0, len(blobs), chunk_size):
-                yield blobs[b:b+chunk_size]
+            for b in range(0, len(blobs), CHUNK_SIZE):
+                yield blobs[b:b+CHUNK_SIZE]
         await db.save_file(user.id, path, blob_reader(), permission = FileReadPermission(permission), mime_type = mime_t)
     else:
         await db.save_file(user.id, path, blobs, permission = FileReadPermission(permission), mime_type=mime_t)

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/stat.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional, Any
 import aiosqlite
 from .config import DATA_HOME
-from .utils import debounce_async
+from .utils import debounce_async, concurrent_wrap
 class RequestDB:
     conn: aiosqlite.Connection
@@ -25,6 +25,7 @@ class RequestDB:
                 status INTEGER
             )
         ''')
+        return self
     async def close(self):
         await self.conn.close()
@@ -63,4 +64,27 @@ class RequestDB:
         ''', (time, method, path, headers, query, client, duration, request_size, response_size, status)) as cursor:
             assert cursor.lastrowid is not None
             return cursor.lastrowid
+    @concurrent_wrap()
+    async def shrink(self, max_rows: int = 1_000_000, time_before: float = 0):
+        async with aiosqlite.connect(self.db) as conn:
+            # remove all but the last max_rows
+            res = await (await conn.execute('SELECT COUNT(*) FROM requests')).fetchone()
+            assert res is not None
+            row_len = res[0]
+            if row_len > max_rows:
+                await conn.execute('''
+                    DELETE FROM requests WHERE id NOT IN (
+                        SELECT id FROM requests ORDER BY time DESC LIMIT ?
+                    )
+                ''', (max_rows,))
+            # remove old requests that is older than time_before
+            if time_before > 0:
+                await conn.execute('''
+                    DELETE FROM requests WHERE time < ?
+                ''', (time_before,))
+            await conn.commit()

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/utils.py RENAMED Viewed

@@ -57,6 +57,7 @@ def format_last_modified(last_modified_gmt: str):
     return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
 def now_stamp() -> float:
+    """ Get the current timestamp, in seconds """
     return datetime.datetime.now().timestamp()
 def stamp_to_str(stamp: float) -> str:

{lfss-0.7.9 → lfss-0.7.10}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lfss"
-version = "0.7.9"
+version = "0.7.10"
 description = "Lightweight file storage service"
 authors = ["li, mengxun <limengxun45@outlook.com>"]
 readme = "Readme.md"
@@ -24,6 +24,7 @@ lfss-serve = "lfss.cli.serve:main"
 lfss-user = "lfss.cli.user:main"
 lfss-panel = "lfss.cli.panel:main"
 lfss-cli = "lfss.cli.cli:main"
+lfss-vacuum = "lfss.cli.vacuum:main"
 lfss-balance = "lfss.cli.balance:main"
 [build-system]

{lfss-0.7.9 → lfss-0.7.10}/Readme.md RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/docs/Known_issues.md RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/docs/Permission.md RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/api.js RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/index.html RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/info.css RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/info.js RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/popup.css RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/popup.js RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/scripts.js RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/styles.css RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/frontend/utils.js RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/cli/cli.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/cli/panel.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/cli/serve.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/cli/user.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/client/__init__.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/client/api.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/sql/init.sql RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/sql/pragma.sql RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/__init__.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/connection_pool.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/datatype.py RENAMED Viewed

File without changes

{lfss-0.7.9 → lfss-0.7.10}/lfss/src/error.py RENAMED Viewed

File without changes

lfss 0.7.9__tar.gz → 0.7.10__tar.gz

lfss 0.7.9tar.gz → 0.7.10tar.gz