PyPI - lfss - Versions diffs - 0.9.2__py3-none-any.whl → 0.11.4__py3-none-any.whl - Mend

lfss 0.9.2py3-none-any.whl → 0.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

Readme.md +4 -4
docs/Enviroment_variables.md +4 -2
docs/Permission.md +4 -4
docs/Webdav.md +3 -3
docs/changelog.md +58 -0
frontend/api.js +66 -4
frontend/login.js +0 -1
frontend/popup.js +18 -3
frontend/scripts.js +46 -39
frontend/utils.js +98 -1
lfss/api/__init__.py +7 -4
lfss/api/connector.py +47 -11
lfss/cli/cli.py +9 -9
lfss/cli/log.py +77 -0
lfss/cli/vacuum.py +69 -19
lfss/eng/config.py +7 -5
lfss/eng/connection_pool.py +12 -8
lfss/eng/database.py +350 -140
lfss/eng/error.py +6 -2
lfss/eng/log.py +91 -21
lfss/eng/thumb.py +20 -23
lfss/eng/utils.py +50 -29
lfss/sql/init.sql +9 -4
lfss/svc/app.py +1 -1
lfss/svc/app_base.py +8 -3
lfss/svc/app_dav.py +74 -61
lfss/svc/app_native.py +95 -59
lfss/svc/common_impl.py +72 -37
{lfss-0.9.2.dist-info → lfss-0.11.4.dist-info}/METADATA +10 -8
lfss-0.11.4.dist-info/RECORD +52 -0
{lfss-0.9.2.dist-info → lfss-0.11.4.dist-info}/entry_points.txt +1 -0
lfss-0.9.2.dist-info/RECORD +0 -50
{lfss-0.9.2.dist-info → lfss-0.11.4.dist-info}/WHEEL +0 -0

lfss/api/connector.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
-from typing import Optional, Literal, Iterator
-import os
+from typing import Optional, Literal
+from collections.abc import Iterator
+import os, json
 import requests
 import requests.adapters
 import urllib.parse
@@ -14,12 +15,13 @@ from lfss.eng.utils import ensure_uri_compnents
 _default_endpoint = os.environ.get('LFSS_ENDPOINT', 'http://localhost:8000')
 _default_token = os.environ.get('LFSS_TOKEN', '')
+num_t = float | int
 class Connector:
     class Session:
         def __init__(
             self, connector: Connector, pool_size: int = 10,
-            retry: int = 1, backoff_factor: float = 0.5, status_forcelist: list[int] = [503]
+            retry: int = 1, backoff_factor: num_t = 0.5, status_forcelist: list[int] = [503]
             ):
             self.connector = connector
             self.pool_size = pool_size
@@ -46,13 +48,21 @@ class Connector:
         def __exit__(self, exc_type, exc_value, traceback):
             self.close()
-    def __init__(self, endpoint=_default_endpoint, token=_default_token):
+    def __init__(self, endpoint=_default_endpoint, token=_default_token, timeout: Optional[num_t | tuple[num_t, num_t]]=None, verify: Optional[bool | str] = None):
+        """
+        - endpoint: the URL of the LFSS server. Default to $LFSS_ENDPOINT or http://localhost:8000.
+        - token: the access token. Default to $LFSS_TOKEN.
+        - timeout: the timeout for each request, can be either a single value or a tuple of two values (connect, read), refer to requests.Session.request.
+        - verify: either a boolean or a string, to control SSL verification. Default to True, refer to requests.Session.request.
+        """
         assert token, "No token provided. Please set LFSS_TOKEN environment variable."
         self.config = {
             "endpoint": endpoint,
             "token": token
         }
         self._session: Optional[requests.Session] = None
+        self.timeout = timeout
+        self.verify = verify
     def session( self, pool_size: int = 10, **kwargs):
         """ avoid creating a new session for each request.  """
@@ -66,18 +76,22 @@ class Connector:
             path = path[1:]
         path = ensure_uri_compnents(path)
         def f(**kwargs):
-            url = f"{self.config['endpoint']}/{path}" + "?" + urllib.parse.urlencode(search_params)
+            search_params_t = [
+                (k, str(v).lower() if isinstance(v, bool) else v)
+                for k, v in search_params.items()
+            ]   # tuple form
+            url = f"{self.config['endpoint']}/{path}" + "?" + urllib.parse.urlencode(search_params_t, doseq=True)
             headers: dict = kwargs.pop('headers', {})
             headers.update({
                 'Authorization': f"Bearer {self.config['token']}",
             })
             headers.update(extra_headers)
             if self._session is not None:
-                response = self._session.request(method, url, headers=headers, **kwargs)
+                response = self._session.request(method, url, headers=headers, timeout=self.timeout, verify=self.verify, **kwargs)
                 response.raise_for_status()
             else:
                 with requests.Session() as s:
-                    response = s.request(method, url, headers=headers, **kwargs)
+                    response = s.request(method, url, headers=headers, timeout=self.timeout, verify=self.verify, **kwargs)
                     response.raise_for_status()
             return response
         return f
@@ -88,7 +102,7 @@ class Connector:
         # Skip ahead by checking if the file already exists
         if conflict == 'skip-ahead':
-            exists = self.get_metadata(path)
+            exists = self.get_meta(path)
             if exists is None:
                 conflict = 'skip'
             else:
@@ -112,7 +126,7 @@ class Connector:
         # Skip ahead by checking if the file already exists
         if conflict == 'skip-ahead':
-            exists = self.get_metadata(path)
+            exists = self.get_meta(path)
             if exists is None:
                 conflict = 'skip'
             else:
@@ -144,7 +158,7 @@ class Connector:
         # Skip ahead by checking if the file already exists
         if conflict == 'skip-ahead':
-            exists = self.get_metadata(path)
+            exists = self.get_meta(path)
             if exists is None:
                 conflict = 'skip'
             else:
@@ -197,11 +211,22 @@ class Connector:
         assert response.headers['Content-Type'] == 'application/json'
         return response.json()
+    def get_multiple_text(self, *paths: str, skip_content = False) -> dict[str, Optional[str]]:
+        """
+        Gets text contents of multiple files at once. Non-existing files will return None.
+        - skip_content: if True, the file contents will not be fetched, always be empty string ''.
+        """
+        response = self._fetch_factory(
+            'GET', '_api/get-multiple',
+            {'path': paths, "skip_content": skip_content}
+            )()
+        return response.json()
     def delete(self, path: str):
         """Deletes the file at the specified path."""
         self._fetch_factory('DELETE', path)()
-    def get_metadata(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
+    def get_meta(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
         """Gets the metadata for the file at the specified path."""
         try:
             response = self._fetch_factory('GET', '_api/meta', {'path': path})()
@@ -213,6 +238,9 @@ class Connector:
             if e.response.status_code == 404:
                 return None
             raise e
+    # shorthand methods for type constraints
+    def get_fmeta(self, path: str) -> Optional[FileRecord]: assert (f:=self.get_meta(path)) is None or isinstance(f, FileRecord); return f
+    def get_dmeta(self, path: str) -> Optional[DirectoryRecord]: assert (d:=self.get_meta(path)) is None or isinstance(d, DirectoryRecord); return d
     def list_path(self, path: str) -> PathContents:
         """
@@ -276,6 +304,14 @@ class Connector:
         self._fetch_factory('POST', '_api/copy', {'src': src, 'dst': dst})(
             headers = {'Content-Type': 'application/www-form-urlencoded'}
         )
+    def bundle(self, path: str) -> Iterator[bytes]:
+        """Bundle a path into a zip file."""
+        response = self._fetch_factory('GET', '_api/bundle', {'path': path})(
+            headers = {'Content-Type': 'application/www-form-urlencoded'},
+            stream = True
+        )
+        return response.iter_content(chunk_size=1024)
     def whoami(self) -> UserRecord:
         """Gets information about the current user."""

lfss/cli/cli.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from pathlib import Path
-import argparse, typing
+import argparse, typing, sys
 from lfss.api import Connector, upload_directory, upload_file, download_file, download_directory
 from lfss.eng.datatype import FileReadPermission, FileSortKey, DirSortKey
 from lfss.eng.utils import decode_uri_compnents
@@ -12,7 +12,7 @@ def parse_permission(s: str) -> FileReadPermission:
     raise ValueError(f"Invalid permission {s}")
 def parse_arguments():
-    parser = argparse.ArgumentParser(description="Command line interface, please set LFSS_ENDPOINT and LFSS_TOKEN environment variables.")
+    parser = argparse.ArgumentParser(description="Client-side command line interface, set LFSS_ENDPOINT and LFSS_TOKEN environment variables for authentication.")
     sp = parser.add_subparsers(dest="command", required=True)
@@ -78,9 +78,9 @@ def main():
                 permission=args.permission
             )
             if failed_upload:
-                print("\033[91mFailed to upload:\033[0m")
+                print("\033[91mFailed to upload:\033[0m", file=sys.stderr)
                 for path in failed_upload:
-                    print(f"  {path}")
+                    print(f"  {path}", file=sys.stderr)
         else:
             success, msg = upload_file(
                 connector,
@@ -93,7 +93,7 @@ def main():
                 permission=args.permission
             )
             if not success:
-                print("\033[91mFailed to upload: \033[0m", msg)
+                print("\033[91mFailed to upload: \033[0m", msg, file=sys.stderr)
     elif args.command == "download":
         is_dir = args.src.endswith("/")
@@ -107,9 +107,9 @@ def main():
                 overwrite=args.overwrite
             )
             if failed_download:
-                print("\033[91mFailed to download:\033[0m")
+                print("\033[91mFailed to download:\033[0m", file=sys.stderr)
                 for path in failed_download:
-                    print(f"  {path}")
+                    print(f"  {path}", file=sys.stderr)
         else:
             success, msg = download_file(
                 connector,
@@ -121,12 +121,12 @@ def main():
                 overwrite=args.overwrite
             )
             if not success:
-                print("\033[91mFailed to download: \033[0m", msg)
+                print("\033[91mFailed to download: \033[0m", msg, file=sys.stderr)
     elif args.command == "query":
         for path in args.path:
             with catch_request_error():
-                res = connector.get_metadata(path)
+                res = connector.get_meta(path)
                 if res is None:
                     print(f"\033[31mNot found\033[0m ({path})")
                 else:

lfss/cli/log.py ADDED Viewed

@@ -0,0 +1,77 @@
+from typing import Optional
+import argparse
+import rich.console
+import logging
+import sqlite3
+from lfss.eng.log import eval_logline
+console = rich.console.Console()
+def levelstr2int(levelstr: str) -> int:
+    import sys
+    if sys.version_info < (3, 11):
+        return logging.getLevelName(levelstr.upper())
+    else:
+        return logging.getLevelNamesMapping()[levelstr.upper()]
+def view(
+    db_file: str,
+    level: Optional[str] = None,
+    offset: int = 0,
+    limit: int = 1000
+    ):
+    conn = sqlite3.connect(db_file)
+    cursor = conn.cursor()
+    if level is None:
+        cursor.execute("SELECT * FROM log ORDER BY created DESC LIMIT ? OFFSET ?", (limit, offset))
+    else:
+        level_int = levelstr2int(level)
+        cursor.execute("SELECT * FROM log WHERE level >= ? ORDER BY created DESC LIMIT ? OFFSET ?", (level_int, limit, offset))
+    levelname_color = {
+        'DEBUG': 'blue',
+        'INFO': 'green',
+        'WARNING': 'yellow',
+        'ERROR': 'red',
+        'CRITICAL': 'bold red',
+        'FATAL': 'bold red'
+    }
+    for row in cursor.fetchall():
+        log = eval_logline(row)
+        console.print(f"{log.created} [{levelname_color[log.levelname]}][{log.levelname}] [default]{log.message}")
+    conn.close()
+def trim(db_file: str, keep: int  = 1000, level: Optional[str] = None):
+    conn = sqlite3.connect(db_file)
+    cursor = conn.cursor()
+    if level is None:
+        cursor.execute("DELETE FROM log WHERE id NOT IN (SELECT id FROM log ORDER BY created DESC LIMIT ?)", (keep,))
+    else:
+        cursor.execute("DELETE FROM log WHERE levelname = ? and id NOT IN (SELECT id FROM log WHERE levelname = ? ORDER BY created DESC LIMIT ?)", (level.upper(), level.upper(), keep))
+    conn.commit()
+    conn.execute("VACUUM")
+    conn.close()
+def main():
+    parser = argparse.ArgumentParser(description="Log operations utility")
+    subparsers = parser.add_subparsers(title='subcommands', description='valid subcommands', help='additional help')
+    parser_show = subparsers.add_parser('view', help='Show logs')
+    parser_show.add_argument('db_file', type=str, help='Database file path')
+    parser_show.add_argument('-l', '--level', type=str, required=False, help='Log level')
+    parser_show.add_argument('--offset', type=int, default=0, help='Starting offset')
+    parser_show.add_argument('--limit', type=int, default=1000, help='Maximum number of entries to display')
+    parser_show.set_defaults(func=view)
+    parser_trim = subparsers.add_parser('trim', help='Trim logs')
+    parser_trim.add_argument('db_file', type=str, help='Database file path')
+    parser_trim.add_argument('-l', '--level', type=str, required=False, help='Log level')
+    parser_trim.add_argument('--keep', type=int, default=1000, help='Number of entries to keep')
+    parser_trim.set_defaults(func=trim)
+    args = parser.parse_args()
+    if hasattr(args, 'func'):
+        kwargs = vars(args)
+        func = kwargs.pop('func')
+        func(**kwargs)
+if __name__ == '__main__':
+    main()

lfss/cli/vacuum.py CHANGED Viewed

@@ -2,10 +2,11 @@
 Vacuum the database and external storage to ensure that the storage is consistent and minimal.
 """
-from lfss.eng.config import LARGE_BLOB_DIR
-import argparse, time
+from lfss.eng.config import LARGE_BLOB_DIR, THUMB_DB, LOG_DIR
+import argparse, time, itertools
 from functools import wraps
 from asyncio import Semaphore
+import aiosqlite
 import aiofiles, asyncio
 import aiofiles.os
 from contextlib import contextmanager
@@ -13,6 +14,7 @@ from lfss.eng.database import transaction, unique_cursor
 from lfss.svc.request_log import RequestDB
 from lfss.eng.utils import now_stamp
 from lfss.eng.connection_pool import global_entrance
+from lfss.cli.log import trim
 sem: Semaphore
@@ -32,7 +34,7 @@ def barriered(func):
     return wrapper
 @global_entrance()
-async def vacuum_main(index: bool = False, blobs: bool = False):
+async def vacuum_main(index: bool = False, blobs: bool = False, thumbs: bool = False, logs: bool = False, vacuum_all: bool = False):
     # check if any file in the Large Blob directory is not in the database
     # the reverse operation is not necessary, because by design, the database should be the source of truth...
@@ -49,23 +51,68 @@ async def vacuum_main(index: bool = False, blobs: bool = False):
     # create a temporary index to speed up the process...
     with indicator("Clearing un-referenced files in external storage"):
-        async with transaction() as c:
-            await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
-        for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
-            f_id = f.name
-            await ensure_external_consistency(f_id)
-            if (i+1) % 1_000 == 0:
-                print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
-        async with transaction() as c:
-            await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
+        try:
+            async with transaction() as c:
+                await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
+            for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
+                f_id = f.name
+                await ensure_external_consistency(f_id)
+                if (i+1) % 1_000 == 0:
+                    print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
+        finally:
+            async with transaction() as c:
+                await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
-    async with unique_cursor(is_write=True) as c:
-        if index:
-            with indicator("VACUUM-index"):
+    if index or vacuum_all:
+        with indicator("VACUUM-index"):
+            async with transaction() as c:
+                await c.execute("DELETE FROM dupcount WHERE count = 0")
+            async with unique_cursor(is_write=True) as c:
                 await c.execute("VACUUM main")
-        if blobs:
-            with indicator("VACUUM-blobs"):
+    if blobs or vacuum_all:
+        with indicator("VACUUM-blobs"):
+            async with unique_cursor(is_write=True) as c:
                 await c.execute("VACUUM blobs")
+    if logs or vacuum_all:
+        with indicator("VACUUM-logs"):
+            for log_file in LOG_DIR.glob("*.log.db"):
+                trim(str(log_file), keep=10_000)
+    if thumbs or vacuum_all:
+        try:
+            async with transaction() as c:
+                await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
+            with indicator("VACUUM-thumbs"):
+                if not THUMB_DB.exists():
+                    raise FileNotFoundError("Thumbnail database not found.")
+                async with unique_cursor() as db_c:
+                    async with aiosqlite.connect(THUMB_DB) as t_conn:
+                        batch_size = 10_000
+                        for batch_count in itertools.count(start=0):
+                            exceeded_rows = list(await (await t_conn.execute(
+                                "SELECT file_id FROM thumbs LIMIT ? OFFSET ?",
+                                (batch_size, batch_size * batch_count)
+                            )).fetchall())
+                            if not exceeded_rows:
+                                break
+                            batch_ids = [row[0] for row in exceeded_rows]
+                            for f_id in batch_ids:
+                                cursor = await db_c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
+                                if not await cursor.fetchone():
+                                    print(f"Thumbnail {f_id} not found in database, removing from thumb cache.")
+                                    await t_conn.execute("DELETE FROM thumbs WHERE file_id = ?", (f_id,))
+                            print(f"Checked {batch_count+1} batches of {batch_size} thumbnails.")
+                        await t_conn.commit()
+                        await t_conn.execute("VACUUM")
+        except FileNotFoundError as e:
+            if "Thumbnail database not found." in str(e):
+                print("Thumbnail database not found, skipping.")
+        finally:
+            async with transaction() as c:
+                await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
 async def vacuum_requests():
     with indicator("VACUUM-requests"):
@@ -76,15 +123,18 @@ async def vacuum_requests():
 def main():
     global sem
     parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
+    parser.add_argument("--all", action="store_true", help="Vacuum all")
     parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
     parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
     parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
+    parser.add_argument("-t", "--thumb", action="store_true", help="Vacuum thumbnails")
     parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
+    parser.add_argument("-l", "--logs", action="store_true", help="Trim log to keep at most recent 10k rows for each category")
     args = parser.parse_args()
     sem = Semaphore(args.jobs)
-    asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
+    asyncio.run(vacuum_main(index=args.metadata, blobs=args.data, thumbs=args.thumb, logs = args.logs, vacuum_all=args.all))
-    if args.requests:
+    if args.requests or args.all:
         asyncio.run(vacuum_requests())
 if __name__ == '__main__':

lfss/eng/config.py CHANGED Viewed

@@ -11,17 +11,19 @@ if not DATA_HOME.exists():
 DATA_HOME = DATA_HOME.resolve().absolute()
 LARGE_BLOB_DIR = DATA_HOME / 'large_blobs'
 LARGE_BLOB_DIR.mkdir(exist_ok=True)
+LOG_DIR = DATA_HOME / 'logs'
+DISABLE_LOGGING = os.environ.get('DISABLE_LOGGING', '0') == '1'
 # https://sqlite.org/fasterthanfs.html
 __env_large_file = os.environ.get('LFSS_LARGE_FILE', None)
 if __env_large_file is not None:
     LARGE_FILE_BYTES = parse_storage_size(__env_large_file)
 else:
-    LARGE_FILE_BYTES = 8 * 1024 * 1024  # 8MB
-MAX_MEM_FILE_BYTES = 128 * 1024 * 1024   # 128MB
-MAX_BUNDLE_BYTES = 512 * 1024 * 1024   # 512MB
+    LARGE_FILE_BYTES = 1 * 1024 * 1024  # 1MB
+MAX_MEM_FILE_BYTES = 128 * 1024 * 1024  # 128MB
 CHUNK_SIZE = 1024 * 1024   # 1MB chunks for streaming (on large files)
 DEBUG_MODE = os.environ.get('LFSS_DEBUG', '0') == '1'
-THUMB_DB = DATA_HOME / 'thumbs.db'
-THUMB_SIZE = (48, 48)
+THUMB_DB = DATA_HOME / 'thumbs.v0-11.db'
+THUMB_SIZE = (64, 64)

lfss/eng/connection_pool.py CHANGED Viewed

@@ -8,7 +8,7 @@ from functools import wraps
 from typing import Callable, Awaitable
 from .log import get_logger
-from .error import DatabaseLockedError
+from .error import DatabaseLockedError, DatabaseTransactionError
 from .config import DATA_HOME
 async def execute_sql(conn: aiosqlite.Connection | aiosqlite.Cursor, name: str):
@@ -29,7 +29,7 @@ async def get_connection(read_only: bool = False) -> aiosqlite.Connection:
     conn = await aiosqlite.connect(
         get_db_uri(DATA_HOME / 'index.db', read_only=read_only),
-        timeout = 20, uri = True
+        timeout = 10, uri = True
         )
     async with conn.cursor() as c:
         await c.execute(
@@ -147,6 +147,14 @@ def global_entrance(n_read: int = 1):
         return wrapper
     return decorator
+def handle_sqlite_error(e: Exception):
+    if 'database is locked' in str(e):
+        raise DatabaseLockedError from e
+    if 'cannot start a transaction within a transaction' in str(e):
+        get_logger('database', global_instance=True).error(f"Unexpected error: {e}")
+        raise DatabaseTransactionError from e
+    raise e
 @asynccontextmanager
 async def unique_cursor(is_write: bool = False):
     if not is_write:
@@ -155,9 +163,7 @@ async def unique_cursor(is_write: bool = False):
             try:
                 yield await connection_obj.conn.cursor()
             except Exception as e:
-                if 'database is locked' in str(e):
-                    raise DatabaseLockedError from e
-                raise e
+                handle_sqlite_error(e)
             finally:
                 await g_pool.release(connection_obj)
     else:
@@ -166,9 +172,7 @@ async def unique_cursor(is_write: bool = False):
             try:
                 yield await connection_obj.conn.cursor()
             except Exception as e:
-                if 'database is locked' in str(e):
-                    raise DatabaseLockedError from e
-                raise e
+                handle_sqlite_error(e)
             finally:
                 await g_pool.release(connection_obj)

lfss 0.9.2__py3-none-any.whl → 0.11.4__py3-none-any.whl

lfss 0.9.2py3-none-any.whl → 0.11.4py3-none-any.whl