PyPI - lfss - Versions diffs - 0.10.0__tar.gz → 0.11.1__tar.gz - Mend

lfss 0.10.0tar.gz → 0.11.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{lfss-0.10.0 → lfss-0.11.1}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.1
 Name: lfss
-Version: 0.10.0
+Version: 0.11.1
 Summary: Lightweight file storage service
 Home-page: https://github.com/MenxLi/lfss
-Author: li_mengxun
-Author-email: limengxun45@outlookc.com
+Author: Li, Mengxun
+Author-email: mengxunli@whu.edu.cn
 Requires-Python: >=3.10
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10

{lfss-0.10.0 → lfss-0.11.1}/frontend/scripts.js RENAMED Viewed

@@ -5,6 +5,7 @@ import { showInfoPanel, showDirInfoPanel } from './info.js';
 import { makeThumbHtml } from './thumb.js';
 import { store } from './state.js';
 import { maybeShowLoginPanel } from './login.js';
+import { forEachFile } from './utils.js';
 /** @type {import('./api.js').UserRecord}*/
 let userRecord = null;
@@ -158,55 +159,58 @@ uploadFileNameInput.addEventListener('input', debounce(onFileNameInpuChange, 500
         e.preventDefault();
         e.stopPropagation();
     });
-    window.addEventListener('drop', (e) => {
+    window.addEventListener('drop', async (e) => {
         e.preventDefault();
         e.stopPropagation();
-        const files = e.dataTransfer.files;
-        if (files.length == 1){
-            uploadFileSelector.files = files;
-            uploadFileNameInput.value = files[0].name;
+        const items = e.dataTransfer.items;
+        if (items.length == 1 && items[0].kind === 'file' && items[0].webkitGetAsEntry().isFile){
+            uploadFileSelector.files = e.dataTransfer.files;
+            uploadFileNameInput.value = e.dataTransfer.files[0].name;
             uploadFileNameInput.focus();
+            return;
         }
-        else if (files.length > 1){
-            let dstPath = store.dirpath + uploadFileNameInput.value;
-            if (!dstPath.endsWith('/')){ dstPath += '/'; }
-            if (!confirm(`
+        /** @type {[string, File][]} */
+        const uploadInputVal = uploadFileNameInput.value? uploadFileNameInput.value : '';
+        let dstPath = store.dirpath + uploadInputVal;
+        if (!dstPath.endsWith('/')){ dstPath += '/'; }
+        if (!confirm(`\
 You are trying to upload multiple files at once.
 This will directly upload the files to the [${dstPath}] directory without renaming.
 Note that same name files will be overwritten.
-Are you sure you want to proceed?
-                `)){ return; }
-            let counter = 0;
-            async function uploadFileFn(...args){
-                const [file, path] = args;
-                try{
-                    await uploadFile(conn, path, file, {conflict: 'overwrite'});
-                }
-                catch (err){
-                    showPopup('Failed to upload file [' + file.name + ']: ' + err, {level: 'error', timeout: 5000});
-                }
-                counter += 1;
-                console.log("Uploading file: ", counter, "/", files.length);
+Are you sure you want to proceed?\
+        `)){ return; }
+        let counter = 0;
+        async function uploadFileFn(path, file){
+            const this_count = counter;
+            try{
+                await uploadFile(conn, path, file, {conflict: 'overwrite'});
             }
-            let promises = [];
-            for (let i = 0; i < files.length; i++){
-                const file = files[i];
-                const path = dstPath + file.name;
-                promises.push(uploadFileFn(file, path));
+            catch (err){
+                showPopup('Failed to upload file [' + file.name + ']: ' + err, {level: 'error', timeout: 5000});
             }
-            showPopup('Uploading multiple files...', {level: 'info', timeout: 3000});
-            Promise.all(promises).then(
-                () => {
-                    showPopup('Upload success.', {level: 'success', timeout: 3000});
-                    refreshFileList();
-                },
-                (err) => {
-                    showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
-                }
-            );
+            console.log(`[${this_count}/${counter}] Uploaded file: ${path}`);
         }
+        const promises = await forEachFile(e, async (relPath, filePromise) => {
+            counter += 1;
+            const file = await filePromise;
+            await uploadFileFn(dstPath + relPath, file);
+        });
+        showPopup('Uploading multiple files...', {level: 'info', timeout: 3000});
+        Promise.all(promises).then(
+            () => {
+                showPopup('Upload success.', {level: 'success', timeout: 3000});
+                refreshFileList();
+            },
+            (err) => {
+                showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
+            }
+        );
     });
 }

lfss-0.11.1/frontend/utils.js ADDED Viewed

@@ -0,0 +1,194 @@
+export function formatSize(size){
+    if (size < 0){
+        return '';
+    }
+    const sizeInKb = size / 1024;
+    const sizeInMb = sizeInKb / 1024;
+    const sizeInGb = sizeInMb / 1024;
+    if (sizeInGb > 1){
+        return sizeInGb.toFixed(2) + ' GB';
+    }
+    else if (sizeInMb > 1){
+        return sizeInMb.toFixed(2) + ' MB';
+    }
+    else if (sizeInKb > 1){
+        return sizeInKb.toFixed(2) + ' KB';
+    }
+    else {
+        return size + ' B';
+    }
+}
+export function copyToClipboard(text){
+    function secureCopy(text){
+        navigator.clipboard.writeText(text);
+    }
+    function unsecureCopy(text){
+        const el = document.createElement('textarea');
+        el.value = text;
+        document.body.appendChild(el);
+        el.select();
+        document.execCommand('copy');
+        document.body.removeChild(el);
+    }
+    if (navigator.clipboard){
+        secureCopy(text);
+    }
+    else {
+        unsecureCopy(text);
+    }
+}
+export function encodePathURI(path){
+    return path.split('/').map(encodeURIComponent).join('/');
+}
+export function decodePathURI(path){
+    return path.split('/').map(decodeURIComponent).join('/');
+}
+export function ensurePathURI(path){
+    return encodePathURI(decodePathURI(path));
+}
+export function getRandomString(n, additionalCharset='0123456789_-(=)[]{}'){
+    let result = '';
+    let charset = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
+    const firstChar = charset[Math.floor(Math.random() * charset.length)];
+    const lastChar = charset[Math.floor(Math.random() * charset.length)];
+    result += firstChar;
+    charset += additionalCharset;
+    for (let i = 0; i < n-2; i++){
+        result += charset[Math.floor(Math.random() * charset.length)];
+    }
+    result += lastChar;
+    return result;
+};
+/**
+ * @param {string} dateStr
+ * @returns {string}
+ */
+export function cvtGMT2Local(dateStr){
+    if (!dateStr || dateStr === 'N/A'){
+        return '';
+    }
+    const gmtdate = new Date(dateStr);
+    const localdate = new Date(gmtdate.getTime() + gmtdate.getTimezoneOffset() * 60000);
+    return localdate.toISOString().slice(0, 19).replace('T', ' ');
+}
+export function debounce(fn,wait){
+    let timeout;
+    return function(...args){
+        const context = this;
+        if (timeout) clearTimeout(timeout);
+        timeout = setTimeout(() => fn.apply(context, args), wait);
+    }
+}
+export function asHtmlText(text){
+    const anonElem = document.createElement('div');
+    anonElem.textContent = text;
+    const htmlText = anonElem.innerHTML;
+    return htmlText;
+}
+/**
+ * Iterates over all files dropped in the event,
+ * including files inside directories, and processes them
+ * using the provided callback with a concurrency limit.
+ *
+ * @param {Event} e The drop event.
+ * @param {(relPath: string, file: Promise<File>) => Promise<void>} callback A function
+ *        that receives the relative path and a promise for the File.
+ * @param {number} [maxConcurrent=5] Maximum number of concurrent callback executions.
+ * @returns {Promise<Promise<void>[]>} A promise resolving to an array of callback promises.
+ */
+export async function forEachFile(e, callback, maxConcurrent = 16) {
+    const results = []; // to collect callback promises
+    // Concurrency barrier variables.
+    let activeCount = 0;
+    const queue = [];
+    /**
+     * Runs the given async task when below the concurrency limit.
+     * If at limit, waits until a slot is free.
+     *
+     * @param {() => Promise<any>} task An async function returning a promise.
+     * @returns {Promise<any>}
+     */
+    async function runWithLimit(task) {
+        // If we reached the concurrency limit, wait for a free slot.
+        if (activeCount >= maxConcurrent) {
+            await new Promise(resolve => queue.push(resolve));
+        }
+        activeCount++;
+        try {
+            return await task();
+        } finally {
+            activeCount--;
+            // If there are waiting tasks, allow the next one to run.
+            if (queue.length) {
+                queue.shift()();
+            }
+        }
+    }
+    /**
+     * Recursively traverses a file system entry.
+     *
+     * @param {FileSystemEntry} entry The entry (file or directory).
+     * @param {string} path The current relative path.
+     */
+    async function traverse(entry, path) {
+        if (entry.isFile) {
+            // Wrap file retrieval in a promise.
+            const filePromise = new Promise((resolve, reject) => {
+                entry.file(resolve, reject);
+            });
+            // Use the concurrency barrier for the callback invocation.
+            results.push(runWithLimit(() => callback(path + entry.name, filePromise)));
+        } else if (entry.isDirectory) {
+            const reader = entry.createReader();
+            async function readAllEntries(reader) {
+                const entries = [];
+                while (true) {
+                const chunk = await new Promise((resolve, reject) => {
+                    reader.readEntries(resolve, reject);
+                });
+                if (chunk.length === 0) break;
+                entries.push(...chunk);
+                }
+                return entries;
+            }
+            const entries = await readAllEntries(reader);
+            await Promise.all(
+                entries.map(ent => traverse(ent, path + entry.name + '/'))
+            );
+        }
+    }
+    // Process using DataTransfer items if available.
+    if (e.dataTransfer && e.dataTransfer.items) {
+        await Promise.all(
+        Array.from(e.dataTransfer.items).map(async item => {
+            const entry = item.webkitGetAsEntry && item.webkitGetAsEntry();
+            if (entry) {
+            await traverse(entry, '');
+            }
+        })
+        );
+    } else if (e.dataTransfer && e.dataTransfer.files) {
+        // Fallback for browsers that support only dataTransfer.files.
+        Array.from(e.dataTransfer.files).forEach(file => {
+        results.push(runWithLimit(() => callback(file.name, Promise.resolve(file))));
+        });
+    }
+    return results;
+}

{lfss-0.10.0 → lfss-0.11.1}/lfss/api/__init__.py RENAMED Viewed

@@ -113,7 +113,7 @@ def download_file(
                 print(f"File {file_path} already exists, skipping download.")
             return True, error_msg
         try:
-            fmeta = connector.get_metadata(src_url)
+            fmeta = connector.get_meta(src_url)
             if fmeta is None:
                 error_msg = "File not found."
                 return False, error_msg
@@ -170,14 +170,15 @@ def download_directory(
     _counter = 0
     _counter_lock = Lock()
     failed_items: list[tuple[str, str]] = []
+    file_count = 0
     def get_file(c, src_url):
-        nonlocal _counter, failed_items
+        nonlocal _counter, failed_items, file_count, verbose
         with _counter_lock:
             _counter += 1
             this_count = _counter
             dst_path = f"{directory}{os.path.relpath(decode_uri_compnents(src_url), decode_uri_compnents(src_path))}"
             if verbose:
-                print(f"[{this_count}] Downloading {src_url} to {dst_path}")
+                print(f"[{this_count}/{file_count}] Downloading {src_url} to {dst_path}")
         if not (res:=download_file(
             c, src_url, dst_path,
@@ -185,11 +186,13 @@ def download_directory(
             ))[0]:
             failed_items.append((src_url, res[1]))
-    batch_size = 10000
+    batch_size = 10_000
     file_list: list[FileRecord] = []
     with connector.session(n_concurrent) as c:
         file_count = c.count_files(src_path, flat=True)
         for offset in range(0, file_count, batch_size):
+            if verbose:
+                print(f"Retrieving file list... ({offset}/{file_count})", end='\r')
             file_list.extend(c.list_files(
                 src_path, offset=offset, limit=batch_size, flat=True
             ))

{lfss-0.10.0 → lfss-0.11.1}/lfss/api/connector.py RENAMED Viewed

@@ -98,7 +98,7 @@ class Connector:
         # Skip ahead by checking if the file already exists
         if conflict == 'skip-ahead':
-            exists = self.get_metadata(path)
+            exists = self.get_meta(path)
             if exists is None:
                 conflict = 'skip'
             else:
@@ -122,7 +122,7 @@ class Connector:
         # Skip ahead by checking if the file already exists
         if conflict == 'skip-ahead':
-            exists = self.get_metadata(path)
+            exists = self.get_meta(path)
             if exists is None:
                 conflict = 'skip'
             else:
@@ -154,7 +154,7 @@ class Connector:
         # Skip ahead by checking if the file already exists
         if conflict == 'skip-ahead':
-            exists = self.get_metadata(path)
+            exists = self.get_meta(path)
             if exists is None:
                 conflict = 'skip'
             else:
@@ -211,7 +211,7 @@ class Connector:
         """Deletes the file at the specified path."""
         self._fetch_factory('DELETE', path)()
-    def get_metadata(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
+    def get_meta(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
         """Gets the metadata for the file at the specified path."""
         try:
             response = self._fetch_factory('GET', '_api/meta', {'path': path})()
@@ -223,6 +223,9 @@ class Connector:
             if e.response.status_code == 404:
                 return None
             raise e
+    # shorthand methods for type constraints
+    def get_fmeta(self, path: str) -> Optional[FileRecord]: assert (f:=self.get_meta(path)) is None or isinstance(f, FileRecord); return f
+    def get_dmeta(self, path: str) -> Optional[DirectoryRecord]: assert (d:=self.get_meta(path)) is None or isinstance(d, DirectoryRecord); return d
     def list_path(self, path: str) -> PathContents:
         """

{lfss-0.10.0 → lfss-0.11.1}/lfss/cli/cli.py RENAMED Viewed

@@ -12,7 +12,7 @@ def parse_permission(s: str) -> FileReadPermission:
     raise ValueError(f"Invalid permission {s}")
 def parse_arguments():
-    parser = argparse.ArgumentParser(description="Command line interface, please set LFSS_ENDPOINT and LFSS_TOKEN environment variables.")
+    parser = argparse.ArgumentParser(description="Client-side command line interface, set LFSS_ENDPOINT and LFSS_TOKEN environment variables for authentication.")
     sp = parser.add_subparsers(dest="command", required=True)
@@ -126,7 +126,7 @@ def main():
     elif args.command == "query":
         for path in args.path:
             with catch_request_error():
-                res = connector.get_metadata(path)
+                res = connector.get_meta(path)
                 if res is None:
                     print(f"\033[31mNot found\033[0m ({path})")
                 else:

lfss-0.11.1/lfss/cli/vacuum.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""
+Vacuum the database and external storage to ensure that the storage is consistent and minimal.
+"""
+from lfss.eng.config import LARGE_BLOB_DIR, THUMB_DB
+import argparse, time, itertools
+from functools import wraps
+from asyncio import Semaphore
+import aiosqlite
+import aiofiles, asyncio
+import aiofiles.os
+from contextlib import contextmanager
+from lfss.eng.database import transaction, unique_cursor
+from lfss.svc.request_log import RequestDB
+from lfss.eng.utils import now_stamp
+from lfss.eng.connection_pool import global_entrance
+sem: Semaphore
+@contextmanager
+def indicator(name: str):
+    print(f"\033[1;33mRunning {name}... \033[0m")
+    s = time.time()
+    yield
+    print(f"{name} took {time.time() - s:.2f} seconds.")
+def barriered(func):
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        global sem
+        async with sem:
+            return await func(*args, **kwargs)
+    return wrapper
+@global_entrance()
+async def vacuum_main(index: bool = False, blobs: bool = False, thumbs: bool = False, vacuum_all: bool = False):
+    # check if any file in the Large Blob directory is not in the database
+    # the reverse operation is not necessary, because by design, the database should be the source of truth...
+    # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
+    async def ensure_external_consistency(f_id: str):
+        @barriered
+        async def fn():
+            async with unique_cursor() as c:
+                cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
+                if not await cursor.fetchone():
+                    print(f"File {f_id} not found in database, removing from external storage.")
+                    await aiofiles.os.remove(f)
+        await asyncio.create_task(fn())
+    # create a temporary index to speed up the process...
+    with indicator("Clearing un-referenced files in external storage"):
+        try:
+            async with transaction() as c:
+                await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
+            for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
+                f_id = f.name
+                await ensure_external_consistency(f_id)
+                if (i+1) % 1_000 == 0:
+                    print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
+        finally:
+            async with transaction() as c:
+                await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
+    if index or vacuum_all:
+        with indicator("VACUUM-index"):
+            async with transaction() as c:
+                await c.execute("DELETE FROM dupcount WHERE count = 0")
+            async with unique_cursor(is_write=True) as c:
+                await c.execute("VACUUM main")
+    if blobs or vacuum_all:
+        with indicator("VACUUM-blobs"):
+            async with unique_cursor(is_write=True) as c:
+                await c.execute("VACUUM blobs")
+    if thumbs or vacuum_all:
+        try:
+            async with transaction() as c:
+                await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
+            with indicator("VACUUM-thumbs"):
+                if not THUMB_DB.exists():
+                    raise FileNotFoundError("Thumbnail database not found.")
+                async with unique_cursor() as db_c:
+                    async with aiosqlite.connect(THUMB_DB) as t_conn:
+                        batch_size = 10_000
+                        for batch_count in itertools.count(start=0):
+                            exceeded_rows = list(await (await t_conn.execute(
+                                "SELECT file_id FROM thumbs LIMIT ? OFFSET ?",
+                                (batch_size, batch_size * batch_count)
+                            )).fetchall())
+                            if not exceeded_rows:
+                                break
+                            batch_ids = [row[0] for row in exceeded_rows]
+                            for f_id in batch_ids:
+                                cursor = await db_c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
+                                if not await cursor.fetchone():
+                                    print(f"Thumbnail {f_id} not found in database, removing from thumb cache.")
+                                    await t_conn.execute("DELETE FROM thumbs WHERE file_id = ?", (f_id,))
+                            print(f"Checked {batch_count+1} batches of {batch_size} thumbnails.")
+                        await t_conn.commit()
+                        await t_conn.execute("VACUUM")
+        except FileNotFoundError as e:
+            if "Thumbnail database not found." in str(e):
+                print("Thumbnail database not found, skipping.")
+        finally:
+            async with transaction() as c:
+                await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
+async def vacuum_requests():
+    with indicator("VACUUM-requests"):
+        async with RequestDB().connect() as req_db:
+            await req_db.shrink(max_rows=1_000_000, time_before=now_stamp() - 7*24*60*60)
+            await req_db.conn.execute("VACUUM")
+def main():
+    global sem
+    parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
+    parser.add_argument("--all", action="store_true", help="Vacuum all")
+    parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
+    parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
+    parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
+    parser.add_argument("-t", "--thumb", action="store_true", help="Vacuum thumbnails")
+    parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
+    args = parser.parse_args()
+    sem = Semaphore(args.jobs)
+    asyncio.run(vacuum_main(index=args.metadata, blobs=args.data, thumbs=args.thumb, vacuum_all=args.all))
+    if args.requests or args.all:
+        asyncio.run(vacuum_requests())
+if __name__ == '__main__':
+    main()

{lfss-0.10.0 → lfss-0.11.1}/lfss/eng/config.py RENAMED Viewed

@@ -22,5 +22,5 @@ MAX_MEM_FILE_BYTES = 128 * 1024 * 1024   # 128MB
 CHUNK_SIZE = 1024 * 1024   # 1MB chunks for streaming (on large files)
 DEBUG_MODE = os.environ.get('LFSS_DEBUG', '0') == '1'
-THUMB_DB = DATA_HOME / 'thumbs.db'
+THUMB_DB = DATA_HOME / 'thumbs.v0-11.db'
 THUMB_SIZE = (48, 48)

lfss 0.10.0__tar.gz → 0.11.1__tar.gz

lfss 0.10.0tar.gz → 0.11.1tar.gz