lfss 0.7.9__tar.gz → 0.7.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {lfss-0.7.9 → lfss-0.7.11}/PKG-INFO +2 -1
  2. {lfss-0.7.9 → lfss-0.7.11}/lfss/cli/balance.py +8 -58
  3. {lfss-0.7.9 → lfss-0.7.11}/lfss/cli/serve.py +1 -1
  4. lfss-0.7.11/lfss/cli/vacuum.py +91 -0
  5. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/config.py +1 -0
  6. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/database.py +7 -3
  7. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/log.py +2 -2
  8. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/server.py +3 -4
  9. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/stat.py +32 -0
  10. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/utils.py +1 -0
  11. {lfss-0.7.9 → lfss-0.7.11}/pyproject.toml +4 -2
  12. {lfss-0.7.9 → lfss-0.7.11}/Readme.md +0 -0
  13. {lfss-0.7.9 → lfss-0.7.11}/docs/Known_issues.md +0 -0
  14. {lfss-0.7.9 → lfss-0.7.11}/docs/Permission.md +0 -0
  15. {lfss-0.7.9 → lfss-0.7.11}/frontend/api.js +0 -0
  16. {lfss-0.7.9 → lfss-0.7.11}/frontend/index.html +0 -0
  17. {lfss-0.7.9 → lfss-0.7.11}/frontend/info.css +0 -0
  18. {lfss-0.7.9 → lfss-0.7.11}/frontend/info.js +0 -0
  19. {lfss-0.7.9 → lfss-0.7.11}/frontend/popup.css +0 -0
  20. {lfss-0.7.9 → lfss-0.7.11}/frontend/popup.js +0 -0
  21. {lfss-0.7.9 → lfss-0.7.11}/frontend/scripts.js +0 -0
  22. {lfss-0.7.9 → lfss-0.7.11}/frontend/styles.css +0 -0
  23. {lfss-0.7.9 → lfss-0.7.11}/frontend/utils.js +0 -0
  24. {lfss-0.7.9 → lfss-0.7.11}/lfss/cli/cli.py +0 -0
  25. {lfss-0.7.9 → lfss-0.7.11}/lfss/cli/panel.py +0 -0
  26. {lfss-0.7.9 → lfss-0.7.11}/lfss/cli/user.py +0 -0
  27. {lfss-0.7.9 → lfss-0.7.11}/lfss/client/__init__.py +0 -0
  28. {lfss-0.7.9 → lfss-0.7.11}/lfss/client/api.py +0 -0
  29. {lfss-0.7.9 → lfss-0.7.11}/lfss/sql/init.sql +0 -0
  30. {lfss-0.7.9 → lfss-0.7.11}/lfss/sql/pragma.sql +0 -0
  31. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/__init__.py +0 -0
  32. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/connection_pool.py +0 -0
  33. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/datatype.py +0 -0
  34. {lfss-0.7.9 → lfss-0.7.11}/lfss/src/error.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lfss
3
- Version: 0.7.9
3
+ Version: 0.7.11
4
4
  Summary: Lightweight file storage service
5
5
  Home-page: https://github.com/MenxLi/lfss
6
6
  Author: li, mengxun
@@ -15,6 +15,7 @@ Requires-Dist: aiofiles (==23.*)
15
15
  Requires-Dist: aiosqlite (==0.*)
16
16
  Requires-Dist: fastapi (==0.*)
17
17
  Requires-Dist: mimesniff (==1.*)
18
+ Requires-Dist: uvicorn (==0.*)
18
19
  Project-URL: Repository, https://github.com/MenxLi/lfss
19
20
  Description-Content-Type: text/markdown
20
21
 
@@ -8,26 +8,16 @@ from functools import wraps
8
8
  from asyncio import Semaphore
9
9
  import aiofiles, asyncio
10
10
  import aiofiles.os
11
- from contextlib import contextmanager
12
11
  from lfss.src.database import transaction, unique_cursor
13
12
  from lfss.src.connection_pool import global_entrance
14
13
 
15
- @contextmanager
16
- def indicator(name: str):
17
- print(f"\033[1;33mRunning {name}... \033[0m")
18
- s = time.time()
19
- yield
20
- print(f"{name} took {time.time() - s:.2f} seconds.")
21
-
22
- sem = Semaphore(1)
23
-
24
- def _get_sem():
25
- return sem
14
+ sem: Semaphore
26
15
 
27
16
  def barriered(func):
28
17
  @wraps(func)
29
18
  async def wrapper(*args, **kwargs):
30
- async with _get_sem():
19
+ global sem
20
+ async with sem:
31
21
  return await func(*args, **kwargs)
32
22
  return wrapper
33
23
 
@@ -62,8 +52,9 @@ async def move_to_internal(f_id: str, flag: str = ''):
62
52
 
63
53
  @global_entrance()
64
54
  async def _main(batch_size: int = 10000):
65
- tasks = []
55
+ start_time = time.time()
66
56
 
57
+ tasks = []
67
58
  e_cout = 0
68
59
  for batch_count in itertools.count(start=0):
69
60
  async with unique_cursor() as conn:
@@ -80,6 +71,7 @@ async def _main(batch_size: int = 10000):
80
71
  tasks.append(move_to_external(f_id, flag=f"[b{batch_count+1}-e{i+1}/{len(exceeded_rows)}] "))
81
72
  await asyncio.gather(*tasks)
82
73
 
74
+ tasks = []
83
75
  i_count = 0
84
76
  for batch_count in itertools.count(start=0):
85
77
  async with unique_cursor() as conn:
@@ -96,58 +88,16 @@ async def _main(batch_size: int = 10000):
96
88
  tasks.append(move_to_internal(f_id, flag=f"[b{batch_count+1}-i{i+1}/{len(under_rows)}] "))
97
89
  await asyncio.gather(*tasks)
98
90
 
99
- print(f"Finished. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
100
-
101
- @global_entrance()
102
- async def vacuum(index: bool = False, blobs: bool = False):
103
-
104
- # check if any file in the Large Blob directory is not in the database
105
- # the reverse operation is not necessary, because by design, the database should be the source of truth...
106
- # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
107
- async def ensure_external_consistency(f_id: str):
108
- @barriered
109
- async def fn():
110
- async with unique_cursor() as c:
111
- cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
112
- if not await cursor.fetchone():
113
- print(f"File {f_id} not found in database, removing from external storage.")
114
- await aiofiles.os.remove(f)
115
- await asyncio.create_task(fn())
116
-
117
- # create a temporary index to speed up the process...
118
- with indicator("Clearing un-referenced files in external storage"):
119
- async with transaction() as c:
120
- await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
121
- for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
122
- f_id = f.name
123
- await ensure_external_consistency(f_id)
124
- if (i+1) % 1_000 == 0:
125
- print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
126
- async with transaction() as c:
127
- await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
128
-
129
- async with unique_cursor(is_write=True) as c:
91
+ print(f"Time elapsed: {time.time() - start_time:.2f}s. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
130
92
 
131
- if index:
132
- with indicator("VACUUM-index"):
133
- await c.execute("VACUUM main")
134
- if blobs:
135
- with indicator("VACUUM-blobs"):
136
- await c.execute("VACUUM blobs")
137
-
138
93
  def main():
139
94
  global sem
140
95
  parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
141
96
  parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
142
97
  parser.add_argument("-b", "--batch-size", type=int, default=10000, help="Batch size for processing files")
143
- parser.add_argument("--vacuum", action="store_true", help="Run VACUUM only on index.db after balancing")
144
- parser.add_argument("--vacuum-all", action="store_true", help="Run VACUUM on both index.db and blobs.db after balancing")
145
98
  args = parser.parse_args()
146
99
  sem = Semaphore(args.jobs)
147
- with indicator("Balancing"):
148
- asyncio.run(_main(args.batch_size))
149
- if args.vacuum or args.vacuum_all:
150
- asyncio.run(vacuum(index=args.vacuum or args.vacuum_all, blobs=args.vacuum_all))
100
+ asyncio.run(_main(args.batch_size))
151
101
 
152
102
  if __name__ == '__main__':
153
103
  main()
@@ -24,7 +24,7 @@ def main():
24
24
  log_config=default_logging_config
25
25
  )
26
26
  server = Server(config=config)
27
- logger.info(f"Starting server at {args.host}:{args.port}, with {args.workers} workers")
27
+ logger.info(f"Starting server at http://{args.host}:{args.port}, with {args.workers} workers.")
28
28
  server.run()
29
29
 
30
30
  if __name__ == "__main__":
@@ -0,0 +1,91 @@
1
+ """
2
+ Vacuum the database and external storage to ensure that the storage is consistent and minimal.
3
+ """
4
+
5
+ from lfss.src.config import LARGE_BLOB_DIR
6
+ import argparse, time
7
+ from functools import wraps
8
+ from asyncio import Semaphore
9
+ import aiofiles, asyncio
10
+ import aiofiles.os
11
+ from contextlib import contextmanager
12
+ from lfss.src.database import transaction, unique_cursor
13
+ from lfss.src.stat import RequestDB
14
+ from lfss.src.utils import now_stamp
15
+ from lfss.src.connection_pool import global_entrance
16
+
17
+ sem: Semaphore
18
+
19
+ @contextmanager
20
+ def indicator(name: str):
21
+ print(f"\033[1;33mRunning {name}... \033[0m")
22
+ s = time.time()
23
+ yield
24
+ print(f"{name} took {time.time() - s:.2f} seconds.")
25
+
26
+ def barriered(func):
27
+ @wraps(func)
28
+ async def wrapper(*args, **kwargs):
29
+ global sem
30
+ async with sem:
31
+ return await func(*args, **kwargs)
32
+ return wrapper
33
+
34
+ @global_entrance()
35
+ async def vacuum_main(index: bool = False, blobs: bool = False):
36
+
37
+ # check if any file in the Large Blob directory is not in the database
38
+ # the reverse operation is not necessary, because by design, the database should be the source of truth...
39
+ # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
40
+ async def ensure_external_consistency(f_id: str):
41
+ @barriered
42
+ async def fn():
43
+ async with unique_cursor() as c:
44
+ cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
45
+ if not await cursor.fetchone():
46
+ print(f"File {f_id} not found in database, removing from external storage.")
47
+ await aiofiles.os.remove(f)
48
+ await asyncio.create_task(fn())
49
+
50
+ # create a temporary index to speed up the process...
51
+ with indicator("Clearing un-referenced files in external storage"):
52
+ async with transaction() as c:
53
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
54
+ for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
55
+ f_id = f.name
56
+ await ensure_external_consistency(f_id)
57
+ if (i+1) % 1_000 == 0:
58
+ print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
59
+ async with transaction() as c:
60
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
61
+
62
+ async with unique_cursor(is_write=True) as c:
63
+ if index:
64
+ with indicator("VACUUM-index"):
65
+ await c.execute("VACUUM main")
66
+ if blobs:
67
+ with indicator("VACUUM-blobs"):
68
+ await c.execute("VACUUM blobs")
69
+
70
+ async def vacuum_requests():
71
+ with indicator("VACUUM-requests"):
72
+ async with RequestDB().connect() as req_db:
73
+ await req_db.shrink(max_rows=1_000_000, time_before=now_stamp() - 7*24*60*60)
74
+ await req_db.conn.execute("VACUUM")
75
+
76
+ def main():
77
+ global sem
78
+ parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
79
+ parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
80
+ parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
81
+ parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
82
+ parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
83
+ args = parser.parse_args()
84
+ sem = Semaphore(args.jobs)
85
+ asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
86
+
87
+ if args.requests:
88
+ asyncio.run(vacuum_requests())
89
+
90
+ if __name__ == '__main__':
91
+ main()
@@ -20,3 +20,4 @@ else:
20
20
  LARGE_FILE_BYTES = 8 * 1024 * 1024 # 8MB
21
21
  MAX_FILE_BYTES = 512 * 1024 * 1024 # 512MB
22
22
  MAX_BUNDLE_BYTES = 512 * 1024 * 1024 # 512MB
23
+ CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming (on large files)
@@ -11,7 +11,7 @@ import aiofiles.os
11
11
 
12
12
  from .connection_pool import execute_sql, unique_cursor, transaction
13
13
  from .datatype import UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents
14
- from .config import LARGE_BLOB_DIR
14
+ from .config import LARGE_BLOB_DIR, CHUNK_SIZE
15
15
  from .log import get_logger
16
16
  from .utils import decode_uri_compnents, hash_credential, concurrent_wrap
17
17
  from .error import *
@@ -333,7 +333,7 @@ class FileConn(DBObjectBase):
333
333
  await self.cur.execute("DELETE FROM usize WHERE user_id = ?", (owner_id, ))
334
334
  res = await self.cur.execute("DELETE FROM fmeta WHERE owner_id = ? RETURNING *", (owner_id, ))
335
335
  ret = [self.parse_record(r) for r in await res.fetchall()]
336
- self.logger.info(f"Deleted {len(ret)} file(s) for user {owner_id}") # type: ignore
336
+ self.logger.info(f"Deleted {len(ret)} file records for user {owner_id}") # type: ignore
337
337
  return ret
338
338
 
339
339
  async def delete_path_records(self, path: str, under_user_id: Optional[int] = None) -> list[FileRecord]:
@@ -384,7 +384,9 @@ class FileConn(DBObjectBase):
384
384
  async def get_file_blob_external(self, file_id: str) -> AsyncIterable[bytes]:
385
385
  assert (LARGE_BLOB_DIR / file_id).exists(), f"File {file_id} not found"
386
386
  async with aiofiles.open(LARGE_BLOB_DIR / file_id, 'rb') as f:
387
- async for chunk in f:
387
+ while True:
388
+ chunk = await f.read(CHUNK_SIZE)
389
+ if not chunk: break
388
390
  yield chunk
389
391
 
390
392
  async def delete_file_blob_external(self, file_id: str):
@@ -639,7 +641,9 @@ class Database:
639
641
 
640
642
  fconn = FileConn(cur)
641
643
  records = await fconn.delete_user_file_records(user.id)
644
+ self.logger.debug("Deleting files...")
642
645
  await self.__batch_delete_file_blobs(fconn, records)
646
+ self.logger.info(f"Deleted {len(records)} file(s) for user {user.username}")
643
647
 
644
648
  # make sure the user's directory is deleted,
645
649
  # may contain admin's files, but delete them all
@@ -94,11 +94,11 @@ def get_logger(
94
94
  file_handler = logging.FileHandler(log_file)
95
95
  elif file_handler_type == 'daily':
96
96
  file_handler = handlers.TimedRotatingFileHandler(
97
- log_file, when='midnight', interval=1, backupCount=5
97
+ log_file, when='midnight', interval=1, backupCount=30
98
98
  )
99
99
  elif file_handler_type == 'rotate':
100
100
  file_handler = handlers.RotatingFileHandler(
101
- log_file, maxBytes=1000000, backupCount=5
101
+ log_file, maxBytes=1024*1024, backupCount=5
102
102
  )
103
103
 
104
104
  file_handler.setFormatter(formatter_plain)
@@ -15,7 +15,7 @@ from contextlib import asynccontextmanager
15
15
  from .error import *
16
16
  from .log import get_logger
17
17
  from .stat import RequestDB
18
- from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES
18
+ from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES, CHUNK_SIZE
19
19
  from .utils import ensure_uri_compnents, format_last_modified, now_stamp
20
20
  from .connection_pool import global_connection_init, global_connection_close, unique_cursor
21
21
  from .database import Database, UserRecord, DECOY_USER, FileRecord, check_user_permission, FileReadPermission, UserConn, FileConn, PathContents
@@ -253,9 +253,8 @@ async def put_file(
253
253
 
254
254
  if len(blobs) > LARGE_FILE_BYTES:
255
255
  async def blob_reader():
256
- chunk_size = 16 * 1024 * 1024 # 16MB
257
- for b in range(0, len(blobs), chunk_size):
258
- yield blobs[b:b+chunk_size]
256
+ for b in range(0, len(blobs), CHUNK_SIZE):
257
+ yield blobs[b:b+CHUNK_SIZE]
259
258
  await db.save_file(user.id, path, blob_reader(), permission = FileReadPermission(permission), mime_type = mime_t)
260
259
  else:
261
260
  await db.save_file(user.id, path, blobs, permission = FileReadPermission(permission), mime_type=mime_t)
@@ -1,5 +1,6 @@
1
1
  from typing import Optional, Any
2
2
  import aiosqlite
3
+ from contextlib import asynccontextmanager
3
4
  from .config import DATA_HOME
4
5
  from .utils import debounce_async
5
6
 
@@ -25,6 +26,15 @@ class RequestDB:
25
26
  status INTEGER
26
27
  )
27
28
  ''')
29
+ return self
30
+
31
+ def connect(self):
32
+ @asynccontextmanager
33
+ async def _mgr():
34
+ await self.init()
35
+ yield self
36
+ await self.close()
37
+ return _mgr()
28
38
 
29
39
  async def close(self):
30
40
  await self.conn.close()
@@ -63,4 +73,26 @@ class RequestDB:
63
73
  ''', (time, method, path, headers, query, client, duration, request_size, response_size, status)) as cursor:
64
74
  assert cursor.lastrowid is not None
65
75
  return cursor.lastrowid
76
+
77
+ async def shrink(self, max_rows: int = 1_000_000, time_before: float = 0):
78
+ async with aiosqlite.connect(self.db) as conn:
79
+
80
+ # remove all but the last max_rows
81
+ res = await (await conn.execute('SELECT COUNT(*) FROM requests')).fetchone()
82
+ assert res is not None
83
+ row_len = res[0]
84
+ if row_len > max_rows:
85
+ await conn.execute('''
86
+ DELETE FROM requests WHERE id NOT IN (
87
+ SELECT id FROM requests ORDER BY time DESC LIMIT ?
88
+ )
89
+ ''', (max_rows,))
90
+
91
+ # remove old requests that is older than time_before
92
+ if time_before > 0:
93
+ await conn.execute('''
94
+ DELETE FROM requests WHERE time < ?
95
+ ''', (time_before,))
96
+
97
+ await conn.commit()
66
98
 
@@ -57,6 +57,7 @@ def format_last_modified(last_modified_gmt: str):
57
57
  return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
58
58
 
59
59
  def now_stamp() -> float:
60
+ """ Get the current timestamp, in seconds """
60
61
  return datetime.datetime.now().timestamp()
61
62
 
62
63
  def stamp_to_str(stamp: float) -> str:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lfss"
3
- version = "0.7.9"
3
+ version = "0.7.11"
4
4
  description = "Lightweight file storage service"
5
5
  authors = ["li, mengxun <limengxun45@outlook.com>"]
6
6
  readme = "Readme.md"
@@ -10,10 +10,11 @@ include = ["Readme.md", "docs/*", "frontend/*", "lfss/sql/*"]
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = ">=3.9"
13
- fastapi = "0.*"
14
13
  aiosqlite = "0.*"
15
14
  aiofiles = "23.*"
16
15
  mimesniff = "1.*"
16
+ fastapi = "0.*"
17
+ uvicorn = "0.*"
17
18
 
18
19
  [tool.poetry.dev-dependencies]
19
20
  pytest = "*"
@@ -24,6 +25,7 @@ lfss-serve = "lfss.cli.serve:main"
24
25
  lfss-user = "lfss.cli.user:main"
25
26
  lfss-panel = "lfss.cli.panel:main"
26
27
  lfss-cli = "lfss.cli.cli:main"
28
+ lfss-vacuum = "lfss.cli.vacuum:main"
27
29
  lfss-balance = "lfss.cli.balance:main"
28
30
 
29
31
  [build-system]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes