lfss 0.7.9__py3-none-any.whl → 0.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lfss/cli/balance.py CHANGED
@@ -8,26 +8,16 @@ from functools import wraps
8
8
  from asyncio import Semaphore
9
9
  import aiofiles, asyncio
10
10
  import aiofiles.os
11
- from contextlib import contextmanager
12
11
  from lfss.src.database import transaction, unique_cursor
13
12
  from lfss.src.connection_pool import global_entrance
14
13
 
15
- @contextmanager
16
- def indicator(name: str):
17
- print(f"\033[1;33mRunning {name}... \033[0m")
18
- s = time.time()
19
- yield
20
- print(f"{name} took {time.time() - s:.2f} seconds.")
21
-
22
- sem = Semaphore(1)
23
-
24
- def _get_sem():
25
- return sem
14
+ sem: Semaphore
26
15
 
27
16
  def barriered(func):
28
17
  @wraps(func)
29
18
  async def wrapper(*args, **kwargs):
30
- async with _get_sem():
19
+ global sem
20
+ async with sem:
31
21
  return await func(*args, **kwargs)
32
22
  return wrapper
33
23
 
@@ -62,8 +52,9 @@ async def move_to_internal(f_id: str, flag: str = ''):
62
52
 
63
53
  @global_entrance()
64
54
  async def _main(batch_size: int = 10000):
65
- tasks = []
55
+ start_time = time.time()
66
56
 
57
+ tasks = []
67
58
  e_cout = 0
68
59
  for batch_count in itertools.count(start=0):
69
60
  async with unique_cursor() as conn:
@@ -80,6 +71,7 @@ async def _main(batch_size: int = 10000):
80
71
  tasks.append(move_to_external(f_id, flag=f"[b{batch_count+1}-e{i+1}/{len(exceeded_rows)}] "))
81
72
  await asyncio.gather(*tasks)
82
73
 
74
+ tasks = []
83
75
  i_count = 0
84
76
  for batch_count in itertools.count(start=0):
85
77
  async with unique_cursor() as conn:
@@ -96,58 +88,16 @@ async def _main(batch_size: int = 10000):
96
88
  tasks.append(move_to_internal(f_id, flag=f"[b{batch_count+1}-i{i+1}/{len(under_rows)}] "))
97
89
  await asyncio.gather(*tasks)
98
90
 
99
- print(f"Finished. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
100
-
101
- @global_entrance()
102
- async def vacuum(index: bool = False, blobs: bool = False):
103
-
104
- # check if any file in the Large Blob directory is not in the database
105
- # the reverse operation is not necessary, because by design, the database should be the source of truth...
106
- # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
107
- async def ensure_external_consistency(f_id: str):
108
- @barriered
109
- async def fn():
110
- async with unique_cursor() as c:
111
- cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
112
- if not await cursor.fetchone():
113
- print(f"File {f_id} not found in database, removing from external storage.")
114
- await aiofiles.os.remove(f)
115
- await asyncio.create_task(fn())
116
-
117
- # create a temporary index to speed up the process...
118
- with indicator("Clearing un-referenced files in external storage"):
119
- async with transaction() as c:
120
- await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
121
- for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
122
- f_id = f.name
123
- await ensure_external_consistency(f_id)
124
- if (i+1) % 1_000 == 0:
125
- print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
126
- async with transaction() as c:
127
- await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
128
-
129
- async with unique_cursor(is_write=True) as c:
91
+ print(f"Time elapsed: {time.time() - start_time:.2f}s. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
130
92
 
131
- if index:
132
- with indicator("VACUUM-index"):
133
- await c.execute("VACUUM main")
134
- if blobs:
135
- with indicator("VACUUM-blobs"):
136
- await c.execute("VACUUM blobs")
137
-
138
93
  def main():
139
94
  global sem
140
95
  parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
141
96
  parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
142
97
  parser.add_argument("-b", "--batch-size", type=int, default=10000, help="Batch size for processing files")
143
- parser.add_argument("--vacuum", action="store_true", help="Run VACUUM only on index.db after balancing")
144
- parser.add_argument("--vacuum-all", action="store_true", help="Run VACUUM on both index.db and blobs.db after balancing")
145
98
  args = parser.parse_args()
146
99
  sem = Semaphore(args.jobs)
147
- with indicator("Balancing"):
148
- asyncio.run(_main(args.batch_size))
149
- if args.vacuum or args.vacuum_all:
150
- asyncio.run(vacuum(index=args.vacuum or args.vacuum_all, blobs=args.vacuum_all))
100
+ asyncio.run(_main(args.batch_size))
151
101
 
152
102
  if __name__ == '__main__':
153
103
  main()
lfss/cli/vacuum.py ADDED
@@ -0,0 +1,93 @@
1
+ """
2
+ Vacuum the database and external storage to ensure that the storage is consistent and minimal.
3
+ """
4
+
5
+ from lfss.src.config import LARGE_BLOB_DIR, DATA_HOME
6
+ import argparse, time, os
7
+ from functools import wraps
8
+ from asyncio import Semaphore
9
+ import aiofiles, asyncio
10
+ import aiofiles.os
11
+ from contextlib import contextmanager
12
+ from lfss.src.database import transaction, unique_cursor
13
+ from lfss.src.stat import RequestDB
14
+ from lfss.src.connection_pool import global_entrance
15
+
16
+ sem: Semaphore
17
+
18
+ @contextmanager
19
+ def indicator(name: str):
20
+ print(f"\033[1;33mRunning {name}... \033[0m")
21
+ s = time.time()
22
+ yield
23
+ print(f"{name} took {time.time() - s:.2f} seconds.")
24
+
25
+ def barriered(func):
26
+ @wraps(func)
27
+ async def wrapper(*args, **kwargs):
28
+ global sem
29
+ async with sem:
30
+ return await func(*args, **kwargs)
31
+ return wrapper
32
+
33
+ @global_entrance()
34
+ async def vacuum_main(index: bool = False, blobs: bool = False):
35
+
36
+ # check if any file in the Large Blob directory is not in the database
37
+ # the reverse operation is not necessary, because by design, the database should be the source of truth...
38
+ # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
39
+ async def ensure_external_consistency(f_id: str):
40
+ @barriered
41
+ async def fn():
42
+ async with unique_cursor() as c:
43
+ cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
44
+ if not await cursor.fetchone():
45
+ print(f"File {f_id} not found in database, removing from external storage.")
46
+ await aiofiles.os.remove(f)
47
+ await asyncio.create_task(fn())
48
+
49
+ # create a temporary index to speed up the process...
50
+ with indicator("Clearing un-referenced files in external storage"):
51
+ async with transaction() as c:
52
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
53
+ for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
54
+ f_id = f.name
55
+ await ensure_external_consistency(f_id)
56
+ if (i+1) % 1_000 == 0:
57
+ print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
58
+ async with transaction() as c:
59
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
60
+
61
+ async with unique_cursor(is_write=True) as c:
62
+ if index:
63
+ with indicator("VACUUM-index"):
64
+ await c.execute("VACUUM main")
65
+ if blobs:
66
+ with indicator("VACUUM-blobs"):
67
+ await c.execute("VACUUM blobs")
68
+
69
+ async def vacuum_requests():
70
+ with indicator("VACUUM-requests"):
71
+ req_db = await RequestDB().init()
72
+ try:
73
+ await req_db.shrink()
74
+ await req_db.conn.execute("VACUUM")
75
+ finally:
76
+ await req_db.close()
77
+
78
+ def main():
79
+ global sem
80
+ parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
81
+ parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
82
+ parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
83
+ parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
84
+ parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs")
85
+ args = parser.parse_args()
86
+ sem = Semaphore(args.jobs)
87
+ asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
88
+
89
+ if args.requests:
90
+ asyncio.run(vacuum_requests())
91
+
92
+ if __name__ == '__main__':
93
+ main()
lfss/src/config.py CHANGED
@@ -20,3 +20,4 @@ else:
20
20
  LARGE_FILE_BYTES = 8 * 1024 * 1024 # 8MB
21
21
  MAX_FILE_BYTES = 512 * 1024 * 1024 # 512MB
22
22
  MAX_BUNDLE_BYTES = 512 * 1024 * 1024 # 512MB
23
+ CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming (on large files)
lfss/src/database.py CHANGED
@@ -11,7 +11,7 @@ import aiofiles.os
11
11
 
12
12
  from .connection_pool import execute_sql, unique_cursor, transaction
13
13
  from .datatype import UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents
14
- from .config import LARGE_BLOB_DIR
14
+ from .config import LARGE_BLOB_DIR, CHUNK_SIZE
15
15
  from .log import get_logger
16
16
  from .utils import decode_uri_compnents, hash_credential, concurrent_wrap
17
17
  from .error import *
@@ -333,7 +333,7 @@ class FileConn(DBObjectBase):
333
333
  await self.cur.execute("DELETE FROM usize WHERE user_id = ?", (owner_id, ))
334
334
  res = await self.cur.execute("DELETE FROM fmeta WHERE owner_id = ? RETURNING *", (owner_id, ))
335
335
  ret = [self.parse_record(r) for r in await res.fetchall()]
336
- self.logger.info(f"Deleted {len(ret)} file(s) for user {owner_id}") # type: ignore
336
+ self.logger.info(f"Deleted {len(ret)} file records for user {owner_id}") # type: ignore
337
337
  return ret
338
338
 
339
339
  async def delete_path_records(self, path: str, under_user_id: Optional[int] = None) -> list[FileRecord]:
@@ -384,7 +384,9 @@ class FileConn(DBObjectBase):
384
384
  async def get_file_blob_external(self, file_id: str) -> AsyncIterable[bytes]:
385
385
  assert (LARGE_BLOB_DIR / file_id).exists(), f"File {file_id} not found"
386
386
  async with aiofiles.open(LARGE_BLOB_DIR / file_id, 'rb') as f:
387
- async for chunk in f:
387
+ while True:
388
+ chunk = await f.read(CHUNK_SIZE)
389
+ if not chunk: break
388
390
  yield chunk
389
391
 
390
392
  async def delete_file_blob_external(self, file_id: str):
@@ -639,7 +641,9 @@ class Database:
639
641
 
640
642
  fconn = FileConn(cur)
641
643
  records = await fconn.delete_user_file_records(user.id)
644
+ self.logger.debug("Deleting files...")
642
645
  await self.__batch_delete_file_blobs(fconn, records)
646
+ self.logger.info(f"Deleted {len(records)} file(s) for user {user.username}")
643
647
 
644
648
  # make sure the user's directory is deleted,
645
649
  # may contain admin's files, but delete them all
lfss/src/log.py CHANGED
@@ -94,11 +94,11 @@ def get_logger(
94
94
  file_handler = logging.FileHandler(log_file)
95
95
  elif file_handler_type == 'daily':
96
96
  file_handler = handlers.TimedRotatingFileHandler(
97
- log_file, when='midnight', interval=1, backupCount=5
97
+ log_file, when='midnight', interval=1, backupCount=30
98
98
  )
99
99
  elif file_handler_type == 'rotate':
100
100
  file_handler = handlers.RotatingFileHandler(
101
- log_file, maxBytes=1000000, backupCount=5
101
+ log_file, maxBytes=1024*1024, backupCount=5
102
102
  )
103
103
 
104
104
  file_handler.setFormatter(formatter_plain)
lfss/src/server.py CHANGED
@@ -15,7 +15,7 @@ from contextlib import asynccontextmanager
15
15
  from .error import *
16
16
  from .log import get_logger
17
17
  from .stat import RequestDB
18
- from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES
18
+ from .config import MAX_BUNDLE_BYTES, MAX_FILE_BYTES, LARGE_FILE_BYTES, CHUNK_SIZE
19
19
  from .utils import ensure_uri_compnents, format_last_modified, now_stamp
20
20
  from .connection_pool import global_connection_init, global_connection_close, unique_cursor
21
21
  from .database import Database, UserRecord, DECOY_USER, FileRecord, check_user_permission, FileReadPermission, UserConn, FileConn, PathContents
@@ -31,6 +31,7 @@ async def lifespan(app: FastAPI):
31
31
  try:
32
32
  await global_connection_init(n_read = 2)
33
33
  await asyncio.gather(db.init(), req_conn.init())
34
+ await req_conn.shrink()
34
35
  yield
35
36
  await req_conn.commit()
36
37
  finally:
@@ -253,9 +254,8 @@ async def put_file(
253
254
 
254
255
  if len(blobs) > LARGE_FILE_BYTES:
255
256
  async def blob_reader():
256
- chunk_size = 16 * 1024 * 1024 # 16MB
257
- for b in range(0, len(blobs), chunk_size):
258
- yield blobs[b:b+chunk_size]
257
+ for b in range(0, len(blobs), CHUNK_SIZE):
258
+ yield blobs[b:b+CHUNK_SIZE]
259
259
  await db.save_file(user.id, path, blob_reader(), permission = FileReadPermission(permission), mime_type = mime_t)
260
260
  else:
261
261
  await db.save_file(user.id, path, blobs, permission = FileReadPermission(permission), mime_type=mime_t)
lfss/src/stat.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from typing import Optional, Any
2
2
  import aiosqlite
3
3
  from .config import DATA_HOME
4
- from .utils import debounce_async
4
+ from .utils import debounce_async, concurrent_wrap
5
5
 
6
6
  class RequestDB:
7
7
  conn: aiosqlite.Connection
@@ -25,6 +25,7 @@ class RequestDB:
25
25
  status INTEGER
26
26
  )
27
27
  ''')
28
+ return self
28
29
 
29
30
  async def close(self):
30
31
  await self.conn.close()
@@ -63,4 +64,27 @@ class RequestDB:
63
64
  ''', (time, method, path, headers, query, client, duration, request_size, response_size, status)) as cursor:
64
65
  assert cursor.lastrowid is not None
65
66
  return cursor.lastrowid
67
+
68
+ @concurrent_wrap()
69
+ async def shrink(self, max_rows: int = 1_000_000, time_before: float = 0):
70
+ async with aiosqlite.connect(self.db) as conn:
71
+
72
+ # remove all but the last max_rows
73
+ res = await (await conn.execute('SELECT COUNT(*) FROM requests')).fetchone()
74
+ assert res is not None
75
+ row_len = res[0]
76
+ if row_len > max_rows:
77
+ await conn.execute('''
78
+ DELETE FROM requests WHERE id NOT IN (
79
+ SELECT id FROM requests ORDER BY time DESC LIMIT ?
80
+ )
81
+ ''', (max_rows,))
82
+
83
+ # remove old requests that is older than time_before
84
+ if time_before > 0:
85
+ await conn.execute('''
86
+ DELETE FROM requests WHERE time < ?
87
+ ''', (time_before,))
88
+
89
+ await conn.commit()
66
90
 
lfss/src/utils.py CHANGED
@@ -57,6 +57,7 @@ def format_last_modified(last_modified_gmt: str):
57
57
  return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
58
58
 
59
59
  def now_stamp() -> float:
60
+ """ Get the current timestamp, in seconds """
60
61
  return datetime.datetime.now().timestamp()
61
62
 
62
63
  def stamp_to_str(stamp: float) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lfss
3
- Version: 0.7.9
3
+ Version: 0.7.10
4
4
  Summary: Lightweight file storage service
5
5
  Home-page: https://github.com/MenxLi/lfss
6
6
  Author: li, mengxun
@@ -10,26 +10,27 @@ frontend/popup.js,sha256=3PgaGZmxSdV1E-D_MWgcR7aHWkcsHA1BNKSOkmP66tA,5191
10
10
  frontend/scripts.js,sha256=OP99BSbnyTE1LJebGVUvV3WUnDBiZdqaC3a9SE1FF6U,20286
11
11
  frontend/styles.css,sha256=37aU9Iep_hTz3LnAAAcEhC_I7AC0A4lX6apnMuGPTlA,4214
12
12
  frontend/utils.js,sha256=Ts4nlef8pkrEgpwX-uQwAhWvwxlIzex8ijDLNCa22ps,2372
13
- lfss/cli/balance.py,sha256=TmK48DGU7xPMLv7kASOCS-PY8TIs6GQEsRVRK_4YtXY,6456
13
+ lfss/cli/balance.py,sha256=R2rbO2tg9TVnnQIVeU0GJVeMS-5LDhEdk4mbOE9qGq0,4121
14
14
  lfss/cli/cli.py,sha256=LH1nx5wI1K2DZ3hvHz7oq5HcXVDoW2V6sr7q9gJ8gqo,4621
15
15
  lfss/cli/panel.py,sha256=iGdVmdWYjA_7a78ZzWEB_3ggIOBeUKTzg6F5zLaB25c,1401
16
16
  lfss/cli/serve.py,sha256=bO3GT0kuylMGN-7bZWP4e71MlugGZ_lEMkYaYld_Ntg,985
17
17
  lfss/cli/user.py,sha256=ETLtj0N-kmxv0mhmeAsO6cY7kPq7nOOP4DetxIRoQpQ,3405
18
+ lfss/cli/vacuum.py,sha256=i7YufreIsl8J9qfHm876vCmo0wv9OnK-FwyRK0o6HNQ,3624
18
19
  lfss/client/__init__.py,sha256=8uvcKs3PYQamDd_cjfN-fX9QUohEzJqeJlOYkBlzC3M,4556
19
20
  lfss/client/api.py,sha256=kSkB4wADTu012-1wl6v90OiZrw6aTQ42GU4jtV4KO0k,5764
20
21
  lfss/sql/init.sql,sha256=C-JtQAlaOjESI8uoF1Y_9dKukEVSw5Ll-7yA3gG-XHU,1210
21
22
  lfss/sql/pragma.sql,sha256=uENx7xXjARmro-A3XAK8OM8v5AxDMdCCRj47f86UuXg,206
22
23
  lfss/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- lfss/src/config.py,sha256=CIbVFWRu86dl2GVlXlCDv93W8PLwT89NtznU6TCKvtk,729
24
+ lfss/src/config.py,sha256=2qN2nu3onP1M7U6ENC0ZbVVRE90aDomTcQ1GsBLLHF8,800
24
25
  lfss/src/connection_pool.py,sha256=r4Ho5d_Gd4S_KbT7515UJoiyfIgS6xyttqMsKqOfaIg,5190
25
- lfss/src/database.py,sha256=AmyQwPFjrntFyZPTJOUB5kGvFpWuex6TPtxZKR2KK8k,32244
26
+ lfss/src/database.py,sha256=w2QPE3h1Lx0D0fUmdtu9s1XHpNp9p27zqm8AVeP2UVg,32476
26
27
  lfss/src/datatype.py,sha256=WfrLALU_7wei5-i_b0TxY8xWI5mwxLUHFepHSps49zA,1767
27
28
  lfss/src/error.py,sha256=imbhwnbhnI3HLhkbfICROe3F0gleKrOk4XnqHJDOtuI,285
28
- lfss/src/log.py,sha256=xOnkuH-gB_jSVGqNnDVEW05iki6SCJ2xdEhjz5eEsMo,5136
29
- lfss/src/server.py,sha256=rrrhFDFrglth4yCvdvvYko-4JfVJ_MTixhCr9_Hbhx0,16296
30
- lfss/src/stat.py,sha256=hTMtQyM_Ukmhc33Bb9FGCfBMIX02KrGHQg8nL7sC8sU,2082
31
- lfss/src/utils.py,sha256=S9LCJ5OkNk_zM4rZnrHg1UDjnNkDVO_ejmfsBeNJs4s,3868
32
- lfss-0.7.9.dist-info/METADATA,sha256=rPPqsNm5iZI8XyG8YzY1wWn0X1TbGa8tzrEykDkXf3o,1967
33
- lfss-0.7.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
34
- lfss-0.7.9.dist-info/entry_points.txt,sha256=d_Ri3GXxUW-S0E6q953A8od0YMmUAnZGlJSKS46OiW8,172
35
- lfss-0.7.9.dist-info/RECORD,,
29
+ lfss/src/log.py,sha256=u6WRZZsE7iOx6_CV2NHh1ugea26p408FI4WstZh896A,5139
30
+ lfss/src/server.py,sha256=WnUc116XnGooH78IQNkI7VO4bel-Xvp_ZzCoHTcu8sM,16288
31
+ lfss/src/stat.py,sha256=sCSsrdkA0aX9fyNfZlNrZ8PVpovKYQKwWj2xKcCIXx4,3009
32
+ lfss/src/utils.py,sha256=TBGYvgt6xMP8UC5wTGHAr9fmdhu0_gjOtxcSeyvGyVM,3918
33
+ lfss-0.7.10.dist-info/METADATA,sha256=iKvhqHzMfLEqap1_iNt-UIYykPUAZKuCPJhR302AWpM,1968
34
+ lfss-0.7.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
35
+ lfss-0.7.10.dist-info/entry_points.txt,sha256=VJ8svMz7RLtMCgNk99CElx7zo7M-N-z7BWDVw2HA92E,205
36
+ lfss-0.7.10.dist-info/RECORD,,
@@ -4,4 +4,5 @@ lfss-cli=lfss.cli.cli:main
4
4
  lfss-panel=lfss.cli.panel:main
5
5
  lfss-serve=lfss.cli.serve:main
6
6
  lfss-user=lfss.cli.user:main
7
+ lfss-vacuum=lfss.cli.vacuum:main
7
8
 
File without changes