lfss 0.9.4__py3-none-any.whl → 0.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lfss/cli/cli.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- import argparse, typing
2
+ import argparse, typing, sys
3
3
  from lfss.api import Connector, upload_directory, upload_file, download_file, download_directory
4
4
  from lfss.eng.datatype import FileReadPermission, FileSortKey, DirSortKey
5
5
  from lfss.eng.utils import decode_uri_compnents
@@ -12,7 +12,7 @@ def parse_permission(s: str) -> FileReadPermission:
12
12
  raise ValueError(f"Invalid permission {s}")
13
13
 
14
14
  def parse_arguments():
15
- parser = argparse.ArgumentParser(description="Command line interface, please set LFSS_ENDPOINT and LFSS_TOKEN environment variables.")
15
+ parser = argparse.ArgumentParser(description="Client-side command line interface, set LFSS_ENDPOINT and LFSS_TOKEN environment variables for authentication.")
16
16
 
17
17
  sp = parser.add_subparsers(dest="command", required=True)
18
18
 
@@ -78,9 +78,9 @@ def main():
78
78
  permission=args.permission
79
79
  )
80
80
  if failed_upload:
81
- print("\033[91mFailed to upload:\033[0m")
81
+ print("\033[91mFailed to upload:\033[0m", file=sys.stderr)
82
82
  for path in failed_upload:
83
- print(f" {path}")
83
+ print(f" {path}", file=sys.stderr)
84
84
  else:
85
85
  success, msg = upload_file(
86
86
  connector,
@@ -93,7 +93,7 @@ def main():
93
93
  permission=args.permission
94
94
  )
95
95
  if not success:
96
- print("\033[91mFailed to upload: \033[0m", msg)
96
+ print("\033[91mFailed to upload: \033[0m", msg, file=sys.stderr)
97
97
 
98
98
  elif args.command == "download":
99
99
  is_dir = args.src.endswith("/")
@@ -107,9 +107,9 @@ def main():
107
107
  overwrite=args.overwrite
108
108
  )
109
109
  if failed_download:
110
- print("\033[91mFailed to download:\033[0m")
110
+ print("\033[91mFailed to download:\033[0m", file=sys.stderr)
111
111
  for path in failed_download:
112
- print(f" {path}")
112
+ print(f" {path}", file=sys.stderr)
113
113
  else:
114
114
  success, msg = download_file(
115
115
  connector,
@@ -121,12 +121,12 @@ def main():
121
121
  overwrite=args.overwrite
122
122
  )
123
123
  if not success:
124
- print("\033[91mFailed to download: \033[0m", msg)
124
+ print("\033[91mFailed to download: \033[0m", msg, file=sys.stderr)
125
125
 
126
126
  elif args.command == "query":
127
127
  for path in args.path:
128
128
  with catch_request_error():
129
- res = connector.get_metadata(path)
129
+ res = connector.get_meta(path)
130
130
  if res is None:
131
131
  print(f"\033[31mNot found\033[0m ({path})")
132
132
  else:
lfss/cli/log.py ADDED
@@ -0,0 +1,77 @@
1
+ from typing import Optional
2
+ import argparse
3
+ import rich.console
4
+ import logging
5
+ import sqlite3
6
+ from lfss.eng.log import eval_logline
7
+
8
+ console = rich.console.Console()
9
+ def levelstr2int(levelstr: str) -> int:
10
+ import sys
11
+ if sys.version_info < (3, 11):
12
+ return logging.getLevelName(levelstr.upper())
13
+ else:
14
+ return logging.getLevelNamesMapping()[levelstr.upper()]
15
+
16
+ def view(
17
+ db_file: str,
18
+ level: Optional[str] = None,
19
+ offset: int = 0,
20
+ limit: int = 1000
21
+ ):
22
+ conn = sqlite3.connect(db_file)
23
+ cursor = conn.cursor()
24
+ if level is None:
25
+ cursor.execute("SELECT * FROM log ORDER BY created DESC LIMIT ? OFFSET ?", (limit, offset))
26
+ else:
27
+ level_int = levelstr2int(level)
28
+ cursor.execute("SELECT * FROM log WHERE level >= ? ORDER BY created DESC LIMIT ? OFFSET ?", (level_int, limit, offset))
29
+ levelname_color = {
30
+ 'DEBUG': 'blue',
31
+ 'INFO': 'green',
32
+ 'WARNING': 'yellow',
33
+ 'ERROR': 'red',
34
+ 'CRITICAL': 'bold red',
35
+ 'FATAL': 'bold red'
36
+ }
37
+ for row in cursor.fetchall():
38
+ log = eval_logline(row)
39
+ console.print(f"{log.created} [{levelname_color[log.levelname]}][{log.levelname}] [default]{log.message}")
40
+ conn.close()
41
+
42
+ def trim(db_file: str, keep: int = 1000, level: Optional[str] = None):
43
+ conn = sqlite3.connect(db_file)
44
+ cursor = conn.cursor()
45
+ if level is None:
46
+ cursor.execute("DELETE FROM log WHERE id NOT IN (SELECT id FROM log ORDER BY created DESC LIMIT ?)", (keep,))
47
+ else:
48
+ cursor.execute("DELETE FROM log WHERE levelname = ? and id NOT IN (SELECT id FROM log WHERE levelname = ? ORDER BY created DESC LIMIT ?)", (level.upper(), level.upper(), keep))
49
+ conn.commit()
50
+ conn.execute("VACUUM")
51
+ conn.close()
52
+
53
+ def main():
54
+ parser = argparse.ArgumentParser(description="Log operations utility")
55
+ subparsers = parser.add_subparsers(title='subcommands', description='valid subcommands', help='additional help')
56
+
57
+ parser_show = subparsers.add_parser('view', help='Show logs')
58
+ parser_show.add_argument('db_file', type=str, help='Database file path')
59
+ parser_show.add_argument('-l', '--level', type=str, required=False, help='Log level')
60
+ parser_show.add_argument('--offset', type=int, default=0, help='Starting offset')
61
+ parser_show.add_argument('--limit', type=int, default=1000, help='Maximum number of entries to display')
62
+ parser_show.set_defaults(func=view)
63
+
64
+ parser_trim = subparsers.add_parser('trim', help='Trim logs')
65
+ parser_trim.add_argument('db_file', type=str, help='Database file path')
66
+ parser_trim.add_argument('-l', '--level', type=str, required=False, help='Log level')
67
+ parser_trim.add_argument('--keep', type=int, default=1000, help='Number of entries to keep')
68
+ parser_trim.set_defaults(func=trim)
69
+
70
+ args = parser.parse_args()
71
+ if hasattr(args, 'func'):
72
+ kwargs = vars(args)
73
+ func = kwargs.pop('func')
74
+ func(**kwargs)
75
+
76
+ if __name__ == '__main__':
77
+ main()
lfss/cli/vacuum.py CHANGED
@@ -2,10 +2,11 @@
2
2
  Vacuum the database and external storage to ensure that the storage is consistent and minimal.
3
3
  """
4
4
 
5
- from lfss.eng.config import LARGE_BLOB_DIR
6
- import argparse, time
5
+ from lfss.eng.config import LARGE_BLOB_DIR, THUMB_DB, LOG_DIR
6
+ import argparse, time, itertools
7
7
  from functools import wraps
8
8
  from asyncio import Semaphore
9
+ import aiosqlite
9
10
  import aiofiles, asyncio
10
11
  import aiofiles.os
11
12
  from contextlib import contextmanager
@@ -13,6 +14,7 @@ from lfss.eng.database import transaction, unique_cursor
13
14
  from lfss.svc.request_log import RequestDB
14
15
  from lfss.eng.utils import now_stamp
15
16
  from lfss.eng.connection_pool import global_entrance
17
+ from lfss.cli.log import trim
16
18
 
17
19
  sem: Semaphore
18
20
 
@@ -32,7 +34,7 @@ def barriered(func):
32
34
  return wrapper
33
35
 
34
36
  @global_entrance()
35
- async def vacuum_main(index: bool = False, blobs: bool = False):
37
+ async def vacuum_main(index: bool = False, blobs: bool = False, thumbs: bool = False, logs: bool = False, vacuum_all: bool = False):
36
38
 
37
39
  # check if any file in the Large Blob directory is not in the database
38
40
  # the reverse operation is not necessary, because by design, the database should be the source of truth...
@@ -49,23 +51,68 @@ async def vacuum_main(index: bool = False, blobs: bool = False):
49
51
 
50
52
  # create a temporary index to speed up the process...
51
53
  with indicator("Clearing un-referenced files in external storage"):
52
- async with transaction() as c:
53
- await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
54
- for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
55
- f_id = f.name
56
- await ensure_external_consistency(f_id)
57
- if (i+1) % 1_000 == 0:
58
- print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
59
- async with transaction() as c:
60
- await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
54
+ try:
55
+ async with transaction() as c:
56
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
57
+ for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
58
+ f_id = f.name
59
+ await ensure_external_consistency(f_id)
60
+ if (i+1) % 1_000 == 0:
61
+ print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
62
+ finally:
63
+ async with transaction() as c:
64
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
61
65
 
62
- async with unique_cursor(is_write=True) as c:
63
- if index:
64
- with indicator("VACUUM-index"):
66
+ if index or vacuum_all:
67
+ with indicator("VACUUM-index"):
68
+ async with transaction() as c:
69
+ await c.execute("DELETE FROM dupcount WHERE count = 0")
70
+ async with unique_cursor(is_write=True) as c:
65
71
  await c.execute("VACUUM main")
66
- if blobs:
67
- with indicator("VACUUM-blobs"):
72
+ if blobs or vacuum_all:
73
+ with indicator("VACUUM-blobs"):
74
+ async with unique_cursor(is_write=True) as c:
68
75
  await c.execute("VACUUM blobs")
76
+
77
+ if logs or vacuum_all:
78
+ with indicator("VACUUM-logs"):
79
+ for log_file in LOG_DIR.glob("*.log.db"):
80
+ trim(str(log_file), keep=10_000)
81
+
82
+ if thumbs or vacuum_all:
83
+ try:
84
+ async with transaction() as c:
85
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
86
+ with indicator("VACUUM-thumbs"):
87
+ if not THUMB_DB.exists():
88
+ raise FileNotFoundError("Thumbnail database not found.")
89
+ async with unique_cursor() as db_c:
90
+ async with aiosqlite.connect(THUMB_DB) as t_conn:
91
+ batch_size = 10_000
92
+ for batch_count in itertools.count(start=0):
93
+ exceeded_rows = list(await (await t_conn.execute(
94
+ "SELECT file_id FROM thumbs LIMIT ? OFFSET ?",
95
+ (batch_size, batch_size * batch_count)
96
+ )).fetchall())
97
+ if not exceeded_rows:
98
+ break
99
+ batch_ids = [row[0] for row in exceeded_rows]
100
+ for f_id in batch_ids:
101
+ cursor = await db_c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
102
+ if not await cursor.fetchone():
103
+ print(f"Thumbnail {f_id} not found in database, removing from thumb cache.")
104
+ await t_conn.execute("DELETE FROM thumbs WHERE file_id = ?", (f_id,))
105
+ print(f"Checked {batch_count+1} batches of {batch_size} thumbnails.")
106
+
107
+ await t_conn.commit()
108
+ await t_conn.execute("VACUUM")
109
+ except FileNotFoundError as e:
110
+ if "Thumbnail database not found." in str(e):
111
+ print("Thumbnail database not found, skipping.")
112
+
113
+ finally:
114
+ async with transaction() as c:
115
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
69
116
 
70
117
  async def vacuum_requests():
71
118
  with indicator("VACUUM-requests"):
@@ -76,15 +123,18 @@ async def vacuum_requests():
76
123
  def main():
77
124
  global sem
78
125
  parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
126
+ parser.add_argument("--all", action="store_true", help="Vacuum all")
79
127
  parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
80
128
  parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
81
129
  parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
130
+ parser.add_argument("-t", "--thumb", action="store_true", help="Vacuum thumbnails")
82
131
  parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
132
+ parser.add_argument("-l", "--logs", action="store_true", help="Trim log to keep at most recent 10k rows for each category")
83
133
  args = parser.parse_args()
84
134
  sem = Semaphore(args.jobs)
85
- asyncio.run(vacuum_main(index=args.metadata, blobs=args.data))
135
+ asyncio.run(vacuum_main(index=args.metadata, blobs=args.data, thumbs=args.thumb, logs = args.logs, vacuum_all=args.all))
86
136
 
87
- if args.requests:
137
+ if args.requests or args.all:
88
138
  asyncio.run(vacuum_requests())
89
139
 
90
140
  if __name__ == '__main__':
lfss/eng/config.py CHANGED
@@ -11,17 +11,19 @@ if not DATA_HOME.exists():
11
11
  DATA_HOME = DATA_HOME.resolve().absolute()
12
12
  LARGE_BLOB_DIR = DATA_HOME / 'large_blobs'
13
13
  LARGE_BLOB_DIR.mkdir(exist_ok=True)
14
+ LOG_DIR = DATA_HOME / 'logs'
15
+
16
+ DISABLE_LOGGING = os.environ.get('DISABLE_LOGGING', '0') == '1'
14
17
 
15
18
  # https://sqlite.org/fasterthanfs.html
16
19
  __env_large_file = os.environ.get('LFSS_LARGE_FILE', None)
17
20
  if __env_large_file is not None:
18
21
  LARGE_FILE_BYTES = parse_storage_size(__env_large_file)
19
22
  else:
20
- LARGE_FILE_BYTES = 8 * 1024 * 1024 # 8MB
21
- MAX_MEM_FILE_BYTES = 128 * 1024 * 1024 # 128MB
22
- MAX_BUNDLE_BYTES = 512 * 1024 * 1024 # 512MB
23
+ LARGE_FILE_BYTES = 1 * 1024 * 1024 # 1MB
24
+ MAX_MEM_FILE_BYTES = 128 * 1024 * 1024 # 128MB
23
25
  CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming (on large files)
24
26
  DEBUG_MODE = os.environ.get('LFSS_DEBUG', '0') == '1'
25
27
 
26
- THUMB_DB = DATA_HOME / 'thumbs.db'
27
- THUMB_SIZE = (48, 48)
28
+ THUMB_DB = DATA_HOME / 'thumbs.v0-11.db'
29
+ THUMB_SIZE = (64, 64)
@@ -8,7 +8,7 @@ from functools import wraps
8
8
  from typing import Callable, Awaitable
9
9
 
10
10
  from .log import get_logger
11
- from .error import DatabaseLockedError
11
+ from .error import DatabaseLockedError, DatabaseTransactionError
12
12
  from .config import DATA_HOME
13
13
 
14
14
  async def execute_sql(conn: aiosqlite.Connection | aiosqlite.Cursor, name: str):
@@ -147,6 +147,14 @@ def global_entrance(n_read: int = 1):
147
147
  return wrapper
148
148
  return decorator
149
149
 
150
+ def handle_sqlite_error(e: Exception):
151
+ if 'database is locked' in str(e):
152
+ raise DatabaseLockedError from e
153
+ if 'cannot start a transaction within a transaction' in str(e):
154
+ get_logger('database', global_instance=True).error(f"Unexpected error: {e}")
155
+ raise DatabaseTransactionError from e
156
+ raise e
157
+
150
158
  @asynccontextmanager
151
159
  async def unique_cursor(is_write: bool = False):
152
160
  if not is_write:
@@ -155,9 +163,7 @@ async def unique_cursor(is_write: bool = False):
155
163
  try:
156
164
  yield await connection_obj.conn.cursor()
157
165
  except Exception as e:
158
- if 'database is locked' in str(e):
159
- raise DatabaseLockedError from e
160
- raise e
166
+ handle_sqlite_error(e)
161
167
  finally:
162
168
  await g_pool.release(connection_obj)
163
169
  else:
@@ -166,9 +172,7 @@ async def unique_cursor(is_write: bool = False):
166
172
  try:
167
173
  yield await connection_obj.conn.cursor()
168
174
  except Exception as e:
169
- if 'database is locked' in str(e):
170
- raise DatabaseLockedError from e
171
- raise e
175
+ handle_sqlite_error(e)
172
176
  finally:
173
177
  await g_pool.release(connection_obj)
174
178