lfss 0.7.15__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lfss/src/database.py CHANGED
@@ -8,12 +8,16 @@ import zipfile, io, asyncio
8
8
 
9
9
  import aiosqlite, aiofiles
10
10
  import aiofiles.os
11
+ import mimetypes, mimesniff
11
12
 
12
13
  from .connection_pool import execute_sql, unique_cursor, transaction
13
- from .datatype import UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents
14
- from .config import LARGE_BLOB_DIR, CHUNK_SIZE
14
+ from .datatype import (
15
+ UserRecord, FileReadPermission, FileRecord, DirectoryRecord, PathContents,
16
+ FileSortKey, DirSortKey, isValidFileSortKey, isValidDirSortKey
17
+ )
18
+ from .config import LARGE_BLOB_DIR, CHUNK_SIZE, LARGE_FILE_BYTES, MAX_MEM_FILE_BYTES
15
19
  from .log import get_logger
16
- from .utils import decode_uri_compnents, hash_credential, concurrent_wrap
20
+ from .utils import decode_uri_compnents, hash_credential, concurrent_wrap, debounce_async
17
21
  from .error import *
18
22
 
19
23
  class DBObjectBase(ABC):
@@ -156,55 +160,108 @@ class FileConn(DBObjectBase):
156
160
  dirs = [await self.get_path_record(u) for u in dirnames] if not skim else [DirectoryRecord(u) for u in dirnames]
157
161
  return dirs
158
162
 
159
- async def list_path(self, url: str, flat: bool = False) -> PathContents:
160
- """
161
- List all files and directories under the given path
162
- if flat is True, list all files under the path, with out delimiting directories
163
- """
164
- self.logger.debug(f"Listing path {url}, flat={flat}")
165
- if not url.endswith('/'):
166
- url += '/'
167
- if url == '/':
168
- # users cannot be queried using '/', because we store them without '/' prefix,
169
- # so we need to handle this case separately,
170
- if flat:
171
- cursor = await self.cur.execute("SELECT * FROM fmeta")
172
- res = await cursor.fetchall()
173
- files = [self.parse_record(r) for r in res]
174
- return PathContents([], files)
163
+ async def count_path_dirs(self, url: str):
164
+ if not url.endswith('/'): url += '/'
165
+ if url == '/': url = ''
166
+ cursor = await self.cur.execute("""
167
+ SELECT COUNT(*) FROM (
168
+ SELECT DISTINCT SUBSTR(
169
+ url, LENGTH(?) + 1,
170
+ INSTR(SUBSTR(url, LENGTH(?) + 1), '/')
171
+ ) AS dirname
172
+ FROM fmeta WHERE url LIKE ? AND dirname != ''
173
+ )
174
+ """, (url, url, url + '%'))
175
+ res = await cursor.fetchone()
176
+ assert res is not None, "Error: count_path_dirs"
177
+ return res[0]
175
178
 
179
+ async def list_path_dirs(
180
+ self, url: str,
181
+ offset: int = 0, limit: int = int(1e5),
182
+ order_by: DirSortKey = '', order_desc: bool = False,
183
+ skim: bool = True
184
+ ) -> list[DirectoryRecord]:
185
+ if not isValidDirSortKey(order_by):
186
+ raise ValueError(f"Invalid order_by ({order_by})")
187
+
188
+ if not url.endswith('/'): url += '/'
189
+ if url == '/': url = ''
190
+
191
+ sql_qury = """
192
+ SELECT DISTINCT SUBSTR(
193
+ url,
194
+ 1 + LENGTH(?),
195
+ INSTR(SUBSTR(url, 1 + LENGTH(?)), '/')
196
+ ) AS dirname
197
+ FROM fmeta WHERE url LIKE ? AND dirname != ''
198
+ """ \
199
+ + (f"ORDER BY {order_by} {'DESC' if order_desc else 'ASC'}" if order_by else '') \
200
+ + " LIMIT ? OFFSET ?"
201
+ cursor = await self.cur.execute(sql_qury, (url, url, url + '%', limit, offset))
202
+ res = await cursor.fetchall()
203
+ dirs_str = [r[0] for r in res]
204
+ async def get_dir(dir_url):
205
+ if skim:
206
+ return DirectoryRecord(dir_url)
176
207
  else:
177
- return PathContents(await self.list_root_dirs(), [])
178
-
208
+ return await self.get_path_record(dir_url)
209
+ dirs = await asyncio.gather(*[get_dir(url + d) for d in dirs_str])
210
+ return dirs
211
+
212
+ async def count_path_files(self, url: str, flat: bool = False):
213
+ if not url.endswith('/'): url += '/'
214
+ if url == '/': url = ''
179
215
  if flat:
180
- cursor = await self.cur.execute("SELECT * FROM fmeta WHERE url LIKE ?", (url + '%', ))
181
- res = await cursor.fetchall()
182
- files = [self.parse_record(r) for r in res]
183
- return PathContents([], files)
216
+ cursor = await self.cur.execute("SELECT COUNT(*) FROM fmeta WHERE url LIKE ?", (url + '%', ))
217
+ else:
218
+ cursor = await self.cur.execute("SELECT COUNT(*) FROM fmeta WHERE url LIKE ? AND url NOT LIKE ?", (url + '%', url + '%/%'))
219
+ res = await cursor.fetchone()
220
+ assert res is not None, "Error: count_path_files"
221
+ return res[0]
184
222
 
185
- cursor = await self.cur.execute("SELECT * FROM fmeta WHERE url LIKE ? AND url NOT LIKE ?", (url + '%', url + '%/%'))
223
+ async def list_path_files(
224
+ self, url: str,
225
+ offset: int = 0, limit: int = int(1e5),
226
+ order_by: FileSortKey = '', order_desc: bool = False,
227
+ flat: bool = False,
228
+ ) -> list[FileRecord]:
229
+ if not isValidFileSortKey(order_by):
230
+ raise ValueError(f"Invalid order_by {order_by}")
231
+
232
+ if not url.endswith('/'): url += '/'
233
+ if url == '/': url = ''
234
+
235
+ sql_query = "SELECT * FROM fmeta WHERE url LIKE ?"
236
+ if not flat: sql_query += " AND url NOT LIKE ?"
237
+ if order_by: sql_query += f" ORDER BY {order_by} {'DESC' if order_desc else 'ASC'}"
238
+ sql_query += " LIMIT ? OFFSET ?"
239
+ if flat:
240
+ cursor = await self.cur.execute(sql_query, (url + '%', limit, offset))
241
+ else:
242
+ cursor = await self.cur.execute(sql_query, (url + '%', url + '%/%', limit, offset))
186
243
  res = await cursor.fetchall()
187
244
  files = [self.parse_record(r) for r in res]
188
-
189
- # substr indexing starts from 1
190
- cursor = await self.cur.execute(
191
- """
192
- SELECT DISTINCT
193
- SUBSTR(
194
- url,
195
- 1 + LENGTH(?),
196
- INSTR(SUBSTR(url, 1 + LENGTH(?)), '/') - 1
197
- ) AS subdir
198
- FROM fmeta WHERE url LIKE ?
199
- """,
200
- (url, url, url + '%')
245
+ return files
246
+
247
+ async def list_path(self, url: str) -> PathContents:
248
+ """
249
+ List all files and directories under the given path.
250
+ This method is a handy way file browsing, but has limitaions:
251
+ - It does not support pagination
252
+ - It does not support sorting
253
+ - It cannot flatten directories
254
+ - It cannot list directories with details
255
+ """
256
+ MAX_ITEMS = int(1e4)
257
+ dir_count = await self.count_path_dirs(url)
258
+ file_count = await self.count_path_files(url, flat=False)
259
+ if dir_count + file_count > MAX_ITEMS:
260
+ raise TooManyItemsError("Too many items, please paginate")
261
+ return PathContents(
262
+ dirs = await self.list_path_dirs(url, skim=True, limit=MAX_ITEMS),
263
+ files = await self.list_path_files(url, flat=False, limit=MAX_ITEMS)
201
264
  )
202
- res = await cursor.fetchall()
203
- dirs_str = [r[0] + '/' for r in res if r[0] != '/']
204
- async def get_dir(dir_url):
205
- return DirectoryRecord(dir_url, -1)
206
- dirs = await asyncio.gather(*[get_dir(url + d) for d in dirs_str])
207
- return PathContents(dirs, files)
208
265
 
209
266
  async def get_path_record(self, url: str) -> DirectoryRecord:
210
267
  """
@@ -229,8 +286,7 @@ class FileConn(DBObjectBase):
229
286
  async def user_size(self, user_id: int) -> int:
230
287
  cursor = await self.cur.execute("SELECT size FROM usize WHERE user_id = ?", (user_id, ))
231
288
  res = await cursor.fetchone()
232
- if res is None:
233
- return -1
289
+ if res is None: return 0
234
290
  return res[0]
235
291
  async def _user_size_inc(self, user_id: int, inc: int):
236
292
  self.logger.debug(f"Increasing user {user_id} size by {inc}")
@@ -361,7 +417,8 @@ class FileConn(DBObjectBase):
361
417
  async def set_file_blob(self, file_id: str, blob: bytes):
362
418
  await self.cur.execute("INSERT OR REPLACE INTO blobs.fdata (file_id, data) VALUES (?, ?)", (file_id, blob))
363
419
 
364
- async def set_file_blob_external(self, file_id: str, stream: AsyncIterable[bytes])->int:
420
+ @staticmethod
421
+ async def set_file_blob_external(file_id: str, stream: AsyncIterable[bytes])->int:
365
422
  size_sum = 0
366
423
  try:
367
424
  async with aiofiles.open(LARGE_BLOB_DIR / file_id, 'wb') as f:
@@ -389,7 +446,8 @@ class FileConn(DBObjectBase):
389
446
  if not chunk: break
390
447
  yield chunk
391
448
 
392
- async def delete_file_blob_external(self, file_id: str):
449
+ @staticmethod
450
+ async def delete_file_blob_external(file_id: str):
393
451
  if (LARGE_BLOB_DIR / file_id).exists():
394
452
  await aiofiles.os.remove(LARGE_BLOB_DIR / file_id)
395
453
 
@@ -399,6 +457,36 @@ class FileConn(DBObjectBase):
399
457
  async def delete_file_blobs(self, file_ids: list[str]):
400
458
  await self.cur.execute("DELETE FROM blobs.fdata WHERE file_id IN ({})".format(','.join(['?'] * len(file_ids))), file_ids)
401
459
 
460
+ _log_active_queue = []
461
+ _log_active_lock = asyncio.Lock()
462
+ @debounce_async()
463
+ async def _set_all_active():
464
+ async with transaction() as conn:
465
+ uconn = UserConn(conn)
466
+ async with _log_active_lock:
467
+ for u in _log_active_queue:
468
+ await uconn.set_active(u)
469
+ _log_active_queue.clear()
470
+ async def delayed_log_activity(username: str):
471
+ async with _log_active_lock:
472
+ _log_active_queue.append(username)
473
+ await _set_all_active()
474
+
475
+ _log_access_queue = []
476
+ _log_access_lock = asyncio.Lock()
477
+ @debounce_async()
478
+ async def _log_all_access():
479
+ async with transaction() as conn:
480
+ fconn = FileConn(conn)
481
+ async with _log_access_lock:
482
+ for r in _log_access_queue:
483
+ await fconn.log_access(r)
484
+ _log_access_queue.clear()
485
+ async def delayed_log_access(url: str):
486
+ async with _log_access_lock:
487
+ _log_access_queue.append(url)
488
+ await _log_all_access()
489
+
402
490
  def validate_url(url: str, is_file = True):
403
491
  prohibited_chars = ['..', ';', "'", '"', '\\', '\0', '\n', '\r', '\t', '\x0b', '\x0c']
404
492
  ret = not url.startswith('/') and not url.startswith('_') and not url.startswith('.')
@@ -433,11 +521,6 @@ class Database:
433
521
  await execute_sql(conn, 'init.sql')
434
522
  return self
435
523
 
436
- async def record_user_activity(self, u: str):
437
- async with transaction() as conn:
438
- uconn = UserConn(conn)
439
- await uconn.set_active(u)
440
-
441
524
  async def update_file_record(self, user: UserRecord, url: str, permission: FileReadPermission):
442
525
  validate_url(url)
443
526
  async with transaction() as conn:
@@ -451,53 +534,66 @@ class Database:
451
534
 
452
535
  async def save_file(
453
536
  self, u: int | str, url: str,
454
- blob: bytes | AsyncIterable[bytes],
537
+ blob_stream: AsyncIterable[bytes],
455
538
  permission: FileReadPermission = FileReadPermission.UNSET,
456
- mime_type: str = 'application/octet-stream'
457
- ):
539
+ mime_type: Optional[str] = None
540
+ ) -> int:
458
541
  """
459
- if file_size is not provided, the blob must be bytes
542
+ Save a file to the database.
543
+ Will check file size and user storage limit,
544
+ should check permission before calling this method.
460
545
  """
461
546
  validate_url(url)
462
- async with transaction() as cur:
463
- uconn = UserConn(cur)
464
- fconn = FileConn(cur)
547
+ async with unique_cursor() as cur:
465
548
  user = await get_user(cur, u)
466
- if user is None:
467
- return
468
-
469
- # check if the user is the owner of the path, or is admin
470
- if url.startswith('/'):
471
- url = url[1:]
472
- first_component = url.split('/')[0]
473
- if first_component != user.username:
474
- if not user.is_admin:
475
- raise PermissionDeniedError(f"Permission denied: {user.username} cannot write to {url}")
476
- else:
477
- if await get_user(cur, first_component) is None:
478
- raise PermissionDeniedError(f"Invalid path: {first_component} is not a valid username")
549
+ assert user is not None, f"User {u} not found"
479
550
 
480
- user_size_used = await fconn.user_size(user.id)
481
- if isinstance(blob, bytes):
482
- file_size = len(blob)
483
- if user_size_used + file_size > user.max_storage:
484
- raise StorageExceededError(f"Unable to save file, user {user.username} has storage limit of {user.max_storage}, used {user_size_used}, requested {file_size}")
485
- f_id = uuid.uuid4().hex
486
- await fconn.set_file_blob(f_id, blob)
487
- await fconn.set_file_record(
488
- url, owner_id=user.id, file_id=f_id, file_size=file_size,
489
- permission=permission, external=False, mime_type=mime_type)
490
- else:
491
- assert isinstance(blob, AsyncIterable)
492
- f_id = uuid.uuid4().hex
493
- file_size = await fconn.set_file_blob_external(f_id, blob)
551
+ fconn_r = FileConn(cur)
552
+ user_size_used = await fconn_r.user_size(user.id)
553
+
554
+ f_id = uuid.uuid4().hex
555
+ async with aiofiles.tempfile.SpooledTemporaryFile(max_size=MAX_MEM_FILE_BYTES) as f:
556
+ async for chunk in blob_stream:
557
+ await f.write(chunk)
558
+ file_size = await f.tell()
494
559
  if user_size_used + file_size > user.max_storage:
495
- await fconn.delete_file_blob_external(f_id)
496
560
  raise StorageExceededError(f"Unable to save file, user {user.username} has storage limit of {user.max_storage}, used {user_size_used}, requested {file_size}")
497
- await fconn.set_file_record(
498
- url, owner_id=user.id, file_id=f_id, file_size=file_size,
499
- permission=permission, external=True, mime_type=mime_type)
500
- await uconn.set_active(user.username)
561
+
562
+ # check mime type
563
+ if mime_type is None:
564
+ fname = url.split('/')[-1]
565
+ mime_type, _ = mimetypes.guess_type(fname)
566
+ if mime_type is None:
567
+ await f.seek(0)
568
+ mime_type = mimesniff.what(await f.read(1024))
569
+ if mime_type is None:
570
+ mime_type = 'application/octet-stream'
571
+ await f.seek(0)
572
+
573
+ if file_size < LARGE_FILE_BYTES:
574
+ blob = await f.read()
575
+ async with transaction() as w_cur:
576
+ fconn_w = FileConn(w_cur)
577
+ await fconn_w.set_file_blob(f_id, blob)
578
+ await fconn_w.set_file_record(
579
+ url, owner_id=user.id, file_id=f_id, file_size=file_size,
580
+ permission=permission, external=False, mime_type=mime_type)
581
+
582
+ else:
583
+ async def blob_stream_tempfile():
584
+ nonlocal f
585
+ while True:
586
+ chunk = await f.read(CHUNK_SIZE)
587
+ if not chunk: break
588
+ yield chunk
589
+ await FileConn.set_file_blob_external(f_id, blob_stream_tempfile())
590
+ async with transaction() as w_cur:
591
+ await FileConn(w_cur).set_file_record(
592
+ url, owner_id=user.id, file_id=f_id, file_size=file_size,
593
+ permission=permission, external=True, mime_type=mime_type)
594
+
595
+ await delayed_log_activity(user.username)
596
+ return file_size
501
597
 
502
598
  async def read_file_stream(self, url: str) -> AsyncIterable[bytes]:
503
599
  validate_url(url)
@@ -510,9 +606,7 @@ class Database:
510
606
  raise ValueError(f"File {url} is not stored externally, should use read_file instead")
511
607
  ret = fconn.get_file_blob_external(r.file_id)
512
608
 
513
- async with transaction() as w_cur:
514
- await FileConn(w_cur).log_access(url)
515
-
609
+ await delayed_log_access(url)
516
610
  return ret
517
611
 
518
612
 
@@ -532,9 +626,7 @@ class Database:
532
626
  if blob is None:
533
627
  raise FileNotFoundError(f"File {url} data not found")
534
628
 
535
- async with transaction() as w_cur:
536
- await FileConn(w_cur).log_access(url)
537
-
629
+ await delayed_log_access(url)
538
630
  return blob
539
631
 
540
632
  async def delete_file(self, url: str, assure_user: Optional[UserRecord] = None) -> Optional[FileRecord]:
@@ -653,7 +745,8 @@ class Database:
653
745
  async with unique_cursor() as cur:
654
746
  fconn = FileConn(cur)
655
747
  if urls is None:
656
- urls = [r.url for r in (await fconn.list_path(top_url, flat=True)).files]
748
+ fcount = await fconn.count_path_files(top_url, flat=True)
749
+ urls = [r.url for r in (await fconn.list_path_files(top_url, flat=True, limit=fcount))]
657
750
 
658
751
  for url in urls:
659
752
  if not url.startswith(top_url):
lfss/src/datatype.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from enum import IntEnum
2
+ from typing import Literal
2
3
  import dataclasses
3
4
 
4
5
  class FileReadPermission(IntEnum):
@@ -51,6 +52,10 @@ class DirectoryRecord:
51
52
 
52
53
  @dataclasses.dataclass
53
54
  class PathContents:
54
- dirs: list[DirectoryRecord]
55
- files: list[FileRecord]
56
-
55
+ dirs: list[DirectoryRecord] = dataclasses.field(default_factory=list)
56
+ files: list[FileRecord] = dataclasses.field(default_factory=list)
57
+
58
+ FileSortKey = Literal['', 'url', 'file_size', 'create_time', 'access_time', 'mime_type']
59
+ isValidFileSortKey = lambda x: x in ['', 'url', 'file_size', 'create_time', 'access_time', 'mime_type']
60
+ DirSortKey = Literal['', 'dirname']
61
+ isValidDirSortKey = lambda x: x in ['', 'dirname']
lfss/src/error.py CHANGED
@@ -7,4 +7,6 @@ class PermissionDeniedError(LFSSExceptionBase, PermissionError):...
7
7
 
8
8
  class InvalidPathError(LFSSExceptionBase, ValueError):...
9
9
 
10
- class StorageExceededError(LFSSExceptionBase):...
10
+ class StorageExceededError(LFSSExceptionBase):...
11
+
12
+ class TooManyItemsError(LFSSExceptionBase):...