lfss 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Readme.md CHANGED
@@ -1,7 +1,9 @@
1
1
  # Lightweight File Storage Service (LFSS)
2
2
  [![PyPI](https://img.shields.io/pypi/v/lfss)](https://pypi.org/project/lfss/)
3
3
 
4
- A lightweight file/object storage service!
4
+ My experiment on a lightweight file/object storage service.
5
+ It stores small files and metadata in sqlite, large files in the filesystem, and serves them through a simple REST API.
6
+ Tested on 2 million files, and it works fine... thanks to the sqlite database!
5
7
 
6
8
  Usage:
7
9
  ```sh
@@ -21,7 +23,7 @@ Or, you can start a web server at `/frontend` and open `index.html` in your brow
21
23
 
22
24
  The API usage is simple, just `GET`, `PUT`, `DELETE` to the `/<username>/file/url` path.
23
25
  Authentication is done via `Authorization` header with the value `Bearer <token>`, or through the `token` query parameter.
24
- You can refer to `frontend` as an application example, and `frontend/api.js` or `lfss.client.api.py` for the API usage.
26
+ You can refer to `frontend` as an application example, and `frontend/api.js` or `lfss/client/api.py` for the API usage.
25
27
 
26
28
  By default, the service exposes all files to the public for `GET` requests,
27
29
  but file-listing is restricted to the user's own files.
frontend/scripts.js CHANGED
@@ -132,7 +132,7 @@ uploadButton.addEventListener('click', () => {
132
132
  }
133
133
  path = path + fileName;
134
134
  showPopup('Uploading...', {level: 'info', timeout: 3000});
135
- conn.put(path, file)
135
+ conn.put(path, file, {'conflict': 'overwrite'})
136
136
  .then(() => {
137
137
  refreshFileList();
138
138
  uploadFileNameInput.value = '';
lfss/cli/balance.py CHANGED
@@ -2,14 +2,15 @@
2
2
  Balance the storage by ensuring that large file thresholds are met.
3
3
  """
4
4
 
5
- from lfss.src.config import DATA_HOME, LARGE_BLOB_DIR, LARGE_FILE_BYTES
5
+ from lfss.src.config import LARGE_BLOB_DIR, LARGE_FILE_BYTES
6
6
  import argparse, time
7
7
  from functools import wraps
8
8
  from asyncio import Semaphore
9
- import aiosqlite, aiofiles, asyncio
9
+ import aiofiles, asyncio
10
+ from lfss.src.database import transaction, unique_cursor
11
+ from lfss.src.connection_pool import global_entrance
10
12
 
11
13
  sem = Semaphore(1)
12
- db_file = DATA_HOME / 'lfss.db'
13
14
 
14
15
  def _get_sem():
15
16
  return sem
@@ -23,51 +24,35 @@ def barriered(func):
23
24
 
24
25
  @barriered
25
26
  async def move_to_external(f_id: str, flag: str = ''):
26
- async with aiosqlite.connect(db_file, timeout = 60) as c:
27
- async with c.execute( "SELECT data FROM blobs.fdata WHERE file_id = ?", (f_id,)) as cursor:
28
- blob_row = await cursor.fetchone()
29
- if blob_row is None:
30
- print(f"{flag}File {f_id} not found in blobs.fdata")
31
- return
27
+ async with transaction() as c:
28
+ cursor = await c.execute( "SELECT data FROM blobs.fdata WHERE file_id = ?", (f_id,))
29
+ blob_row = await cursor.fetchone()
30
+ if blob_row is None:
31
+ print(f"{flag}File {f_id} not found in blobs.fdata")
32
+ return
32
33
  await c.execute("BEGIN")
33
34
  blob: bytes = blob_row[0]
34
- try:
35
- async with aiofiles.open(LARGE_BLOB_DIR / f_id, 'wb') as f:
36
- await f.write(blob)
37
- await c.execute( "UPDATE fmeta SET external = 1 WHERE file_id = ?", (f_id,))
38
- await c.execute( "DELETE FROM blobs.fdata WHERE file_id = ?", (f_id,))
39
- await c.commit()
40
- print(f"{flag}Moved {f_id} to external storage")
41
- except Exception as e:
42
- await c.rollback()
43
- print(f"{flag}Error moving {f_id}: {e}")
44
-
45
- if isinstance(e, KeyboardInterrupt):
46
- raise e
35
+ async with aiofiles.open(LARGE_BLOB_DIR / f_id, 'wb') as f:
36
+ await f.write(blob)
37
+ await c.execute( "UPDATE fmeta SET external = 1 WHERE file_id = ?", (f_id,))
38
+ await c.execute( "DELETE FROM blobs.fdata WHERE file_id = ?", (f_id,))
39
+ print(f"{flag}Moved {f_id} to external storage")
47
40
 
48
41
  @barriered
49
42
  async def move_to_internal(f_id: str, flag: str = ''):
50
- async with aiosqlite.connect(db_file, timeout = 60) as c:
43
+ async with transaction() as c:
51
44
  if not (LARGE_BLOB_DIR / f_id).exists():
52
45
  print(f"{flag}File {f_id} not found in external storage")
53
46
  return
54
47
  async with aiofiles.open(LARGE_BLOB_DIR / f_id, 'rb') as f:
55
48
  blob = await f.read()
56
49
 
57
- await c.execute("BEGIN")
58
- try:
59
- await c.execute("INSERT INTO blobs.fdata (file_id, data) VALUES (?, ?)", (f_id, blob))
60
- await c.execute("UPDATE fmeta SET external = 0 WHERE file_id = ?", (f_id,))
61
- await c.commit()
62
- (LARGE_BLOB_DIR / f_id).unlink(missing_ok=True)
63
- print(f"{flag}Moved {f_id} to internal storage")
64
- except Exception as e:
65
- await c.rollback()
66
- print(f"{flag}Error moving {f_id}: {e}")
67
- if isinstance(e, KeyboardInterrupt):
68
- raise e
69
-
50
+ await c.execute("INSERT INTO blobs.fdata (file_id, data) VALUES (?, ?)", (f_id, blob))
51
+ await c.execute("UPDATE fmeta SET external = 0 WHERE file_id = ?", (f_id,))
52
+ (LARGE_BLOB_DIR / f_id).unlink(missing_ok=True)
53
+ print(f"{flag}Moved {f_id} to internal storage")
70
54
 
55
+ @global_entrance()
71
56
  async def _main(batch_size: int = 10000):
72
57
 
73
58
  tasks = []
@@ -76,7 +61,7 @@ async def _main(batch_size: int = 10000):
76
61
  e_cout = 0
77
62
  batch_count = 0
78
63
  while True:
79
- async with aiosqlite.connect(db_file) as conn:
64
+ async with unique_cursor() as conn:
80
65
  exceeded_rows = list(await (await conn.execute(
81
66
  "SELECT file_id FROM fmeta WHERE file_size > ? AND external = 0 LIMIT ? OFFSET ?",
82
67
  (LARGE_FILE_BYTES, batch_size, batch_size * batch_count)
@@ -93,7 +78,7 @@ async def _main(batch_size: int = 10000):
93
78
  i_count = 0
94
79
  batch_count = 0
95
80
  while True:
96
- async with aiosqlite.connect(db_file) as conn:
81
+ async with unique_cursor() as conn:
97
82
  under_rows = list(await (await conn.execute(
98
83
  "SELECT file_id, file_size, external FROM fmeta WHERE file_size <= ? AND external = 1 LIMIT ? OFFSET ?",
99
84
  (LARGE_FILE_BYTES, batch_size, batch_size * batch_count)
lfss/cli/user.py CHANGED
@@ -1,5 +1,7 @@
1
1
  import argparse, asyncio
2
- from ..src.database import Database, FileReadPermission
2
+ from contextlib import asynccontextmanager
3
+ from ..src.database import Database, FileReadPermission, transaction, UserConn
4
+ from ..src.connection_pool import global_entrance
3
5
 
4
6
  def parse_storage_size(s: str) -> int:
5
7
  if s[-1] in 'Kk':
@@ -12,6 +14,7 @@ def parse_storage_size(s: str) -> int:
12
14
  return int(s[:-1]) * 1024 * 1024 * 1024 * 1024
13
15
  return int(s)
14
16
 
17
+ @global_entrance(1)
15
18
  async def _main():
16
19
  parser = argparse.ArgumentParser()
17
20
  sp = parser.add_subparsers(dest='subparser_name', required=True)
@@ -42,49 +45,48 @@ async def _main():
42
45
  sp_list.add_argument("-l", "--long", action="store_true")
43
46
 
44
47
  args = parser.parse_args()
45
- conn = await Database().init()
48
+
49
+ @asynccontextmanager
50
+ async def get_uconn():
51
+ async with transaction() as conn:
52
+ yield UserConn(conn)
46
53
 
47
- try:
48
- if args.subparser_name == 'add':
49
- await conn.user.create_user(args.username, args.password, args.admin, max_storage=args.max_storage, permission=args.permission)
50
- user = await conn.user.get_user(args.username)
54
+ if args.subparser_name == 'add':
55
+ async with get_uconn() as uconn:
56
+ await uconn.create_user(args.username, args.password, args.admin, max_storage=args.max_storage, permission=args.permission)
57
+ user = await uconn.get_user(args.username)
51
58
  assert user is not None
52
59
  print('User created, credential:', user.credential)
53
-
54
- if args.subparser_name == 'delete':
55
- user = await conn.user.get_user(args.username)
56
- if user is None:
57
- print('User not found')
58
- exit(1)
59
- else:
60
- await conn.delete_user(user.id)
61
- print('User deleted')
62
-
63
- if args.subparser_name == 'set':
64
- user = await conn.user.get_user(args.username)
60
+
61
+ if args.subparser_name == 'delete':
62
+ async with get_uconn() as uconn:
63
+ user = await uconn.get_user(args.username)
64
+ if user is None:
65
+ print('User not found')
66
+ exit(1)
67
+ else:
68
+ db = await Database().init()
69
+ await db.delete_user(user.id)
70
+ print('User deleted')
71
+
72
+ if args.subparser_name == 'set':
73
+ async with get_uconn() as uconn:
74
+ user = await uconn.get_user(args.username)
65
75
  if user is None:
66
76
  print('User not found')
67
77
  exit(1)
68
- await conn.user.update_user(user.username, args.password, args.admin, max_storage=args.max_storage, permission=args.permission)
69
- user = await conn.user.get_user(args.username)
78
+ await uconn.update_user(user.username, args.password, args.admin, max_storage=args.max_storage, permission=args.permission)
79
+ user = await uconn.get_user(args.username)
70
80
  assert user is not None
71
81
  print('User updated, credential:', user.credential)
72
-
73
- if args.subparser_name == 'list':
74
- async for user in conn.user.all():
82
+
83
+ if args.subparser_name == 'list':
84
+ async with get_uconn() as uconn:
85
+ async for user in uconn.all():
75
86
  print(user)
76
87
  if args.long:
77
88
  print(' ', user.credential)
78
89
 
79
- await conn.commit()
80
-
81
- except Exception as e:
82
- conn.logger.error(f'Error: {e}')
83
- await conn.rollback()
84
-
85
- finally:
86
- await conn.close()
87
-
88
90
  def main():
89
91
  asyncio.run(_main())
90
92
 
lfss/sql/init.sql CHANGED
@@ -1,9 +1,3 @@
1
- PRAGMA journal_mode=MEMROY;
2
- PRAGMA temp_store=MEMORY;
3
- PRAGMA page_size=4096;
4
- PRAGMA synchronous=NORMAL;
5
- PRAGMA case_sensitive_like=ON;
6
-
7
1
  CREATE TABLE IF NOT EXISTS user (
8
2
  id INTEGER PRIMARY KEY AUTOINCREMENT,
9
3
  username VARCHAR(256) UNIQUE NOT NULL,
lfss/src/config.py CHANGED
@@ -13,5 +13,5 @@ LARGE_BLOB_DIR.mkdir(exist_ok=True)
13
13
 
14
14
  # https://sqlite.org/fasterthanfs.html
15
15
  LARGE_FILE_BYTES = 8 * 1024 * 1024 # 8MB
16
- MAX_FILE_BYTES = 1024 * 1024 * 1024 # 1GB
17
- MAX_BUNDLE_BYTES = 1024 * 1024 * 1024 # 1GB
16
+ MAX_FILE_BYTES = 512 * 1024 * 1024 # 512MB
17
+ MAX_BUNDLE_BYTES = 512 * 1024 * 1024 # 512MB
@@ -0,0 +1,152 @@
1
+ import os
2
+ from pathlib import Path
3
+ import aiosqlite, aiofiles
4
+ from contextlib import asynccontextmanager
5
+ from dataclasses import dataclass
6
+ from asyncio import Semaphore, Lock
7
+ from functools import wraps
8
+
9
+ from .log import get_logger
10
+ from .config import DATA_HOME
11
+
12
+ async def execute_sql(conn: aiosqlite.Connection | aiosqlite.Cursor, name: str):
13
+ this_dir = Path(__file__).parent
14
+ sql_dir = this_dir.parent / 'sql'
15
+ async with aiofiles.open(sql_dir / name, 'r') as f:
16
+ sql = await f.read()
17
+ sql = sql.split(';')
18
+ for s in sql:
19
+ await conn.execute(s)
20
+
21
+ async def get_connection() -> aiosqlite.Connection:
22
+ if not os.environ.get('SQLITE_TEMPDIR'):
23
+ os.environ['SQLITE_TEMPDIR'] = str(DATA_HOME)
24
+ # large blobs are stored in a separate database, should be more efficient
25
+ conn = await aiosqlite.connect(DATA_HOME / 'index.db', timeout = 60)
26
+ async with conn.cursor() as c:
27
+ await c.execute(f"ATTACH DATABASE ? AS blobs", (str(DATA_HOME/'blobs.db'), ))
28
+ await execute_sql(conn, 'pragma.sql')
29
+ return conn
30
+
31
+
32
+ @dataclass
33
+ class SqlConnection:
34
+ conn: aiosqlite.Connection
35
+ is_available: bool = True
36
+
37
+ class SqlConnectionPool:
38
+ _sem: Semaphore
39
+ _w_sem: Semaphore
40
+ def __init__(self):
41
+ self._connections: list[SqlConnection] = []
42
+ self._w_connection: None | SqlConnection = None
43
+ self._lock = Lock()
44
+
45
+ async def init(self, n_read: int):
46
+ await self.close()
47
+ self._connections = []
48
+ for _ in range(n_read):
49
+ conn = await get_connection()
50
+ self._connections.append(SqlConnection(conn))
51
+ self._w_connection = SqlConnection(await get_connection())
52
+ self._sem = Semaphore(n_read)
53
+ self._w_sem = Semaphore(1)
54
+
55
+ @property
56
+ def n_read(self):
57
+ return len(self._connections)
58
+ @property
59
+ def sem(self):
60
+ return self._sem
61
+ @property
62
+ def w_sem(self):
63
+ return self._w_sem
64
+
65
+ async def get(self, w: bool = False) -> SqlConnection:
66
+ if len(self._connections) == 0:
67
+ raise Exception("No available connections, please init the pool first")
68
+
69
+ async with self._lock:
70
+ if w:
71
+ assert self._w_connection
72
+ if self._w_connection.is_available:
73
+ self._w_connection.is_available = False
74
+ return self._w_connection
75
+ raise Exception("Write connection is not available")
76
+
77
+ else:
78
+ for c in self._connections:
79
+ if c.is_available:
80
+ c.is_available = False
81
+ return c
82
+ raise Exception("No available connections, impossible?")
83
+
84
+ async def release(self, conn: SqlConnection):
85
+ async with self._lock:
86
+ if conn == self._w_connection:
87
+ conn.is_available = True
88
+ return
89
+
90
+ if not conn in self._connections:
91
+ raise Exception("Connection not in pool")
92
+ conn.is_available = True
93
+
94
+ async def close(self):
95
+ for c in self._connections:
96
+ await c.conn.close()
97
+ if self._w_connection:
98
+ await self._w_connection.conn.close()
99
+
100
+ # these two functions shold be called before and after the event loop
101
+ g_pool = SqlConnectionPool()
102
+ async def global_connection_init(n_read: int = 1):
103
+ await g_pool.init(n_read)
104
+
105
+ async def global_connection_close():
106
+ await g_pool.close()
107
+
108
+ @asynccontextmanager
109
+ async def global_connection(n_read: int = 1):
110
+ await global_connection_init(n_read)
111
+ try:
112
+ yield g_pool
113
+ finally:
114
+ await global_connection_close()
115
+
116
+ def global_entrance(n_read: int = 1):
117
+ def decorator(func):
118
+ @wraps(func)
119
+ async def wrapper(*args, **kwargs):
120
+ async with global_connection(n_read):
121
+ return await func(*args, **kwargs)
122
+ return wrapper
123
+ return decorator
124
+
125
+ @asynccontextmanager
126
+ async def unique_cursor(is_write: bool = False):
127
+ if not is_write:
128
+ async with g_pool.sem:
129
+ connection_obj = await g_pool.get()
130
+ try:
131
+ yield await connection_obj.conn.cursor()
132
+ finally:
133
+ await g_pool.release(connection_obj)
134
+ else:
135
+ async with g_pool.w_sem:
136
+ connection_obj = await g_pool.get(w=True)
137
+ try:
138
+ yield await connection_obj.conn.cursor()
139
+ finally:
140
+ await g_pool.release(connection_obj)
141
+
142
+ @asynccontextmanager
143
+ async def transaction():
144
+ async with unique_cursor(is_write=True) as cur:
145
+ try:
146
+ await cur.execute('BEGIN')
147
+ yield cur
148
+ await cur.execute('COMMIT')
149
+ except Exception as e:
150
+ get_logger('database', global_instance=True).error(f"Error in transaction: {e}, rollback.")
151
+ await cur.execute('ROLLBACK')
152
+ raise e