lfss 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Readme.md +4 -2
- frontend/scripts.js +1 -1
- lfss/cli/balance.py +23 -38
- lfss/cli/user.py +34 -32
- lfss/sql/init.sql +0 -6
- lfss/src/config.py +2 -2
- lfss/src/connection_pool.py +152 -0
- lfss/src/database.py +259 -279
- lfss/src/server.py +69 -75
- {lfss-0.7.0.dist-info → lfss-0.7.2.dist-info}/METADATA +5 -3
- {lfss-0.7.0.dist-info → lfss-0.7.2.dist-info}/RECORD +13 -12
- {lfss-0.7.0.dist-info → lfss-0.7.2.dist-info}/WHEEL +0 -0
- {lfss-0.7.0.dist-info → lfss-0.7.2.dist-info}/entry_points.txt +0 -0
Readme.md
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
# Lightweight File Storage Service (LFSS)
|
2
2
|
[](https://pypi.org/project/lfss/)
|
3
3
|
|
4
|
-
|
4
|
+
My experiment on a lightweight file/object storage service.
|
5
|
+
It stores small files and metadata in sqlite, large files in the filesystem, and serves them through a simple REST API.
|
6
|
+
Tested on 2 million files, and it works fine... thanks to the sqlite database!
|
5
7
|
|
6
8
|
Usage:
|
7
9
|
```sh
|
@@ -21,7 +23,7 @@ Or, you can start a web server at `/frontend` and open `index.html` in your brow
|
|
21
23
|
|
22
24
|
The API usage is simple, just `GET`, `PUT`, `DELETE` to the `/<username>/file/url` path.
|
23
25
|
Authentication is done via `Authorization` header with the value `Bearer <token>`, or through the `token` query parameter.
|
24
|
-
You can refer to `frontend` as an application example, and `frontend/api.js` or `lfss
|
26
|
+
You can refer to `frontend` as an application example, and `frontend/api.js` or `lfss/client/api.py` for the API usage.
|
25
27
|
|
26
28
|
By default, the service exposes all files to the public for `GET` requests,
|
27
29
|
but file-listing is restricted to the user's own files.
|
frontend/scripts.js
CHANGED
@@ -132,7 +132,7 @@ uploadButton.addEventListener('click', () => {
|
|
132
132
|
}
|
133
133
|
path = path + fileName;
|
134
134
|
showPopup('Uploading...', {level: 'info', timeout: 3000});
|
135
|
-
conn.put(path, file)
|
135
|
+
conn.put(path, file, {'conflict': 'overwrite'})
|
136
136
|
.then(() => {
|
137
137
|
refreshFileList();
|
138
138
|
uploadFileNameInput.value = '';
|
lfss/cli/balance.py
CHANGED
@@ -2,14 +2,15 @@
|
|
2
2
|
Balance the storage by ensuring that large file thresholds are met.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from lfss.src.config import
|
5
|
+
from lfss.src.config import LARGE_BLOB_DIR, LARGE_FILE_BYTES
|
6
6
|
import argparse, time
|
7
7
|
from functools import wraps
|
8
8
|
from asyncio import Semaphore
|
9
|
-
import
|
9
|
+
import aiofiles, asyncio
|
10
|
+
from lfss.src.database import transaction, unique_cursor
|
11
|
+
from lfss.src.connection_pool import global_entrance
|
10
12
|
|
11
13
|
sem = Semaphore(1)
|
12
|
-
db_file = DATA_HOME / 'lfss.db'
|
13
14
|
|
14
15
|
def _get_sem():
|
15
16
|
return sem
|
@@ -23,51 +24,35 @@ def barriered(func):
|
|
23
24
|
|
24
25
|
@barriered
|
25
26
|
async def move_to_external(f_id: str, flag: str = ''):
|
26
|
-
async with
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
async with transaction() as c:
|
28
|
+
cursor = await c.execute( "SELECT data FROM blobs.fdata WHERE file_id = ?", (f_id,))
|
29
|
+
blob_row = await cursor.fetchone()
|
30
|
+
if blob_row is None:
|
31
|
+
print(f"{flag}File {f_id} not found in blobs.fdata")
|
32
|
+
return
|
32
33
|
await c.execute("BEGIN")
|
33
34
|
blob: bytes = blob_row[0]
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
await c.commit()
|
40
|
-
print(f"{flag}Moved {f_id} to external storage")
|
41
|
-
except Exception as e:
|
42
|
-
await c.rollback()
|
43
|
-
print(f"{flag}Error moving {f_id}: {e}")
|
44
|
-
|
45
|
-
if isinstance(e, KeyboardInterrupt):
|
46
|
-
raise e
|
35
|
+
async with aiofiles.open(LARGE_BLOB_DIR / f_id, 'wb') as f:
|
36
|
+
await f.write(blob)
|
37
|
+
await c.execute( "UPDATE fmeta SET external = 1 WHERE file_id = ?", (f_id,))
|
38
|
+
await c.execute( "DELETE FROM blobs.fdata WHERE file_id = ?", (f_id,))
|
39
|
+
print(f"{flag}Moved {f_id} to external storage")
|
47
40
|
|
48
41
|
@barriered
|
49
42
|
async def move_to_internal(f_id: str, flag: str = ''):
|
50
|
-
async with
|
43
|
+
async with transaction() as c:
|
51
44
|
if not (LARGE_BLOB_DIR / f_id).exists():
|
52
45
|
print(f"{flag}File {f_id} not found in external storage")
|
53
46
|
return
|
54
47
|
async with aiofiles.open(LARGE_BLOB_DIR / f_id, 'rb') as f:
|
55
48
|
blob = await f.read()
|
56
49
|
|
57
|
-
await c.execute("
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
await c.commit()
|
62
|
-
(LARGE_BLOB_DIR / f_id).unlink(missing_ok=True)
|
63
|
-
print(f"{flag}Moved {f_id} to internal storage")
|
64
|
-
except Exception as e:
|
65
|
-
await c.rollback()
|
66
|
-
print(f"{flag}Error moving {f_id}: {e}")
|
67
|
-
if isinstance(e, KeyboardInterrupt):
|
68
|
-
raise e
|
69
|
-
|
50
|
+
await c.execute("INSERT INTO blobs.fdata (file_id, data) VALUES (?, ?)", (f_id, blob))
|
51
|
+
await c.execute("UPDATE fmeta SET external = 0 WHERE file_id = ?", (f_id,))
|
52
|
+
(LARGE_BLOB_DIR / f_id).unlink(missing_ok=True)
|
53
|
+
print(f"{flag}Moved {f_id} to internal storage")
|
70
54
|
|
55
|
+
@global_entrance()
|
71
56
|
async def _main(batch_size: int = 10000):
|
72
57
|
|
73
58
|
tasks = []
|
@@ -76,7 +61,7 @@ async def _main(batch_size: int = 10000):
|
|
76
61
|
e_cout = 0
|
77
62
|
batch_count = 0
|
78
63
|
while True:
|
79
|
-
async with
|
64
|
+
async with unique_cursor() as conn:
|
80
65
|
exceeded_rows = list(await (await conn.execute(
|
81
66
|
"SELECT file_id FROM fmeta WHERE file_size > ? AND external = 0 LIMIT ? OFFSET ?",
|
82
67
|
(LARGE_FILE_BYTES, batch_size, batch_size * batch_count)
|
@@ -93,7 +78,7 @@ async def _main(batch_size: int = 10000):
|
|
93
78
|
i_count = 0
|
94
79
|
batch_count = 0
|
95
80
|
while True:
|
96
|
-
async with
|
81
|
+
async with unique_cursor() as conn:
|
97
82
|
under_rows = list(await (await conn.execute(
|
98
83
|
"SELECT file_id, file_size, external FROM fmeta WHERE file_size <= ? AND external = 1 LIMIT ? OFFSET ?",
|
99
84
|
(LARGE_FILE_BYTES, batch_size, batch_size * batch_count)
|
lfss/cli/user.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
import argparse, asyncio
|
2
|
-
from
|
2
|
+
from contextlib import asynccontextmanager
|
3
|
+
from ..src.database import Database, FileReadPermission, transaction, UserConn
|
4
|
+
from ..src.connection_pool import global_entrance
|
3
5
|
|
4
6
|
def parse_storage_size(s: str) -> int:
|
5
7
|
if s[-1] in 'Kk':
|
@@ -12,6 +14,7 @@ def parse_storage_size(s: str) -> int:
|
|
12
14
|
return int(s[:-1]) * 1024 * 1024 * 1024 * 1024
|
13
15
|
return int(s)
|
14
16
|
|
17
|
+
@global_entrance(1)
|
15
18
|
async def _main():
|
16
19
|
parser = argparse.ArgumentParser()
|
17
20
|
sp = parser.add_subparsers(dest='subparser_name', required=True)
|
@@ -42,49 +45,48 @@ async def _main():
|
|
42
45
|
sp_list.add_argument("-l", "--long", action="store_true")
|
43
46
|
|
44
47
|
args = parser.parse_args()
|
45
|
-
|
48
|
+
|
49
|
+
@asynccontextmanager
|
50
|
+
async def get_uconn():
|
51
|
+
async with transaction() as conn:
|
52
|
+
yield UserConn(conn)
|
46
53
|
|
47
|
-
|
48
|
-
|
49
|
-
await
|
50
|
-
user = await
|
54
|
+
if args.subparser_name == 'add':
|
55
|
+
async with get_uconn() as uconn:
|
56
|
+
await uconn.create_user(args.username, args.password, args.admin, max_storage=args.max_storage, permission=args.permission)
|
57
|
+
user = await uconn.get_user(args.username)
|
51
58
|
assert user is not None
|
52
59
|
print('User created, credential:', user.credential)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
60
|
+
|
61
|
+
if args.subparser_name == 'delete':
|
62
|
+
async with get_uconn() as uconn:
|
63
|
+
user = await uconn.get_user(args.username)
|
64
|
+
if user is None:
|
65
|
+
print('User not found')
|
66
|
+
exit(1)
|
67
|
+
else:
|
68
|
+
db = await Database().init()
|
69
|
+
await db.delete_user(user.id)
|
70
|
+
print('User deleted')
|
71
|
+
|
72
|
+
if args.subparser_name == 'set':
|
73
|
+
async with get_uconn() as uconn:
|
74
|
+
user = await uconn.get_user(args.username)
|
65
75
|
if user is None:
|
66
76
|
print('User not found')
|
67
77
|
exit(1)
|
68
|
-
await
|
69
|
-
user = await
|
78
|
+
await uconn.update_user(user.username, args.password, args.admin, max_storage=args.max_storage, permission=args.permission)
|
79
|
+
user = await uconn.get_user(args.username)
|
70
80
|
assert user is not None
|
71
81
|
print('User updated, credential:', user.credential)
|
72
|
-
|
73
|
-
|
74
|
-
|
82
|
+
|
83
|
+
if args.subparser_name == 'list':
|
84
|
+
async with get_uconn() as uconn:
|
85
|
+
async for user in uconn.all():
|
75
86
|
print(user)
|
76
87
|
if args.long:
|
77
88
|
print(' ', user.credential)
|
78
89
|
|
79
|
-
await conn.commit()
|
80
|
-
|
81
|
-
except Exception as e:
|
82
|
-
conn.logger.error(f'Error: {e}')
|
83
|
-
await conn.rollback()
|
84
|
-
|
85
|
-
finally:
|
86
|
-
await conn.close()
|
87
|
-
|
88
90
|
def main():
|
89
91
|
asyncio.run(_main())
|
90
92
|
|
lfss/sql/init.sql
CHANGED
lfss/src/config.py
CHANGED
@@ -13,5 +13,5 @@ LARGE_BLOB_DIR.mkdir(exist_ok=True)
|
|
13
13
|
|
14
14
|
# https://sqlite.org/fasterthanfs.html
|
15
15
|
LARGE_FILE_BYTES = 8 * 1024 * 1024 # 8MB
|
16
|
-
MAX_FILE_BYTES =
|
17
|
-
MAX_BUNDLE_BYTES =
|
16
|
+
MAX_FILE_BYTES = 512 * 1024 * 1024 # 512MB
|
17
|
+
MAX_BUNDLE_BYTES = 512 * 1024 * 1024 # 512MB
|
@@ -0,0 +1,152 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
import aiosqlite, aiofiles
|
4
|
+
from contextlib import asynccontextmanager
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from asyncio import Semaphore, Lock
|
7
|
+
from functools import wraps
|
8
|
+
|
9
|
+
from .log import get_logger
|
10
|
+
from .config import DATA_HOME
|
11
|
+
|
12
|
+
async def execute_sql(conn: aiosqlite.Connection | aiosqlite.Cursor, name: str):
|
13
|
+
this_dir = Path(__file__).parent
|
14
|
+
sql_dir = this_dir.parent / 'sql'
|
15
|
+
async with aiofiles.open(sql_dir / name, 'r') as f:
|
16
|
+
sql = await f.read()
|
17
|
+
sql = sql.split(';')
|
18
|
+
for s in sql:
|
19
|
+
await conn.execute(s)
|
20
|
+
|
21
|
+
async def get_connection() -> aiosqlite.Connection:
|
22
|
+
if not os.environ.get('SQLITE_TEMPDIR'):
|
23
|
+
os.environ['SQLITE_TEMPDIR'] = str(DATA_HOME)
|
24
|
+
# large blobs are stored in a separate database, should be more efficient
|
25
|
+
conn = await aiosqlite.connect(DATA_HOME / 'index.db', timeout = 60)
|
26
|
+
async with conn.cursor() as c:
|
27
|
+
await c.execute(f"ATTACH DATABASE ? AS blobs", (str(DATA_HOME/'blobs.db'), ))
|
28
|
+
await execute_sql(conn, 'pragma.sql')
|
29
|
+
return conn
|
30
|
+
|
31
|
+
|
32
|
+
@dataclass
|
33
|
+
class SqlConnection:
|
34
|
+
conn: aiosqlite.Connection
|
35
|
+
is_available: bool = True
|
36
|
+
|
37
|
+
class SqlConnectionPool:
|
38
|
+
_sem: Semaphore
|
39
|
+
_w_sem: Semaphore
|
40
|
+
def __init__(self):
|
41
|
+
self._connections: list[SqlConnection] = []
|
42
|
+
self._w_connection: None | SqlConnection = None
|
43
|
+
self._lock = Lock()
|
44
|
+
|
45
|
+
async def init(self, n_read: int):
|
46
|
+
await self.close()
|
47
|
+
self._connections = []
|
48
|
+
for _ in range(n_read):
|
49
|
+
conn = await get_connection()
|
50
|
+
self._connections.append(SqlConnection(conn))
|
51
|
+
self._w_connection = SqlConnection(await get_connection())
|
52
|
+
self._sem = Semaphore(n_read)
|
53
|
+
self._w_sem = Semaphore(1)
|
54
|
+
|
55
|
+
@property
|
56
|
+
def n_read(self):
|
57
|
+
return len(self._connections)
|
58
|
+
@property
|
59
|
+
def sem(self):
|
60
|
+
return self._sem
|
61
|
+
@property
|
62
|
+
def w_sem(self):
|
63
|
+
return self._w_sem
|
64
|
+
|
65
|
+
async def get(self, w: bool = False) -> SqlConnection:
|
66
|
+
if len(self._connections) == 0:
|
67
|
+
raise Exception("No available connections, please init the pool first")
|
68
|
+
|
69
|
+
async with self._lock:
|
70
|
+
if w:
|
71
|
+
assert self._w_connection
|
72
|
+
if self._w_connection.is_available:
|
73
|
+
self._w_connection.is_available = False
|
74
|
+
return self._w_connection
|
75
|
+
raise Exception("Write connection is not available")
|
76
|
+
|
77
|
+
else:
|
78
|
+
for c in self._connections:
|
79
|
+
if c.is_available:
|
80
|
+
c.is_available = False
|
81
|
+
return c
|
82
|
+
raise Exception("No available connections, impossible?")
|
83
|
+
|
84
|
+
async def release(self, conn: SqlConnection):
|
85
|
+
async with self._lock:
|
86
|
+
if conn == self._w_connection:
|
87
|
+
conn.is_available = True
|
88
|
+
return
|
89
|
+
|
90
|
+
if not conn in self._connections:
|
91
|
+
raise Exception("Connection not in pool")
|
92
|
+
conn.is_available = True
|
93
|
+
|
94
|
+
async def close(self):
|
95
|
+
for c in self._connections:
|
96
|
+
await c.conn.close()
|
97
|
+
if self._w_connection:
|
98
|
+
await self._w_connection.conn.close()
|
99
|
+
|
100
|
+
# these two functions shold be called before and after the event loop
|
101
|
+
g_pool = SqlConnectionPool()
|
102
|
+
async def global_connection_init(n_read: int = 1):
|
103
|
+
await g_pool.init(n_read)
|
104
|
+
|
105
|
+
async def global_connection_close():
|
106
|
+
await g_pool.close()
|
107
|
+
|
108
|
+
@asynccontextmanager
|
109
|
+
async def global_connection(n_read: int = 1):
|
110
|
+
await global_connection_init(n_read)
|
111
|
+
try:
|
112
|
+
yield g_pool
|
113
|
+
finally:
|
114
|
+
await global_connection_close()
|
115
|
+
|
116
|
+
def global_entrance(n_read: int = 1):
|
117
|
+
def decorator(func):
|
118
|
+
@wraps(func)
|
119
|
+
async def wrapper(*args, **kwargs):
|
120
|
+
async with global_connection(n_read):
|
121
|
+
return await func(*args, **kwargs)
|
122
|
+
return wrapper
|
123
|
+
return decorator
|
124
|
+
|
125
|
+
@asynccontextmanager
|
126
|
+
async def unique_cursor(is_write: bool = False):
|
127
|
+
if not is_write:
|
128
|
+
async with g_pool.sem:
|
129
|
+
connection_obj = await g_pool.get()
|
130
|
+
try:
|
131
|
+
yield await connection_obj.conn.cursor()
|
132
|
+
finally:
|
133
|
+
await g_pool.release(connection_obj)
|
134
|
+
else:
|
135
|
+
async with g_pool.w_sem:
|
136
|
+
connection_obj = await g_pool.get(w=True)
|
137
|
+
try:
|
138
|
+
yield await connection_obj.conn.cursor()
|
139
|
+
finally:
|
140
|
+
await g_pool.release(connection_obj)
|
141
|
+
|
142
|
+
@asynccontextmanager
|
143
|
+
async def transaction():
|
144
|
+
async with unique_cursor(is_write=True) as cur:
|
145
|
+
try:
|
146
|
+
await cur.execute('BEGIN')
|
147
|
+
yield cur
|
148
|
+
await cur.execute('COMMIT')
|
149
|
+
except Exception as e:
|
150
|
+
get_logger('database', global_instance=True).error(f"Error in transaction: {e}, rollback.")
|
151
|
+
await cur.execute('ROLLBACK')
|
152
|
+
raise e
|