lfss 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lfss-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,42 @@
1
+ Metadata-Version: 2.1
2
+ Name: lfss
3
+ Version: 0.1.0
4
+ Summary: Lightweight file storage service
5
+ Home-page: https://github.com/MenxLi/lfss
6
+ Author: li, mengxun
7
+ Author-email: limengxun45@outlook.com
8
+ Requires-Python: >=3.9
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: aiosqlite (==0.*)
15
+ Requires-Dist: fastapi (==0.*)
16
+ Requires-Dist: mimesniff (==1.*)
17
+ Project-URL: Repository, https://github.com/MenxLi/lfss
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Lightweight File Storage Service (LFSS)
21
+
22
+ A lightweight file/object storage service!
23
+
24
+ Usage:
25
+ ```sh
26
+ pip install .
27
+ lfss-user add <username> <password>
28
+ lfss-serve
29
+ ```
30
+
31
+ By default, the data will be stored in the `.storage_data` directory, in a sqlite database.
32
+ The data storage can be set via environment variable `LFSS_DATA`.
33
+
34
+ I provide a simple client to interact with the service.
35
+ Just start a web server at `/frontend` and open `index.html` in your browser.
36
+
37
+ Currently, there is no file access-control, anyone can access any file with `GET` request.
38
+ However, the path-listing is only available to the authenticated user (to their own files, under `<username>/`).
39
+
40
+ The API usage is simple, just `GET`, `PUT`, `DELETE` to the `/<username>/file/url` path.
41
+ Authentication is done via `Authorization` header, with the value `Bearer <token>`.
42
+ Please refer to `frontend` as an application example, and `frontend/api.js` for the API usage.
lfss-0.1.0/Readme.md ADDED
@@ -0,0 +1,23 @@
1
+ # Lightweight File Storage Service (LFSS)
2
+
3
+ A lightweight file/object storage service!
4
+
5
+ Usage:
6
+ ```sh
7
+ pip install .
8
+ lfss-user add <username> <password>
9
+ lfss-serve
10
+ ```
11
+
12
+ By default, the data will be stored in the `.storage_data` directory, in a sqlite database.
13
+ The data storage can be set via environment variable `LFSS_DATA`.
14
+
15
+ I provide a simple client to interact with the service.
16
+ Just start a web server at `/frontend` and open `index.html` in your browser.
17
+
18
+ Currently, there is no file access-control, anyone can access any file with `GET` request.
19
+ However, the path-listing is only available to the authenticated user (to their own files, under `<username>/`).
20
+
21
+ The API usage is simple, just `GET`, `PUT`, `DELETE` to the `/<username>/file/url` path.
22
+ Authentication is done via `Authorization` header, with the value `Bearer <token>`.
23
+ Please refer to `frontend` as an application example, and `frontend/api.js` for the API usage.
@@ -0,0 +1,31 @@
1
+ import argparse
2
+ from uvicorn import Config, Server
3
+ from uvicorn.config import LOGGING_CONFIG
4
+ from ..src.server import *
5
+
6
+ def main():
7
+ parser = argparse.ArgumentParser()
8
+ parser.add_argument('--host', type=str, default='0.0.0.0')
9
+ parser.add_argument('--port', type=int, default=8000)
10
+ parser.add_argument('--workers', type=int, default=None)
11
+ parser.add_argument('--enable-uvicorn-log', action='store_true')
12
+ args = parser.parse_args()
13
+
14
+ default_logging_config = LOGGING_CONFIG.copy()
15
+ if not args.enable_uvicorn_log:
16
+ default_logging_config["loggers"]["uvicorn"]["handlers"] = []
17
+
18
+ config = Config(
19
+ app=app,
20
+ host=args.host,
21
+ port=args.port,
22
+ access_log=False,
23
+ workers=args.workers,
24
+ log_config=default_logging_config
25
+ )
26
+ server = Server(config=config)
27
+ logger.info(f"Starting server at {args.host}:{args.port}, with {args.workers} workers")
28
+ server.run()
29
+
30
+ if __name__ == "__main__":
31
+ main()
@@ -0,0 +1,77 @@
1
+ import argparse, asyncio
2
+ from ..src.database import Database
3
+
4
+ async def _main():
5
+ parser = argparse.ArgumentParser()
6
+ sp = parser.add_subparsers(dest='subparser_name', required=True)
7
+ sp_add = sp.add_parser('add')
8
+ sp_add.add_argument('username', type=str)
9
+ sp_add.add_argument('password', type=str)
10
+ sp_add.add_argument('--admin', action='store_true')
11
+
12
+ sp_delete = sp.add_parser('delete')
13
+ sp_delete.add_argument('username', type=str)
14
+
15
+ def parse_bool(s):
16
+ if s.lower() == 'true':
17
+ return True
18
+ if s.lower() == 'false':
19
+ return False
20
+ raise ValueError('Not a boolean')
21
+ sp_set = sp.add_parser('set')
22
+ sp_set.add_argument('username', type=str)
23
+ sp_set.add_argument('-p', '--password', type=str, default=None)
24
+ sp_set.add_argument('-a', '--admin', type=parse_bool, default=None)
25
+
26
+ sp_list = sp.add_parser('list')
27
+ sp_list.add_argument("-l", "--long", action="store_true")
28
+
29
+ args = parser.parse_args()
30
+ conn = await Database().init()
31
+
32
+ try:
33
+ if args.subparser_name == 'add':
34
+ await conn.user.create_user(args.username, args.password, args.admin)
35
+ user = await conn.user.get_user(args.username)
36
+ assert user is not None
37
+ print('User created, credential:', user.credential)
38
+
39
+ if args.subparser_name == 'delete':
40
+ user = await conn.user.get_user(args.username)
41
+ if user is None:
42
+ print('User not found')
43
+ exit(1)
44
+ else:
45
+ await conn.delete_user(user.id)
46
+ print('User deleted')
47
+
48
+ if args.subparser_name == 'set':
49
+ user = await conn.user.get_user(args.username)
50
+ if user is None:
51
+ print('User not found')
52
+ exit(1)
53
+ await conn.user.set_user(user.username, args.password, args.admin)
54
+ user = await conn.user.get_user(args.username)
55
+ assert user is not None
56
+ print('User updated, credential:', user.credential)
57
+
58
+ if args.subparser_name == 'list':
59
+ async for user in conn.user.all():
60
+ print(user)
61
+ if args.long:
62
+ print(' ', user.credential)
63
+
64
+ await conn.commit()
65
+
66
+ except Exception as e:
67
+ conn.logger.error(f'Error: {e}')
68
+ await conn.rollback()
69
+
70
+ finally:
71
+ await conn.close()
72
+
73
+ def main():
74
+ asyncio.run(_main())
75
+
76
+ if __name__ == '__main__':
77
+ main()
File without changes
@@ -0,0 +1,11 @@
1
+ from pathlib import Path
2
+ import os
3
+
4
+ __default_dir = '.storage_data'
5
+
6
+ DATA_HOME = Path(os.environ.get('LFSS_DATA', __default_dir))
7
+ if not DATA_HOME.exists():
8
+ DATA_HOME.mkdir()
9
+ print(f"[init] Created data home at {DATA_HOME}")
10
+
11
+ MAX_BUNDLE_BYTES = 128 * 1024 * 1024 # 128MB
@@ -0,0 +1,523 @@
1
+
2
+ from typing import Optional, overload, Literal
3
+ from abc import ABC, abstractmethod
4
+
5
+ import urllib.parse
6
+ import dataclasses, hashlib, uuid
7
+ from contextlib import asynccontextmanager
8
+ from enum import IntEnum
9
+ import zipfile, io
10
+
11
+ import aiosqlite
12
+ from asyncio import Lock
13
+
14
+ from .config import DATA_HOME
15
+ from .log import get_logger
16
+ from .utils import decode_uri_compnents
17
+
18
+ _g_conn: Optional[aiosqlite.Connection] = None
19
+
20
+ def hash_credential(username, password):
21
+ return hashlib.sha256((username + password).encode()).hexdigest()
22
+
23
+ class DBConnBase(ABC):
24
+ logger = get_logger('database', global_instance=True)
25
+
26
+ @property
27
+ def conn(self)->aiosqlite.Connection:
28
+ global _g_conn
29
+ if _g_conn is None:
30
+ raise ValueError('Connection not initialized, did you forget to call super().init()?')
31
+ return _g_conn
32
+
33
+ @abstractmethod
34
+ async def init(self):
35
+ """Should return self"""
36
+ global _g_conn
37
+ if _g_conn is None:
38
+ _g_conn = await aiosqlite.connect(DATA_HOME / 'lfss.db')
39
+
40
+ async def commit(self):
41
+ await self.conn.commit()
42
+
43
+ @dataclasses.dataclass
44
+ class DBUserRecord:
45
+ id: int
46
+ username: str
47
+ credential: str
48
+ is_admin: bool
49
+ create_time: str
50
+ last_active: str
51
+
52
+ def __str__(self):
53
+ return f"User {self.username} (id={self.id}, admin={self.is_admin}, created at {self.create_time}, last active at {self.last_active})"
54
+
55
+ DECOY_USER = DBUserRecord(0, 'decoy', 'decoy', False, '2021-01-01 00:00:00', '2021-01-01 00:00:00')
56
+ class UserConn(DBConnBase):
57
+
58
+ @staticmethod
59
+ def parse_record(record) -> DBUserRecord:
60
+ return DBUserRecord(*record)
61
+
62
+ async def init(self):
63
+ await super().init()
64
+ await self.conn.execute('''
65
+ CREATE TABLE IF NOT EXISTS user (
66
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
67
+ username VARCHAR(255) UNIQUE NOT NULL,
68
+ credential VARCHAR(255) NOT NULL,
69
+ is_admin BOOLEAN DEFAULT FALSE,
70
+ create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
71
+ last_active TIMESTAMP DEFAULT CURRENT_TIMESTAMP
72
+ )
73
+ ''')
74
+ return self
75
+
76
+ async def get_user(self, username: str) -> Optional[DBUserRecord]:
77
+ async with self.conn.execute("SELECT * FROM user WHERE username = ?", (username, )) as cursor:
78
+ res = await cursor.fetchone()
79
+
80
+ if res is None: return None
81
+ return self.parse_record(res)
82
+
83
+ async def get_user_by_id(self, user_id: int) -> Optional[DBUserRecord]:
84
+ async with self.conn.execute("SELECT * FROM user WHERE id = ?", (user_id, )) as cursor:
85
+ res = await cursor.fetchone()
86
+
87
+ if res is None: return None
88
+ return self.parse_record(res)
89
+
90
+ async def get_user_by_credential(self, credential: str) -> Optional[DBUserRecord]:
91
+ async with self.conn.execute("SELECT * FROM user WHERE credential = ?", (credential, )) as cursor:
92
+ res = await cursor.fetchone()
93
+
94
+ if res is None: return None
95
+ return self.parse_record(res)
96
+
97
+ async def create_user(self, username: str, password: str, is_admin: bool = False) -> int:
98
+ assert not username.startswith('_'), "Error: reserved username"
99
+ assert not ('/' in username or len(username) > 255), "Invalid username"
100
+ assert urllib.parse.quote(username) == username, "Invalid username, must be URL safe"
101
+ self.logger.debug(f"Creating user {username}")
102
+ credential = hash_credential(username, password)
103
+ assert await self.get_user(username) is None, "Duplicate username"
104
+ async with self.conn.execute("INSERT INTO user (username, credential, is_admin) VALUES (?, ?, ?)", (username, credential, is_admin)) as cursor:
105
+ self.logger.info(f"User {username} created")
106
+ assert cursor.lastrowid is not None
107
+ return cursor.lastrowid
108
+
109
+ async def set_user(self, username: str, password: Optional[str] = None, is_admin: Optional[bool] = None):
110
+ assert not username.startswith('_'), "Error: reserved username"
111
+ assert not ('/' in username or len(username) > 255), "Invalid username"
112
+ assert urllib.parse.quote(username) == username, "Invalid username, must be URL safe"
113
+ if password is not None:
114
+ credential = hash_credential(username, password)
115
+ else:
116
+ async with self.conn.execute("SELECT credential FROM user WHERE username = ?", (username, )) as cursor:
117
+ res = await cursor.fetchone()
118
+ assert res is not None, f"User {username} not found"
119
+ credential = res[0]
120
+
121
+ if is_admin is None:
122
+ async with self.conn.execute("SELECT is_admin FROM user WHERE username = ?", (username, )) as cursor:
123
+ res = await cursor.fetchone()
124
+ assert res is not None, f"User {username} not found"
125
+ is_admin = res[0]
126
+
127
+ await self.conn.execute("UPDATE user SET credential = ?, is_admin = ? WHERE username = ?", (credential, is_admin, username))
128
+ self.logger.info(f"User {username} updated")
129
+
130
+ async def all(self):
131
+ async with self.conn.execute("SELECT * FROM user") as cursor:
132
+ async for record in cursor:
133
+ yield self.parse_record(record)
134
+
135
+ async def set_active(self, username: str):
136
+ await self.conn.execute("UPDATE user SET last_active = CURRENT_TIMESTAMP WHERE username = ?", (username, ))
137
+
138
+ async def delete_user(self, username: str):
139
+ await self.conn.execute("DELETE FROM user WHERE username = ?", (username, ))
140
+ self.logger.info(f"Delete user {username}")
141
+
142
+ class FileReadPermission(IntEnum):
143
+ PUBLIC = 0 # accessible by anyone
144
+ PROTECTED = 1 # accessible by any user
145
+ PRIVATE = 2 # accessible by owner only (including admin)
146
+
147
+ @dataclasses.dataclass
148
+ class FileDBRecord:
149
+ url: str
150
+ owner_id: int
151
+ file_id: str # defines mapping from fmata to fdata
152
+ file_size: int
153
+ create_time: str
154
+ access_time: str
155
+ permission: FileReadPermission
156
+
157
+ def __str__(self):
158
+ return f"File {self.url} (owner={self.owner_id}, created at {self.create_time}, accessed at {self.access_time}, " + \
159
+ f"file_id={self.file_id}, permission={self.permission}, size={self.file_size})"
160
+
161
+ @dataclasses.dataclass
162
+ class DirectoryRecord:
163
+ url: str
164
+ size: int
165
+
166
+ def __str__(self):
167
+ return f"Directory {self.url} (size={self.size})"
168
+
169
+ class FileConn(DBConnBase):
170
+
171
+ @staticmethod
172
+ def parse_record(record) -> FileDBRecord:
173
+ return FileDBRecord(*record)
174
+
175
+ async def init(self):
176
+ await super().init()
177
+ await self.conn.execute('''
178
+ CREATE TABLE IF NOT EXISTS fmeta (
179
+ url VARCHAR(512) PRIMARY KEY,
180
+ owner_id INTEGER NOT NULL,
181
+ file_id VARCHAR(256) NOT NULL,
182
+ file_size INTEGER,
183
+ create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
184
+ access_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
185
+ permission INTEGER DEFAULT 0
186
+ )
187
+ ''')
188
+ await self.conn.execute('''
189
+ CREATE INDEX IF NOT EXISTS idx_fmeta_url ON fmeta(url)
190
+ ''')
191
+
192
+ await self.conn.execute('''
193
+ CREATE TABLE IF NOT EXISTS fdata (
194
+ file_id VARCHAR(256) PRIMARY KEY,
195
+ data BLOB
196
+ )
197
+ ''')
198
+
199
+ return self
200
+
201
+ async def get_file_record(self, url: str) -> Optional[FileDBRecord]:
202
+ async with self.conn.execute("SELECT * FROM fmeta WHERE url = ?", (url, )) as cursor:
203
+ res = await cursor.fetchone()
204
+ if res is None:
205
+ return None
206
+ return self.parse_record(res)
207
+
208
+ async def get_file_records(self, urls: list[str]) -> list[FileDBRecord]:
209
+ async with self.conn.execute("SELECT * FROM fmeta WHERE url IN ({})".format(','.join(['?'] * len(urls))), urls) as cursor:
210
+ res = await cursor.fetchall()
211
+ if res is None:
212
+ return []
213
+ return [self.parse_record(r) for r in res]
214
+
215
+ async def get_user_file_records(self, owner_id: int) -> list[FileDBRecord]:
216
+ async with self.conn.execute("SELECT * FROM fmeta WHERE owner_id = ?", (owner_id, )) as cursor:
217
+ res = await cursor.fetchall()
218
+ return [self.parse_record(r) for r in res]
219
+
220
+ async def get_path_records(self, url: str) -> list[FileDBRecord]:
221
+ async with self.conn.execute("SELECT * FROM fmeta WHERE url LIKE ?", (url + '%', )) as cursor:
222
+ res = await cursor.fetchall()
223
+ return [self.parse_record(r) for r in res]
224
+
225
+ async def list_root(self, *usernames: str) -> list[DirectoryRecord]:
226
+ """
227
+ Efficiently list users' directories, if usernames is empty, list all users' directories.
228
+ """
229
+ if not usernames:
230
+ # list all users
231
+ async with self.conn.execute("SELECT username FROM user") as cursor:
232
+ res = await cursor.fetchall()
233
+ dirnames = [u[0] + '/' for u in res]
234
+ dirs = [DirectoryRecord(u, await self.path_size(u, include_subpath=True)) for u in dirnames]
235
+ return dirs
236
+ else:
237
+ # list specific users
238
+ dirnames = [uname + '/' for uname in usernames]
239
+ dirs = [DirectoryRecord(u, await self.path_size(u, include_subpath=True)) for u in dirnames]
240
+ return dirs
241
+
242
+ @overload
243
+ async def list_path(self, url: str, flat: Literal[True]) -> list[FileDBRecord]:...
244
+ @overload
245
+ async def list_path(self, url: str, flat: Literal[False]) -> tuple[list[DirectoryRecord], list[FileDBRecord]]:...
246
+
247
+ async def list_path(self, url: str, flat: bool = False) -> list[FileDBRecord] | tuple[list[DirectoryRecord], list[FileDBRecord]]:
248
+ """
249
+ List all files and directories under the given path,
250
+ if flat is True, return a list of FileDBRecord, recursively including all subdirectories.
251
+ Otherwise, return a tuple of (dirs, files), where dirs is a list of DirectoryRecord,
252
+ """
253
+ if not url.endswith('/'):
254
+ url += '/'
255
+ if url == '/':
256
+ # users cannot be queried using '/', because we store them without '/' prefix,
257
+ # so we need to handle this case separately,
258
+ if flat:
259
+ async with self.conn.execute("SELECT * FROM fmeta") as cursor:
260
+ res = await cursor.fetchall()
261
+ return [self.parse_record(r) for r in res]
262
+
263
+ else:
264
+ return (await self.list_root(), [])
265
+
266
+ if flat:
267
+ async with self.conn.execute("SELECT * FROM fmeta WHERE url LIKE ?", (url + '%', )) as cursor:
268
+ res = await cursor.fetchall()
269
+ return [self.parse_record(r) for r in res]
270
+
271
+ async with self.conn.execute("SELECT * FROM fmeta WHERE url LIKE ? AND url NOT LIKE ?", (url + '%', url + '%/%')) as cursor:
272
+ res = await cursor.fetchall()
273
+ files = [self.parse_record(r) for r in res]
274
+
275
+ # substr indexing starts from 1
276
+ async with self.conn.execute(
277
+ """
278
+ SELECT DISTINCT
279
+ SUBSTR(
280
+ url,
281
+ 1 + LENGTH(?),
282
+ INSTR(SUBSTR(url, 1 + LENGTH(?)), '/') - 1
283
+ ) AS subdir
284
+ FROM fmeta WHERE url LIKE ?
285
+ """,
286
+ (url, url, url + '%')
287
+ ) as cursor:
288
+ res = await cursor.fetchall()
289
+ dirs_str = [r[0] + '/' for r in res if r[0] != '/']
290
+ dirs = [DirectoryRecord(url + d, await self.path_size(url + d, include_subpath=True)) for d in dirs_str]
291
+
292
+ return (dirs, files)
293
+
294
+ async def path_size(self, url: str, include_subpath = False) -> int:
295
+ if not url.endswith('/'):
296
+ url += '/'
297
+ if not include_subpath:
298
+ async with self.conn.execute("SELECT SUM(file_size) FROM fmeta WHERE url LIKE ? AND url NOT LIKE ?", (url + '%', url + '%/%')) as cursor:
299
+ res = await cursor.fetchone()
300
+ else:
301
+ async with self.conn.execute("SELECT SUM(file_size) FROM fmeta WHERE url LIKE ?", (url + '%', )) as cursor:
302
+ res = await cursor.fetchone()
303
+ assert res is not None
304
+ return res[0] or 0
305
+
306
+ async def set_file_record(self, url: str, owner_id: int, file_id: str, file_size: int, permission: Optional[ FileReadPermission ] = None):
307
+ self.logger.debug(f"Updating fmeta {url}: user_id={owner_id}, file_id={file_id}")
308
+
309
+ old = await self.get_file_record(url)
310
+ if old is not None:
311
+ assert old.owner_id == owner_id, f"User mismatch: {old.owner_id} != {owner_id}"
312
+ if permission is None:
313
+ permission = old.permission
314
+ await self.conn.execute(
315
+ """
316
+ UPDATE fmeta SET file_id = ?, file_size = ?, permission = ?,
317
+ access_time = CURRENT_TIMESTAMP WHERE url = ?
318
+ """, (file_id, file_size, permission, url))
319
+ self.logger.info(f"File {url} updated")
320
+ else:
321
+ if permission is None:
322
+ permission = FileReadPermission.PUBLIC
323
+ await self.conn.execute("INSERT INTO fmeta (url, owner_id, file_id, file_size, permission) VALUES (?, ?, ?, ?, ?)", (url, owner_id, file_id, file_size, permission))
324
+ self.logger.info(f"File {url} created")
325
+
326
+ async def log_access(self, url: str):
327
+ await self.conn.execute("UPDATE fmeta SET access_time = CURRENT_TIMESTAMP WHERE url = ?", (url, ))
328
+
329
+ async def delete_file_record(self, url: str):
330
+ file_record = await self.get_file_record(url)
331
+ if file_record is None: return
332
+ await self.conn.execute("DELETE FROM fmeta WHERE url = ?", (url, ))
333
+ self.logger.info(f"Deleted fmeta {url}")
334
+
335
+ async def delete_user_file_records(self, owner_id: int):
336
+ async with self.conn.execute("SELECT * FROM fmeta WHERE owner_id = ?", (owner_id, )) as cursor:
337
+ res = await cursor.fetchall()
338
+ await self.conn.execute("DELETE FROM fmeta WHERE owner_id = ?", (owner_id, ))
339
+ self.logger.info(f"Deleted {len(res)} files for user {owner_id}") # type: ignore
340
+
341
+ async def delete_path_records(self, path: str):
342
+ """Delete all records with url starting with path"""
343
+ async with self.conn.execute("SELECT * FROM fmeta WHERE url LIKE ?", (path + '%', )) as cursor:
344
+ res = await cursor.fetchall()
345
+ await self.conn.execute("DELETE FROM fmeta WHERE url LIKE ?", (path + '%', ))
346
+ self.logger.info(f"Deleted {len(res)} files for path {path}") # type: ignore
347
+
348
+ async def set_file_blob(self, file_id: str, blob: bytes) -> int:
349
+ await self.conn.execute("INSERT OR REPLACE INTO fdata (file_id, data) VALUES (?, ?)", (file_id, blob))
350
+ return len(blob)
351
+
352
+ async def get_file_blob(self, file_id: str) -> Optional[bytes]:
353
+ async with self.conn.execute("SELECT data FROM fdata WHERE file_id = ?", (file_id, )) as cursor:
354
+ res = await cursor.fetchone()
355
+ if res is None:
356
+ return None
357
+ return res[0]
358
+
359
+ async def delete_file_blob(self, file_id: str):
360
+ await self.conn.execute("DELETE FROM fdata WHERE file_id = ?", (file_id, ))
361
+
362
+ async def delete_file_blobs(self, file_ids: list[str]):
363
+ await self.conn.execute("DELETE FROM fdata WHERE file_id IN ({})".format(','.join(['?'] * len(file_ids))), file_ids)
364
+
365
+ def _validate_url(url: str, is_file = True) -> bool:
366
+ ret = not url.startswith('/') and not ('..' in url) and ('/' in url) and not ('//' in url) \
367
+ and not ' ' in url and not url.startswith('\\') and not url.startswith('_') and not url.startswith('.')
368
+
369
+ if not ret:
370
+ return False
371
+
372
+ if is_file:
373
+ ret = ret and not url.endswith('/')
374
+ else:
375
+ ret = ret and url.endswith('/')
376
+ return ret
377
+
378
+ async def get_user(db: "Database", user: int | str) -> Optional[DBUserRecord]:
379
+ if isinstance(user, str):
380
+ return await db.user.get_user(user)
381
+ elif isinstance(user, int):
382
+ return await db.user.get_user_by_id(user)
383
+ else:
384
+ return None
385
+
386
+ _transaction_lock = Lock()
387
+ @asynccontextmanager
388
+ async def transaction(db: "Database"):
389
+ try:
390
+ await _transaction_lock.acquire()
391
+ yield
392
+ await db.commit()
393
+ except Exception as e:
394
+ db.logger.error(f"Error in transaction: {e}")
395
+ await db.rollback()
396
+ finally:
397
+ _transaction_lock.release()
398
+
399
+ class Database:
400
+ user: UserConn = UserConn()
401
+ file: FileConn = FileConn()
402
+ logger = get_logger('database', global_instance=True)
403
+
404
+ async def init(self):
405
+ await self.user.init()
406
+ await self.file.init()
407
+ return self
408
+
409
+ async def commit(self):
410
+ global _g_conn
411
+ if _g_conn is not None:
412
+ await _g_conn.commit()
413
+
414
+ async def close(self):
415
+ global _g_conn
416
+ if _g_conn: await _g_conn.close()
417
+
418
+ async def rollback(self):
419
+ global _g_conn
420
+ if _g_conn is not None:
421
+ await _g_conn.rollback()
422
+
423
+ async def save_file(self, u: int | str, url: str, blob: bytes):
424
+ if not _validate_url(url):
425
+ raise ValueError(f"Invalid URL: {url}")
426
+ assert isinstance(blob, bytes), "blob must be bytes"
427
+
428
+ user = await get_user(self, u)
429
+ if user is None:
430
+ return
431
+
432
+ # check if the user is the owner of the path, or is admin
433
+ if url.startswith('/'):
434
+ url = url[1:]
435
+ first_component = url.split('/')[0]
436
+ if first_component != user.username:
437
+ if not user.is_admin:
438
+ raise ValueError(f"Permission denied: {user.username} cannot write to {url}")
439
+ else:
440
+ if await get_user(self, first_component) is None:
441
+ raise ValueError(f"Invalid path: {first_component} is not a valid username")
442
+
443
+ f_id = uuid.uuid4().hex
444
+ async with transaction(self):
445
+ file_size = await self.file.set_file_blob(f_id, blob)
446
+ await self.file.set_file_record(url, owner_id=user.id, file_id=f_id, file_size=file_size)
447
+ await self.user.set_active(user.username)
448
+
449
+ # async def read_file_stream(self, url: str): ...
450
+ async def read_file(self, url: str) -> bytes:
451
+ if not _validate_url(url): raise ValueError(f"Invalid URL: {url}")
452
+
453
+ r = await self.file.get_file_record(url)
454
+ if r is None:
455
+ raise FileNotFoundError(f"File {url} not found")
456
+
457
+ f_id = r.file_id
458
+ blob = await self.file.get_file_blob(f_id)
459
+ if blob is None:
460
+ raise FileNotFoundError(f"File {url} data not found")
461
+
462
+ async with transaction(self):
463
+ await self.file.log_access(url)
464
+
465
+ return blob
466
+
467
+ async def delete_file(self, url: str) -> Optional[FileDBRecord]:
468
+ if not _validate_url(url): raise ValueError(f"Invalid URL: {url}")
469
+
470
+ async with transaction(self):
471
+ r = await self.file.get_file_record(url)
472
+ if r is None:
473
+ return None
474
+ f_id = r.file_id
475
+ await self.file.delete_file_blob(f_id)
476
+ await self.file.delete_file_record(url)
477
+ return r
478
+
479
+ async def delete_path(self, url: str):
480
+ if not _validate_url(url, is_file=False): raise ValueError(f"Invalid URL: {url}")
481
+
482
+ async with transaction(self):
483
+ records = await self.file.get_path_records(url)
484
+ if not records:
485
+ return None
486
+ await self.file.delete_file_blobs([r.file_id for r in records])
487
+ await self.file.delete_path_records(url)
488
+ return records
489
+
490
+ async def delete_user(self, u: str | int):
491
+ user = await get_user(self, u)
492
+ if user is None:
493
+ return
494
+
495
+ async with transaction(self):
496
+ records = await self.file.get_user_file_records(user.id)
497
+ await self.file.delete_file_blobs([r.file_id for r in records])
498
+ await self.file.delete_user_file_records(user.id)
499
+ await self.user.delete_user(user.username)
500
+
501
+ async def zip_path(self, top_url: str, urls: Optional[list[str]]) -> io.BytesIO:
502
+ if urls is None:
503
+ urls = [r.url for r in await self.file.list_path(top_url, flat=True)]
504
+
505
+ buffer = io.BytesIO()
506
+ with zipfile.ZipFile(buffer, 'w') as zf:
507
+ for url in urls:
508
+ if not url.startswith(top_url):
509
+ continue
510
+ r = await self.file.get_file_record(url)
511
+ if r is None:
512
+ continue
513
+ f_id = r.file_id
514
+ blob = await self.file.get_file_blob(f_id)
515
+ if blob is None:
516
+ continue
517
+
518
+ rel_path = url[len(top_url):]
519
+ rel_path = decode_uri_compnents(rel_path)
520
+ zf.writestr(rel_path, blob)
521
+
522
+ buffer.seek(0)
523
+ return buffer
@@ -0,0 +1,157 @@
1
+ from .config import DATA_HOME
2
+ from typing import TypeVar, Callable, Literal, Optional
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from functools import wraps
5
+ import logging, pathlib, asyncio
6
+ from logging import handlers
7
+
8
+ class BCOLORS:
9
+ HEADER = '\033[95m'
10
+ OKBLUE = '\033[94m'
11
+ OKCYAN = '\033[96m'
12
+ OKGREEN = '\033[92m'
13
+ OKGRAY = '\033[90m'
14
+ WARNING = '\033[93m'
15
+ FAIL = '\033[91m'
16
+ ENDC = '\033[0m'
17
+ BOLD = '\033[1m'
18
+ UNDERLINE = '\033[4m'
19
+
20
+ # Additional colors
21
+ BLACK = '\033[30m'
22
+ RED = '\033[31m'
23
+ GREEN = '\033[32m'
24
+ YELLOW = '\033[33m'
25
+ BLUE = '\033[34m'
26
+ MAGENTA = '\033[35m'
27
+ CYAN = '\033[36m'
28
+ WHITE = '\033[37m'
29
+ LIGHTGRAY = '\033[37m'
30
+ DARKGRAY = '\033[90m'
31
+ LIGHTRED = '\033[91m'
32
+ LIGHTGREEN = '\033[92m'
33
+ LIGHTYELLOW = '\033[93m'
34
+ LIGHTBLUE = '\033[94m'
35
+ LIGHTMAGENTA = '\033[95m'
36
+ LIGHTCYAN = '\033[96m'
37
+
38
+ _thread_pool = ThreadPoolExecutor(max_workers=1)
39
+ def thread_wrap(func):
40
+ def wrapper(*args, **kwargs):
41
+ _thread_pool.submit(func, *args, **kwargs)
42
+ return wrapper
43
+
44
+ class BaseLogger(logging.Logger):
45
+ def finalize(self):
46
+ for handler in self.handlers:
47
+ handler.flush()
48
+ handler.close()
49
+ self.removeHandler(handler)
50
+
51
+ @thread_wrap
52
+ def debug(self, *args, **kwargs): super().debug(*args, **kwargs)
53
+ @thread_wrap
54
+ def info(self, *args, **kwargs): super().info(*args, **kwargs)
55
+ @thread_wrap
56
+ def warning(self, *args, **kwargs): super().warning(*args, **kwargs)
57
+ @thread_wrap
58
+ def error(self, *args, **kwargs): super().error(*args, **kwargs)
59
+
60
+ _fh_T = Literal['rotate', 'simple', 'daily']
61
+
62
+ __g_logger_dict: dict[str, BaseLogger] = {}
63
+ def get_logger(
64
+ name = 'default',
65
+ log_home = pathlib.Path(DATA_HOME) / 'logs',
66
+ level = 'DEBUG',
67
+ file_handler_type: _fh_T = 'rotate',
68
+ global_instance = True
69
+ )->BaseLogger:
70
+ if global_instance and name in __g_logger_dict:
71
+ return __g_logger_dict[name]
72
+
73
+ def setupLogger(logger: BaseLogger):
74
+ logger.setLevel(level)
75
+
76
+ format_str = BCOLORS.LIGHTMAGENTA + ' %(asctime)s ' +BCOLORS.OKCYAN + '[%(name)s][%(levelname)s] ' + BCOLORS.ENDC + ' %(message)s'
77
+ formatter = logging.Formatter(format_str)
78
+ console_handler = logging.StreamHandler()
79
+ console_handler.setFormatter(formatter)
80
+ logger.addHandler(console_handler)
81
+
82
+ # format_str_plain = format_str.replace(BCOLORS.LIGHTMAGENTA, '').replace(BCOLORS.OKCYAN, '').replace(BCOLORS.ENDC, '')
83
+ format_str_plain = format_str
84
+ for color in BCOLORS.__dict__.values():
85
+ if isinstance(color, str) and color.startswith('\033'):
86
+ format_str_plain = format_str_plain.replace(color, '')
87
+
88
+ formatter_plain = logging.Formatter(format_str_plain)
89
+ log_home.mkdir(exist_ok=True)
90
+ log_file = log_home / f'{name}.log'
91
+ if file_handler_type == 'simple':
92
+ file_handler = logging.FileHandler(log_file)
93
+ elif file_handler_type == 'daily':
94
+ file_handler = handlers.TimedRotatingFileHandler(
95
+ log_file, when='midnight', interval=1, backupCount=5
96
+ )
97
+ elif file_handler_type == 'rotate':
98
+ file_handler = handlers.RotatingFileHandler(
99
+ log_file, maxBytes=1000000, backupCount=5
100
+ )
101
+
102
+ file_handler.setFormatter(formatter_plain)
103
+ logger.addHandler(file_handler)
104
+
105
+ logger = BaseLogger(name)
106
+ setupLogger(logger)
107
+ if global_instance:
108
+ __g_logger_dict[name] = logger
109
+
110
+ return logger
111
+
112
+ def clear_handlers(logger: logging.Logger):
113
+ for handler in logger.handlers:
114
+ handler.flush()
115
+ handler.close()
116
+ logger.removeHandler(handler)
117
+ __g_logger_dict.pop(logger.name, None)
118
+ # print(f'Cleared handlers for logger {logger.name}')
119
+
120
+ FUNCTION_T = TypeVar('FUNCTION_T', bound=Callable)
121
+ def log_access(
122
+ include_args: bool = True,
123
+ logger: Optional[BaseLogger] = None,
124
+ ):
125
+ if logger is None:
126
+ logger = get_logger()
127
+
128
+ def _log_access(fn: FUNCTION_T) -> FUNCTION_T:
129
+ if asyncio.iscoroutinefunction(fn):
130
+ @wraps(fn)
131
+ async def async_wrapper(*args, **kwargs):
132
+ if include_args:
133
+ logger.info(f'[func] <{fn.__name__}> called with: {args}, {kwargs}')
134
+ else:
135
+ logger.info(f'[func] <{fn.__name__}>')
136
+
137
+ return await fn(*args, **kwargs)
138
+ return async_wrapper # type: ignore
139
+ else:
140
+ @wraps(fn)
141
+ def wrapper(*args, **kwargs):
142
+ logger = get_logger()
143
+ if include_args:
144
+ logger.info(f'[func] <{fn.__name__}> called with: {args}, {kwargs}')
145
+ else:
146
+ logger.info(f'[func] <{fn.__name__}>')
147
+
148
+ return fn(*args, **kwargs)
149
+ return wrapper # type: ignore
150
+ return _log_access
151
+
152
+ def get_dummy_logger() -> BaseLogger:
153
+ return BaseLogger('dummy')
154
+
155
+ __ALL__ = [
156
+ 'get_logger', 'clear_handlers', 'log_access', 'get_dummy_logger'
157
+ ]
@@ -0,0 +1,219 @@
1
+ from typing import Optional
2
+
3
+ from fastapi import FastAPI, APIRouter, Depends, Request, Response
4
+ from fastapi.exceptions import HTTPException
5
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ import mimesniff
8
+
9
+ import json
10
+ import mimetypes
11
+ from contextlib import asynccontextmanager
12
+
13
+ from .log import get_logger
14
+ from .config import MAX_BUNDLE_BYTES
15
+ from .utils import ensure_uri_compnents
16
+ from .database import Database, DBUserRecord, DECOY_USER, FileReadPermission
17
+
18
+ logger = get_logger("server")
19
+ conn = Database()
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(app: FastAPI):
23
+ global conn
24
+ await conn.init()
25
+ yield
26
+ await conn.close()
27
+
28
+ async def get_current_user(token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False))):
29
+ if not token:
30
+ return DECOY_USER
31
+ user = await conn.user.get_user_by_credential(token.credentials)
32
+ if not user:
33
+ raise HTTPException(status_code=401, detail="Invalid token")
34
+ return user
35
+
36
+ app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
37
+ app.add_middleware(
38
+ CORSMiddleware,
39
+ allow_origins=["*"],
40
+ allow_credentials=True,
41
+ allow_methods=["*"],
42
+ allow_headers=["*"],
43
+ )
44
+
45
+ router_fs = APIRouter(prefix="")
46
+
47
+ @router_fs.get("/{path:path}")
48
+ async def get_file(path: str, asfile = False, user: DBUserRecord = Depends(get_current_user)):
49
+ path = ensure_uri_compnents(path)
50
+ if path == "": path = "/"
51
+ if path.endswith("/"):
52
+ # return file under the path as json
53
+ if user.id == 0:
54
+ raise HTTPException(status_code=403, detail="Permission denied, credential required")
55
+ if path == "/":
56
+ return {
57
+ "dirs": await conn.file.list_root(user.username) \
58
+ if not user.is_admin else await conn.file.list_root(),
59
+ "files": []
60
+ }
61
+
62
+ if not path.startswith(f"{user.username}/") and not user.is_admin:
63
+ raise HTTPException(status_code=403, detail="Permission denied, path must start with username")
64
+
65
+ dirs, files = await conn.file.list_path(path, flat = False)
66
+ return {
67
+ "dirs": dirs,
68
+ "files": files
69
+ }
70
+
71
+ file_record = await conn.file.get_file_record(path)
72
+ if not file_record:
73
+ raise HTTPException(status_code=404, detail="File not found")
74
+
75
+ # permission check
76
+ perm = file_record.permission
77
+ if perm == FileReadPermission.PRIVATE:
78
+ if not user.is_admin and user.id != file_record.owner_id:
79
+ raise HTTPException(status_code=403, detail="Permission denied")
80
+ else:
81
+ assert path.startswith(f"{user.username}/")
82
+ elif perm == FileReadPermission.PROTECTED:
83
+ if user.id == 0:
84
+ raise HTTPException(status_code=403, detail="Permission denied")
85
+ else:
86
+ assert perm == FileReadPermission.PUBLIC
87
+
88
+ fname = path.split("/")[-1]
89
+ async def send(media_type: Optional[str] = None, disposition = "attachment"):
90
+ fblob = await conn.read_file(path)
91
+ if media_type is None:
92
+ media_type, _ = mimetypes.guess_type(fname)
93
+ if media_type is None:
94
+ media_type = mimesniff.what(fblob)
95
+
96
+ return Response(
97
+ content=fblob, media_type=media_type, headers={
98
+ "Content-Disposition": f"{disposition}; filename={fname}",
99
+ "Content-Length": str(len(fblob))
100
+ }
101
+ )
102
+
103
+ if asfile:
104
+ return await send('application/octet-stream', "attachment")
105
+ else:
106
+ return await send(None, "inline")
107
+
108
+ @router_fs.put("/{path:path}")
109
+ async def put_file(request: Request, path: str, user: DBUserRecord = Depends(get_current_user)):
110
+ path = ensure_uri_compnents(path)
111
+ if user.id == 0:
112
+ logger.debug("Reject put request from DECOY_USER")
113
+ raise HTTPException(status_code=403, detail="Permission denied")
114
+ if not path.startswith(f"{user.username}/") and not user.is_admin:
115
+ logger.debug(f"Reject put request from {user.username} to {path}")
116
+ raise HTTPException(status_code=403, detail="Permission denied")
117
+
118
+ logger.info(f"PUT {path}, user: {user.username}")
119
+ exists_flag = False
120
+ file_record = await conn.file.get_file_record(path)
121
+ if file_record:
122
+ exists_flag = True
123
+ # remove the old file
124
+ await conn.delete_file(path)
125
+
126
+ # check content-type
127
+ content_type = request.headers.get("Content-Type")
128
+ logger.debug(f"Content-Type: {content_type}")
129
+ if content_type == "application/json":
130
+ body = await request.json()
131
+ await conn.save_file(user.id, path, json.dumps(body).encode('utf-8'))
132
+ elif content_type == "application/x-www-form-urlencoded":
133
+ # may not work...
134
+ body = await request.form()
135
+ file = body.get("file")
136
+ if isinstance(file, str) or file is None:
137
+ raise HTTPException(status_code=400, detail="Invalid form data, file required")
138
+ await conn.save_file(user.id, path, await file.read())
139
+ elif content_type == "application/octet-stream":
140
+ body = await request.body()
141
+ await conn.save_file(user.id, path, body)
142
+ else:
143
+ body = await request.body()
144
+ await conn.save_file(user.id, path, body)
145
+
146
+ # https://developer.mozilla.org/zh-CN/docs/Web/HTTP/Methods/PUT
147
+ if exists_flag:
148
+ return Response(status_code=201, headers={
149
+ "Content-Type": "application/json",
150
+ }, content=json.dumps({"url": path}))
151
+ else:
152
+ return Response(status_code=200, headers={
153
+ "Content-Type": "application/json",
154
+ }, content=json.dumps({"url": path}))
155
+
156
+ @router_fs.delete("/{path:path}")
157
+ async def delete_file(path: str, user: DBUserRecord = Depends(get_current_user)):
158
+ path = ensure_uri_compnents(path)
159
+ if user.id == 0:
160
+ raise HTTPException(status_code=403, detail="Permission denied")
161
+ if not path.startswith(f"{user.username}/") and not user.is_admin:
162
+ raise HTTPException(status_code=403, detail="Permission denied")
163
+
164
+ logger.info(f"DELETE {path}, user: {user.username}")
165
+
166
+ if path.endswith("/"):
167
+ res = await conn.delete_path(path)
168
+ else:
169
+ res = await conn.delete_file(path)
170
+
171
+ if res:
172
+ return Response(status_code=200, content="Deleted")
173
+ else:
174
+ return Response(status_code=404, content="Not found")
175
+
176
+ router_api = APIRouter(prefix="/_api")
177
+
178
+ @router_api.get("/bundle")
179
+ async def bundle_files(path: str, user: DBUserRecord = Depends(get_current_user)):
180
+ logger.info(f"GET bundle({path}), user: {user.username}")
181
+ path = ensure_uri_compnents(path)
182
+ assert path.endswith("/") or path == ""
183
+
184
+ if not path == "" and path[0] == "/": # adapt to both /path and path
185
+ path = path[1:]
186
+
187
+ # TODO: maybe check permission here...
188
+
189
+ # return bundle of files
190
+ files = await conn.file.list_path(path, flat = True)
191
+ if len(files) == 0:
192
+ raise HTTPException(status_code=404, detail="No files found")
193
+ total_size = sum([f.file_size for f in files])
194
+ if total_size > MAX_BUNDLE_BYTES:
195
+ raise HTTPException(status_code=400, detail="Too large to zip")
196
+
197
+ file_paths = [f.url for f in files]
198
+ zip_buffer = await conn.zip_path(path, file_paths)
199
+ return Response(
200
+ content=zip_buffer.getvalue(), media_type="application/zip", headers={
201
+ "Content-Disposition": f"attachment; filename=bundle.zip",
202
+ "Content-Length": str(zip_buffer.getbuffer().nbytes)
203
+ }
204
+ )
205
+
206
+ @router_api.get("/fmeta")
207
+ async def get_file_meta(path: str, user: DBUserRecord = Depends(get_current_user)):
208
+ logger.info(f"GET meta({path}), user: {user.username}")
209
+ if path.endswith("/"):
210
+ raise HTTPException(status_code=400, detail="Invalid path")
211
+ path = ensure_uri_compnents(path)
212
+ file_record = await conn.file.get_file_record(path)
213
+ if not file_record:
214
+ raise HTTPException(status_code=404, detail="File not found")
215
+ return file_record
216
+
217
+ # order matters
218
+ app.include_router(router_api)
219
+ app.include_router(router_fs)
@@ -0,0 +1,24 @@
1
+ import urllib.parse
2
+
3
+ def encode_uri_compnents(path: str):
4
+ """
5
+ Encode the path components to encode the special characters,
6
+ also to avoid path traversal attack
7
+ """
8
+ path_sp = path.split("/")
9
+ mapped = map(lambda x: urllib.parse.quote(x), path_sp)
10
+ return "/".join(mapped)
11
+
12
+ def decode_uri_compnents(path: str):
13
+ """
14
+ Decode the path components to decode the special characters
15
+ """
16
+ path_sp = path.split("/")
17
+ mapped = map(lambda x: urllib.parse.unquote(x), path_sp)
18
+ return "/".join(mapped)
19
+
20
+ def ensure_uri_compnents(path: str):
21
+ """
22
+ Ensure the path components are safe to use
23
+ """
24
+ return encode_uri_compnents(decode_uri_compnents(path))
@@ -0,0 +1,22 @@
1
+ [tool.poetry]
2
+ name = "lfss"
3
+ version = "0.1.0"
4
+ description = "Lightweight file storage service"
5
+ authors = ["li, mengxun <limengxun45@outlook.com>"]
6
+ readme = "Readme.md"
7
+ homepage = "https://github.com/MenxLi/lfss"
8
+ repository = "https://github.com/MenxLi/lfss"
9
+
10
+ [tool.poetry.dependencies]
11
+ python = ">=3.9"
12
+ fastapi = "0.*"
13
+ aiosqlite = "0.*"
14
+ mimesniff = "1.*"
15
+
16
+ [tool.poetry.scripts]
17
+ lfss-serve = "lfss.cli.serve:main"
18
+ lfss-user = "lfss.cli.user:main"
19
+
20
+ [build-system]
21
+ requires = ["poetry-core>=1.0.0"]
22
+ build-backend = "poetry.core.masonry.api"