lfss 0.7.8__tar.gz → 0.7.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {lfss-0.7.8 → lfss-0.7.9}/PKG-INFO +1 -1
  2. {lfss-0.7.8 → lfss-0.7.9}/frontend/api.js +1 -0
  3. {lfss-0.7.8 → lfss-0.7.9}/frontend/info.js +6 -0
  4. {lfss-0.7.8 → lfss-0.7.9}/lfss/cli/balance.py +39 -13
  5. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/database.py +14 -8
  6. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/datatype.py +3 -2
  7. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/server.py +2 -2
  8. {lfss-0.7.8 → lfss-0.7.9}/pyproject.toml +1 -1
  9. {lfss-0.7.8 → lfss-0.7.9}/Readme.md +0 -0
  10. {lfss-0.7.8 → lfss-0.7.9}/docs/Known_issues.md +0 -0
  11. {lfss-0.7.8 → lfss-0.7.9}/docs/Permission.md +0 -0
  12. {lfss-0.7.8 → lfss-0.7.9}/frontend/index.html +0 -0
  13. {lfss-0.7.8 → lfss-0.7.9}/frontend/info.css +0 -0
  14. {lfss-0.7.8 → lfss-0.7.9}/frontend/popup.css +0 -0
  15. {lfss-0.7.8 → lfss-0.7.9}/frontend/popup.js +0 -0
  16. {lfss-0.7.8 → lfss-0.7.9}/frontend/scripts.js +0 -0
  17. {lfss-0.7.8 → lfss-0.7.9}/frontend/styles.css +0 -0
  18. {lfss-0.7.8 → lfss-0.7.9}/frontend/utils.js +0 -0
  19. {lfss-0.7.8 → lfss-0.7.9}/lfss/cli/cli.py +0 -0
  20. {lfss-0.7.8 → lfss-0.7.9}/lfss/cli/panel.py +0 -0
  21. {lfss-0.7.8 → lfss-0.7.9}/lfss/cli/serve.py +0 -0
  22. {lfss-0.7.8 → lfss-0.7.9}/lfss/cli/user.py +0 -0
  23. {lfss-0.7.8 → lfss-0.7.9}/lfss/client/__init__.py +0 -0
  24. {lfss-0.7.8 → lfss-0.7.9}/lfss/client/api.py +0 -0
  25. {lfss-0.7.8 → lfss-0.7.9}/lfss/sql/init.sql +0 -0
  26. {lfss-0.7.8 → lfss-0.7.9}/lfss/sql/pragma.sql +0 -0
  27. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/__init__.py +0 -0
  28. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/config.py +0 -0
  29. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/connection_pool.py +0 -0
  30. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/error.py +0 -0
  31. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/log.py +0 -0
  32. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/stat.py +0 -0
  33. {lfss-0.7.8 → lfss-0.7.9}/lfss/src/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lfss
3
- Version: 0.7.8
3
+ Version: 0.7.9
4
4
  Summary: Lightweight file storage service
5
5
  Home-page: https://github.com/MenxLi/lfss
6
6
  Author: li, mengxun
@@ -28,6 +28,7 @@
28
28
  * @property {string} size - the size of the directory, in bytes
29
29
  * @property {string} create_time - the time the directory was created
30
30
  * @property {string} access_time - the time the directory was last accessed
31
+ * @property {number} n_files - the number of total files in the directory, including subdirectories
31
32
  *
32
33
  * @typedef {Object} PathListResponse
33
34
  * @property {DirectoryRecord[]} dirs - the list of directories in the directory
@@ -89,6 +89,10 @@ export function showDirInfoPanel(r, u, c){
89
89
  <td class="info-table-key">Size</td>
90
90
  <td class="info-table-value" id="info-table-pathsize">N/A</td>
91
91
  </tr>
92
+ <tr>
93
+ <td class="info-table-key">File-Count</td>
94
+ <td class="info-table-value" id="info-table-nfiles">N/A</td>
95
+ </tr>
92
96
  <tr>
93
97
  <td class="info-table-key">Access-Time</td>
94
98
  <td class="info-table-value" id="info-table-accesstime">1970-01-01 00:00:00</td>
@@ -124,6 +128,7 @@ export function showDirInfoPanel(r, u, c){
124
128
  const sizeValTd = document.querySelector('.info-table-value#info-table-pathsize');
125
129
  const createTimeValTd = document.querySelector('.info-table-value#info-table-createtime');
126
130
  const accessTimeValTd = document.querySelector('.info-table-value#info-table-accesstime');
131
+ const countValTd = document.querySelector('.info-table-value#info-table-nfiles');
127
132
  // console.log(sizeValTd, createTimeValTd, accessTimeValTd)
128
133
  c.getMetadata(ensureSlashEnd(r.url)).then((meta) => {
129
134
  if (!meta) {
@@ -133,5 +138,6 @@ export function showDirInfoPanel(r, u, c){
133
138
  sizeValTd.textContent = formatSize(meta.size);
134
139
  createTimeValTd.textContent = cvtGMT2Local(meta.create_time);
135
140
  accessTimeValTd.textContent = cvtGMT2Local(meta.access_time);
141
+ countValTd.textContent = meta.n_files;
136
142
  });
137
143
  }
@@ -7,10 +7,18 @@ import argparse, time, itertools
7
7
  from functools import wraps
8
8
  from asyncio import Semaphore
9
9
  import aiofiles, asyncio
10
+ import aiofiles.os
10
11
  from contextlib import contextmanager
11
12
  from lfss.src.database import transaction, unique_cursor
12
13
  from lfss.src.connection_pool import global_entrance
13
14
 
15
+ @contextmanager
16
+ def indicator(name: str):
17
+ print(f"\033[1;33mRunning {name}... \033[0m")
18
+ s = time.time()
19
+ yield
20
+ print(f"{name} took {time.time() - s:.2f} seconds.")
21
+
14
22
  sem = Semaphore(1)
15
23
 
16
24
  def _get_sem():
@@ -54,9 +62,7 @@ async def move_to_internal(f_id: str, flag: str = ''):
54
62
 
55
63
  @global_entrance()
56
64
  async def _main(batch_size: int = 10000):
57
-
58
65
  tasks = []
59
- start_time = time.time()
60
66
 
61
67
  e_cout = 0
62
68
  for batch_count in itertools.count(start=0):
@@ -90,20 +96,38 @@ async def _main(batch_size: int = 10000):
90
96
  tasks.append(move_to_internal(f_id, flag=f"[b{batch_count+1}-i{i+1}/{len(under_rows)}] "))
91
97
  await asyncio.gather(*tasks)
92
98
 
93
- end_time = time.time()
94
- print(f"Balancing complete, took {end_time - start_time:.2f} seconds. "
95
- f"{e_cout} files moved to external storage, {i_count} files moved to internal storage.")
99
+ print(f"Finished. {e_cout} files moved to external storage, {i_count} files moved to internal storage.")
96
100
 
97
101
  @global_entrance()
98
102
  async def vacuum(index: bool = False, blobs: bool = False):
99
- @contextmanager
100
- def indicator(name: str):
101
- print(f"\033[1;33mRunning {name}... \033[0m")
102
- s = time.time()
103
- yield
104
- print(f"{name} took {time.time() - s:.2f} seconds")
103
+
104
+ # check if any file in the Large Blob directory is not in the database
105
+ # the reverse operation is not necessary, because by design, the database should be the source of truth...
106
+ # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
107
+ async def ensure_external_consistency(f_id: str):
108
+ @barriered
109
+ async def fn():
110
+ async with unique_cursor() as c:
111
+ cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
112
+ if not await cursor.fetchone():
113
+ print(f"File {f_id} not found in database, removing from external storage.")
114
+ await aiofiles.os.remove(f)
115
+ await asyncio.create_task(fn())
116
+
117
+ # create a temporary index to speed up the process...
118
+ with indicator("Clearing un-referenced files in external storage"):
119
+ async with transaction() as c:
120
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
121
+ for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
122
+ f_id = f.name
123
+ await ensure_external_consistency(f_id)
124
+ if (i+1) % 1_000 == 0:
125
+ print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
126
+ async with transaction() as c:
127
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
105
128
 
106
129
  async with unique_cursor(is_write=True) as c:
130
+
107
131
  if index:
108
132
  with indicator("VACUUM-index"):
109
133
  await c.execute("VACUUM main")
@@ -120,8 +144,10 @@ def main():
120
144
  parser.add_argument("--vacuum-all", action="store_true", help="Run VACUUM on both index.db and blobs.db after balancing")
121
145
  args = parser.parse_args()
122
146
  sem = Semaphore(args.jobs)
123
- asyncio.run(_main(args.batch_size))
124
- asyncio.run(vacuum(index=args.vacuum or args.vacuum_all, blobs=args.vacuum_all))
147
+ with indicator("Balancing"):
148
+ asyncio.run(_main(args.batch_size))
149
+ if args.vacuum or args.vacuum_all:
150
+ asyncio.run(vacuum(index=args.vacuum or args.vacuum_all, blobs=args.vacuum_all))
125
151
 
126
152
  if __name__ == '__main__':
127
153
  main()
@@ -139,7 +139,7 @@ class FileConn(DBObjectBase):
139
139
  return []
140
140
  return [self.parse_record(r) for r in res]
141
141
 
142
- async def list_root_dirs(self, *usernames: str) -> list[DirectoryRecord]:
142
+ async def list_root_dirs(self, *usernames: str, skim = False) -> list[DirectoryRecord]:
143
143
  """
144
144
  Efficiently list users' directories, if usernames is empty, list all users' directories.
145
145
  """
@@ -148,12 +148,12 @@ class FileConn(DBObjectBase):
148
148
  await self.cur.execute("SELECT username FROM user")
149
149
  res = await self.cur.fetchall()
150
150
  dirnames = [u[0] + '/' for u in res]
151
- dirs = [DirectoryRecord(u, await self.path_size(u, include_subpath=True)) for u in dirnames]
151
+ dirs = [await self.get_path_record(u) for u in dirnames] if not skim else [DirectoryRecord(u) for u in dirnames]
152
152
  return dirs
153
153
  else:
154
154
  # list specific users
155
155
  dirnames = [uname + '/' for uname in usernames]
156
- dirs = [DirectoryRecord(u, await self.path_size(u, include_subpath=True)) for u in dirnames]
156
+ dirs = [await self.get_path_record(u) for u in dirnames] if not skim else [DirectoryRecord(u) for u in dirnames]
157
157
  return dirs
158
158
 
159
159
  async def list_path(self, url: str, flat: bool = False) -> PathContents:
@@ -207,20 +207,24 @@ class FileConn(DBObjectBase):
207
207
  return PathContents(dirs, files)
208
208
 
209
209
  async def get_path_record(self, url: str) -> DirectoryRecord:
210
+ """
211
+ Get the full record of a directory, including size, create_time, update_time, access_time etc.
212
+ """
210
213
  assert url.endswith('/'), "Path must end with /"
211
214
  cursor = await self.cur.execute("""
212
215
  SELECT MIN(create_time) as create_time,
213
216
  MAX(create_time) as update_time,
214
- MAX(access_time) as access_time
217
+ MAX(access_time) as access_time,
218
+ COUNT(*) as n_files
215
219
  FROM fmeta
216
220
  WHERE url LIKE ?
217
221
  """, (url + '%', ))
218
222
  result = await cursor.fetchone()
219
223
  if result is None or any(val is None for val in result):
220
224
  raise PathNotFoundError(f"Path {url} not found")
221
- create_time, update_time, access_time = result
225
+ create_time, update_time, access_time, n_files = result
222
226
  p_size = await self.path_size(url, include_subpath=True)
223
- return DirectoryRecord(url, p_size, create_time=create_time, update_time=update_time, access_time=access_time)
227
+ return DirectoryRecord(url, p_size, create_time=create_time, update_time=update_time, access_time=access_time, n_files=n_files)
224
228
 
225
229
  async def user_size(self, user_id: int) -> int:
226
230
  cursor = await self.cur.execute("SELECT size FROM usize WHERE user_id = ?", (user_id, ))
@@ -513,7 +517,7 @@ class Database:
513
517
  async def read_file(self, url: str) -> bytes:
514
518
  validate_url(url)
515
519
 
516
- async with transaction() as cur:
520
+ async with unique_cursor() as cur:
517
521
  fconn = FileConn(cur)
518
522
  r = await fconn.get_file_record(url)
519
523
  if r is None:
@@ -525,7 +529,9 @@ class Database:
525
529
  blob = await fconn.get_file_blob(f_id)
526
530
  if blob is None:
527
531
  raise FileNotFoundError(f"File {url} data not found")
528
- await fconn.log_access(url)
532
+
533
+ async with transaction() as w_cur:
534
+ await FileConn(w_cur).log_access(url)
529
535
 
530
536
  return blob
531
537
 
@@ -40,13 +40,14 @@ class FileRecord:
40
40
  @dataclasses.dataclass
41
41
  class DirectoryRecord:
42
42
  url: str
43
- size: int
43
+ size: int = -1
44
44
  create_time: str = ""
45
45
  update_time: str = ""
46
46
  access_time: str = ""
47
+ n_files: int = -1
47
48
 
48
49
  def __str__(self):
49
- return f"Directory {self.url} (size={self.size})"
50
+ return f"Directory {self.url} (size={self.size}, created at {self.create_time}, updated at {self.update_time}, accessed at {self.access_time}, n_files={self.n_files})"
50
51
 
51
52
  @dataclasses.dataclass
52
53
  class PathContents:
@@ -135,8 +135,8 @@ async def get_file(path: str, download: bool = False, flat: bool = False, user:
135
135
  if flat:
136
136
  raise HTTPException(status_code=400, detail="Flat query not supported for root path")
137
137
  return PathContents(
138
- dirs = await fconn.list_root_dirs(user.username) \
139
- if not user.is_admin else await fconn.list_root_dirs(),
138
+ dirs = await fconn.list_root_dirs(user.username, skim=True) \
139
+ if not user.is_admin else await fconn.list_root_dirs(skim=True),
140
140
  files = []
141
141
  )
142
142
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lfss"
3
- version = "0.7.8"
3
+ version = "0.7.9"
4
4
  description = "Lightweight file storage service"
5
5
  authors = ["li, mengxun <limengxun45@outlook.com>"]
6
6
  readme = "Readme.md"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes