lfss 0.10.0__tar.gz → 0.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {lfss-0.10.0 → lfss-0.11.1}/PKG-INFO +3 -3
  2. {lfss-0.10.0 → lfss-0.11.1}/frontend/scripts.js +43 -39
  3. lfss-0.11.1/frontend/utils.js +194 -0
  4. {lfss-0.10.0 → lfss-0.11.1}/lfss/api/__init__.py +7 -4
  5. {lfss-0.10.0 → lfss-0.11.1}/lfss/api/connector.py +7 -4
  6. {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/cli.py +2 -2
  7. lfss-0.11.1/lfss/cli/vacuum.py +134 -0
  8. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/config.py +1 -1
  9. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/database.py +122 -46
  10. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/thumb.py +16 -23
  11. {lfss-0.10.0 → lfss-0.11.1}/lfss/sql/init.sql +9 -4
  12. {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app.py +1 -1
  13. {lfss-0.10.0 → lfss-0.11.1}/pyproject.toml +2 -2
  14. lfss-0.10.0/frontend/utils.js +0 -96
  15. lfss-0.10.0/lfss/cli/vacuum.py +0 -91
  16. {lfss-0.10.0 → lfss-0.11.1}/Readme.md +0 -0
  17. {lfss-0.10.0 → lfss-0.11.1}/docs/Enviroment_variables.md +0 -0
  18. {lfss-0.10.0 → lfss-0.11.1}/docs/Known_issues.md +0 -0
  19. {lfss-0.10.0 → lfss-0.11.1}/docs/Permission.md +0 -0
  20. {lfss-0.10.0 → lfss-0.11.1}/docs/Webdav.md +0 -0
  21. /lfss-0.10.0/docs/Changelog.md → /lfss-0.11.1/docs/changelog.md +0 -0
  22. {lfss-0.10.0 → lfss-0.11.1}/frontend/api.js +0 -0
  23. {lfss-0.10.0 → lfss-0.11.1}/frontend/index.html +0 -0
  24. {lfss-0.10.0 → lfss-0.11.1}/frontend/info.css +0 -0
  25. {lfss-0.10.0 → lfss-0.11.1}/frontend/info.js +0 -0
  26. {lfss-0.10.0 → lfss-0.11.1}/frontend/login.css +0 -0
  27. {lfss-0.10.0 → lfss-0.11.1}/frontend/login.js +0 -0
  28. {lfss-0.10.0 → lfss-0.11.1}/frontend/popup.css +0 -0
  29. {lfss-0.10.0 → lfss-0.11.1}/frontend/popup.js +0 -0
  30. {lfss-0.10.0 → lfss-0.11.1}/frontend/state.js +0 -0
  31. {lfss-0.10.0 → lfss-0.11.1}/frontend/styles.css +0 -0
  32. {lfss-0.10.0 → lfss-0.11.1}/frontend/thumb.css +0 -0
  33. {lfss-0.10.0 → lfss-0.11.1}/frontend/thumb.js +0 -0
  34. {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/__init__.py +0 -0
  35. {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/balance.py +0 -0
  36. {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/panel.py +0 -0
  37. {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/serve.py +0 -0
  38. {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/user.py +0 -0
  39. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/__init__.py +0 -0
  40. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/bounded_pool.py +0 -0
  41. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/connection_pool.py +0 -0
  42. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/datatype.py +0 -0
  43. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/error.py +0 -0
  44. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/log.py +0 -0
  45. {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/utils.py +0 -0
  46. {lfss-0.10.0 → lfss-0.11.1}/lfss/sql/pragma.sql +0 -0
  47. {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app_base.py +0 -0
  48. {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app_dav.py +0 -0
  49. {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app_native.py +0 -0
  50. {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/common_impl.py +0 -0
  51. {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/request_log.py +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lfss
3
- Version: 0.10.0
3
+ Version: 0.11.1
4
4
  Summary: Lightweight file storage service
5
5
  Home-page: https://github.com/MenxLi/lfss
6
- Author: li_mengxun
7
- Author-email: limengxun45@outlookc.com
6
+ Author: Li, Mengxun
7
+ Author-email: mengxunli@whu.edu.cn
8
8
  Requires-Python: >=3.10
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Programming Language :: Python :: 3.10
@@ -5,6 +5,7 @@ import { showInfoPanel, showDirInfoPanel } from './info.js';
5
5
  import { makeThumbHtml } from './thumb.js';
6
6
  import { store } from './state.js';
7
7
  import { maybeShowLoginPanel } from './login.js';
8
+ import { forEachFile } from './utils.js';
8
9
 
9
10
  /** @type {import('./api.js').UserRecord}*/
10
11
  let userRecord = null;
@@ -158,55 +159,58 @@ uploadFileNameInput.addEventListener('input', debounce(onFileNameInpuChange, 500
158
159
  e.preventDefault();
159
160
  e.stopPropagation();
160
161
  });
161
- window.addEventListener('drop', (e) => {
162
+ window.addEventListener('drop', async (e) => {
162
163
  e.preventDefault();
163
164
  e.stopPropagation();
164
- const files = e.dataTransfer.files;
165
- if (files.length == 1){
166
- uploadFileSelector.files = files;
167
- uploadFileNameInput.value = files[0].name;
165
+ const items = e.dataTransfer.items;
166
+ if (items.length == 1 && items[0].kind === 'file' && items[0].webkitGetAsEntry().isFile){
167
+ uploadFileSelector.files = e.dataTransfer.files;
168
+ uploadFileNameInput.value = e.dataTransfer.files[0].name;
168
169
  uploadFileNameInput.focus();
170
+ return;
169
171
  }
170
- else if (files.length > 1){
171
- let dstPath = store.dirpath + uploadFileNameInput.value;
172
- if (!dstPath.endsWith('/')){ dstPath += '/'; }
173
- if (!confirm(`
172
+
173
+ /** @type {[string, File][]} */
174
+ const uploadInputVal = uploadFileNameInput.value? uploadFileNameInput.value : '';
175
+ let dstPath = store.dirpath + uploadInputVal;
176
+ if (!dstPath.endsWith('/')){ dstPath += '/'; }
177
+
178
+ if (!confirm(`\
174
179
  You are trying to upload multiple files at once.
175
180
  This will directly upload the files to the [${dstPath}] directory without renaming.
176
181
  Note that same name files will be overwritten.
177
- Are you sure you want to proceed?
178
- `)){ return; }
179
-
180
- let counter = 0;
181
- async function uploadFileFn(...args){
182
- const [file, path] = args;
183
- try{
184
- await uploadFile(conn, path, file, {conflict: 'overwrite'});
185
- }
186
- catch (err){
187
- showPopup('Failed to upload file [' + file.name + ']: ' + err, {level: 'error', timeout: 5000});
188
- }
189
- counter += 1;
190
- console.log("Uploading file: ", counter, "/", files.length);
182
+ Are you sure you want to proceed?\
183
+ `)){ return; }
184
+
185
+ let counter = 0;
186
+ async function uploadFileFn(path, file){
187
+ const this_count = counter;
188
+ try{
189
+ await uploadFile(conn, path, file, {conflict: 'overwrite'});
191
190
  }
192
-
193
- let promises = [];
194
- for (let i = 0; i < files.length; i++){
195
- const file = files[i];
196
- const path = dstPath + file.name;
197
- promises.push(uploadFileFn(file, path));
191
+ catch (err){
192
+ showPopup('Failed to upload file [' + file.name + ']: ' + err, {level: 'error', timeout: 5000});
198
193
  }
199
- showPopup('Uploading multiple files...', {level: 'info', timeout: 3000});
200
- Promise.all(promises).then(
201
- () => {
202
- showPopup('Upload success.', {level: 'success', timeout: 3000});
203
- refreshFileList();
204
- },
205
- (err) => {
206
- showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
207
- }
208
- );
194
+ console.log(`[${this_count}/${counter}] Uploaded file: ${path}`);
209
195
  }
196
+
197
+ const promises = await forEachFile(e, async (relPath, filePromise) => {
198
+ counter += 1;
199
+ const file = await filePromise;
200
+ await uploadFileFn(dstPath + relPath, file);
201
+ });
202
+
203
+ showPopup('Uploading multiple files...', {level: 'info', timeout: 3000});
204
+ Promise.all(promises).then(
205
+ () => {
206
+ showPopup('Upload success.', {level: 'success', timeout: 3000});
207
+ refreshFileList();
208
+ },
209
+ (err) => {
210
+ showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
211
+ }
212
+ );
213
+
210
214
  });
211
215
  }
212
216
 
@@ -0,0 +1,194 @@
1
+
2
+ export function formatSize(size){
3
+ if (size < 0){
4
+ return '';
5
+ }
6
+ const sizeInKb = size / 1024;
7
+ const sizeInMb = sizeInKb / 1024;
8
+ const sizeInGb = sizeInMb / 1024;
9
+ if (sizeInGb > 1){
10
+ return sizeInGb.toFixed(2) + ' GB';
11
+ }
12
+ else if (sizeInMb > 1){
13
+ return sizeInMb.toFixed(2) + ' MB';
14
+ }
15
+ else if (sizeInKb > 1){
16
+ return sizeInKb.toFixed(2) + ' KB';
17
+ }
18
+ else {
19
+ return size + ' B';
20
+ }
21
+ }
22
+
23
+ export function copyToClipboard(text){
24
+ function secureCopy(text){
25
+ navigator.clipboard.writeText(text);
26
+ }
27
+ function unsecureCopy(text){
28
+ const el = document.createElement('textarea');
29
+ el.value = text;
30
+ document.body.appendChild(el);
31
+ el.select();
32
+ document.execCommand('copy');
33
+ document.body.removeChild(el);
34
+ }
35
+ if (navigator.clipboard){
36
+ secureCopy(text);
37
+ }
38
+ else {
39
+ unsecureCopy(text);
40
+ }
41
+ }
42
+
43
+ export function encodePathURI(path){
44
+ return path.split('/').map(encodeURIComponent).join('/');
45
+ }
46
+
47
+ export function decodePathURI(path){
48
+ return path.split('/').map(decodeURIComponent).join('/');
49
+ }
50
+
51
+ export function ensurePathURI(path){
52
+ return encodePathURI(decodePathURI(path));
53
+ }
54
+
55
+ export function getRandomString(n, additionalCharset='0123456789_-(=)[]{}'){
56
+ let result = '';
57
+ let charset = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
58
+ const firstChar = charset[Math.floor(Math.random() * charset.length)];
59
+ const lastChar = charset[Math.floor(Math.random() * charset.length)];
60
+ result += firstChar;
61
+ charset += additionalCharset;
62
+ for (let i = 0; i < n-2; i++){
63
+ result += charset[Math.floor(Math.random() * charset.length)];
64
+ }
65
+ result += lastChar;
66
+ return result;
67
+ };
68
+
69
+ /**
70
+ * @param {string} dateStr
71
+ * @returns {string}
72
+ */
73
+ export function cvtGMT2Local(dateStr){
74
+ if (!dateStr || dateStr === 'N/A'){
75
+ return '';
76
+ }
77
+ const gmtdate = new Date(dateStr);
78
+ const localdate = new Date(gmtdate.getTime() + gmtdate.getTimezoneOffset() * 60000);
79
+ return localdate.toISOString().slice(0, 19).replace('T', ' ');
80
+ }
81
+
82
+ export function debounce(fn,wait){
83
+ let timeout;
84
+ return function(...args){
85
+ const context = this;
86
+ if (timeout) clearTimeout(timeout);
87
+ timeout = setTimeout(() => fn.apply(context, args), wait);
88
+ }
89
+ }
90
+
91
+ export function asHtmlText(text){
92
+ const anonElem = document.createElement('div');
93
+ anonElem.textContent = text;
94
+ const htmlText = anonElem.innerHTML;
95
+ return htmlText;
96
+ }
97
+
98
+ /**
99
+ * Iterates over all files dropped in the event,
100
+ * including files inside directories, and processes them
101
+ * using the provided callback with a concurrency limit.
102
+ *
103
+ * @param {Event} e The drop event.
104
+ * @param {(relPath: string, file: Promise<File>) => Promise<void>} callback A function
105
+ * that receives the relative path and a promise for the File.
106
+ * @param {number} [maxConcurrent=5] Maximum number of concurrent callback executions.
107
+ * @returns {Promise<Promise<void>[]>} A promise resolving to an array of callback promises.
108
+ */
109
+ export async function forEachFile(e, callback, maxConcurrent = 16) {
110
+ const results = []; // to collect callback promises
111
+
112
+ // Concurrency barrier variables.
113
+ let activeCount = 0;
114
+ const queue = [];
115
+
116
+ /**
117
+ * Runs the given async task when below the concurrency limit.
118
+ * If at limit, waits until a slot is free.
119
+ *
120
+ * @param {() => Promise<any>} task An async function returning a promise.
121
+ * @returns {Promise<any>}
122
+ */
123
+ async function runWithLimit(task) {
124
+ // If we reached the concurrency limit, wait for a free slot.
125
+ if (activeCount >= maxConcurrent) {
126
+ await new Promise(resolve => queue.push(resolve));
127
+ }
128
+ activeCount++;
129
+ try {
130
+ return await task();
131
+ } finally {
132
+ activeCount--;
133
+ // If there are waiting tasks, allow the next one to run.
134
+ if (queue.length) {
135
+ queue.shift()();
136
+ }
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Recursively traverses a file system entry.
142
+ *
143
+ * @param {FileSystemEntry} entry The entry (file or directory).
144
+ * @param {string} path The current relative path.
145
+ */
146
+ async function traverse(entry, path) {
147
+ if (entry.isFile) {
148
+ // Wrap file retrieval in a promise.
149
+ const filePromise = new Promise((resolve, reject) => {
150
+ entry.file(resolve, reject);
151
+ });
152
+ // Use the concurrency barrier for the callback invocation.
153
+ results.push(runWithLimit(() => callback(path + entry.name, filePromise)));
154
+ } else if (entry.isDirectory) {
155
+ const reader = entry.createReader();
156
+
157
+ async function readAllEntries(reader) {
158
+ const entries = [];
159
+ while (true) {
160
+ const chunk = await new Promise((resolve, reject) => {
161
+ reader.readEntries(resolve, reject);
162
+ });
163
+ if (chunk.length === 0) break;
164
+ entries.push(...chunk);
165
+ }
166
+ return entries;
167
+ }
168
+
169
+ const entries = await readAllEntries(reader);
170
+ await Promise.all(
171
+ entries.map(ent => traverse(ent, path + entry.name + '/'))
172
+ );
173
+ }
174
+ }
175
+
176
+ // Process using DataTransfer items if available.
177
+ if (e.dataTransfer && e.dataTransfer.items) {
178
+ await Promise.all(
179
+ Array.from(e.dataTransfer.items).map(async item => {
180
+ const entry = item.webkitGetAsEntry && item.webkitGetAsEntry();
181
+ if (entry) {
182
+ await traverse(entry, '');
183
+ }
184
+ })
185
+ );
186
+ } else if (e.dataTransfer && e.dataTransfer.files) {
187
+ // Fallback for browsers that support only dataTransfer.files.
188
+ Array.from(e.dataTransfer.files).forEach(file => {
189
+ results.push(runWithLimit(() => callback(file.name, Promise.resolve(file))));
190
+ });
191
+ }
192
+ return results;
193
+ }
194
+
@@ -113,7 +113,7 @@ def download_file(
113
113
  print(f"File {file_path} already exists, skipping download.")
114
114
  return True, error_msg
115
115
  try:
116
- fmeta = connector.get_metadata(src_url)
116
+ fmeta = connector.get_meta(src_url)
117
117
  if fmeta is None:
118
118
  error_msg = "File not found."
119
119
  return False, error_msg
@@ -170,14 +170,15 @@ def download_directory(
170
170
  _counter = 0
171
171
  _counter_lock = Lock()
172
172
  failed_items: list[tuple[str, str]] = []
173
+ file_count = 0
173
174
  def get_file(c, src_url):
174
- nonlocal _counter, failed_items
175
+ nonlocal _counter, failed_items, file_count, verbose
175
176
  with _counter_lock:
176
177
  _counter += 1
177
178
  this_count = _counter
178
179
  dst_path = f"{directory}{os.path.relpath(decode_uri_compnents(src_url), decode_uri_compnents(src_path))}"
179
180
  if verbose:
180
- print(f"[{this_count}] Downloading {src_url} to {dst_path}")
181
+ print(f"[{this_count}/{file_count}] Downloading {src_url} to {dst_path}")
181
182
 
182
183
  if not (res:=download_file(
183
184
  c, src_url, dst_path,
@@ -185,11 +186,13 @@ def download_directory(
185
186
  ))[0]:
186
187
  failed_items.append((src_url, res[1]))
187
188
 
188
- batch_size = 10000
189
+ batch_size = 10_000
189
190
  file_list: list[FileRecord] = []
190
191
  with connector.session(n_concurrent) as c:
191
192
  file_count = c.count_files(src_path, flat=True)
192
193
  for offset in range(0, file_count, batch_size):
194
+ if verbose:
195
+ print(f"Retrieving file list... ({offset}/{file_count})", end='\r')
193
196
  file_list.extend(c.list_files(
194
197
  src_path, offset=offset, limit=batch_size, flat=True
195
198
  ))
@@ -98,7 +98,7 @@ class Connector:
98
98
 
99
99
  # Skip ahead by checking if the file already exists
100
100
  if conflict == 'skip-ahead':
101
- exists = self.get_metadata(path)
101
+ exists = self.get_meta(path)
102
102
  if exists is None:
103
103
  conflict = 'skip'
104
104
  else:
@@ -122,7 +122,7 @@ class Connector:
122
122
 
123
123
  # Skip ahead by checking if the file already exists
124
124
  if conflict == 'skip-ahead':
125
- exists = self.get_metadata(path)
125
+ exists = self.get_meta(path)
126
126
  if exists is None:
127
127
  conflict = 'skip'
128
128
  else:
@@ -154,7 +154,7 @@ class Connector:
154
154
 
155
155
  # Skip ahead by checking if the file already exists
156
156
  if conflict == 'skip-ahead':
157
- exists = self.get_metadata(path)
157
+ exists = self.get_meta(path)
158
158
  if exists is None:
159
159
  conflict = 'skip'
160
160
  else:
@@ -211,7 +211,7 @@ class Connector:
211
211
  """Deletes the file at the specified path."""
212
212
  self._fetch_factory('DELETE', path)()
213
213
 
214
- def get_metadata(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
214
+ def get_meta(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
215
215
  """Gets the metadata for the file at the specified path."""
216
216
  try:
217
217
  response = self._fetch_factory('GET', '_api/meta', {'path': path})()
@@ -223,6 +223,9 @@ class Connector:
223
223
  if e.response.status_code == 404:
224
224
  return None
225
225
  raise e
226
+ # shorthand methods for type constraints
227
+ def get_fmeta(self, path: str) -> Optional[FileRecord]: assert (f:=self.get_meta(path)) is None or isinstance(f, FileRecord); return f
228
+ def get_dmeta(self, path: str) -> Optional[DirectoryRecord]: assert (d:=self.get_meta(path)) is None or isinstance(d, DirectoryRecord); return d
226
229
 
227
230
  def list_path(self, path: str) -> PathContents:
228
231
  """
@@ -12,7 +12,7 @@ def parse_permission(s: str) -> FileReadPermission:
12
12
  raise ValueError(f"Invalid permission {s}")
13
13
 
14
14
  def parse_arguments():
15
- parser = argparse.ArgumentParser(description="Command line interface, please set LFSS_ENDPOINT and LFSS_TOKEN environment variables.")
15
+ parser = argparse.ArgumentParser(description="Client-side command line interface, set LFSS_ENDPOINT and LFSS_TOKEN environment variables for authentication.")
16
16
 
17
17
  sp = parser.add_subparsers(dest="command", required=True)
18
18
 
@@ -126,7 +126,7 @@ def main():
126
126
  elif args.command == "query":
127
127
  for path in args.path:
128
128
  with catch_request_error():
129
- res = connector.get_metadata(path)
129
+ res = connector.get_meta(path)
130
130
  if res is None:
131
131
  print(f"\033[31mNot found\033[0m ({path})")
132
132
  else:
@@ -0,0 +1,134 @@
1
+ """
2
+ Vacuum the database and external storage to ensure that the storage is consistent and minimal.
3
+ """
4
+
5
+ from lfss.eng.config import LARGE_BLOB_DIR, THUMB_DB
6
+ import argparse, time, itertools
7
+ from functools import wraps
8
+ from asyncio import Semaphore
9
+ import aiosqlite
10
+ import aiofiles, asyncio
11
+ import aiofiles.os
12
+ from contextlib import contextmanager
13
+ from lfss.eng.database import transaction, unique_cursor
14
+ from lfss.svc.request_log import RequestDB
15
+ from lfss.eng.utils import now_stamp
16
+ from lfss.eng.connection_pool import global_entrance
17
+
18
+ sem: Semaphore
19
+
20
+ @contextmanager
21
+ def indicator(name: str):
22
+ print(f"\033[1;33mRunning {name}... \033[0m")
23
+ s = time.time()
24
+ yield
25
+ print(f"{name} took {time.time() - s:.2f} seconds.")
26
+
27
+ def barriered(func):
28
+ @wraps(func)
29
+ async def wrapper(*args, **kwargs):
30
+ global sem
31
+ async with sem:
32
+ return await func(*args, **kwargs)
33
+ return wrapper
34
+
35
+ @global_entrance()
36
+ async def vacuum_main(index: bool = False, blobs: bool = False, thumbs: bool = False, vacuum_all: bool = False):
37
+
38
+ # check if any file in the Large Blob directory is not in the database
39
+ # the reverse operation is not necessary, because by design, the database should be the source of truth...
40
+ # we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
41
+ async def ensure_external_consistency(f_id: str):
42
+ @barriered
43
+ async def fn():
44
+ async with unique_cursor() as c:
45
+ cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
46
+ if not await cursor.fetchone():
47
+ print(f"File {f_id} not found in database, removing from external storage.")
48
+ await aiofiles.os.remove(f)
49
+ await asyncio.create_task(fn())
50
+
51
+ # create a temporary index to speed up the process...
52
+ with indicator("Clearing un-referenced files in external storage"):
53
+ try:
54
+ async with transaction() as c:
55
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
56
+ for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
57
+ f_id = f.name
58
+ await ensure_external_consistency(f_id)
59
+ if (i+1) % 1_000 == 0:
60
+ print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
61
+ finally:
62
+ async with transaction() as c:
63
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
64
+
65
+ if index or vacuum_all:
66
+ with indicator("VACUUM-index"):
67
+ async with transaction() as c:
68
+ await c.execute("DELETE FROM dupcount WHERE count = 0")
69
+ async with unique_cursor(is_write=True) as c:
70
+ await c.execute("VACUUM main")
71
+ if blobs or vacuum_all:
72
+ with indicator("VACUUM-blobs"):
73
+ async with unique_cursor(is_write=True) as c:
74
+ await c.execute("VACUUM blobs")
75
+
76
+ if thumbs or vacuum_all:
77
+ try:
78
+ async with transaction() as c:
79
+ await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
80
+ with indicator("VACUUM-thumbs"):
81
+ if not THUMB_DB.exists():
82
+ raise FileNotFoundError("Thumbnail database not found.")
83
+ async with unique_cursor() as db_c:
84
+ async with aiosqlite.connect(THUMB_DB) as t_conn:
85
+ batch_size = 10_000
86
+ for batch_count in itertools.count(start=0):
87
+ exceeded_rows = list(await (await t_conn.execute(
88
+ "SELECT file_id FROM thumbs LIMIT ? OFFSET ?",
89
+ (batch_size, batch_size * batch_count)
90
+ )).fetchall())
91
+ if not exceeded_rows:
92
+ break
93
+ batch_ids = [row[0] for row in exceeded_rows]
94
+ for f_id in batch_ids:
95
+ cursor = await db_c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
96
+ if not await cursor.fetchone():
97
+ print(f"Thumbnail {f_id} not found in database, removing from thumb cache.")
98
+ await t_conn.execute("DELETE FROM thumbs WHERE file_id = ?", (f_id,))
99
+ print(f"Checked {batch_count+1} batches of {batch_size} thumbnails.")
100
+
101
+ await t_conn.commit()
102
+ await t_conn.execute("VACUUM")
103
+ except FileNotFoundError as e:
104
+ if "Thumbnail database not found." in str(e):
105
+ print("Thumbnail database not found, skipping.")
106
+
107
+ finally:
108
+ async with transaction() as c:
109
+ await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
110
+
111
+ async def vacuum_requests():
112
+ with indicator("VACUUM-requests"):
113
+ async with RequestDB().connect() as req_db:
114
+ await req_db.shrink(max_rows=1_000_000, time_before=now_stamp() - 7*24*60*60)
115
+ await req_db.conn.execute("VACUUM")
116
+
117
+ def main():
118
+ global sem
119
+ parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
120
+ parser.add_argument("--all", action="store_true", help="Vacuum all")
121
+ parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
122
+ parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
123
+ parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
124
+ parser.add_argument("-t", "--thumb", action="store_true", help="Vacuum thumbnails")
125
+ parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
126
+ args = parser.parse_args()
127
+ sem = Semaphore(args.jobs)
128
+ asyncio.run(vacuum_main(index=args.metadata, blobs=args.data, thumbs=args.thumb, vacuum_all=args.all))
129
+
130
+ if args.requests or args.all:
131
+ asyncio.run(vacuum_requests())
132
+
133
+ if __name__ == '__main__':
134
+ main()
@@ -22,5 +22,5 @@ MAX_MEM_FILE_BYTES = 128 * 1024 * 1024 # 128MB
22
22
  CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming (on large files)
23
23
  DEBUG_MODE = os.environ.get('LFSS_DEBUG', '0') == '1'
24
24
 
25
- THUMB_DB = DATA_HOME / 'thumbs.db'
25
+ THUMB_DB = DATA_HOME / 'thumbs.v0-11.db'
26
26
  THUMB_SIZE = (48, 48)