lfss 0.10.0__tar.gz → 0.11.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lfss-0.10.0 → lfss-0.11.1}/PKG-INFO +3 -3
- {lfss-0.10.0 → lfss-0.11.1}/frontend/scripts.js +43 -39
- lfss-0.11.1/frontend/utils.js +194 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/api/__init__.py +7 -4
- {lfss-0.10.0 → lfss-0.11.1}/lfss/api/connector.py +7 -4
- {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/cli.py +2 -2
- lfss-0.11.1/lfss/cli/vacuum.py +134 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/config.py +1 -1
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/database.py +122 -46
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/thumb.py +16 -23
- {lfss-0.10.0 → lfss-0.11.1}/lfss/sql/init.sql +9 -4
- {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app.py +1 -1
- {lfss-0.10.0 → lfss-0.11.1}/pyproject.toml +2 -2
- lfss-0.10.0/frontend/utils.js +0 -96
- lfss-0.10.0/lfss/cli/vacuum.py +0 -91
- {lfss-0.10.0 → lfss-0.11.1}/Readme.md +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/docs/Enviroment_variables.md +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/docs/Known_issues.md +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/docs/Permission.md +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/docs/Webdav.md +0 -0
- /lfss-0.10.0/docs/Changelog.md → /lfss-0.11.1/docs/changelog.md +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/api.js +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/index.html +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/info.css +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/info.js +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/login.css +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/login.js +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/popup.css +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/popup.js +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/state.js +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/styles.css +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/thumb.css +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/frontend/thumb.js +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/__init__.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/balance.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/panel.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/serve.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/cli/user.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/__init__.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/bounded_pool.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/connection_pool.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/datatype.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/error.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/log.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/eng/utils.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/sql/pragma.sql +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app_base.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app_dav.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/app_native.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/common_impl.py +0 -0
- {lfss-0.10.0 → lfss-0.11.1}/lfss/svc/request_log.py +0 -0
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lfss
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.11.1
|
4
4
|
Summary: Lightweight file storage service
|
5
5
|
Home-page: https://github.com/MenxLi/lfss
|
6
|
-
Author:
|
7
|
-
Author-email:
|
6
|
+
Author: Li, Mengxun
|
7
|
+
Author-email: mengxunli@whu.edu.cn
|
8
8
|
Requires-Python: >=3.10
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
@@ -5,6 +5,7 @@ import { showInfoPanel, showDirInfoPanel } from './info.js';
|
|
5
5
|
import { makeThumbHtml } from './thumb.js';
|
6
6
|
import { store } from './state.js';
|
7
7
|
import { maybeShowLoginPanel } from './login.js';
|
8
|
+
import { forEachFile } from './utils.js';
|
8
9
|
|
9
10
|
/** @type {import('./api.js').UserRecord}*/
|
10
11
|
let userRecord = null;
|
@@ -158,55 +159,58 @@ uploadFileNameInput.addEventListener('input', debounce(onFileNameInpuChange, 500
|
|
158
159
|
e.preventDefault();
|
159
160
|
e.stopPropagation();
|
160
161
|
});
|
161
|
-
window.addEventListener('drop', (e) => {
|
162
|
+
window.addEventListener('drop', async (e) => {
|
162
163
|
e.preventDefault();
|
163
164
|
e.stopPropagation();
|
164
|
-
const
|
165
|
-
if (
|
166
|
-
uploadFileSelector.files = files;
|
167
|
-
uploadFileNameInput.value = files[0].name;
|
165
|
+
const items = e.dataTransfer.items;
|
166
|
+
if (items.length == 1 && items[0].kind === 'file' && items[0].webkitGetAsEntry().isFile){
|
167
|
+
uploadFileSelector.files = e.dataTransfer.files;
|
168
|
+
uploadFileNameInput.value = e.dataTransfer.files[0].name;
|
168
169
|
uploadFileNameInput.focus();
|
170
|
+
return;
|
169
171
|
}
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
172
|
+
|
173
|
+
/** @type {[string, File][]} */
|
174
|
+
const uploadInputVal = uploadFileNameInput.value? uploadFileNameInput.value : '';
|
175
|
+
let dstPath = store.dirpath + uploadInputVal;
|
176
|
+
if (!dstPath.endsWith('/')){ dstPath += '/'; }
|
177
|
+
|
178
|
+
if (!confirm(`\
|
174
179
|
You are trying to upload multiple files at once.
|
175
180
|
This will directly upload the files to the [${dstPath}] directory without renaming.
|
176
181
|
Note that same name files will be overwritten.
|
177
|
-
Are you sure you want to proceed
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
}
|
186
|
-
catch (err){
|
187
|
-
showPopup('Failed to upload file [' + file.name + ']: ' + err, {level: 'error', timeout: 5000});
|
188
|
-
}
|
189
|
-
counter += 1;
|
190
|
-
console.log("Uploading file: ", counter, "/", files.length);
|
182
|
+
Are you sure you want to proceed?\
|
183
|
+
`)){ return; }
|
184
|
+
|
185
|
+
let counter = 0;
|
186
|
+
async function uploadFileFn(path, file){
|
187
|
+
const this_count = counter;
|
188
|
+
try{
|
189
|
+
await uploadFile(conn, path, file, {conflict: 'overwrite'});
|
191
190
|
}
|
192
|
-
|
193
|
-
|
194
|
-
for (let i = 0; i < files.length; i++){
|
195
|
-
const file = files[i];
|
196
|
-
const path = dstPath + file.name;
|
197
|
-
promises.push(uploadFileFn(file, path));
|
191
|
+
catch (err){
|
192
|
+
showPopup('Failed to upload file [' + file.name + ']: ' + err, {level: 'error', timeout: 5000});
|
198
193
|
}
|
199
|
-
|
200
|
-
Promise.all(promises).then(
|
201
|
-
() => {
|
202
|
-
showPopup('Upload success.', {level: 'success', timeout: 3000});
|
203
|
-
refreshFileList();
|
204
|
-
},
|
205
|
-
(err) => {
|
206
|
-
showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
|
207
|
-
}
|
208
|
-
);
|
194
|
+
console.log(`[${this_count}/${counter}] Uploaded file: ${path}`);
|
209
195
|
}
|
196
|
+
|
197
|
+
const promises = await forEachFile(e, async (relPath, filePromise) => {
|
198
|
+
counter += 1;
|
199
|
+
const file = await filePromise;
|
200
|
+
await uploadFileFn(dstPath + relPath, file);
|
201
|
+
});
|
202
|
+
|
203
|
+
showPopup('Uploading multiple files...', {level: 'info', timeout: 3000});
|
204
|
+
Promise.all(promises).then(
|
205
|
+
() => {
|
206
|
+
showPopup('Upload success.', {level: 'success', timeout: 3000});
|
207
|
+
refreshFileList();
|
208
|
+
},
|
209
|
+
(err) => {
|
210
|
+
showPopup('Failed to upload some files: ' + err, {level: 'error', timeout: 5000});
|
211
|
+
}
|
212
|
+
);
|
213
|
+
|
210
214
|
});
|
211
215
|
}
|
212
216
|
|
@@ -0,0 +1,194 @@
|
|
1
|
+
|
2
|
+
export function formatSize(size){
|
3
|
+
if (size < 0){
|
4
|
+
return '';
|
5
|
+
}
|
6
|
+
const sizeInKb = size / 1024;
|
7
|
+
const sizeInMb = sizeInKb / 1024;
|
8
|
+
const sizeInGb = sizeInMb / 1024;
|
9
|
+
if (sizeInGb > 1){
|
10
|
+
return sizeInGb.toFixed(2) + ' GB';
|
11
|
+
}
|
12
|
+
else if (sizeInMb > 1){
|
13
|
+
return sizeInMb.toFixed(2) + ' MB';
|
14
|
+
}
|
15
|
+
else if (sizeInKb > 1){
|
16
|
+
return sizeInKb.toFixed(2) + ' KB';
|
17
|
+
}
|
18
|
+
else {
|
19
|
+
return size + ' B';
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
export function copyToClipboard(text){
|
24
|
+
function secureCopy(text){
|
25
|
+
navigator.clipboard.writeText(text);
|
26
|
+
}
|
27
|
+
function unsecureCopy(text){
|
28
|
+
const el = document.createElement('textarea');
|
29
|
+
el.value = text;
|
30
|
+
document.body.appendChild(el);
|
31
|
+
el.select();
|
32
|
+
document.execCommand('copy');
|
33
|
+
document.body.removeChild(el);
|
34
|
+
}
|
35
|
+
if (navigator.clipboard){
|
36
|
+
secureCopy(text);
|
37
|
+
}
|
38
|
+
else {
|
39
|
+
unsecureCopy(text);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
export function encodePathURI(path){
|
44
|
+
return path.split('/').map(encodeURIComponent).join('/');
|
45
|
+
}
|
46
|
+
|
47
|
+
export function decodePathURI(path){
|
48
|
+
return path.split('/').map(decodeURIComponent).join('/');
|
49
|
+
}
|
50
|
+
|
51
|
+
export function ensurePathURI(path){
|
52
|
+
return encodePathURI(decodePathURI(path));
|
53
|
+
}
|
54
|
+
|
55
|
+
export function getRandomString(n, additionalCharset='0123456789_-(=)[]{}'){
|
56
|
+
let result = '';
|
57
|
+
let charset = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
58
|
+
const firstChar = charset[Math.floor(Math.random() * charset.length)];
|
59
|
+
const lastChar = charset[Math.floor(Math.random() * charset.length)];
|
60
|
+
result += firstChar;
|
61
|
+
charset += additionalCharset;
|
62
|
+
for (let i = 0; i < n-2; i++){
|
63
|
+
result += charset[Math.floor(Math.random() * charset.length)];
|
64
|
+
}
|
65
|
+
result += lastChar;
|
66
|
+
return result;
|
67
|
+
};
|
68
|
+
|
69
|
+
/**
|
70
|
+
* @param {string} dateStr
|
71
|
+
* @returns {string}
|
72
|
+
*/
|
73
|
+
export function cvtGMT2Local(dateStr){
|
74
|
+
if (!dateStr || dateStr === 'N/A'){
|
75
|
+
return '';
|
76
|
+
}
|
77
|
+
const gmtdate = new Date(dateStr);
|
78
|
+
const localdate = new Date(gmtdate.getTime() + gmtdate.getTimezoneOffset() * 60000);
|
79
|
+
return localdate.toISOString().slice(0, 19).replace('T', ' ');
|
80
|
+
}
|
81
|
+
|
82
|
+
export function debounce(fn,wait){
|
83
|
+
let timeout;
|
84
|
+
return function(...args){
|
85
|
+
const context = this;
|
86
|
+
if (timeout) clearTimeout(timeout);
|
87
|
+
timeout = setTimeout(() => fn.apply(context, args), wait);
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
export function asHtmlText(text){
|
92
|
+
const anonElem = document.createElement('div');
|
93
|
+
anonElem.textContent = text;
|
94
|
+
const htmlText = anonElem.innerHTML;
|
95
|
+
return htmlText;
|
96
|
+
}
|
97
|
+
|
98
|
+
/**
|
99
|
+
* Iterates over all files dropped in the event,
|
100
|
+
* including files inside directories, and processes them
|
101
|
+
* using the provided callback with a concurrency limit.
|
102
|
+
*
|
103
|
+
* @param {Event} e The drop event.
|
104
|
+
* @param {(relPath: string, file: Promise<File>) => Promise<void>} callback A function
|
105
|
+
* that receives the relative path and a promise for the File.
|
106
|
+
* @param {number} [maxConcurrent=5] Maximum number of concurrent callback executions.
|
107
|
+
* @returns {Promise<Promise<void>[]>} A promise resolving to an array of callback promises.
|
108
|
+
*/
|
109
|
+
export async function forEachFile(e, callback, maxConcurrent = 16) {
|
110
|
+
const results = []; // to collect callback promises
|
111
|
+
|
112
|
+
// Concurrency barrier variables.
|
113
|
+
let activeCount = 0;
|
114
|
+
const queue = [];
|
115
|
+
|
116
|
+
/**
|
117
|
+
* Runs the given async task when below the concurrency limit.
|
118
|
+
* If at limit, waits until a slot is free.
|
119
|
+
*
|
120
|
+
* @param {() => Promise<any>} task An async function returning a promise.
|
121
|
+
* @returns {Promise<any>}
|
122
|
+
*/
|
123
|
+
async function runWithLimit(task) {
|
124
|
+
// If we reached the concurrency limit, wait for a free slot.
|
125
|
+
if (activeCount >= maxConcurrent) {
|
126
|
+
await new Promise(resolve => queue.push(resolve));
|
127
|
+
}
|
128
|
+
activeCount++;
|
129
|
+
try {
|
130
|
+
return await task();
|
131
|
+
} finally {
|
132
|
+
activeCount--;
|
133
|
+
// If there are waiting tasks, allow the next one to run.
|
134
|
+
if (queue.length) {
|
135
|
+
queue.shift()();
|
136
|
+
}
|
137
|
+
}
|
138
|
+
}
|
139
|
+
|
140
|
+
/**
|
141
|
+
* Recursively traverses a file system entry.
|
142
|
+
*
|
143
|
+
* @param {FileSystemEntry} entry The entry (file or directory).
|
144
|
+
* @param {string} path The current relative path.
|
145
|
+
*/
|
146
|
+
async function traverse(entry, path) {
|
147
|
+
if (entry.isFile) {
|
148
|
+
// Wrap file retrieval in a promise.
|
149
|
+
const filePromise = new Promise((resolve, reject) => {
|
150
|
+
entry.file(resolve, reject);
|
151
|
+
});
|
152
|
+
// Use the concurrency barrier for the callback invocation.
|
153
|
+
results.push(runWithLimit(() => callback(path + entry.name, filePromise)));
|
154
|
+
} else if (entry.isDirectory) {
|
155
|
+
const reader = entry.createReader();
|
156
|
+
|
157
|
+
async function readAllEntries(reader) {
|
158
|
+
const entries = [];
|
159
|
+
while (true) {
|
160
|
+
const chunk = await new Promise((resolve, reject) => {
|
161
|
+
reader.readEntries(resolve, reject);
|
162
|
+
});
|
163
|
+
if (chunk.length === 0) break;
|
164
|
+
entries.push(...chunk);
|
165
|
+
}
|
166
|
+
return entries;
|
167
|
+
}
|
168
|
+
|
169
|
+
const entries = await readAllEntries(reader);
|
170
|
+
await Promise.all(
|
171
|
+
entries.map(ent => traverse(ent, path + entry.name + '/'))
|
172
|
+
);
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
// Process using DataTransfer items if available.
|
177
|
+
if (e.dataTransfer && e.dataTransfer.items) {
|
178
|
+
await Promise.all(
|
179
|
+
Array.from(e.dataTransfer.items).map(async item => {
|
180
|
+
const entry = item.webkitGetAsEntry && item.webkitGetAsEntry();
|
181
|
+
if (entry) {
|
182
|
+
await traverse(entry, '');
|
183
|
+
}
|
184
|
+
})
|
185
|
+
);
|
186
|
+
} else if (e.dataTransfer && e.dataTransfer.files) {
|
187
|
+
// Fallback for browsers that support only dataTransfer.files.
|
188
|
+
Array.from(e.dataTransfer.files).forEach(file => {
|
189
|
+
results.push(runWithLimit(() => callback(file.name, Promise.resolve(file))));
|
190
|
+
});
|
191
|
+
}
|
192
|
+
return results;
|
193
|
+
}
|
194
|
+
|
@@ -113,7 +113,7 @@ def download_file(
|
|
113
113
|
print(f"File {file_path} already exists, skipping download.")
|
114
114
|
return True, error_msg
|
115
115
|
try:
|
116
|
-
fmeta = connector.
|
116
|
+
fmeta = connector.get_meta(src_url)
|
117
117
|
if fmeta is None:
|
118
118
|
error_msg = "File not found."
|
119
119
|
return False, error_msg
|
@@ -170,14 +170,15 @@ def download_directory(
|
|
170
170
|
_counter = 0
|
171
171
|
_counter_lock = Lock()
|
172
172
|
failed_items: list[tuple[str, str]] = []
|
173
|
+
file_count = 0
|
173
174
|
def get_file(c, src_url):
|
174
|
-
nonlocal _counter, failed_items
|
175
|
+
nonlocal _counter, failed_items, file_count, verbose
|
175
176
|
with _counter_lock:
|
176
177
|
_counter += 1
|
177
178
|
this_count = _counter
|
178
179
|
dst_path = f"{directory}{os.path.relpath(decode_uri_compnents(src_url), decode_uri_compnents(src_path))}"
|
179
180
|
if verbose:
|
180
|
-
print(f"[{this_count}] Downloading {src_url} to {dst_path}")
|
181
|
+
print(f"[{this_count}/{file_count}] Downloading {src_url} to {dst_path}")
|
181
182
|
|
182
183
|
if not (res:=download_file(
|
183
184
|
c, src_url, dst_path,
|
@@ -185,11 +186,13 @@ def download_directory(
|
|
185
186
|
))[0]:
|
186
187
|
failed_items.append((src_url, res[1]))
|
187
188
|
|
188
|
-
batch_size =
|
189
|
+
batch_size = 10_000
|
189
190
|
file_list: list[FileRecord] = []
|
190
191
|
with connector.session(n_concurrent) as c:
|
191
192
|
file_count = c.count_files(src_path, flat=True)
|
192
193
|
for offset in range(0, file_count, batch_size):
|
194
|
+
if verbose:
|
195
|
+
print(f"Retrieving file list... ({offset}/{file_count})", end='\r')
|
193
196
|
file_list.extend(c.list_files(
|
194
197
|
src_path, offset=offset, limit=batch_size, flat=True
|
195
198
|
))
|
@@ -98,7 +98,7 @@ class Connector:
|
|
98
98
|
|
99
99
|
# Skip ahead by checking if the file already exists
|
100
100
|
if conflict == 'skip-ahead':
|
101
|
-
exists = self.
|
101
|
+
exists = self.get_meta(path)
|
102
102
|
if exists is None:
|
103
103
|
conflict = 'skip'
|
104
104
|
else:
|
@@ -122,7 +122,7 @@ class Connector:
|
|
122
122
|
|
123
123
|
# Skip ahead by checking if the file already exists
|
124
124
|
if conflict == 'skip-ahead':
|
125
|
-
exists = self.
|
125
|
+
exists = self.get_meta(path)
|
126
126
|
if exists is None:
|
127
127
|
conflict = 'skip'
|
128
128
|
else:
|
@@ -154,7 +154,7 @@ class Connector:
|
|
154
154
|
|
155
155
|
# Skip ahead by checking if the file already exists
|
156
156
|
if conflict == 'skip-ahead':
|
157
|
-
exists = self.
|
157
|
+
exists = self.get_meta(path)
|
158
158
|
if exists is None:
|
159
159
|
conflict = 'skip'
|
160
160
|
else:
|
@@ -211,7 +211,7 @@ class Connector:
|
|
211
211
|
"""Deletes the file at the specified path."""
|
212
212
|
self._fetch_factory('DELETE', path)()
|
213
213
|
|
214
|
-
def
|
214
|
+
def get_meta(self, path: str) -> Optional[FileRecord | DirectoryRecord]:
|
215
215
|
"""Gets the metadata for the file at the specified path."""
|
216
216
|
try:
|
217
217
|
response = self._fetch_factory('GET', '_api/meta', {'path': path})()
|
@@ -223,6 +223,9 @@ class Connector:
|
|
223
223
|
if e.response.status_code == 404:
|
224
224
|
return None
|
225
225
|
raise e
|
226
|
+
# shorthand methods for type constraints
|
227
|
+
def get_fmeta(self, path: str) -> Optional[FileRecord]: assert (f:=self.get_meta(path)) is None or isinstance(f, FileRecord); return f
|
228
|
+
def get_dmeta(self, path: str) -> Optional[DirectoryRecord]: assert (d:=self.get_meta(path)) is None or isinstance(d, DirectoryRecord); return d
|
226
229
|
|
227
230
|
def list_path(self, path: str) -> PathContents:
|
228
231
|
"""
|
@@ -12,7 +12,7 @@ def parse_permission(s: str) -> FileReadPermission:
|
|
12
12
|
raise ValueError(f"Invalid permission {s}")
|
13
13
|
|
14
14
|
def parse_arguments():
|
15
|
-
parser = argparse.ArgumentParser(description="
|
15
|
+
parser = argparse.ArgumentParser(description="Client-side command line interface, set LFSS_ENDPOINT and LFSS_TOKEN environment variables for authentication.")
|
16
16
|
|
17
17
|
sp = parser.add_subparsers(dest="command", required=True)
|
18
18
|
|
@@ -126,7 +126,7 @@ def main():
|
|
126
126
|
elif args.command == "query":
|
127
127
|
for path in args.path:
|
128
128
|
with catch_request_error():
|
129
|
-
res = connector.
|
129
|
+
res = connector.get_meta(path)
|
130
130
|
if res is None:
|
131
131
|
print(f"\033[31mNot found\033[0m ({path})")
|
132
132
|
else:
|
@@ -0,0 +1,134 @@
|
|
1
|
+
"""
|
2
|
+
Vacuum the database and external storage to ensure that the storage is consistent and minimal.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from lfss.eng.config import LARGE_BLOB_DIR, THUMB_DB
|
6
|
+
import argparse, time, itertools
|
7
|
+
from functools import wraps
|
8
|
+
from asyncio import Semaphore
|
9
|
+
import aiosqlite
|
10
|
+
import aiofiles, asyncio
|
11
|
+
import aiofiles.os
|
12
|
+
from contextlib import contextmanager
|
13
|
+
from lfss.eng.database import transaction, unique_cursor
|
14
|
+
from lfss.svc.request_log import RequestDB
|
15
|
+
from lfss.eng.utils import now_stamp
|
16
|
+
from lfss.eng.connection_pool import global_entrance
|
17
|
+
|
18
|
+
sem: Semaphore
|
19
|
+
|
20
|
+
@contextmanager
|
21
|
+
def indicator(name: str):
|
22
|
+
print(f"\033[1;33mRunning {name}... \033[0m")
|
23
|
+
s = time.time()
|
24
|
+
yield
|
25
|
+
print(f"{name} took {time.time() - s:.2f} seconds.")
|
26
|
+
|
27
|
+
def barriered(func):
|
28
|
+
@wraps(func)
|
29
|
+
async def wrapper(*args, **kwargs):
|
30
|
+
global sem
|
31
|
+
async with sem:
|
32
|
+
return await func(*args, **kwargs)
|
33
|
+
return wrapper
|
34
|
+
|
35
|
+
@global_entrance()
|
36
|
+
async def vacuum_main(index: bool = False, blobs: bool = False, thumbs: bool = False, vacuum_all: bool = False):
|
37
|
+
|
38
|
+
# check if any file in the Large Blob directory is not in the database
|
39
|
+
# the reverse operation is not necessary, because by design, the database should be the source of truth...
|
40
|
+
# we allow un-referenced files in the Large Blob directory on failure, but not the other way around (unless manually deleted)
|
41
|
+
async def ensure_external_consistency(f_id: str):
|
42
|
+
@barriered
|
43
|
+
async def fn():
|
44
|
+
async with unique_cursor() as c:
|
45
|
+
cursor = await c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
|
46
|
+
if not await cursor.fetchone():
|
47
|
+
print(f"File {f_id} not found in database, removing from external storage.")
|
48
|
+
await aiofiles.os.remove(f)
|
49
|
+
await asyncio.create_task(fn())
|
50
|
+
|
51
|
+
# create a temporary index to speed up the process...
|
52
|
+
with indicator("Clearing un-referenced files in external storage"):
|
53
|
+
try:
|
54
|
+
async with transaction() as c:
|
55
|
+
await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
|
56
|
+
for i, f in enumerate(LARGE_BLOB_DIR.iterdir()):
|
57
|
+
f_id = f.name
|
58
|
+
await ensure_external_consistency(f_id)
|
59
|
+
if (i+1) % 1_000 == 0:
|
60
|
+
print(f"Checked {(i+1)//1000}k files in external storage.", end='\r')
|
61
|
+
finally:
|
62
|
+
async with transaction() as c:
|
63
|
+
await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
|
64
|
+
|
65
|
+
if index or vacuum_all:
|
66
|
+
with indicator("VACUUM-index"):
|
67
|
+
async with transaction() as c:
|
68
|
+
await c.execute("DELETE FROM dupcount WHERE count = 0")
|
69
|
+
async with unique_cursor(is_write=True) as c:
|
70
|
+
await c.execute("VACUUM main")
|
71
|
+
if blobs or vacuum_all:
|
72
|
+
with indicator("VACUUM-blobs"):
|
73
|
+
async with unique_cursor(is_write=True) as c:
|
74
|
+
await c.execute("VACUUM blobs")
|
75
|
+
|
76
|
+
if thumbs or vacuum_all:
|
77
|
+
try:
|
78
|
+
async with transaction() as c:
|
79
|
+
await c.execute("CREATE INDEX IF NOT EXISTS fmeta_file_id ON fmeta (file_id)")
|
80
|
+
with indicator("VACUUM-thumbs"):
|
81
|
+
if not THUMB_DB.exists():
|
82
|
+
raise FileNotFoundError("Thumbnail database not found.")
|
83
|
+
async with unique_cursor() as db_c:
|
84
|
+
async with aiosqlite.connect(THUMB_DB) as t_conn:
|
85
|
+
batch_size = 10_000
|
86
|
+
for batch_count in itertools.count(start=0):
|
87
|
+
exceeded_rows = list(await (await t_conn.execute(
|
88
|
+
"SELECT file_id FROM thumbs LIMIT ? OFFSET ?",
|
89
|
+
(batch_size, batch_size * batch_count)
|
90
|
+
)).fetchall())
|
91
|
+
if not exceeded_rows:
|
92
|
+
break
|
93
|
+
batch_ids = [row[0] for row in exceeded_rows]
|
94
|
+
for f_id in batch_ids:
|
95
|
+
cursor = await db_c.execute("SELECT file_id FROM fmeta WHERE file_id = ?", (f_id,))
|
96
|
+
if not await cursor.fetchone():
|
97
|
+
print(f"Thumbnail {f_id} not found in database, removing from thumb cache.")
|
98
|
+
await t_conn.execute("DELETE FROM thumbs WHERE file_id = ?", (f_id,))
|
99
|
+
print(f"Checked {batch_count+1} batches of {batch_size} thumbnails.")
|
100
|
+
|
101
|
+
await t_conn.commit()
|
102
|
+
await t_conn.execute("VACUUM")
|
103
|
+
except FileNotFoundError as e:
|
104
|
+
if "Thumbnail database not found." in str(e):
|
105
|
+
print("Thumbnail database not found, skipping.")
|
106
|
+
|
107
|
+
finally:
|
108
|
+
async with transaction() as c:
|
109
|
+
await c.execute("DROP INDEX IF EXISTS fmeta_file_id")
|
110
|
+
|
111
|
+
async def vacuum_requests():
|
112
|
+
with indicator("VACUUM-requests"):
|
113
|
+
async with RequestDB().connect() as req_db:
|
114
|
+
await req_db.shrink(max_rows=1_000_000, time_before=now_stamp() - 7*24*60*60)
|
115
|
+
await req_db.conn.execute("VACUUM")
|
116
|
+
|
117
|
+
def main():
|
118
|
+
global sem
|
119
|
+
parser = argparse.ArgumentParser(description="Balance the storage by ensuring that large file thresholds are met.")
|
120
|
+
parser.add_argument("--all", action="store_true", help="Vacuum all")
|
121
|
+
parser.add_argument("-j", "--jobs", type=int, default=2, help="Number of concurrent jobs")
|
122
|
+
parser.add_argument("-m", "--metadata", action="store_true", help="Vacuum metadata")
|
123
|
+
parser.add_argument("-d", "--data", action="store_true", help="Vacuum blobs")
|
124
|
+
parser.add_argument("-t", "--thumb", action="store_true", help="Vacuum thumbnails")
|
125
|
+
parser.add_argument("-r", "--requests", action="store_true", help="Vacuum request logs to only keep at most recent 1M rows in 7 days")
|
126
|
+
args = parser.parse_args()
|
127
|
+
sem = Semaphore(args.jobs)
|
128
|
+
asyncio.run(vacuum_main(index=args.metadata, blobs=args.data, thumbs=args.thumb, vacuum_all=args.all))
|
129
|
+
|
130
|
+
if args.requests or args.all:
|
131
|
+
asyncio.run(vacuum_requests())
|
132
|
+
|
133
|
+
if __name__ == '__main__':
|
134
|
+
main()
|
@@ -22,5 +22,5 @@ MAX_MEM_FILE_BYTES = 128 * 1024 * 1024 # 128MB
|
|
22
22
|
CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming (on large files)
|
23
23
|
DEBUG_MODE = os.environ.get('LFSS_DEBUG', '0') == '1'
|
24
24
|
|
25
|
-
THUMB_DB = DATA_HOME / 'thumbs.db'
|
25
|
+
THUMB_DB = DATA_HOME / 'thumbs.v0-11.db'
|
26
26
|
THUMB_SIZE = (48, 48)
|