lfss 0.7.14__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  import os, time, pathlib
2
2
  from threading import Lock
3
- from .api import Connector
3
+ from .connector import Connector
4
+ from ..src.datatype import FileRecord
5
+ from ..src.utils import decode_uri_compnents
4
6
  from ..src.bounded_pool import BoundedThreadPoolExecutor
5
7
 
6
8
  def upload_file(
@@ -11,8 +13,9 @@ def upload_file(
11
13
  interval: float = 0,
12
14
  verbose: bool = False,
13
15
  **put_kwargs
14
- ):
16
+ ) -> tuple[bool, str]:
15
17
  this_try = 0
18
+ error_msg = ""
16
19
  while this_try <= n_retries:
17
20
  try:
18
21
  with open(file_path, 'rb') as f:
@@ -24,6 +27,7 @@ def upload_file(
24
27
  raise e
25
28
  if verbose:
26
29
  print(f"Error uploading {file_path}: {e}, retrying...")
30
+ error_msg = str(e)
27
31
  this_try += 1
28
32
  finally:
29
33
  time.sleep(interval)
@@ -31,8 +35,8 @@ def upload_file(
31
35
  if this_try > n_retries:
32
36
  if verbose:
33
37
  print(f"Failed to upload {file_path} after {n_retries} retries.")
34
- return False
35
- return True
38
+ return False, error_msg
39
+ return True, error_msg
36
40
 
37
41
  def upload_directory(
38
42
  connector: Connector,
@@ -43,7 +47,7 @@ def upload_directory(
43
47
  interval: float = 0,
44
48
  verbose: bool = False,
45
49
  **put_kwargs
46
- ) -> list[str]:
50
+ ) -> list[tuple[str, str]]:
47
51
  assert path.endswith('/'), "Path must end with a slash."
48
52
  if path.startswith('/'):
49
53
  path = path[1:]
@@ -52,8 +56,8 @@ def upload_directory(
52
56
  _counter = 0
53
57
  _counter_lock = Lock()
54
58
 
55
- faild_files = []
56
- def put_file(file_path):
59
+ faild_items = []
60
+ def put_file(c: Connector, file_path):
57
61
  with _counter_lock:
58
62
  nonlocal _counter
59
63
  _counter += 1
@@ -62,18 +66,19 @@ def upload_directory(
62
66
  if verbose:
63
67
  print(f"[{this_count}] Uploading {file_path} to {dst_path}")
64
68
 
65
- if not upload_file(
66
- connector, file_path, dst_path,
69
+ if not (res:=upload_file(
70
+ c, file_path, dst_path,
67
71
  n_retries=n_retries, interval=interval, verbose=verbose, **put_kwargs
68
- ):
69
- faild_files.append(file_path)
72
+ ))[0]:
73
+ faild_items.append((file_path, res[1]))
70
74
 
71
- with BoundedThreadPoolExecutor(n_concurrent) as executor:
72
- for root, dirs, files in os.walk(directory):
73
- for file in files:
74
- executor.submit(put_file, os.path.join(root, file))
75
+ with connector.session(n_concurrent) as c:
76
+ with BoundedThreadPoolExecutor(n_concurrent) as executor:
77
+ for root, dirs, files in os.walk(directory):
78
+ for file in files:
79
+ executor.submit(put_file, c, os.path.join(root, file))
75
80
 
76
- return faild_files
81
+ return faild_items
77
82
 
78
83
  def download_file(
79
84
  connector: Connector,
@@ -83,17 +88,19 @@ def download_file(
83
88
  interval: float = 0,
84
89
  verbose: bool = False,
85
90
  overwrite: bool = False
86
- ):
91
+ ) -> tuple[bool, str]:
87
92
  this_try = 0
93
+ error_msg = ""
88
94
  while this_try <= n_retries:
89
95
  if not overwrite and os.path.exists(file_path):
90
96
  if verbose:
91
97
  print(f"File {file_path} already exists, skipping download.")
92
- return True
98
+ return True, error_msg
93
99
  try:
94
100
  blob = connector.get(src_url)
95
- if not blob:
96
- return False
101
+ if blob is None:
102
+ error_msg = "File not found."
103
+ return False, error_msg
97
104
  pathlib.Path(file_path).parent.mkdir(parents=True, exist_ok=True)
98
105
  with open(file_path, 'wb') as f:
99
106
  f.write(blob)
@@ -103,6 +110,7 @@ def download_file(
103
110
  raise e
104
111
  if verbose:
105
112
  print(f"Error downloading {src_url}: {e}, retrying...")
113
+ error_msg = str(e)
106
114
  this_try += 1
107
115
  finally:
108
116
  time.sleep(interval)
@@ -110,8 +118,8 @@ def download_file(
110
118
  if this_try > n_retries:
111
119
  if verbose:
112
120
  print(f"Failed to download {src_url} after {n_retries} retries.")
113
- return False
114
- return True
121
+ return False, error_msg
122
+ return True, error_msg
115
123
 
116
124
  def download_directory(
117
125
  connector: Connector,
@@ -122,7 +130,7 @@ def download_directory(
122
130
  interval: float = 0,
123
131
  verbose: bool = False,
124
132
  overwrite: bool = False
125
- ) -> list[str]:
133
+ ) -> list[tuple[str, str]]:
126
134
 
127
135
  directory = str(directory)
128
136
 
@@ -133,23 +141,32 @@ def download_directory(
133
141
 
134
142
  _counter = 0
135
143
  _counter_lock = Lock()
136
- failed_files = []
137
- def get_file(src_url):
138
- nonlocal _counter, failed_files
144
+ failed_items: list[tuple[str, str]] = []
145
+ def get_file(c, src_url):
146
+ nonlocal _counter, failed_items
139
147
  with _counter_lock:
140
148
  _counter += 1
141
149
  this_count = _counter
142
- dst_path = f"{directory}{os.path.relpath(src_url, src_path)}"
150
+ dst_path = f"{directory}{os.path.relpath(decode_uri_compnents(src_url), decode_uri_compnents(src_path))}"
143
151
  if verbose:
144
152
  print(f"[{this_count}] Downloading {src_url} to {dst_path}")
145
153
 
146
- if not download_file(
147
- connector, src_url, dst_path,
154
+ if not (res:=download_file(
155
+ c, src_url, dst_path,
148
156
  n_retries=n_retries, interval=interval, verbose=verbose, overwrite=overwrite
149
- ):
150
- failed_files.append(src_url)
157
+ ))[0]:
158
+ failed_items.append((src_url, res[1]))
151
159
 
152
- with BoundedThreadPoolExecutor(n_concurrent) as executor:
153
- for file in connector.list_path(src_path, flat=True).files:
154
- executor.submit(get_file, file.url)
155
- return failed_files
160
+ batch_size = 10000
161
+ file_list: list[FileRecord] = []
162
+ with connector.session(n_concurrent) as c:
163
+ file_count = c.count_files(src_path, flat=True)
164
+ for offset in range(0, file_count, batch_size):
165
+ file_list.extend(c.list_files(
166
+ src_path, offset=offset, limit=batch_size, flat=True
167
+ ))
168
+
169
+ with BoundedThreadPoolExecutor(n_concurrent) as executor:
170
+ for file in file_list:
171
+ executor.submit(get_file, c, file.url)
172
+ return failed_items
@@ -1,9 +1,13 @@
1
- from typing import Optional, Literal
1
+ from __future__ import annotations
2
+ from typing import Optional, Literal, Iterator
2
3
  import os
3
4
  import requests
5
+ import requests.adapters
4
6
  import urllib.parse
7
+ from lfss.src.error import PathNotFoundError
5
8
  from lfss.src.datatype import (
6
- FileReadPermission, FileRecord, DirectoryRecord, UserRecord, PathContents
9
+ FileReadPermission, FileRecord, DirectoryRecord, UserRecord, PathContents,
10
+ FileSortKey, DirSortKey
7
11
  )
8
12
  from lfss.src.utils import ensure_uri_compnents
9
13
 
@@ -11,12 +15,41 @@ _default_endpoint = os.environ.get('LFSS_ENDPOINT', 'http://localhost:8000')
11
15
  _default_token = os.environ.get('LFSS_TOKEN', '')
12
16
 
13
17
  class Connector:
18
+ class Session:
19
+ def __init__(self, connector: Connector, pool_size: int = 10):
20
+ self.connector = connector
21
+ self.pool_size = pool_size
22
+ def open(self):
23
+ self.close()
24
+ if self.connector._session is None:
25
+ s = requests.Session()
26
+ adapter = requests.adapters.HTTPAdapter(pool_connections=self.pool_size, pool_maxsize=self.pool_size)
27
+ s.mount('http://', adapter)
28
+ s.mount('https://', adapter)
29
+ self.connector._session = s
30
+ def close(self):
31
+ if self.connector._session is not None:
32
+ self.connector._session.close()
33
+ self.connector._session = None
34
+ def __call__(self):
35
+ return self.connector
36
+ def __enter__(self):
37
+ self.open()
38
+ return self.connector
39
+ def __exit__(self, exc_type, exc_value, traceback):
40
+ self.close()
41
+
14
42
  def __init__(self, endpoint=_default_endpoint, token=_default_token):
15
43
  assert token, "No token provided. Please set LFSS_TOKEN environment variable."
16
44
  self.config = {
17
45
  "endpoint": endpoint,
18
46
  "token": token
19
47
  }
48
+ self._session: Optional[requests.Session] = None
49
+
50
+ def session(self, pool_size: int = 10):
51
+ """ avoid creating a new session for each request. """
52
+ return self.Session(self, pool_size)
20
53
 
21
54
  def _fetch_factory(
22
55
  self, method: Literal['GET', 'POST', 'PUT', 'DELETE'],
@@ -31,9 +64,13 @@ class Connector:
31
64
  headers.update({
32
65
  'Authorization': f"Bearer {self.config['token']}",
33
66
  })
34
- with requests.Session() as s:
35
- response = s.request(method, url, headers=headers, **kwargs)
67
+ if self._session is not None:
68
+ response = self._session.request(method, url, headers=headers, **kwargs)
36
69
  response.raise_for_status()
70
+ else:
71
+ with requests.Session() as s:
72
+ response = s.request(method, url, headers=headers, **kwargs)
73
+ response.raise_for_status()
37
74
  return response
38
75
  return f
39
76
 
@@ -80,9 +117,9 @@ class Connector:
80
117
  )
81
118
  return response.json()
82
119
 
83
- def _get(self, path: str) -> Optional[requests.Response]:
120
+ def _get(self, path: str, stream: bool = False) -> Optional[requests.Response]:
84
121
  try:
85
- response = self._fetch_factory('GET', path)()
122
+ response = self._fetch_factory('GET', path)(stream=stream)
86
123
  except requests.exceptions.HTTPError as e:
87
124
  if e.response.status_code == 404:
88
125
  return None
@@ -94,6 +131,12 @@ class Connector:
94
131
  response = self._get(path)
95
132
  if response is None: return None
96
133
  return response.content
134
+
135
+ def get_stream(self, path: str) -> Iterator[bytes]:
136
+ """Downloads a file from the specified path, will raise PathNotFoundError if path not found."""
137
+ response = self._get(path, stream=True)
138
+ if response is None: raise PathNotFoundError("Path not found: " + path)
139
+ return response.iter_content(chunk_size=1024)
97
140
 
98
141
  def get_json(self, path: str) -> Optional[dict]:
99
142
  response = self._get(path)
@@ -118,12 +161,50 @@ class Connector:
118
161
  return None
119
162
  raise e
120
163
 
121
- def list_path(self, path: str, flat: bool = False) -> PathContents:
164
+ def list_path(self, path: str) -> PathContents:
165
+ """
166
+ shorthand list with limited options,
167
+ for large directories / more options, use list_files and list_dirs instead.
168
+ """
122
169
  assert path.endswith('/')
123
- response = self._fetch_factory('GET', path, {'flat': flat})()
170
+ response = self._fetch_factory('GET', path)()
124
171
  dirs = [DirectoryRecord(**d) for d in response.json()['dirs']]
125
172
  files = [FileRecord(**f) for f in response.json()['files']]
126
173
  return PathContents(dirs=dirs, files=files)
174
+
175
+ def count_files(self, path: str, flat: bool = False) -> int:
176
+ assert path.endswith('/')
177
+ response = self._fetch_factory('GET', '_api/count-files', {'path': path, 'flat': flat})()
178
+ return response.json()['count']
179
+
180
+ def list_files(
181
+ self, path: str, offset: int = 0, limit: int = 1000,
182
+ order_by: FileSortKey = '', order_desc: bool = False,
183
+ flat: bool = False
184
+ ) -> list[FileRecord]:
185
+ assert path.endswith('/')
186
+ response = self._fetch_factory('GET', "_api/list-files", {
187
+ 'path': path,
188
+ 'offset': offset, 'limit': limit, 'order_by': order_by, 'order_desc': order_desc, 'flat': flat
189
+ })()
190
+ return [FileRecord(**f) for f in response.json()]
191
+
192
+ def count_dirs(self, path: str) -> int:
193
+ assert path.endswith('/')
194
+ response = self._fetch_factory('GET', '_api/count-dirs', {'path': path})()
195
+ return response.json()['count']
196
+
197
+ def list_dirs(
198
+ self, path: str, offset: int = 0, limit: int = 1000,
199
+ order_by: DirSortKey = '', order_desc: bool = False,
200
+ skim: bool = True
201
+ ) -> list[DirectoryRecord]:
202
+ assert path.endswith('/')
203
+ response = self._fetch_factory('GET', "_api/list-dirs", {
204
+ 'path': path,
205
+ 'offset': offset, 'limit': limit, 'order_by': order_by, 'order_desc': order_desc, 'skim': skim
206
+ })()
207
+ return [DirectoryRecord(**d) for d in response.json()]
127
208
 
128
209
  def set_file_permission(self, path: str, permission: int | FileReadPermission):
129
210
  """Sets the file permission for the specified path."""
lfss/cli/cli.py CHANGED
@@ -1,4 +1,4 @@
1
- from lfss.client import Connector, upload_directory, upload_file, download_file, download_directory
1
+ from lfss.api import Connector, upload_directory, upload_file, download_file, download_directory
2
2
  from pathlib import Path
3
3
  import argparse
4
4
  from lfss.src.datatype import FileReadPermission
lfss/cli/user.py CHANGED
@@ -29,7 +29,7 @@ async def _main():
29
29
  sp_set.add_argument('username', type=str)
30
30
  sp_set.add_argument('-p', '--password', type=str, default=None)
31
31
  sp_set.add_argument('-a', '--admin', type=parse_bool, default=None)
32
- sp_set.add_argument('--permission', type=int, default=None)
32
+ sp_set.add_argument('--permission', type=parse_permission, default=None)
33
33
  sp_set.add_argument('--max-storage', type=parse_storage_size, default=None)
34
34
 
35
35
  sp_list = sp.add_parser('list')
@@ -46,7 +46,7 @@ class SqlConnection:
46
46
 
47
47
  class SqlConnectionPool:
48
48
  _r_sem: Semaphore
49
- _w_sem: Semaphore
49
+ _w_sem: Lock | Semaphore
50
50
  def __init__(self):
51
51
  self._readers: list[SqlConnection] = []
52
52
  self._writer: None | SqlConnection = None
@@ -57,7 +57,8 @@ class SqlConnectionPool:
57
57
  self._readers = []
58
58
 
59
59
  self._writer = SqlConnection(await get_connection(read_only=False))
60
- self._w_sem = Semaphore(1)
60
+ self._w_sem = Lock()
61
+ # self._w_sem = Semaphore(1)
61
62
 
62
63
  for _ in range(n_read):
63
64
  conn = await get_connection(read_only=True)