rclone-api 1.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. rclone_api/__init__.py +951 -0
  2. rclone_api/assets/example.txt +1 -0
  3. rclone_api/cli.py +15 -0
  4. rclone_api/cmd/analyze.py +51 -0
  5. rclone_api/cmd/copy_large_s3.py +111 -0
  6. rclone_api/cmd/copy_large_s3_finish.py +81 -0
  7. rclone_api/cmd/list_files.py +27 -0
  8. rclone_api/cmd/save_to_db.py +77 -0
  9. rclone_api/completed_process.py +60 -0
  10. rclone_api/config.py +87 -0
  11. rclone_api/convert.py +31 -0
  12. rclone_api/db/__init__.py +3 -0
  13. rclone_api/db/db.py +277 -0
  14. rclone_api/db/models.py +57 -0
  15. rclone_api/deprecated.py +24 -0
  16. rclone_api/detail/copy_file_parts_resumable.py +42 -0
  17. rclone_api/detail/walk.py +116 -0
  18. rclone_api/diff.py +164 -0
  19. rclone_api/dir.py +113 -0
  20. rclone_api/dir_listing.py +66 -0
  21. rclone_api/exec.py +40 -0
  22. rclone_api/experimental/flags.py +89 -0
  23. rclone_api/experimental/flags_base.py +58 -0
  24. rclone_api/file.py +205 -0
  25. rclone_api/file_item.py +68 -0
  26. rclone_api/file_part.py +198 -0
  27. rclone_api/file_stream.py +52 -0
  28. rclone_api/filelist.py +30 -0
  29. rclone_api/group_files.py +256 -0
  30. rclone_api/http_server.py +244 -0
  31. rclone_api/install.py +95 -0
  32. rclone_api/log.py +44 -0
  33. rclone_api/mount.py +55 -0
  34. rclone_api/mount_util.py +247 -0
  35. rclone_api/process.py +187 -0
  36. rclone_api/rclone_impl.py +1285 -0
  37. rclone_api/remote.py +21 -0
  38. rclone_api/rpath.py +102 -0
  39. rclone_api/s3/api.py +109 -0
  40. rclone_api/s3/basic_ops.py +61 -0
  41. rclone_api/s3/chunk_task.py +187 -0
  42. rclone_api/s3/create.py +107 -0
  43. rclone_api/s3/multipart/file_info.py +7 -0
  44. rclone_api/s3/multipart/finished_piece.py +69 -0
  45. rclone_api/s3/multipart/info_json.py +239 -0
  46. rclone_api/s3/multipart/merge_state.py +147 -0
  47. rclone_api/s3/multipart/upload_info.py +62 -0
  48. rclone_api/s3/multipart/upload_parts_inline.py +356 -0
  49. rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
  50. rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
  51. rclone_api/s3/multipart/upload_state.py +165 -0
  52. rclone_api/s3/types.py +67 -0
  53. rclone_api/scan_missing_folders.py +153 -0
  54. rclone_api/types.py +402 -0
  55. rclone_api/util.py +324 -0
  56. rclone_api-1.5.8.dist-info/LICENSE +21 -0
  57. rclone_api-1.5.8.dist-info/METADATA +969 -0
  58. rclone_api-1.5.8.dist-info/RECORD +61 -0
  59. rclone_api-1.5.8.dist-info/WHEEL +5 -0
  60. rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
  61. rclone_api-1.5.8.dist-info/top_level.txt +1 -0
rclone_api/db/db.py ADDED
@@ -0,0 +1,277 @@
1
+ """
2
+ Database module for rclone_api.
3
+ """
4
+
5
+ import os
6
+ from threading import Lock
7
+ from typing import Optional
8
+
9
+ from sqlmodel import Session, SQLModel, create_engine, select
10
+
11
+ from rclone_api.db.models import RepositoryMeta, create_file_entry_model
12
+ from rclone_api.file import FileItem
13
+
14
+
15
+ def _to_table_name(remote_name: str) -> str:
16
+ return (
17
+ "files_"
18
+ + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
19
+ )
20
+
21
+
22
+ class DB:
23
+ """Database class for rclone_api."""
24
+
25
+ def __init__(self, db_path_url: str):
26
+ """Initialize the database.
27
+
28
+ Args:
29
+ db_path: Path to the database file
30
+ """
31
+ self.db_path_url = db_path_url
32
+
33
+ # When running multiple commands in parallel, the database connection may fail once
34
+ # when the database is first populated.
35
+ retries = 2
36
+ for _ in range(retries):
37
+ try:
38
+ self.engine = create_engine(db_path_url)
39
+ SQLModel.metadata.create_all(self.engine)
40
+ break
41
+ except Exception as e:
42
+ print(f"Failed to connect to database. Retrying... {e}")
43
+ else:
44
+ raise Exception("Failed to connect to database.")
45
+ self._cache: dict[str, DBRepo] = {}
46
+ self._cache_lock = Lock()
47
+
48
+ def drop_all(self) -> None:
49
+ """Drop all tables in the database."""
50
+ SQLModel.metadata.drop_all(self.engine)
51
+
52
+ def close(self) -> None:
53
+ """Close the database connection and release resources."""
54
+ if hasattr(self, "engine") and self.engine is not None:
55
+ self.engine.dispose()
56
+
57
+ def add_files(self, files: list[FileItem]) -> None:
58
+ """Add files to the database.
59
+
60
+ Args:
61
+ remote_name: Name of the remote
62
+ files: List of file entries
63
+ """
64
+
65
+ partition: dict[str, list[FileItem]] = {}
66
+ for file in files:
67
+ partition.setdefault(file.remote, []).append(file)
68
+
69
+ for remote_name, files in partition.items():
70
+ repo = self.get_or_create_repo(remote_name)
71
+ repo.insert_files(files)
72
+
73
+ def query_all_files(self, remote_name: str) -> list[FileItem]:
74
+ """Query files from the database.
75
+
76
+ Args:
77
+ remote_name: Name of the remote
78
+ """
79
+ repo = self.get_or_create_repo(remote_name)
80
+ files = repo.get_all_files()
81
+ out: list[FileItem] = []
82
+ for file in files:
83
+ out.append(file)
84
+ return out
85
+
86
+ def get_or_create_repo(self, remote_name: str) -> "DBRepo":
87
+ """Get a table section for a remote.
88
+
89
+ Args:
90
+ remote_name: Name of the remote
91
+ table_name: Optional table name, will be derived from remote_name if not provided
92
+
93
+ Returns:
94
+ DBRepo: A table section for the remote
95
+ """
96
+ with self._cache_lock:
97
+ if remote_name in self._cache:
98
+ return self._cache[remote_name]
99
+ table_name = _to_table_name(remote_name)
100
+ out = DBRepo(self.engine, remote_name, table_name)
101
+ self._cache[remote_name] = out
102
+ return out
103
+
104
+
105
+ class DBRepo:
106
+ """Table repo remote."""
107
+
108
+ def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
109
+ """Initialize a table section.
110
+
111
+ Args:
112
+ engine: SQLAlchemy engine
113
+ remote_name: Name of the remote
114
+ table_name: Optional table name, will be derived from remote_name if not provided
115
+ """
116
+ self.engine = engine
117
+ self.remote_name = remote_name
118
+
119
+ # If table_name is not provided, derive one from the remote name.
120
+ if table_name is None:
121
+ # table_name = (
122
+ # "file_entries_"
123
+ # + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
124
+ # )
125
+ table_name = _to_table_name(remote_name)
126
+ self.table_name = table_name
127
+
128
+ # Check if repository exists in RepositoryMeta; if not, create a new entry.
129
+ with Session(self.engine) as session:
130
+ existing_repo = session.exec(
131
+ select(RepositoryMeta).where(
132
+ RepositoryMeta.repo_name == self.remote_name
133
+ )
134
+ ).first()
135
+ if not existing_repo:
136
+ repo_meta = RepositoryMeta(
137
+ repo_name=self.remote_name, file_table_name=self.table_name
138
+ )
139
+ session.add(repo_meta)
140
+ session.commit()
141
+
142
+ # Dynamically create the file entry model and its table.
143
+ self.FileEntryModel = create_file_entry_model(self.table_name)
144
+ SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
145
+
146
+ def insert_file(self, file: FileItem) -> None:
147
+ """Insert a file entry into the table.
148
+
149
+ Args:
150
+ file: File entry
151
+ """
152
+ return self.insert_files([file])
153
+
154
+ def insert_files(self, files: list[FileItem]) -> None:
155
+ """
156
+ Insert multiple file entries into the table.
157
+
158
+ Three bulk operations are performed:
159
+ 1. Select: Determine which files already exist.
160
+ 2. Insert: Bulk-insert new file entries.
161
+ 3. Update: Bulk-update existing file entries.
162
+
163
+ The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
164
+ """
165
+ # Step 1: Bulk select existing records.
166
+ # get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
167
+ existing_files = self.get_exists(files)
168
+
169
+ # Determine which files need to be updated vs. inserted.
170
+ needs_update = existing_files
171
+ is_new = set(files) - existing_files
172
+
173
+ # Step 2: Bulk insert new rows.
174
+ new_values = [
175
+ {
176
+ "path": file.path_no_remote,
177
+ "name": file.name,
178
+ "size": file.size,
179
+ "mime_type": file.mime_type,
180
+ "mod_time": file.mod_time,
181
+ "suffix": file.real_suffix,
182
+ }
183
+ for file in is_new
184
+ ]
185
+ with Session(self.engine) as session:
186
+ if new_values:
187
+ session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
188
+ session.commit()
189
+
190
+ # Step 3: Bulk update existing rows.
191
+ # First, query the database for the primary keys of rows that match the unique keys in needs_update.
192
+ with Session(self.engine) as session:
193
+ # Collect all unique paths from files needing update.
194
+ update_paths = [file.path_no_remote for file in needs_update]
195
+ # Query for existing rows matching any of these paths.
196
+ db_entries = session.exec(
197
+ select(self.FileEntryModel).where(
198
+ self.FileEntryModel.path.in_(update_paths) # type: ignore
199
+ )
200
+ ).all()
201
+
202
+ # Build a mapping from the unique key (path, name) to the primary key (id).
203
+ id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
204
+
205
+ # Prepare bulk update mappings.
206
+ update_values = []
207
+ for file in needs_update:
208
+ key = (file.path_no_remote, file.name)
209
+ if key in id_map:
210
+ update_values.append(
211
+ {
212
+ "id": id_map[key],
213
+ "size": file.size,
214
+ "mime_type": file.mime_type,
215
+ "mod_time": file.mod_time,
216
+ "suffix": file.real_suffix,
217
+ }
218
+ )
219
+ if update_values:
220
+ session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
221
+ session.commit()
222
+
223
+ def get_exists(self, files: list[FileItem]) -> set[FileItem]:
224
+ """Get file entries from the table that exist among the given files.
225
+
226
+ Args:
227
+ files: List of file entries
228
+
229
+ Returns:
230
+ Set of FileItem instances whose 'path_no_remote' exists in the table.
231
+ """
232
+ # Extract unique paths from the input files.
233
+ paths = {file.path_no_remote for file in files}
234
+
235
+ with Session(self.engine) as session:
236
+ # Execute a single query to fetch all file paths in the table that match the input paths.
237
+ result = session.exec(
238
+ select(self.FileEntryModel.path).where(
239
+ self.FileEntryModel.path.in_(paths) # type: ignore
240
+ )
241
+ ).all()
242
+ # Convert the result to a set for fast membership tests.
243
+ existing_paths = set(result)
244
+
245
+ # Return the set of FileItem objects that have a path in the existing_paths.
246
+ return {file for file in files if file.path_no_remote in existing_paths}
247
+
248
+ def get_all_files(self) -> list[FileItem]:
249
+ """Get all files in the table.
250
+
251
+ Returns:
252
+ list: List of file entries
253
+ """
254
+ # with Session(self.engine) as session:
255
+ # return session.exec(select(self.FileEntryModel)).all()
256
+ out: list[FileItem] = []
257
+ with Session(self.engine) as session:
258
+ query = session.exec(select(self.FileEntryModel)).all()
259
+ for item in query:
260
+ name = item.name # type: ignore
261
+ size = item.size # type: ignore
262
+ mime_type = item.mime_type # type: ignore
263
+ mod_time = item.mod_time # type: ignore
264
+ path = item.path # type: ignore
265
+ parent = os.path.dirname(path)
266
+ if parent == "/" or parent == ".":
267
+ parent = ""
268
+ o = FileItem(
269
+ remote=self.remote_name,
270
+ parent=parent,
271
+ name=name,
272
+ size=size,
273
+ mime_type=mime_type,
274
+ mod_time=mod_time,
275
+ )
276
+ out.append(o)
277
+ return out
@@ -0,0 +1,57 @@
1
+ """
2
+ Database models for rclone_api.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional, Type
7
+
8
+ from sqlalchemy import BigInteger, Column
9
+ from sqlmodel import Field, SQLModel
10
+
11
+
12
+ # Meta table that indexes all repositories
13
+ class RepositoryMeta(SQLModel, table=True):
14
+ """Repository metadata table."""
15
+
16
+ id: Optional[int] = Field(default=None, primary_key=True)
17
+ repo_name: str
18
+ file_table_name: str # The dedicated table name for file entries
19
+
20
+
21
+ # Base FileEntry model that will be extended
22
+ class FileEntry(SQLModel, ABC):
23
+ """Base file entry model with common fields."""
24
+
25
+ id: Optional[int] = Field(default=None, primary_key=True)
26
+ path: str = Field(index=True, unique=True)
27
+ suffix: str = Field(index=True)
28
+ name: str
29
+ size: int = Field(sa_column=Column(BigInteger))
30
+ mime_type: str
31
+ mod_time: str
32
+ hash: Optional[str] = Field(default=None)
33
+
34
+ @abstractmethod
35
+ def table_name(self) -> str:
36
+ """Return the table name for this file entry model."""
37
+ pass
38
+
39
+
40
+ # Factory to dynamically create a FileEntry model with a given table name
41
+ def create_file_entry_model(_table_name: str) -> Type[FileEntry]:
42
+ """Create a file entry model with a given table name.
43
+
44
+ Args:
45
+ table_name: Table name
46
+
47
+ Returns:
48
+ Type[FileEntryBase]: File entry model class with specified table name
49
+ """
50
+
51
+ class FileEntryConcrete(FileEntry, table=True):
52
+ __tablename__ = _table_name # type: ignore # dynamically set table name
53
+
54
+ def table_name(self) -> str:
55
+ return _table_name
56
+
57
+ return FileEntryConcrete
@@ -0,0 +1,24 @@
1
+ import functools
2
+ import warnings
3
+
4
+
5
+ def deprecated(new_func_name: str):
6
+ """Decorator to mark functions as deprecated.
7
+
8
+ Args:
9
+ new_func_name: The name of the function that should be used instead.
10
+ """
11
+
12
+ def decorator(func):
13
+ @functools.wraps(func)
14
+ def wrapper(*args, **kwargs):
15
+ warnings.warn(
16
+ f"{func.__name__}() is deprecated; use {new_func_name}() instead.",
17
+ DeprecationWarning,
18
+ stacklevel=2,
19
+ )
20
+ return func(*args, **kwargs)
21
+
22
+ return wrapper
23
+
24
+ return decorator
@@ -0,0 +1,42 @@
1
+ from rclone_api.rclone_impl import RcloneImpl
2
+ from rclone_api.types import (
3
+ PartInfo,
4
+ )
5
+
6
+
7
+ def copy_file_parts_resumable(
8
+ self: RcloneImpl,
9
+ src: str, # src:/Bucket/path/myfile.large.zst
10
+ dst_dir: str, # dst:/Bucket/path/myfile.large.zst-parts/
11
+ part_infos: list[PartInfo] | None = None,
12
+ upload_threads: int = 10,
13
+ merge_threads: int = 5,
14
+ verbose: bool | None = None,
15
+ ) -> Exception | None:
16
+ # _upload_parts
17
+ from rclone_api.s3.multipart.upload_parts_resumable import upload_parts_resumable
18
+ from rclone_api.s3.multipart.upload_parts_server_side_merge import (
19
+ s3_server_side_multi_part_merge,
20
+ )
21
+
22
+ if verbose is None:
23
+ verbose = self.get_verbose()
24
+
25
+ err: Exception | None = upload_parts_resumable(
26
+ self=self,
27
+ src=src,
28
+ dst_dir=dst_dir,
29
+ part_infos=part_infos,
30
+ threads=upload_threads,
31
+ )
32
+ if isinstance(err, Exception):
33
+ return err
34
+ if dst_dir.endswith("/"):
35
+ dst_dir = dst_dir[:-1]
36
+ dst_info = f"{dst_dir}/info.json"
37
+ err = s3_server_side_multi_part_merge(
38
+ rclone=self, info_path=dst_info, max_workers=merge_threads, verbose=verbose
39
+ )
40
+ if isinstance(err, Exception):
41
+ return err
42
+ return None
@@ -0,0 +1,116 @@
1
+ import random
2
+ from queue import Queue
3
+ from threading import Thread
4
+ from typing import Generator
5
+
6
+ from rclone_api import Dir
7
+ from rclone_api.dir_listing import DirListing
8
+ from rclone_api.remote import Remote
9
+ from rclone_api.types import Order
10
+
11
+ _MAX_OUT_QUEUE_SIZE = 50
12
+
13
+
14
+ def walk_runner_breadth_first(
15
+ dir: Dir,
16
+ max_depth: int,
17
+ out_queue: Queue[DirListing | None],
18
+ order: Order = Order.NORMAL,
19
+ ) -> None:
20
+ queue: Queue[Dir] = Queue()
21
+ queue.put(dir)
22
+ try:
23
+ while not queue.empty():
24
+ current_dir = queue.get()
25
+ dirlisting = current_dir.ls(max_depth=0, order=order)
26
+ out_queue.put(dirlisting)
27
+ dirs = dirlisting.dirs
28
+
29
+ if max_depth != 0 and len(dirs) > 0:
30
+ for child in dirs:
31
+ queue.put(child)
32
+ if max_depth < 0:
33
+ continue
34
+ if max_depth > 0:
35
+ max_depth -= 1
36
+ out_queue.put(None)
37
+ except KeyboardInterrupt:
38
+ import _thread
39
+
40
+ out_queue.put(None)
41
+
42
+ _thread.interrupt_main()
43
+
44
+
45
+ def walk_runner_depth_first(
46
+ dir: Dir,
47
+ max_depth: int,
48
+ out_queue: Queue[DirListing | None],
49
+ order: Order = Order.NORMAL,
50
+ ) -> None:
51
+ try:
52
+ stack = [(dir, max_depth)]
53
+ while stack:
54
+ current_dir, depth = stack.pop()
55
+ dirlisting = current_dir.ls()
56
+ if order == Order.REVERSE:
57
+ dirlisting.dirs.reverse()
58
+ if order == Order.RANDOM:
59
+
60
+ random.shuffle(dirlisting.dirs)
61
+ if depth != 0:
62
+ for subdir in dirlisting.dirs: # Process deeper directories first
63
+ # stack.append((child, depth - 1 if depth > 0 else depth))
64
+ next_depth = depth - 1 if depth > 0 else depth
65
+ walk_runner_depth_first(subdir, next_depth, out_queue, order=order)
66
+ out_queue.put(dirlisting)
67
+ out_queue.put(None)
68
+ except KeyboardInterrupt:
69
+ import _thread
70
+
71
+ out_queue.put(None)
72
+ _thread.interrupt_main()
73
+
74
+
75
+ def walk(
76
+ dir: Dir | Remote,
77
+ breadth_first: bool,
78
+ max_depth: int = -1,
79
+ order: Order = Order.NORMAL,
80
+ ) -> Generator[DirListing, None, None]:
81
+ """Walk through the given directory recursively.
82
+
83
+ Args:
84
+ dir: Directory or Remote to walk through
85
+ max_depth: Maximum depth to traverse (-1 for unlimited)
86
+
87
+ Yields:
88
+ DirListing: Directory listing for each directory encountered
89
+ """
90
+ try:
91
+ # Convert Remote to Dir if needed
92
+ if isinstance(dir, Remote):
93
+ dir = Dir(dir)
94
+ out_queue: Queue[DirListing | None] = Queue(maxsize=_MAX_OUT_QUEUE_SIZE)
95
+
96
+ def _task() -> None:
97
+ if breadth_first:
98
+ walk_runner_breadth_first(dir, max_depth, out_queue, order)
99
+ else:
100
+ walk_runner_depth_first(dir, max_depth, out_queue, order)
101
+
102
+ # Start worker thread
103
+ worker = Thread(
104
+ target=_task,
105
+ daemon=True,
106
+ )
107
+ worker.start()
108
+
109
+ while dirlisting := out_queue.get():
110
+ if dirlisting is None:
111
+ break
112
+ yield dirlisting
113
+
114
+ worker.join()
115
+ except KeyboardInterrupt:
116
+ pass
rclone_api/diff.py ADDED
@@ -0,0 +1,164 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ from queue import Queue
4
+ from threading import Thread
5
+ from typing import Generator
6
+
7
+ from rclone_api.process import Process
8
+
9
+
10
+ class DiffType(Enum):
11
+ EQUAL = "="
12
+ MISSING_ON_SRC = (
13
+ "-" # means path was missing on the source, so only in the destination
14
+ )
15
+ MISSING_ON_DST = (
16
+ "+" # means path was missing on the destination, so only in the source
17
+ )
18
+ DIFFERENT = "*" # means path was present in source and destination but different.
19
+ ERROR = "!" # means there was an error
20
+
21
+
22
+ class DiffOption(Enum):
23
+ COMBINED = "combined"
24
+ MISSING_ON_SRC = "missing-on-src"
25
+ MISSING_ON_DST = "missing-on-dst"
26
+ DIFFER = "differ"
27
+ MATCH = "match"
28
+ ERROR = "error"
29
+
30
+
31
+ @dataclass
32
+ class DiffItem:
33
+ type: DiffType
34
+ path: str
35
+ src_prefix: str
36
+ dst_prefix: str
37
+
38
+ def __str__(self) -> str:
39
+ return f"{self.type.value} {self.path}"
40
+
41
+ def __repr__(self) -> str:
42
+ return f"{self.type.name} {self.path}"
43
+
44
+ def full_str(self) -> str:
45
+ return f"{self.type.name} {self.src_prefix}/{self.path} {self.dst_prefix}/{self.path}"
46
+
47
+ def dst_path(self) -> str:
48
+ return f"{self.dst_prefix}/{self.path}"
49
+
50
+ def src_path(self) -> str:
51
+ return f"{self.src_prefix}/{self.path}"
52
+
53
+
54
+ def _parse_missing_on_src_dst(line: str) -> str | None:
55
+ if line.endswith("does-not-exist"):
56
+ # 2025/02/17 14:43:38 ERROR : zachs_video/breaking_ai_mind.mp4: file not in S3 bucket rclone-api-unit-test path does-not-exist
57
+ parts = line.split(" : ", 1)
58
+ if len(parts) < 1:
59
+ return None
60
+ right = parts[1]
61
+ file_path = right.split(":", 1)[0]
62
+ return file_path.strip()
63
+ return None
64
+
65
+
66
+ def _classify_diff(
67
+ line: str, src_slug: str, dst_slug: str, diff_option: DiffOption
68
+ ) -> DiffItem | None:
69
+ def _new(type: DiffType, path: str) -> DiffItem:
70
+ return DiffItem(type, path, src_prefix=src_slug, dst_prefix=dst_slug)
71
+
72
+ if diff_option == DiffOption.COMBINED:
73
+ suffix = line[1:].strip() if len(line) > 0 else ""
74
+ if line.startswith(DiffType.EQUAL.value):
75
+ return _new(DiffType.EQUAL, suffix)
76
+ if line.startswith(DiffType.MISSING_ON_SRC.value):
77
+ return _new(DiffType.MISSING_ON_SRC, suffix)
78
+ if line.startswith(DiffType.MISSING_ON_DST.value):
79
+ return _new(DiffType.MISSING_ON_DST, suffix)
80
+ if line.startswith(DiffType.DIFFERENT.value):
81
+ return _new(DiffType.DIFFERENT, suffix)
82
+ if line.startswith(DiffType.ERROR.value):
83
+ return _new(DiffType.ERROR, suffix)
84
+ return None
85
+ if diff_option == DiffOption.MISSING_ON_SRC:
86
+ filename_src: str | None = _parse_missing_on_src_dst(line)
87
+ if filename_src is not None:
88
+ return _new(DiffType.MISSING_ON_SRC, filename_src)
89
+ return None
90
+ if diff_option == DiffOption.MISSING_ON_DST:
91
+ filename_dst: str | None = _parse_missing_on_src_dst(line)
92
+ if filename_dst is not None:
93
+ return _new(DiffType.MISSING_ON_DST, filename_dst)
94
+ return None
95
+ else:
96
+ raise ValueError(f"Unknown diff_option: {diff_option}")
97
+
98
+
99
+ def _async_diff_stream_from_running_process(
100
+ running_process: Process,
101
+ src_slug: str,
102
+ dst_slug: str,
103
+ diff_option: DiffOption,
104
+ output: Queue[DiffItem | None],
105
+ ) -> None:
106
+ count = 0
107
+ first_few_lines: list[str] = []
108
+ try:
109
+ assert running_process.stdout is not None
110
+ n_max = 10
111
+ for line in iter(running_process.stdout.readline, b""):
112
+ try:
113
+ line_str = line.decode("utf-8").strip()
114
+ if len(first_few_lines) < n_max:
115
+ first_few_lines.append(line_str)
116
+ # _classify_line_type
117
+ diff_item: DiffItem | None = _classify_diff(
118
+ line_str, src_slug, dst_slug, diff_option
119
+ )
120
+ if diff_item is None:
121
+ # Some other output that we don't care about, debug print etc.
122
+ continue
123
+ output.put(diff_item)
124
+ count += 1
125
+ # print(f"unhandled: {line_str}")
126
+ except UnicodeDecodeError:
127
+ print("UnicodeDecodeError")
128
+ continue
129
+ except KeyboardInterrupt:
130
+ import _thread
131
+
132
+ print("KeyboardInterrupt")
133
+ _thread.interrupt_main()
134
+ except Exception as e:
135
+ import _thread
136
+
137
+ print(f"Error: {e}")
138
+ _thread.interrupt_main()
139
+ finally:
140
+ output.put(None)
141
+
142
+
143
+ def diff_stream_from_running_process(
144
+ running_process: Process,
145
+ src_slug: str,
146
+ dst_slug: str,
147
+ diff_option: DiffOption,
148
+ ) -> Generator[DiffItem, None, None]:
149
+ output: Queue[DiffItem | None] = Queue()
150
+ # process_output_to_diff_stream(running_process, src_slug, dst_slug, output)
151
+
152
+ def _task() -> None:
153
+ _async_diff_stream_from_running_process(
154
+ running_process, src_slug, dst_slug, diff_option, output
155
+ )
156
+
157
+ thread = Thread(target=_task, daemon=True)
158
+ thread.start()
159
+ while True:
160
+ item = output.get()
161
+ if item is None:
162
+ break
163
+ yield item
164
+ thread.join(timeout=5)