ctxsync 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctxsync/__init__.py +0 -0
- ctxsync/chat_sync.py +186 -0
- ctxsync/cli/__init__.py +3 -0
- ctxsync/cli/auth.py +77 -0
- ctxsync/cli/category.py +71 -0
- ctxsync/cli/chat.py +357 -0
- ctxsync/cli/config.py +72 -0
- ctxsync/cli/file.py +29 -0
- ctxsync/cli/main.py +257 -0
- ctxsync/cli/organization.py +98 -0
- ctxsync/cli/project.py +422 -0
- ctxsync/cli/session.py +626 -0
- ctxsync/cli/submodule.py +148 -0
- ctxsync/cli/sync.py +79 -0
- ctxsync/compression.py +302 -0
- ctxsync/configmanager/__init__.py +5 -0
- ctxsync/configmanager/base_config_manager.py +255 -0
- ctxsync/configmanager/file_config_manager.py +362 -0
- ctxsync/configmanager/inmemory_config_manager.py +134 -0
- ctxsync/exceptions.py +22 -0
- ctxsync/provider_factory.py +38 -0
- ctxsync/providers/__init__.py +0 -0
- ctxsync/providers/base_claude_ai.py +537 -0
- ctxsync/providers/base_provider.py +109 -0
- ctxsync/providers/claude_ai.py +192 -0
- ctxsync/session_key_manager.py +129 -0
- ctxsync/syncmanager.py +328 -0
- ctxsync/utils.py +416 -0
- ctxsync-0.8.0.dist-info/METADATA +151 -0
- ctxsync-0.8.0.dist-info/RECORD +34 -0
- ctxsync-0.8.0.dist-info/WHEEL +5 -0
- ctxsync-0.8.0.dist-info/entry_points.txt +2 -0
- ctxsync-0.8.0.dist-info/licenses/LICENSE +21 -0
- ctxsync-0.8.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import urllib.request
|
|
2
|
+
import urllib.error
|
|
3
|
+
import urllib.parse
|
|
4
|
+
import json
|
|
5
|
+
import gzip
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from .base_claude_ai import BaseClaudeAIProvider
|
|
8
|
+
from ..exceptions import ProviderError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ClaudeAIProvider(BaseClaudeAIProvider):
|
|
12
|
+
def __init__(self, config=None):
|
|
13
|
+
super().__init__(config)
|
|
14
|
+
|
|
15
|
+
def _make_request_internal( # noqa: C901
|
|
16
|
+
self, method, endpoint, data, base_url, extra_headers=None
|
|
17
|
+
):
|
|
18
|
+
"""Internal method to make HTTP requests with specified base URL."""
|
|
19
|
+
url = f"{base_url}{endpoint}"
|
|
20
|
+
headers = {
|
|
21
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0",
|
|
22
|
+
"Content-Type": "application/json",
|
|
23
|
+
"Accept-Encoding": "gzip",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if extra_headers:
|
|
27
|
+
headers.update(extra_headers)
|
|
28
|
+
|
|
29
|
+
session_key, expiry = self.config.get_session_key("claude.ai")
|
|
30
|
+
cookies = {
|
|
31
|
+
"sessionKey": session_key,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
self.logger.debug(f"Making {method} request to {url}")
|
|
36
|
+
self.logger.debug(f"Headers: {headers}")
|
|
37
|
+
self.logger.debug(f"Cookies: {cookies}")
|
|
38
|
+
if data:
|
|
39
|
+
self.logger.debug(f"Request data: {data}")
|
|
40
|
+
|
|
41
|
+
# Prepare the request
|
|
42
|
+
req = urllib.request.Request(url, method=method)
|
|
43
|
+
for key, value in headers.items():
|
|
44
|
+
req.add_header(key, value)
|
|
45
|
+
|
|
46
|
+
# Add cookies
|
|
47
|
+
cookie_string = "; ".join([f"{k}={v}" for k, v in cookies.items()])
|
|
48
|
+
req.add_header("Cookie", cookie_string)
|
|
49
|
+
|
|
50
|
+
# Add data if present
|
|
51
|
+
if data:
|
|
52
|
+
json_data = json.dumps(data).encode("utf-8")
|
|
53
|
+
req.data = json_data
|
|
54
|
+
|
|
55
|
+
# Make the request
|
|
56
|
+
with urllib.request.urlopen(req) as response:
|
|
57
|
+
self.logger.debug(f"Response status code: {response.status}")
|
|
58
|
+
self.logger.debug(f"Response headers: {response.headers}")
|
|
59
|
+
|
|
60
|
+
# Handle gzip encoding
|
|
61
|
+
if response.headers.get("Content-Encoding") == "gzip":
|
|
62
|
+
content = gzip.decompress(response.read())
|
|
63
|
+
else:
|
|
64
|
+
content = response.read()
|
|
65
|
+
|
|
66
|
+
content_str = content.decode("utf-8")
|
|
67
|
+
self.logger.debug(f"Response content: {content_str[:1000]}...")
|
|
68
|
+
|
|
69
|
+
if not content:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
return json.loads(content_str)
|
|
73
|
+
|
|
74
|
+
except urllib.error.HTTPError as e:
|
|
75
|
+
self.handle_http_error(e)
|
|
76
|
+
except urllib.error.URLError as e:
|
|
77
|
+
self.logger.error(f"URL Error: {str(e)}")
|
|
78
|
+
raise ProviderError(f"API request failed: {str(e)}")
|
|
79
|
+
except json.JSONDecodeError as json_err:
|
|
80
|
+
self.logger.error(f"Failed to parse JSON response: {str(json_err)}")
|
|
81
|
+
self.logger.error(f"Response content: {content_str}")
|
|
82
|
+
raise ProviderError(f"Invalid JSON response from API: {str(json_err)}")
|
|
83
|
+
|
|
84
|
+
def _make_request(self, method, endpoint, data=None):
|
|
85
|
+
return self._make_request_internal(method, endpoint, data, self.base_url)
|
|
86
|
+
|
|
87
|
+
def handle_http_error(self, e):
|
|
88
|
+
self.logger.debug(f"Request failed: {str(e)}")
|
|
89
|
+
self.logger.debug(f"Response status code: {e.code}")
|
|
90
|
+
self.logger.debug(f"Response headers: {e.headers}")
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# Check if the content is gzip-encoded
|
|
94
|
+
if e.headers.get("Content-Encoding") == "gzip":
|
|
95
|
+
content = gzip.decompress(e.read())
|
|
96
|
+
else:
|
|
97
|
+
content = e.read()
|
|
98
|
+
|
|
99
|
+
# Try to decode the content as UTF-8
|
|
100
|
+
content_str = content.decode("utf-8")
|
|
101
|
+
except UnicodeDecodeError:
|
|
102
|
+
# If UTF-8 decoding fails, try to decode as ISO-8859-1
|
|
103
|
+
content_str = content.decode("iso-8859-1")
|
|
104
|
+
|
|
105
|
+
self.logger.debug(f"Response content: {content_str}")
|
|
106
|
+
|
|
107
|
+
if e.code == 403:
|
|
108
|
+
error_msg = "Received a 403 Forbidden error."
|
|
109
|
+
raise ProviderError(error_msg)
|
|
110
|
+
elif e.code == 429:
|
|
111
|
+
try:
|
|
112
|
+
error_data = json.loads(content_str)
|
|
113
|
+
resets_at_unix = json.loads(error_data["error"]["message"])["resetsAt"]
|
|
114
|
+
resets_at_local = datetime.fromtimestamp(
|
|
115
|
+
resets_at_unix, tz=timezone.utc
|
|
116
|
+
).astimezone()
|
|
117
|
+
formatted_time = resets_at_local.strftime("%a %b %d %Y %H:%M:%S %Z%z")
|
|
118
|
+
error_msg = f"Message limit exceeded. Try again after {formatted_time}"
|
|
119
|
+
except (KeyError, json.JSONDecodeError) as parse_error:
|
|
120
|
+
error_msg = f"HTTP 429: Too Many Requests. Failed to parse error response: {parse_error}"
|
|
121
|
+
self.logger.error(error_msg)
|
|
122
|
+
raise ProviderError(error_msg)
|
|
123
|
+
else:
|
|
124
|
+
error_msg = f"API request failed with status code {e.code}: {content_str}"
|
|
125
|
+
self.logger.error(error_msg)
|
|
126
|
+
raise ProviderError(error_msg)
|
|
127
|
+
|
|
128
|
+
def _make_request_v1(self, method, endpoint, data=None, organization_id=None):
|
|
129
|
+
"""Make a request to the v1 API (not under /api prefix)."""
|
|
130
|
+
# For v1 endpoints, we use the root URL without the /api prefix
|
|
131
|
+
base_url = self.base_url.replace("/api", "")
|
|
132
|
+
|
|
133
|
+
# Add required Anthropic headers for v1 API
|
|
134
|
+
extra_headers = {
|
|
135
|
+
"anthropic-version": "2023-06-01",
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# Add organization header if provided
|
|
139
|
+
if organization_id:
|
|
140
|
+
extra_headers["x-organization-uuid"] = organization_id
|
|
141
|
+
|
|
142
|
+
return self._make_request_internal(
|
|
143
|
+
method, endpoint, data, base_url, extra_headers
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def _make_request_stream(self, method, endpoint, data=None):
|
|
147
|
+
url = f"{self.base_url}{endpoint}"
|
|
148
|
+
session_key, _ = self.config.get_session_key("claude.ai")
|
|
149
|
+
headers = {
|
|
150
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0",
|
|
151
|
+
"Content-Type": "application/json",
|
|
152
|
+
"Accept": "text/event-stream",
|
|
153
|
+
"Cookie": f"sessionKey={session_key}",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
req = urllib.request.Request(url, method=method, headers=headers)
|
|
157
|
+
if data:
|
|
158
|
+
req.data = json.dumps(data).encode("utf-8")
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
return urllib.request.urlopen(req)
|
|
162
|
+
except urllib.error.HTTPError as e:
|
|
163
|
+
self.handle_http_error(e)
|
|
164
|
+
except urllib.error.URLError as e:
|
|
165
|
+
raise ProviderError(f"API request failed: {str(e)}")
|
|
166
|
+
|
|
167
|
+
def _make_request_stream_v1(self, method, endpoint, organization_id=None):
|
|
168
|
+
"""Make a streaming request to the v1 API."""
|
|
169
|
+
# For v1 endpoints, use root URL without /api prefix
|
|
170
|
+
base_url = self.base_url.replace("/api", "")
|
|
171
|
+
url = f"{base_url}{endpoint}"
|
|
172
|
+
|
|
173
|
+
session_key, _ = self.config.get_session_key("claude.ai")
|
|
174
|
+
headers = {
|
|
175
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0",
|
|
176
|
+
"Accept": "text/event-stream",
|
|
177
|
+
"anthropic-version": "2023-06-01",
|
|
178
|
+
"Cookie": f"sessionKey={session_key}",
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Add organization header if provided
|
|
182
|
+
if organization_id:
|
|
183
|
+
headers["x-organization-uuid"] = organization_id
|
|
184
|
+
|
|
185
|
+
req = urllib.request.Request(url, method=method, headers=headers)
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
return urllib.request.urlopen(req)
|
|
189
|
+
except urllib.error.HTTPError as e:
|
|
190
|
+
self.handle_http_error(e)
|
|
191
|
+
except urllib.error.URLError as e:
|
|
192
|
+
raise ProviderError(f"API request failed: {str(e)}")
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import base64
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from cryptography.fernet import Fernet
|
|
6
|
+
from cryptography.hazmat.primitives import hashes
|
|
7
|
+
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SessionKeyManager:
|
|
11
|
+
def __init__(self, ssh_key_path=None):
|
|
12
|
+
self.logger = logging.getLogger(__name__)
|
|
13
|
+
# Allow config-provided ssh_key_path to guide key discovery
|
|
14
|
+
self.ssh_key_path = self._find_ssh_key(ssh_key_path)
|
|
15
|
+
|
|
16
|
+
def _find_ssh_key(self, configured_path=None):
|
|
17
|
+
"""
|
|
18
|
+
Locate an SSH private key for session encryption.
|
|
19
|
+
Priority:
|
|
20
|
+
1. If configured_path points to a specific file, check it first
|
|
21
|
+
2. If configured_path is a directory, search it alongside ~/.ssh
|
|
22
|
+
3. Fall back to ~/.ssh with default key names
|
|
23
|
+
4. Prompt the user as a last resort
|
|
24
|
+
"""
|
|
25
|
+
default_ssh_dir = Path.home() / ".ssh"
|
|
26
|
+
key_names = ["id_ed25519", "id_ecdsa"]
|
|
27
|
+
search_dirs = [default_ssh_dir]
|
|
28
|
+
|
|
29
|
+
if configured_path:
|
|
30
|
+
configured = Path(configured_path)
|
|
31
|
+
|
|
32
|
+
if configured.is_file():
|
|
33
|
+
# Config points directly to a key file — use it immediately
|
|
34
|
+
return str(configured)
|
|
35
|
+
|
|
36
|
+
if configured.is_dir():
|
|
37
|
+
# Config is a directory — search it in addition to ~/.ssh
|
|
38
|
+
if configured != default_ssh_dir:
|
|
39
|
+
search_dirs.insert(0, configured)
|
|
40
|
+
else:
|
|
41
|
+
# Path doesn't exist — warn and fall through to defaults
|
|
42
|
+
self.logger.warning(
|
|
43
|
+
"Configured ssh_key_path not found: %s", configured_path
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Search all candidate directories for supported key names
|
|
47
|
+
for search_dir in search_dirs:
|
|
48
|
+
for key_name in key_names:
|
|
49
|
+
key_path = search_dir / key_name
|
|
50
|
+
if key_path.exists():
|
|
51
|
+
return str(key_path)
|
|
52
|
+
|
|
53
|
+
# If no supported key is found, prompt the user to generate an Ed25519 key
|
|
54
|
+
self.logger.warning(
|
|
55
|
+
"* No supported SSH key found. RSA keys are no longer supported."
|
|
56
|
+
)
|
|
57
|
+
self.logger.warning(
|
|
58
|
+
"* Please generate an Ed25519 key using the following command:"
|
|
59
|
+
)
|
|
60
|
+
self.logger.warning(' ssh-keygen -t ed25519 -C "your_email@example.com"')
|
|
61
|
+
self.logger.warning(
|
|
62
|
+
"* If you have NOT specified a custom ssh_key_path in config,"
|
|
63
|
+
)
|
|
64
|
+
self.logger.warning("* have created a key, and are still seeing this message,")
|
|
65
|
+
self.logger.warning(
|
|
66
|
+
" be sure to name your key 'id_ed25519' or 'id_ecdsa' so it's found automatically."
|
|
67
|
+
)
|
|
68
|
+
self.logger.warning(
|
|
69
|
+
"* Or set ssh_key_path with the full key name in your .ctxsync/config.local.json"
|
|
70
|
+
)
|
|
71
|
+
return input("Enter the full path to your new Ed25519 private key: ")
|
|
72
|
+
|
|
73
|
+
def _get_key_type(self):
|
|
74
|
+
try:
|
|
75
|
+
result = subprocess.run(
|
|
76
|
+
["ssh-keygen", "-l", "-f", self.ssh_key_path],
|
|
77
|
+
capture_output=True,
|
|
78
|
+
text=True,
|
|
79
|
+
check=True,
|
|
80
|
+
)
|
|
81
|
+
output = result.stdout.lower()
|
|
82
|
+
if "ecdsa" in output:
|
|
83
|
+
return "ecdsa"
|
|
84
|
+
elif "ed25519" in output:
|
|
85
|
+
return "ed25519"
|
|
86
|
+
else:
|
|
87
|
+
raise ValueError(f"Unsupported key type for {self.ssh_key_path}")
|
|
88
|
+
except subprocess.CalledProcessError as e:
|
|
89
|
+
self.logger.error(f"Failed to determine key type: {e}")
|
|
90
|
+
raise RuntimeError(
|
|
91
|
+
"Failed to determine SSH key type. Make sure the key file is valid and accessible."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _derive_key_from_ssh_key(self):
|
|
95
|
+
with open(self.ssh_key_path, "rb") as key_file:
|
|
96
|
+
ssh_key_data = key_file.read()
|
|
97
|
+
|
|
98
|
+
kdf = PBKDF2HMAC(
|
|
99
|
+
algorithm=hashes.SHA256(),
|
|
100
|
+
length=32,
|
|
101
|
+
salt=b"ctxsync", # Using a fixed salt; consider using a secure random salt in production
|
|
102
|
+
iterations=100000,
|
|
103
|
+
)
|
|
104
|
+
key = base64.urlsafe_b64encode(kdf.derive(ssh_key_data))
|
|
105
|
+
return key
|
|
106
|
+
|
|
107
|
+
def encrypt_session_key(self, provider, session_key):
|
|
108
|
+
self._get_key_type()
|
|
109
|
+
return self._encrypt_symmetric(session_key)
|
|
110
|
+
|
|
111
|
+
def _encrypt_symmetric(self, session_key):
|
|
112
|
+
key = self._derive_key_from_ssh_key()
|
|
113
|
+
f = Fernet(key)
|
|
114
|
+
encrypted_session_key = f.encrypt(session_key.encode()).decode()
|
|
115
|
+
return encrypted_session_key, "symmetric"
|
|
116
|
+
|
|
117
|
+
def decrypt_session_key(self, provider, encryption_method, encrypted_session_key):
|
|
118
|
+
if not encrypted_session_key or not encryption_method:
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
if encryption_method == "symmetric":
|
|
122
|
+
return self._decrypt_symmetric(encrypted_session_key)
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Unknown encryption method: {encryption_method}")
|
|
125
|
+
|
|
126
|
+
def _decrypt_symmetric(self, encrypted_session_key):
|
|
127
|
+
key = self._derive_key_from_ssh_key()
|
|
128
|
+
f = Fernet(key)
|
|
129
|
+
return f.decrypt(encrypted_session_key.encode()).decode()
|
ctxsync/syncmanager.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
import io
|
|
7
|
+
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from ctxsync.utils import compute_md5_hash
|
|
11
|
+
from ctxsync.exceptions import ProviderError
|
|
12
|
+
from .compression import compress_content, decompress_content
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def retry_on_403(max_retries=3, delay=1):
|
|
18
|
+
def decorator(func):
|
|
19
|
+
@functools.wraps(func)
|
|
20
|
+
def wrapper(*args, **kwargs):
|
|
21
|
+
self = args[0] if len(args) > 0 else None
|
|
22
|
+
for attempt in range(max_retries):
|
|
23
|
+
try:
|
|
24
|
+
return func(*args, **kwargs)
|
|
25
|
+
except ProviderError as e:
|
|
26
|
+
if "403 Forbidden" in str(e) and attempt < max_retries - 1:
|
|
27
|
+
if self and hasattr(self, "logger"):
|
|
28
|
+
self.logger.warning(
|
|
29
|
+
f"Received 403 error. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})"
|
|
30
|
+
)
|
|
31
|
+
else:
|
|
32
|
+
logger.warning(
|
|
33
|
+
f"Received 403 error. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})"
|
|
34
|
+
)
|
|
35
|
+
time.sleep(delay)
|
|
36
|
+
else:
|
|
37
|
+
raise
|
|
38
|
+
|
|
39
|
+
return wrapper
|
|
40
|
+
|
|
41
|
+
return decorator
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SyncManager:
|
|
45
|
+
def __init__(self, provider, config, local_path):
|
|
46
|
+
self.provider = provider
|
|
47
|
+
self.config = config
|
|
48
|
+
self.active_organization_id = config.get("active_organization_id")
|
|
49
|
+
self.active_project_id = config.get("active_project_id")
|
|
50
|
+
self.local_path = local_path
|
|
51
|
+
self.upload_delay = config.get("upload_delay", 0.5)
|
|
52
|
+
self.two_way_sync = config.get("two_way_sync", False)
|
|
53
|
+
self.max_retries = 3
|
|
54
|
+
self.retry_delay = 1
|
|
55
|
+
self.compression_algorithm = config.get("compression_algorithm", "none")
|
|
56
|
+
self.synced_files = {}
|
|
57
|
+
|
|
58
|
+
def sync(self, local_files, remote_files):
|
|
59
|
+
self.synced_files = {} # Reset synced files at the start of sync
|
|
60
|
+
if self.compression_algorithm == "none":
|
|
61
|
+
self._sync_without_compression(local_files, remote_files)
|
|
62
|
+
else:
|
|
63
|
+
self._sync_with_compression(local_files, remote_files)
|
|
64
|
+
|
|
65
|
+
def _sync_without_compression(self, local_files, remote_files):
|
|
66
|
+
remote_files_to_delete = set(rf["file_name"] for rf in remote_files)
|
|
67
|
+
synced_files = set()
|
|
68
|
+
|
|
69
|
+
with tqdm(total=len(local_files), desc="Local → Remote") as pbar:
|
|
70
|
+
for local_file, local_checksum in local_files.items():
|
|
71
|
+
remote_file = next(
|
|
72
|
+
(rf for rf in remote_files if rf["file_name"] == local_file), None
|
|
73
|
+
)
|
|
74
|
+
if remote_file:
|
|
75
|
+
self.update_existing_file(
|
|
76
|
+
local_file,
|
|
77
|
+
local_checksum,
|
|
78
|
+
remote_file,
|
|
79
|
+
remote_files_to_delete,
|
|
80
|
+
synced_files,
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
self.upload_new_file(local_file, synced_files)
|
|
84
|
+
pbar.update(1)
|
|
85
|
+
|
|
86
|
+
self.update_local_timestamps(remote_files, synced_files)
|
|
87
|
+
|
|
88
|
+
if self.two_way_sync:
|
|
89
|
+
with tqdm(total=len(remote_files), desc="Local ← Remote") as pbar:
|
|
90
|
+
for remote_file in remote_files:
|
|
91
|
+
self.sync_remote_to_local(
|
|
92
|
+
remote_file, remote_files_to_delete, synced_files
|
|
93
|
+
)
|
|
94
|
+
pbar.update(1)
|
|
95
|
+
|
|
96
|
+
self.prune_remote_files(remote_files, remote_files_to_delete)
|
|
97
|
+
|
|
98
|
+
def _sync_with_compression(self, local_files, remote_files):
|
|
99
|
+
packed_content = self._pack_files(local_files)
|
|
100
|
+
compressed_content = compress_content(
|
|
101
|
+
packed_content, self.compression_algorithm
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
remote_file_name = (
|
|
105
|
+
f"ctxsync_packed_{datetime.now().strftime('%Y%m%d%H%M%S')}.dat"
|
|
106
|
+
)
|
|
107
|
+
self._upload_compressed_file(compressed_content, remote_file_name)
|
|
108
|
+
|
|
109
|
+
if self.two_way_sync:
|
|
110
|
+
remote_compressed_content = self._download_compressed_file()
|
|
111
|
+
if remote_compressed_content:
|
|
112
|
+
remote_packed_content = decompress_content(
|
|
113
|
+
remote_compressed_content, self.compression_algorithm
|
|
114
|
+
)
|
|
115
|
+
self._unpack_files(remote_packed_content)
|
|
116
|
+
|
|
117
|
+
self._cleanup_old_remote_files(remote_files)
|
|
118
|
+
|
|
119
|
+
def _pack_files(self, local_files):
|
|
120
|
+
packed_content = io.StringIO()
|
|
121
|
+
for file_path, file_hash in local_files.items():
|
|
122
|
+
full_path = os.path.join(self.local_path, file_path)
|
|
123
|
+
with open(full_path, "r", encoding="utf-8") as f:
|
|
124
|
+
content = f.read()
|
|
125
|
+
packed_content.write(f"--- BEGIN FILE: {file_path} ---\n")
|
|
126
|
+
packed_content.write(content)
|
|
127
|
+
packed_content.write(f"\n--- END FILE: {file_path} ---\n")
|
|
128
|
+
return packed_content.getvalue()
|
|
129
|
+
|
|
130
|
+
@retry_on_403()
|
|
131
|
+
def _upload_compressed_file(self, compressed_content, file_name):
|
|
132
|
+
logger.debug(f"Uploading compressed file {file_name} to remote...")
|
|
133
|
+
self.provider.upload_file(
|
|
134
|
+
self.active_organization_id,
|
|
135
|
+
self.active_project_id,
|
|
136
|
+
file_name,
|
|
137
|
+
compressed_content,
|
|
138
|
+
)
|
|
139
|
+
time.sleep(self.upload_delay)
|
|
140
|
+
|
|
141
|
+
@retry_on_403()
|
|
142
|
+
def _download_compressed_file(self):
|
|
143
|
+
logger.debug("Downloading latest compressed file from remote...")
|
|
144
|
+
remote_files = self.provider.list_files(
|
|
145
|
+
self.active_organization_id, self.active_project_id
|
|
146
|
+
)
|
|
147
|
+
compressed_files = [
|
|
148
|
+
rf for rf in remote_files if rf["file_name"].startswith("ctxsync_packed_")
|
|
149
|
+
]
|
|
150
|
+
if compressed_files:
|
|
151
|
+
latest_file = max(compressed_files, key=lambda x: x["file_name"])
|
|
152
|
+
return latest_file["content"]
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
def _unpack_files(self, packed_content):
|
|
156
|
+
current_file = None
|
|
157
|
+
current_content = io.StringIO()
|
|
158
|
+
|
|
159
|
+
for line in packed_content.splitlines():
|
|
160
|
+
if line.startswith("--- BEGIN FILE:"):
|
|
161
|
+
if current_file:
|
|
162
|
+
self._write_file(current_file, current_content.getvalue())
|
|
163
|
+
current_content = io.StringIO()
|
|
164
|
+
current_file = line.split("--- BEGIN FILE:")[1].strip()
|
|
165
|
+
elif line.startswith("--- END FILE:"):
|
|
166
|
+
if current_file:
|
|
167
|
+
self._write_file(current_file, current_content.getvalue())
|
|
168
|
+
current_file = None
|
|
169
|
+
current_content = io.StringIO()
|
|
170
|
+
else:
|
|
171
|
+
current_content.write(line + "\n")
|
|
172
|
+
|
|
173
|
+
if current_file:
|
|
174
|
+
self._write_file(current_file, current_content.getvalue())
|
|
175
|
+
|
|
176
|
+
def _write_file(self, file_path, content):
|
|
177
|
+
full_path = os.path.join(self.local_path, file_path)
|
|
178
|
+
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
|
179
|
+
with open(full_path, "w", encoding="utf-8") as f:
|
|
180
|
+
f.write(content)
|
|
181
|
+
|
|
182
|
+
def _cleanup_old_remote_files(self, remote_files):
|
|
183
|
+
for remote_file in remote_files:
|
|
184
|
+
if remote_file["file_name"].startswith("ctxsync_packed_"):
|
|
185
|
+
self.provider.delete_file(
|
|
186
|
+
self.active_organization_id,
|
|
187
|
+
self.active_project_id,
|
|
188
|
+
remote_file["uuid"],
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
@retry_on_403()
|
|
192
|
+
def update_existing_file(
|
|
193
|
+
self,
|
|
194
|
+
local_file,
|
|
195
|
+
local_checksum,
|
|
196
|
+
remote_file,
|
|
197
|
+
remote_files_to_delete,
|
|
198
|
+
synced_files,
|
|
199
|
+
):
|
|
200
|
+
remote_content = remote_file["content"]
|
|
201
|
+
remote_checksum = compute_md5_hash(remote_content)
|
|
202
|
+
if local_checksum != remote_checksum:
|
|
203
|
+
logger.debug(f"Updating {local_file} on remote...")
|
|
204
|
+
with tqdm(total=2, desc=f"Updating {local_file}", leave=False) as pbar:
|
|
205
|
+
self.provider.delete_file(
|
|
206
|
+
self.active_organization_id,
|
|
207
|
+
self.active_project_id,
|
|
208
|
+
remote_file["uuid"],
|
|
209
|
+
)
|
|
210
|
+
pbar.update(1)
|
|
211
|
+
with open(
|
|
212
|
+
os.path.join(self.local_path, local_file), "r", encoding="utf-8"
|
|
213
|
+
) as file:
|
|
214
|
+
content = file.read()
|
|
215
|
+
self.provider.upload_file(
|
|
216
|
+
self.active_organization_id,
|
|
217
|
+
self.active_project_id,
|
|
218
|
+
local_file,
|
|
219
|
+
content,
|
|
220
|
+
)
|
|
221
|
+
pbar.update(1)
|
|
222
|
+
time.sleep(self.upload_delay)
|
|
223
|
+
synced_files.add(local_file)
|
|
224
|
+
remote_files_to_delete.remove(local_file)
|
|
225
|
+
|
|
226
|
+
@retry_on_403()
|
|
227
|
+
def upload_new_file(self, local_file, synced_files):
|
|
228
|
+
logger.debug(f"Uploading new file {local_file} to remote...")
|
|
229
|
+
with open(
|
|
230
|
+
os.path.join(self.local_path, local_file), "r", encoding="utf-8"
|
|
231
|
+
) as file:
|
|
232
|
+
content = file.read()
|
|
233
|
+
with tqdm(total=1, desc=f"Uploading {local_file}", leave=False) as pbar:
|
|
234
|
+
self.provider.upload_file(
|
|
235
|
+
self.active_organization_id, self.active_project_id, local_file, content
|
|
236
|
+
)
|
|
237
|
+
pbar.update(1)
|
|
238
|
+
time.sleep(self.upload_delay)
|
|
239
|
+
synced_files.add(local_file)
|
|
240
|
+
|
|
241
|
+
def update_local_timestamps(self, remote_files, synced_files):
|
|
242
|
+
for remote_file in remote_files:
|
|
243
|
+
if remote_file["file_name"] in synced_files:
|
|
244
|
+
local_file_path = os.path.join(
|
|
245
|
+
self.local_path, remote_file["file_name"]
|
|
246
|
+
)
|
|
247
|
+
if os.path.exists(local_file_path):
|
|
248
|
+
remote_timestamp = datetime.fromisoformat(
|
|
249
|
+
remote_file["created_at"].replace("Z", "+00:00")
|
|
250
|
+
).timestamp()
|
|
251
|
+
os.utime(local_file_path, (remote_timestamp, remote_timestamp))
|
|
252
|
+
logger.debug(f"Updated timestamp on local file {local_file_path}")
|
|
253
|
+
|
|
254
|
+
def sync_remote_to_local(self, remote_file, remote_files_to_delete, synced_files):
|
|
255
|
+
local_file_path = os.path.join(self.local_path, remote_file["file_name"])
|
|
256
|
+
if os.path.exists(local_file_path):
|
|
257
|
+
self.update_existing_local_file(
|
|
258
|
+
local_file_path, remote_file, remote_files_to_delete, synced_files
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
self.create_new_local_file(
|
|
262
|
+
local_file_path, remote_file, remote_files_to_delete, synced_files
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
def update_existing_local_file(
|
|
266
|
+
self, local_file_path, remote_file, remote_files_to_delete, synced_files
|
|
267
|
+
):
|
|
268
|
+
local_mtime = datetime.fromtimestamp(
|
|
269
|
+
os.path.getmtime(local_file_path), tz=timezone.utc
|
|
270
|
+
)
|
|
271
|
+
remote_mtime = datetime.fromisoformat(
|
|
272
|
+
remote_file["created_at"].replace("Z", "+00:00")
|
|
273
|
+
)
|
|
274
|
+
if remote_mtime > local_mtime:
|
|
275
|
+
logger.debug(
|
|
276
|
+
f"Updating local file {remote_file['file_name']} from remote..."
|
|
277
|
+
)
|
|
278
|
+
content = remote_file["content"]
|
|
279
|
+
with open(local_file_path, "w", encoding="utf-8") as file:
|
|
280
|
+
file.write(content)
|
|
281
|
+
synced_files.add(remote_file["file_name"])
|
|
282
|
+
if remote_file["file_name"] in remote_files_to_delete:
|
|
283
|
+
remote_files_to_delete.remove(remote_file["file_name"])
|
|
284
|
+
|
|
285
|
+
def create_new_local_file(
|
|
286
|
+
self, local_file_path, remote_file, remote_files_to_delete, synced_files
|
|
287
|
+
):
|
|
288
|
+
logger.debug(
|
|
289
|
+
f"Creating new local file {remote_file['file_name']} from remote..."
|
|
290
|
+
)
|
|
291
|
+
content = remote_file["content"]
|
|
292
|
+
with tqdm(
|
|
293
|
+
total=1, desc=f"Creating {remote_file['file_name']}", leave=False
|
|
294
|
+
) as pbar:
|
|
295
|
+
with open(local_file_path, "w", encoding="utf-8") as file:
|
|
296
|
+
file.write(content)
|
|
297
|
+
pbar.update(1)
|
|
298
|
+
synced_files.add(remote_file["file_name"])
|
|
299
|
+
if remote_file["file_name"] in remote_files_to_delete:
|
|
300
|
+
remote_files_to_delete.remove(remote_file["file_name"])
|
|
301
|
+
|
|
302
|
+
def prune_remote_files(self, remote_files, remote_files_to_delete):
|
|
303
|
+
if not self.config.get("prune_remote_files"):
|
|
304
|
+
logger.info("Remote pruning is not enabled.")
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
for file_to_delete in list(remote_files_to_delete):
|
|
308
|
+
self.delete_remote_files(file_to_delete, remote_files)
|
|
309
|
+
|
|
310
|
+
@retry_on_403()
|
|
311
|
+
def delete_remote_files(self, file_to_delete, remote_files):
|
|
312
|
+
logger.debug(f"Deleting {file_to_delete} from remote...")
|
|
313
|
+
remote_file = next(
|
|
314
|
+
rf for rf in remote_files if rf["file_name"] == file_to_delete
|
|
315
|
+
)
|
|
316
|
+
with tqdm(total=1, desc=f"Deleting {file_to_delete}", leave=False) as pbar:
|
|
317
|
+
self.provider.delete_file(
|
|
318
|
+
self.active_organization_id, self.active_project_id, remote_file["uuid"]
|
|
319
|
+
)
|
|
320
|
+
pbar.update(1)
|
|
321
|
+
time.sleep(self.upload_delay)
|
|
322
|
+
|
|
323
|
+
def embedding(self, local_files):
|
|
324
|
+
packed_content = self._pack_files(local_files)
|
|
325
|
+
compressed_content = compress_content(
|
|
326
|
+
packed_content, self.compression_algorithm
|
|
327
|
+
)
|
|
328
|
+
return compressed_content
|