decompressed-sdk 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- decompressed_sdk/__init__.py +139 -0
- decompressed_sdk/auth.py +33 -0
- decompressed_sdk/client.py +165 -0
- decompressed_sdk/errors.py +62 -0
- decompressed_sdk/resources/__init__.py +24 -0
- decompressed_sdk/resources/connectors.py +39 -0
- decompressed_sdk/resources/datasets.py +501 -0
- decompressed_sdk/resources/embeddings.py +285 -0
- decompressed_sdk/resources/materializations.py +139 -0
- decompressed_sdk/resources/syncs.py +196 -0
- decompressed_sdk/resources/versions.py +342 -0
- decompressed_sdk/types/__init__.py +93 -0
- decompressed_sdk/types/base.py +19 -0
- decompressed_sdk/types/connectors.py +18 -0
- decompressed_sdk/types/datasets.py +113 -0
- decompressed_sdk/types/embeddings.py +65 -0
- decompressed_sdk/types/materializations.py +65 -0
- decompressed_sdk/types/search.py +32 -0
- decompressed_sdk/types/syncs.py +64 -0
- decompressed_sdk/types/versions.py +243 -0
- decompressed_sdk-0.2.0.dist-info/METADATA +37 -0
- decompressed_sdk-0.2.0.dist-info/RECORD +25 -0
- decompressed_sdk-0.2.0.dist-info/WHEEL +5 -0
- decompressed_sdk-0.2.0.dist-info/licenses/LICENSE +21 -0
- decompressed_sdk-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Decompressed SDK - Version-First Vector Data Platform.
|
|
3
|
+
|
|
4
|
+
This SDK implements "Snowflake for vectors" semantics where datasets
|
|
5
|
+
are version-aware and all operations require explicit version pinning.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from decompressed_sdk import DecompressedClient
|
|
9
|
+
|
|
10
|
+
client = DecompressedClient(
|
|
11
|
+
base_url="https://api.decompressed.io",
|
|
12
|
+
api_key="your-api-key"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Get dataset reference
|
|
16
|
+
dataset = client.datasets.get("my-dataset")
|
|
17
|
+
|
|
18
|
+
# Pin to specific version for reproducible operations
|
|
19
|
+
v1 = dataset.version(1)
|
|
20
|
+
info = v1.info()
|
|
21
|
+
results = v1.search([[0.1, 0.2, 0.3]], top_k=10)
|
|
22
|
+
|
|
23
|
+
# Compare versions (LLM regression testing)
|
|
24
|
+
diff = v1.compare_to(2)
|
|
25
|
+
|
|
26
|
+
# Time travel
|
|
27
|
+
yesterday = dataset.at("2024-01-28T00:00:00Z")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from .client import DecompressedClient
|
|
31
|
+
from .auth import ApiKeyAuth, JwtAuth, AuthStrategy
|
|
32
|
+
from .errors import (
|
|
33
|
+
DecompressedError,
|
|
34
|
+
AuthenticationError,
|
|
35
|
+
NotFoundError,
|
|
36
|
+
ValidationError,
|
|
37
|
+
RateLimitError,
|
|
38
|
+
ServerError,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Re-export types for convenience
|
|
42
|
+
from .types import (
|
|
43
|
+
# Datasets
|
|
44
|
+
Dataset,
|
|
45
|
+
DatasetInfo,
|
|
46
|
+
DatasetQueryResponse,
|
|
47
|
+
DatasetVersion,
|
|
48
|
+
UploadSession,
|
|
49
|
+
AppendSession,
|
|
50
|
+
JobStatus,
|
|
51
|
+
UploadResult,
|
|
52
|
+
AppendResult,
|
|
53
|
+
# Versions (NEW - core of version-first design)
|
|
54
|
+
DatasetRef,
|
|
55
|
+
VersionedDataset,
|
|
56
|
+
DatasetEvent,
|
|
57
|
+
VersionInfo,
|
|
58
|
+
VersionComparison,
|
|
59
|
+
# Materializations
|
|
60
|
+
Materialization,
|
|
61
|
+
MaterializationType,
|
|
62
|
+
MaterializationStatus,
|
|
63
|
+
CreateMaterializationResponse,
|
|
64
|
+
MaterializationEstimate,
|
|
65
|
+
MaterializationDownloadFile,
|
|
66
|
+
MaterializationDownloadResponse,
|
|
67
|
+
# Connectors
|
|
68
|
+
Connector,
|
|
69
|
+
# Syncs
|
|
70
|
+
SyncJob,
|
|
71
|
+
SyncResult,
|
|
72
|
+
SyncValidation,
|
|
73
|
+
SyncState,
|
|
74
|
+
# Embeddings
|
|
75
|
+
SourceReference,
|
|
76
|
+
EmbeddingJob,
|
|
77
|
+
EmbeddingJobStatus,
|
|
78
|
+
EmbeddingResult,
|
|
79
|
+
# Search
|
|
80
|
+
SearchMatch,
|
|
81
|
+
SearchResponse,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
__version__ = "0.2.0"
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
# Client
|
|
88
|
+
"DecompressedClient",
|
|
89
|
+
# Auth
|
|
90
|
+
"ApiKeyAuth",
|
|
91
|
+
"JwtAuth",
|
|
92
|
+
"AuthStrategy",
|
|
93
|
+
# Errors
|
|
94
|
+
"DecompressedError",
|
|
95
|
+
"AuthenticationError",
|
|
96
|
+
"NotFoundError",
|
|
97
|
+
"ValidationError",
|
|
98
|
+
"RateLimitError",
|
|
99
|
+
"ServerError",
|
|
100
|
+
# Types - Datasets
|
|
101
|
+
"Dataset",
|
|
102
|
+
"DatasetInfo",
|
|
103
|
+
"DatasetQueryResponse",
|
|
104
|
+
"DatasetVersion",
|
|
105
|
+
"UploadSession",
|
|
106
|
+
"AppendSession",
|
|
107
|
+
"JobStatus",
|
|
108
|
+
"UploadResult",
|
|
109
|
+
"AppendResult",
|
|
110
|
+
# Types - Versions (NEW)
|
|
111
|
+
"DatasetRef",
|
|
112
|
+
"VersionedDataset",
|
|
113
|
+
"DatasetEvent",
|
|
114
|
+
"VersionInfo",
|
|
115
|
+
"VersionComparison",
|
|
116
|
+
# Types - Materializations
|
|
117
|
+
"Materialization",
|
|
118
|
+
"MaterializationType",
|
|
119
|
+
"MaterializationStatus",
|
|
120
|
+
"CreateMaterializationResponse",
|
|
121
|
+
"MaterializationEstimate",
|
|
122
|
+
"MaterializationDownloadFile",
|
|
123
|
+
"MaterializationDownloadResponse",
|
|
124
|
+
# Types - Connectors
|
|
125
|
+
"Connector",
|
|
126
|
+
# Types - Syncs
|
|
127
|
+
"SyncJob",
|
|
128
|
+
"SyncResult",
|
|
129
|
+
"SyncValidation",
|
|
130
|
+
"SyncState",
|
|
131
|
+
# Types - Embeddings
|
|
132
|
+
"SourceReference",
|
|
133
|
+
"EmbeddingJob",
|
|
134
|
+
"EmbeddingJobStatus",
|
|
135
|
+
"EmbeddingResult",
|
|
136
|
+
# Types - Search
|
|
137
|
+
"SearchMatch",
|
|
138
|
+
"SearchResponse",
|
|
139
|
+
]
|
decompressed_sdk/auth.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict, Protocol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AuthStrategy(Protocol):
|
|
8
|
+
def headers(self) -> Dict[str, str]:
|
|
9
|
+
...
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class ApiKeyAuth:
|
|
14
|
+
api_key: str
|
|
15
|
+
|
|
16
|
+
def __post_init__(self) -> None:
|
|
17
|
+
if not self.api_key:
|
|
18
|
+
raise ValueError("API key is required")
|
|
19
|
+
|
|
20
|
+
def headers(self) -> Dict[str, str]:
|
|
21
|
+
return {"Authorization": f"Bearer {self.api_key}"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class JwtAuth:
|
|
26
|
+
jwt: str
|
|
27
|
+
|
|
28
|
+
def __post_init__(self) -> None:
|
|
29
|
+
if not self.jwt:
|
|
30
|
+
raise ValueError("JWT is required")
|
|
31
|
+
|
|
32
|
+
def headers(self) -> Dict[str, str]:
|
|
33
|
+
return {"Authorization": f"Bearer {self.jwt}"}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Decompressed SDK Client - Version-First Design.
|
|
3
|
+
|
|
4
|
+
This client implements "Snowflake for vectors" semantics where datasets
|
|
5
|
+
are version-aware and all operations require explicit version pinning.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from decompressed_sdk import DecompressedClient
|
|
9
|
+
|
|
10
|
+
client = DecompressedClient(
|
|
11
|
+
base_url="https://api.decompressed.io",
|
|
12
|
+
api_key="your-api-key"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Get dataset reference
|
|
16
|
+
dataset = client.datasets.get("my-dataset")
|
|
17
|
+
|
|
18
|
+
# Pin to specific version for reproducible operations
|
|
19
|
+
v1 = dataset.version(1)
|
|
20
|
+
info = v1.info()
|
|
21
|
+
results = v1.search([[0.1, 0.2, 0.3]], top_k=10)
|
|
22
|
+
|
|
23
|
+
# Or use latest explicitly
|
|
24
|
+
latest = dataset.latest()
|
|
25
|
+
|
|
26
|
+
# Time travel to a specific point
|
|
27
|
+
yesterday = dataset.at("2024-01-28T00:00:00Z")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
from typing import Any, Dict, Optional
|
|
33
|
+
|
|
34
|
+
import requests
|
|
35
|
+
|
|
36
|
+
from .auth import ApiKeyAuth, AuthStrategy, JwtAuth
|
|
37
|
+
from .errors import raise_for_status
|
|
38
|
+
from .resources.datasets import DatasetsResource
|
|
39
|
+
from .resources.materializations import MaterializationsResource
|
|
40
|
+
from .resources.connectors import ConnectorsResource
|
|
41
|
+
from .resources.syncs import SyncsResource
|
|
42
|
+
from .resources.embeddings import EmbeddingsResource
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DecompressedClient:
|
|
46
|
+
"""
|
|
47
|
+
Decompressed SDK client with version-first design.
|
|
48
|
+
|
|
49
|
+
All dataset operations require explicit version pinning to ensure
|
|
50
|
+
reproducibility and auditability.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
base_url: API base URL (e.g., "https://api.decompressed.io")
|
|
54
|
+
api_key: API key for authentication
|
|
55
|
+
jwt: JWT token for authentication (alternative to api_key)
|
|
56
|
+
session: Optional requests.Session for connection pooling
|
|
57
|
+
timeout_seconds: Request timeout in seconds
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
client = DecompressedClient(
|
|
61
|
+
base_url="https://api.decompressed.io",
|
|
62
|
+
api_key="dcp_..."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Get dataset and pin to version
|
|
66
|
+
dataset = client.datasets.get("embeddings-v2")
|
|
67
|
+
v3 = dataset.version(3)
|
|
68
|
+
|
|
69
|
+
# All operations use version 3
|
|
70
|
+
info = v3.info()
|
|
71
|
+
results = v3.search([[0.1, 0.2, 0.3]])
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
*,
|
|
77
|
+
base_url: str,
|
|
78
|
+
api_key: Optional[str] = None,
|
|
79
|
+
jwt: Optional[str] = None,
|
|
80
|
+
session: Optional[requests.Session] = None,
|
|
81
|
+
timeout_seconds: float = 60.0,
|
|
82
|
+
) -> None:
|
|
83
|
+
if api_key and jwt:
|
|
84
|
+
raise ValueError("Provide only one of api_key or jwt")
|
|
85
|
+
|
|
86
|
+
self._base_url = base_url.rstrip("/")
|
|
87
|
+
self._timeout_seconds = timeout_seconds
|
|
88
|
+
|
|
89
|
+
self._auth: Optional[AuthStrategy] = None
|
|
90
|
+
if api_key:
|
|
91
|
+
self._auth = ApiKeyAuth(api_key)
|
|
92
|
+
if jwt:
|
|
93
|
+
self._auth = JwtAuth(jwt)
|
|
94
|
+
|
|
95
|
+
self._session = session or requests.Session()
|
|
96
|
+
|
|
97
|
+
# Resource accessors
|
|
98
|
+
self.datasets = DatasetsResource(self)
|
|
99
|
+
self.connectors = ConnectorsResource(self)
|
|
100
|
+
self.syncs = SyncsResource(self)
|
|
101
|
+
self.embeddings = EmbeddingsResource(self)
|
|
102
|
+
|
|
103
|
+
def materializations(self, dataset_id: str) -> MaterializationsResource:
|
|
104
|
+
"""
|
|
105
|
+
Get a MaterializationsResource for managing materializations of a dataset.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
dataset_id: The dataset ID or name
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
MaterializationsResource for the dataset
|
|
112
|
+
"""
|
|
113
|
+
return MaterializationsResource(self, dataset_id)
|
|
114
|
+
|
|
115
|
+
def request(self, method: str, path: str, json: Any = None) -> Any:
|
|
116
|
+
"""Make an authenticated request to the API."""
|
|
117
|
+
headers: Dict[str, str] = {"Accept": "application/json"}
|
|
118
|
+
if self._auth:
|
|
119
|
+
headers.update(self._auth.headers())
|
|
120
|
+
|
|
121
|
+
url = f"{self._base_url}{path}"
|
|
122
|
+
resp = self._session.request(
|
|
123
|
+
method=method,
|
|
124
|
+
url=url,
|
|
125
|
+
headers=headers,
|
|
126
|
+
json=json,
|
|
127
|
+
timeout=self._timeout_seconds,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if resp.status_code >= 400:
|
|
131
|
+
request_id = resp.headers.get("x-request-id")
|
|
132
|
+
try:
|
|
133
|
+
body = resp.json()
|
|
134
|
+
except Exception:
|
|
135
|
+
body = resp.text
|
|
136
|
+
raise_for_status(resp.status_code, body, request_id)
|
|
137
|
+
|
|
138
|
+
if resp.status_code == 204:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
return resp.json()
|
|
142
|
+
|
|
143
|
+
def request_raw(self, method: str, path: str) -> requests.Response:
|
|
144
|
+
"""Make an authenticated request and return raw response (for binary data)."""
|
|
145
|
+
headers: Dict[str, str] = {}
|
|
146
|
+
if self._auth:
|
|
147
|
+
headers.update(self._auth.headers())
|
|
148
|
+
|
|
149
|
+
url = f"{self._base_url}{path}"
|
|
150
|
+
resp = self._session.request(
|
|
151
|
+
method=method,
|
|
152
|
+
url=url,
|
|
153
|
+
headers=headers,
|
|
154
|
+
timeout=self._timeout_seconds,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if resp.status_code >= 400:
|
|
158
|
+
request_id = resp.headers.get("x-request-id")
|
|
159
|
+
try:
|
|
160
|
+
body = resp.json()
|
|
161
|
+
except Exception:
|
|
162
|
+
body = resp.text
|
|
163
|
+
raise_for_status(resp.status_code, body, request_id)
|
|
164
|
+
|
|
165
|
+
return resp
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class DecompressedError(Exception):
|
|
9
|
+
message: str
|
|
10
|
+
status: Optional[int] = None
|
|
11
|
+
request_id: Optional[str] = None
|
|
12
|
+
details: Any = None
|
|
13
|
+
|
|
14
|
+
def __str__(self) -> str:
|
|
15
|
+
return self.message
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AuthenticationError(DecompressedError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PermissionError(DecompressedError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NotFoundError(DecompressedError):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ValidationError(DecompressedError):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RateLimitError(DecompressedError):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ServerError(DecompressedError):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def raise_for_status(status: int, body: Any, request_id: Optional[str]) -> None:
|
|
43
|
+
message = None
|
|
44
|
+
if isinstance(body, dict):
|
|
45
|
+
message = body.get("detail") or body.get("message")
|
|
46
|
+
if not message:
|
|
47
|
+
message = f"HTTP {status}"
|
|
48
|
+
|
|
49
|
+
if status == 401:
|
|
50
|
+
raise AuthenticationError(message, status=status, request_id=request_id, details=body)
|
|
51
|
+
if status == 403:
|
|
52
|
+
raise PermissionError(message, status=status, request_id=request_id, details=body)
|
|
53
|
+
if status == 404:
|
|
54
|
+
raise NotFoundError(message, status=status, request_id=request_id, details=body)
|
|
55
|
+
if status == 422:
|
|
56
|
+
raise ValidationError(message, status=status, request_id=request_id, details=body)
|
|
57
|
+
if status == 429:
|
|
58
|
+
raise RateLimitError(message, status=status, request_id=request_id, details=body)
|
|
59
|
+
if status >= 500:
|
|
60
|
+
raise ServerError(message, status=status, request_id=request_id, details=body)
|
|
61
|
+
|
|
62
|
+
raise DecompressedError(message, status=status, request_id=request_id, details=body)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Decompressed SDK resource modules.
|
|
2
|
+
|
|
3
|
+
Version-first design:
|
|
4
|
+
- DraftVersion: Mutable staging area before commit
|
|
5
|
+
- CommittedVersion: Immutable, addressable forever
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .datasets import DatasetsResource
|
|
9
|
+
from .versions import DraftVersion, CommittedVersion, VersionedDatasetResource
|
|
10
|
+
from .materializations import MaterializationsResource
|
|
11
|
+
from .connectors import ConnectorsResource
|
|
12
|
+
from .syncs import SyncsResource
|
|
13
|
+
from .embeddings import EmbeddingsResource
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"DatasetsResource",
|
|
17
|
+
"DraftVersion",
|
|
18
|
+
"CommittedVersion",
|
|
19
|
+
"VersionedDatasetResource", # Backward compat alias
|
|
20
|
+
"MaterializationsResource",
|
|
21
|
+
"ConnectorsResource",
|
|
22
|
+
"SyncsResource",
|
|
23
|
+
"EmbeddingsResource",
|
|
24
|
+
]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Connectors resource."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from ..client import DecompressedClient
|
|
9
|
+
|
|
10
|
+
from ..types.connectors import Connector
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _filter_dataclass_kwargs(model: type, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
14
|
+
"""Filter dict to only include fields defined in the dataclass."""
|
|
15
|
+
allowed = getattr(model, "__dataclass_fields__", None)
|
|
16
|
+
if not allowed:
|
|
17
|
+
return data
|
|
18
|
+
return {k: v for k, v in data.items() if k in allowed}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ConnectorsResource:
|
|
22
|
+
"""Manage connectors (vector databases, external services)."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, client: "DecompressedClient") -> None:
|
|
25
|
+
self._client = client
|
|
26
|
+
|
|
27
|
+
def list(self) -> List[Connector]:
|
|
28
|
+
"""List all connectors."""
|
|
29
|
+
data = self._client.request("GET", "/api/v1/connectors")
|
|
30
|
+
return [Connector(**_filter_dataclass_kwargs(Connector, item)) for item in data]
|
|
31
|
+
|
|
32
|
+
def get(self, connector_id: str) -> Connector:
|
|
33
|
+
"""Get a connector by ID."""
|
|
34
|
+
data = self._client.request("GET", f"/api/v1/connectors/{connector_id}")
|
|
35
|
+
return Connector(**_filter_dataclass_kwargs(Connector, data))
|
|
36
|
+
|
|
37
|
+
def test(self, connector_id: str) -> Dict[str, Any]:
|
|
38
|
+
"""Test a connector's connection."""
|
|
39
|
+
return self._client.request("POST", f"/api/v1/connectors/{connector_id}/test")
|