decompressed-sdk 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. decompressed_sdk-0.2.0/LICENSE +21 -0
  2. decompressed_sdk-0.2.0/PKG-INFO +37 -0
  3. decompressed_sdk-0.2.0/README.md +22 -0
  4. decompressed_sdk-0.2.0/pyproject.toml +26 -0
  5. decompressed_sdk-0.2.0/setup.cfg +4 -0
  6. decompressed_sdk-0.2.0/src/decompressed_sdk/__init__.py +139 -0
  7. decompressed_sdk-0.2.0/src/decompressed_sdk/auth.py +33 -0
  8. decompressed_sdk-0.2.0/src/decompressed_sdk/client.py +165 -0
  9. decompressed_sdk-0.2.0/src/decompressed_sdk/errors.py +62 -0
  10. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/__init__.py +24 -0
  11. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/connectors.py +39 -0
  12. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/datasets.py +501 -0
  13. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/embeddings.py +285 -0
  14. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/materializations.py +139 -0
  15. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/syncs.py +196 -0
  16. decompressed_sdk-0.2.0/src/decompressed_sdk/resources/versions.py +342 -0
  17. decompressed_sdk-0.2.0/src/decompressed_sdk/types/__init__.py +93 -0
  18. decompressed_sdk-0.2.0/src/decompressed_sdk/types/base.py +19 -0
  19. decompressed_sdk-0.2.0/src/decompressed_sdk/types/connectors.py +18 -0
  20. decompressed_sdk-0.2.0/src/decompressed_sdk/types/datasets.py +113 -0
  21. decompressed_sdk-0.2.0/src/decompressed_sdk/types/embeddings.py +65 -0
  22. decompressed_sdk-0.2.0/src/decompressed_sdk/types/materializations.py +65 -0
  23. decompressed_sdk-0.2.0/src/decompressed_sdk/types/search.py +32 -0
  24. decompressed_sdk-0.2.0/src/decompressed_sdk/types/syncs.py +64 -0
  25. decompressed_sdk-0.2.0/src/decompressed_sdk/types/versions.py +243 -0
  26. decompressed_sdk-0.2.0/src/decompressed_sdk.egg-info/PKG-INFO +37 -0
  27. decompressed_sdk-0.2.0/src/decompressed_sdk.egg-info/SOURCES.txt +28 -0
  28. decompressed_sdk-0.2.0/src/decompressed_sdk.egg-info/dependency_links.txt +1 -0
  29. decompressed_sdk-0.2.0/src/decompressed_sdk.egg-info/requires.txt +1 -0
  30. decompressed_sdk-0.2.0/src/decompressed_sdk.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,37 @@
1
+ Metadata-Version: 2.4
2
+ Name: decompressed-sdk
3
+ Version: 0.2.0
4
+ Summary: Decompressed public SDK (Python)
5
+ Author: Decompressed
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/decompressed/decompressed
8
+ Project-URL: Repository, https://github.com/decompressed/decompressed
9
+ Project-URL: Issues, https://github.com/decompressed/decompressed/issues
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: requests>=2.31.0
14
+ Dynamic: license-file
15
+
16
+ # decompressed-sdk
17
+
18
+ Public Python SDK for Decompressed.
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pip install decompressed-sdk
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```python
29
+ from decompressed_sdk import DecompressedClient
30
+
31
+ client = DecompressedClient(
32
+ base_url="https://api.decompressed.ai",
33
+ api_key="dsk_test_123",
34
+ )
35
+
36
+ datasets = client.datasets.list()
37
+ ```
@@ -0,0 +1,22 @@
1
+ # decompressed-sdk
2
+
3
+ Public Python SDK for Decompressed.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install decompressed-sdk
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from decompressed_sdk import DecompressedClient
15
+
16
+ client = DecompressedClient(
17
+ base_url="https://api.decompressed.ai",
18
+ api_key="dsk_test_123",
19
+ )
20
+
21
+ datasets = client.datasets.list()
22
+ ```
@@ -0,0 +1,26 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "decompressed-sdk"
7
+ version = "0.2.0"
8
+ description = "Decompressed public SDK (Python)"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ authors = [{name = "Decompressed"}]
13
+ dependencies = [
14
+ "requests>=2.31.0",
15
+ ]
16
+
17
+ [project.urls]
18
+ Homepage = "https://github.com/decompressed/decompressed"
19
+ Repository = "https://github.com/decompressed/decompressed"
20
+ Issues = "https://github.com/decompressed/decompressed/issues"
21
+
22
+ [tool.setuptools]
23
+ package-dir = {"" = "src"}
24
+
25
+ [tool.setuptools.packages.find]
26
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,139 @@
1
+ """
2
+ Decompressed SDK - Version-First Vector Data Platform.
3
+
4
+ This SDK implements "Snowflake for vectors" semantics where datasets
5
+ are version-aware and all operations require explicit version pinning.
6
+
7
+ Example:
8
+ from decompressed_sdk import DecompressedClient
9
+
10
+ client = DecompressedClient(
11
+ base_url="https://api.decompressed.io",
12
+ api_key="your-api-key"
13
+ )
14
+
15
+ # Get dataset reference
16
+ dataset = client.datasets.get("my-dataset")
17
+
18
+ # Pin to specific version for reproducible operations
19
+ v1 = dataset.version(1)
20
+ info = v1.info()
21
+ results = v1.search([[0.1, 0.2, 0.3]], top_k=10)
22
+
23
+ # Compare versions (LLM regression testing)
24
+ diff = v1.compare_to(2)
25
+
26
+ # Time travel
27
+ yesterday = dataset.at("2024-01-28T00:00:00Z")
28
+ """
29
+
30
+ from .client import DecompressedClient
31
+ from .auth import ApiKeyAuth, JwtAuth, AuthStrategy
32
+ from .errors import (
33
+ DecompressedError,
34
+ AuthenticationError,
35
+ NotFoundError,
36
+ ValidationError,
37
+ RateLimitError,
38
+ ServerError,
39
+ )
40
+
41
+ # Re-export types for convenience
42
+ from .types import (
43
+ # Datasets
44
+ Dataset,
45
+ DatasetInfo,
46
+ DatasetQueryResponse,
47
+ DatasetVersion,
48
+ UploadSession,
49
+ AppendSession,
50
+ JobStatus,
51
+ UploadResult,
52
+ AppendResult,
53
+ # Versions (NEW - core of version-first design)
54
+ DatasetRef,
55
+ VersionedDataset,
56
+ DatasetEvent,
57
+ VersionInfo,
58
+ VersionComparison,
59
+ # Materializations
60
+ Materialization,
61
+ MaterializationType,
62
+ MaterializationStatus,
63
+ CreateMaterializationResponse,
64
+ MaterializationEstimate,
65
+ MaterializationDownloadFile,
66
+ MaterializationDownloadResponse,
67
+ # Connectors
68
+ Connector,
69
+ # Syncs
70
+ SyncJob,
71
+ SyncResult,
72
+ SyncValidation,
73
+ SyncState,
74
+ # Embeddings
75
+ SourceReference,
76
+ EmbeddingJob,
77
+ EmbeddingJobStatus,
78
+ EmbeddingResult,
79
+ # Search
80
+ SearchMatch,
81
+ SearchResponse,
82
+ )
83
+
84
+ __version__ = "0.2.0"
85
+
86
+ __all__ = [
87
+ # Client
88
+ "DecompressedClient",
89
+ # Auth
90
+ "ApiKeyAuth",
91
+ "JwtAuth",
92
+ "AuthStrategy",
93
+ # Errors
94
+ "DecompressedError",
95
+ "AuthenticationError",
96
+ "NotFoundError",
97
+ "ValidationError",
98
+ "RateLimitError",
99
+ "ServerError",
100
+ # Types - Datasets
101
+ "Dataset",
102
+ "DatasetInfo",
103
+ "DatasetQueryResponse",
104
+ "DatasetVersion",
105
+ "UploadSession",
106
+ "AppendSession",
107
+ "JobStatus",
108
+ "UploadResult",
109
+ "AppendResult",
110
+ # Types - Versions (NEW)
111
+ "DatasetRef",
112
+ "VersionedDataset",
113
+ "DatasetEvent",
114
+ "VersionInfo",
115
+ "VersionComparison",
116
+ # Types - Materializations
117
+ "Materialization",
118
+ "MaterializationType",
119
+ "MaterializationStatus",
120
+ "CreateMaterializationResponse",
121
+ "MaterializationEstimate",
122
+ "MaterializationDownloadFile",
123
+ "MaterializationDownloadResponse",
124
+ # Types - Connectors
125
+ "Connector",
126
+ # Types - Syncs
127
+ "SyncJob",
128
+ "SyncResult",
129
+ "SyncValidation",
130
+ "SyncState",
131
+ # Types - Embeddings
132
+ "SourceReference",
133
+ "EmbeddingJob",
134
+ "EmbeddingJobStatus",
135
+ "EmbeddingResult",
136
+ # Types - Search
137
+ "SearchMatch",
138
+ "SearchResponse",
139
+ ]
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Protocol
5
+
6
+
7
+ class AuthStrategy(Protocol):
8
+ def headers(self) -> Dict[str, str]:
9
+ ...
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ApiKeyAuth:
14
+ api_key: str
15
+
16
+ def __post_init__(self) -> None:
17
+ if not self.api_key:
18
+ raise ValueError("API key is required")
19
+
20
+ def headers(self) -> Dict[str, str]:
21
+ return {"Authorization": f"Bearer {self.api_key}"}
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class JwtAuth:
26
+ jwt: str
27
+
28
+ def __post_init__(self) -> None:
29
+ if not self.jwt:
30
+ raise ValueError("JWT is required")
31
+
32
+ def headers(self) -> Dict[str, str]:
33
+ return {"Authorization": f"Bearer {self.jwt}"}
@@ -0,0 +1,165 @@
1
+ """
2
+ Decompressed SDK Client - Version-First Design.
3
+
4
+ This client implements "Snowflake for vectors" semantics where datasets
5
+ are version-aware and all operations require explicit version pinning.
6
+
7
+ Example:
8
+ from decompressed_sdk import DecompressedClient
9
+
10
+ client = DecompressedClient(
11
+ base_url="https://api.decompressed.io",
12
+ api_key="your-api-key"
13
+ )
14
+
15
+ # Get dataset reference
16
+ dataset = client.datasets.get("my-dataset")
17
+
18
+ # Pin to specific version for reproducible operations
19
+ v1 = dataset.version(1)
20
+ info = v1.info()
21
+ results = v1.search([[0.1, 0.2, 0.3]], top_k=10)
22
+
23
+ # Or use latest explicitly
24
+ latest = dataset.latest()
25
+
26
+ # Time travel to a specific point
27
+ yesterday = dataset.at("2024-01-28T00:00:00Z")
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ from typing import Any, Dict, Optional
33
+
34
+ import requests
35
+
36
+ from .auth import ApiKeyAuth, AuthStrategy, JwtAuth
37
+ from .errors import raise_for_status
38
+ from .resources.datasets import DatasetsResource
39
+ from .resources.materializations import MaterializationsResource
40
+ from .resources.connectors import ConnectorsResource
41
+ from .resources.syncs import SyncsResource
42
+ from .resources.embeddings import EmbeddingsResource
43
+
44
+
45
+ class DecompressedClient:
46
+ """
47
+ Decompressed SDK client with version-first design.
48
+
49
+ All dataset operations require explicit version pinning to ensure
50
+ reproducibility and auditability.
51
+
52
+ Args:
53
+ base_url: API base URL (e.g., "https://api.decompressed.io")
54
+ api_key: API key for authentication
55
+ jwt: JWT token for authentication (alternative to api_key)
56
+ session: Optional requests.Session for connection pooling
57
+ timeout_seconds: Request timeout in seconds
58
+
59
+ Example:
60
+ client = DecompressedClient(
61
+ base_url="https://api.decompressed.io",
62
+ api_key="dcp_..."
63
+ )
64
+
65
+ # Get dataset and pin to version
66
+ dataset = client.datasets.get("embeddings-v2")
67
+ v3 = dataset.version(3)
68
+
69
+ # All operations use version 3
70
+ info = v3.info()
71
+ results = v3.search([[0.1, 0.2, 0.3]])
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ *,
77
+ base_url: str,
78
+ api_key: Optional[str] = None,
79
+ jwt: Optional[str] = None,
80
+ session: Optional[requests.Session] = None,
81
+ timeout_seconds: float = 60.0,
82
+ ) -> None:
83
+ if api_key and jwt:
84
+ raise ValueError("Provide only one of api_key or jwt")
85
+
86
+ self._base_url = base_url.rstrip("/")
87
+ self._timeout_seconds = timeout_seconds
88
+
89
+ self._auth: Optional[AuthStrategy] = None
90
+ if api_key:
91
+ self._auth = ApiKeyAuth(api_key)
92
+ if jwt:
93
+ self._auth = JwtAuth(jwt)
94
+
95
+ self._session = session or requests.Session()
96
+
97
+ # Resource accessors
98
+ self.datasets = DatasetsResource(self)
99
+ self.connectors = ConnectorsResource(self)
100
+ self.syncs = SyncsResource(self)
101
+ self.embeddings = EmbeddingsResource(self)
102
+
103
+ def materializations(self, dataset_id: str) -> MaterializationsResource:
104
+ """
105
+ Get a MaterializationsResource for managing materializations of a dataset.
106
+
107
+ Args:
108
+ dataset_id: The dataset ID or name
109
+
110
+ Returns:
111
+ MaterializationsResource for the dataset
112
+ """
113
+ return MaterializationsResource(self, dataset_id)
114
+
115
+ def request(self, method: str, path: str, json: Any = None) -> Any:
116
+ """Make an authenticated request to the API."""
117
+ headers: Dict[str, str] = {"Accept": "application/json"}
118
+ if self._auth:
119
+ headers.update(self._auth.headers())
120
+
121
+ url = f"{self._base_url}{path}"
122
+ resp = self._session.request(
123
+ method=method,
124
+ url=url,
125
+ headers=headers,
126
+ json=json,
127
+ timeout=self._timeout_seconds,
128
+ )
129
+
130
+ if resp.status_code >= 400:
131
+ request_id = resp.headers.get("x-request-id")
132
+ try:
133
+ body = resp.json()
134
+ except Exception:
135
+ body = resp.text
136
+ raise_for_status(resp.status_code, body, request_id)
137
+
138
+ if resp.status_code == 204:
139
+ return None
140
+
141
+ return resp.json()
142
+
143
+ def request_raw(self, method: str, path: str) -> requests.Response:
144
+ """Make an authenticated request and return raw response (for binary data)."""
145
+ headers: Dict[str, str] = {}
146
+ if self._auth:
147
+ headers.update(self._auth.headers())
148
+
149
+ url = f"{self._base_url}{path}"
150
+ resp = self._session.request(
151
+ method=method,
152
+ url=url,
153
+ headers=headers,
154
+ timeout=self._timeout_seconds,
155
+ )
156
+
157
+ if resp.status_code >= 400:
158
+ request_id = resp.headers.get("x-request-id")
159
+ try:
160
+ body = resp.json()
161
+ except Exception:
162
+ body = resp.text
163
+ raise_for_status(resp.status_code, body, request_id)
164
+
165
+ return resp
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Optional
5
+
6
+
7
+ @dataclass
8
+ class DecompressedError(Exception):
9
+ message: str
10
+ status: Optional[int] = None
11
+ request_id: Optional[str] = None
12
+ details: Any = None
13
+
14
+ def __str__(self) -> str:
15
+ return self.message
16
+
17
+
18
+ class AuthenticationError(DecompressedError):
19
+ pass
20
+
21
+
22
+ class PermissionError(DecompressedError):
23
+ pass
24
+
25
+
26
+ class NotFoundError(DecompressedError):
27
+ pass
28
+
29
+
30
+ class ValidationError(DecompressedError):
31
+ pass
32
+
33
+
34
+ class RateLimitError(DecompressedError):
35
+ pass
36
+
37
+
38
+ class ServerError(DecompressedError):
39
+ pass
40
+
41
+
42
+ def raise_for_status(status: int, body: Any, request_id: Optional[str]) -> None:
43
+ message = None
44
+ if isinstance(body, dict):
45
+ message = body.get("detail") or body.get("message")
46
+ if not message:
47
+ message = f"HTTP {status}"
48
+
49
+ if status == 401:
50
+ raise AuthenticationError(message, status=status, request_id=request_id, details=body)
51
+ if status == 403:
52
+ raise PermissionError(message, status=status, request_id=request_id, details=body)
53
+ if status == 404:
54
+ raise NotFoundError(message, status=status, request_id=request_id, details=body)
55
+ if status == 422:
56
+ raise ValidationError(message, status=status, request_id=request_id, details=body)
57
+ if status == 429:
58
+ raise RateLimitError(message, status=status, request_id=request_id, details=body)
59
+ if status >= 500:
60
+ raise ServerError(message, status=status, request_id=request_id, details=body)
61
+
62
+ raise DecompressedError(message, status=status, request_id=request_id, details=body)
@@ -0,0 +1,24 @@
1
+ """Decompressed SDK resource modules.
2
+
3
+ Version-first design:
4
+ - DraftVersion: Mutable staging area before commit
5
+ - CommittedVersion: Immutable, addressable forever
6
+ """
7
+
8
+ from .datasets import DatasetsResource
9
+ from .versions import DraftVersion, CommittedVersion, VersionedDatasetResource
10
+ from .materializations import MaterializationsResource
11
+ from .connectors import ConnectorsResource
12
+ from .syncs import SyncsResource
13
+ from .embeddings import EmbeddingsResource
14
+
15
+ __all__ = [
16
+ "DatasetsResource",
17
+ "DraftVersion",
18
+ "CommittedVersion",
19
+ "VersionedDatasetResource", # Backward compat alias
20
+ "MaterializationsResource",
21
+ "ConnectorsResource",
22
+ "SyncsResource",
23
+ "EmbeddingsResource",
24
+ ]
@@ -0,0 +1,39 @@
1
+ """Connectors resource."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from ..client import DecompressedClient
9
+
10
+ from ..types.connectors import Connector
11
+
12
+
13
+ def _filter_dataclass_kwargs(model: type, data: Dict[str, Any]) -> Dict[str, Any]:
14
+ """Filter dict to only include fields defined in the dataclass."""
15
+ allowed = getattr(model, "__dataclass_fields__", None)
16
+ if not allowed:
17
+ return data
18
+ return {k: v for k, v in data.items() if k in allowed}
19
+
20
+
21
+ class ConnectorsResource:
22
+ """Manage connectors (vector databases, external services)."""
23
+
24
+ def __init__(self, client: "DecompressedClient") -> None:
25
+ self._client = client
26
+
27
+ def list(self) -> List[Connector]:
28
+ """List all connectors."""
29
+ data = self._client.request("GET", "/api/v1/connectors")
30
+ return [Connector(**_filter_dataclass_kwargs(Connector, item)) for item in data]
31
+
32
+ def get(self, connector_id: str) -> Connector:
33
+ """Get a connector by ID."""
34
+ data = self._client.request("GET", f"/api/v1/connectors/{connector_id}")
35
+ return Connector(**_filter_dataclass_kwargs(Connector, data))
36
+
37
+ def test(self, connector_id: str) -> Dict[str, Any]:
38
+ """Test a connector's connection."""
39
+ return self._client.request("POST", f"/api/v1/connectors/{connector_id}/test")