nomic 3.5.2__tar.gz → 3.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nomic might be problematic. Click here for more details.
- {nomic-3.5.2 → nomic-3.6.0}/PKG-INFO +35 -5
- {nomic-3.5.2 → nomic-3.6.0}/nomic/__init__.py +1 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/cli.py +1 -1
- nomic-3.6.0/nomic/client.py +388 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/dataset.py +40 -1
- {nomic-3.5.2 → nomic-3.6.0}/nomic/embed.py +1 -1
- {nomic-3.5.2 → nomic-3.6.0}/nomic.egg-info/PKG-INFO +35 -5
- {nomic-3.5.2 → nomic-3.6.0}/nomic.egg-info/SOURCES.txt +6 -1
- {nomic-3.5.2 → nomic-3.6.0}/nomic.egg-info/entry_points.txt +0 -1
- {nomic-3.5.2 → nomic-3.6.0}/nomic.egg-info/requires.txt +2 -1
- {nomic-3.5.2 → nomic-3.6.0}/setup.py +4 -2
- nomic-3.6.0/tests/__init__.py +0 -0
- nomic-3.6.0/tests/test_atlas_client.py +789 -0
- nomic-3.6.0/tests/test_documents.py +52 -0
- nomic-3.6.0/tests/test_embed.py +83 -0
- {nomic-3.5.2 → nomic-3.6.0}/README.md +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/atlas.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/aws/__init__.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/aws/sagemaker.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/data_inference.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/data_operations.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/pl_callbacks/__init__.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/pl_callbacks/pl_callback.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/settings.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic/utils.py +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic.egg-info/dependency_links.txt +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/nomic.egg-info/top_level.txt +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/pyproject.toml +0 -0
- {nomic-3.5.2 → nomic-3.6.0}/setup.cfg +0 -0
|
@@ -1,19 +1,51 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nomic
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: The official Nomic python client.
|
|
5
5
|
Home-page: https://github.com/nomic-ai/nomic
|
|
6
6
|
Author: nomic.ai
|
|
7
7
|
Author-email: support@nomic.ai
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Platform: UNKNOWN
|
|
10
8
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
9
|
Classifier: Programming Language :: Python :: 3
|
|
12
10
|
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: click
|
|
12
|
+
Requires-Dist: jsonlines
|
|
13
|
+
Requires-Dist: jsonschema<5,>=4.23.0
|
|
14
|
+
Requires-Dist: loguru
|
|
15
|
+
Requires-Dist: rich
|
|
16
|
+
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: numpy
|
|
18
|
+
Requires-Dist: pandas
|
|
19
|
+
Requires-Dist: pydantic<3,>=2
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Requires-Dist: pyarrow
|
|
22
|
+
Requires-Dist: pillow
|
|
23
|
+
Requires-Dist: pyjwt
|
|
13
24
|
Provides-Extra: local
|
|
25
|
+
Requires-Dist: gpt4all<3,>=2.5.0; extra == "local"
|
|
14
26
|
Provides-Extra: aws
|
|
27
|
+
Requires-Dist: boto3; extra == "aws"
|
|
28
|
+
Requires-Dist: sagemaker; extra == "aws"
|
|
15
29
|
Provides-Extra: all
|
|
30
|
+
Requires-Dist: nomic[aws,local]; extra == "all"
|
|
16
31
|
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: nomic[all]; extra == "dev"
|
|
33
|
+
Requires-Dist: black==24.3.0; extra == "dev"
|
|
34
|
+
Requires-Dist: coverage; extra == "dev"
|
|
35
|
+
Requires-Dist: pylint; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest; extra == "dev"
|
|
37
|
+
Requires-Dist: isort; extra == "dev"
|
|
38
|
+
Requires-Dist: pyright<=1.1.377; extra == "dev"
|
|
39
|
+
Requires-Dist: myst-parser; extra == "dev"
|
|
40
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
41
|
+
Requires-Dist: mkautodoc; extra == "dev"
|
|
42
|
+
Requires-Dist: twine; extra == "dev"
|
|
43
|
+
Requires-Dist: mkdocstrings[python]; extra == "dev"
|
|
44
|
+
Requires-Dist: mkdocs-jupyter; extra == "dev"
|
|
45
|
+
Requires-Dist: pillow; extra == "dev"
|
|
46
|
+
Requires-Dist: cairosvg; extra == "dev"
|
|
47
|
+
Requires-Dist: pytorch-lightning; extra == "dev"
|
|
48
|
+
Requires-Dist: pandas; extra == "dev"
|
|
17
49
|
|
|
18
50
|
<h1 align="center">Nomic Atlas Python Client</h1>
|
|
19
51
|
<h3 align="center">Explore, label, search and share massive datasets in your web browser.</h3>
|
|
@@ -243,5 +275,3 @@ Join the discussion on our [:hut: Discord](https://discord.gg/myY5YDR8z8) to ask
|
|
|
243
275
|
<br>
|
|
244
276
|
|
|
245
277
|
[Go to top](#)
|
|
246
|
-
|
|
247
|
-
|
|
@@ -53,7 +53,7 @@ def login(token, tenant="production", domain=None):
|
|
|
53
53
|
console.print("Authenticate with the Nomic API", style=style, justify="center")
|
|
54
54
|
console.print(auth0_auth_endpoint, style=style, justify="center")
|
|
55
55
|
console.print(
|
|
56
|
-
"Click the above link to retrieve your access token and then run `nomic login [token]`",
|
|
56
|
+
"Click the above link to retrieve your access token and then run `nomic login \\[token]`",
|
|
57
57
|
style=style,
|
|
58
58
|
justify="center",
|
|
59
59
|
)
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum, auto
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Generic, Literal, TypeVar, overload
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
import jsonschema
|
|
12
|
+
import requests
|
|
13
|
+
|
|
14
|
+
from nomic.dataset import AtlasClass
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"NomicClient",
|
|
18
|
+
"PlatformTask",
|
|
19
|
+
"TaskFailed",
|
|
20
|
+
"TaskPending",
|
|
21
|
+
"UploadedFile",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
MAX_FAILRESP_LENGTH = 1_000 # chars
|
|
25
|
+
|
|
26
|
+
T = TypeVar("T")
|
|
27
|
+
|
|
28
|
+
client: "AtlasClass | None" = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_client():
|
|
32
|
+
global client
|
|
33
|
+
if client is None:
|
|
34
|
+
client = AtlasClass()
|
|
35
|
+
return client
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class UploadedFile:
|
|
40
|
+
url: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Sentinel(Enum):
|
|
44
|
+
Nothing = auto()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TaskPending(Exception):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TaskFailed(Exception):
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PlatformTask(Generic[T]):
|
|
56
|
+
"""
|
|
57
|
+
An object representing a task on the Nomic Platform.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
id: The ID of the task.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
_id: str
|
|
64
|
+
_result: "T | Sentinel"
|
|
65
|
+
|
|
66
|
+
def __init__(self, id: str):
|
|
67
|
+
self._id = id
|
|
68
|
+
self._result = Sentinel.Nothing
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def id(self) -> str:
|
|
72
|
+
return self._id
|
|
73
|
+
|
|
74
|
+
def get(self, timeout: "float | None" = None, *, block: bool = True) -> T:
|
|
75
|
+
"""
|
|
76
|
+
Waits for the task to complete and returns the result.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
timeout: The maximum time to wait for the task to complete.
|
|
80
|
+
block: Whether to block until the task is complete.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The result of the task.
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
TaskPending: If the task is not complete and block is True.
|
|
87
|
+
TaskFailed: If the task fails.
|
|
88
|
+
"""
|
|
89
|
+
if self._result is not Sentinel.Nothing:
|
|
90
|
+
return self._result
|
|
91
|
+
client = get_client()
|
|
92
|
+
start_time = time.time()
|
|
93
|
+
while True:
|
|
94
|
+
response = client._get(f"/v1/status/{self._id}")
|
|
95
|
+
raise_for_status_with_body(response)
|
|
96
|
+
status_resp = response.json()
|
|
97
|
+
if status_resp["status"] == "COMPLETED":
|
|
98
|
+
break
|
|
99
|
+
if status_resp["status"] == "FAILED":
|
|
100
|
+
raise TaskFailed(status_resp["error"])
|
|
101
|
+
if not block:
|
|
102
|
+
raise TaskPending
|
|
103
|
+
sleeptime = 1 # poll interval
|
|
104
|
+
if timeout is not None:
|
|
105
|
+
end_time = start_time + timeout
|
|
106
|
+
if end_time < (now := time.time()):
|
|
107
|
+
raise TaskPending
|
|
108
|
+
sleeptime = min(sleeptime, end_time - now)
|
|
109
|
+
time.sleep(sleeptime)
|
|
110
|
+
|
|
111
|
+
completed_response = requests.get(status_resp["result_url"])
|
|
112
|
+
raise_for_status_with_body(completed_response)
|
|
113
|
+
|
|
114
|
+
result = status_resp.pop("result", {})
|
|
115
|
+
result.pop("result_url", None)
|
|
116
|
+
result.pop("result", None)
|
|
117
|
+
result["result"] = completed_response.json()
|
|
118
|
+
result["result"].pop("status", None)
|
|
119
|
+
result["result"].pop("error", None)
|
|
120
|
+
self._result = result
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class NomicClient:
|
|
125
|
+
"""Client for the Nomic Platform API."""
|
|
126
|
+
|
|
127
|
+
def upload_file(self, path: "str | os.PathLike[str]") -> UploadedFile:
|
|
128
|
+
"""
|
|
129
|
+
Uploads a file to the Nomic Platform.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
path: The path to the PDF file to upload.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
An UploadedFile object representing the uploaded file.
|
|
136
|
+
"""
|
|
137
|
+
client = get_client()
|
|
138
|
+
|
|
139
|
+
path = Path(path)
|
|
140
|
+
|
|
141
|
+
with path.open("rb") as pdf_file:
|
|
142
|
+
file_type = path.suffix.lower()
|
|
143
|
+
if file_type == ".pdf":
|
|
144
|
+
content_type = "application/pdf"
|
|
145
|
+
# elif file_type == ".docx":
|
|
146
|
+
# content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
147
|
+
# elif file_type == ".doc":
|
|
148
|
+
# content_type = "application/msword"
|
|
149
|
+
# elif file_type == ".txt":
|
|
150
|
+
# content_type = "text/plain"
|
|
151
|
+
# elif file_type == ".pptx":
|
|
152
|
+
# content_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
153
|
+
# elif file_type == ".ppt":
|
|
154
|
+
# content_type = "application/vnd.ms-powerpoint"
|
|
155
|
+
# elif file_type == ".csv":
|
|
156
|
+
# content_type = "text/csv"
|
|
157
|
+
# elif file_type == ".xlsx":
|
|
158
|
+
# content_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
159
|
+
# elif file_type == ".xls":
|
|
160
|
+
# content_type = "application/vnd.ms-excel"
|
|
161
|
+
else:
|
|
162
|
+
raise ValueError(f"Unsupported file type: {file_type}")
|
|
163
|
+
|
|
164
|
+
response = client._post(
|
|
165
|
+
"/v1/upload",
|
|
166
|
+
json=dict(files=[{"id": path.name, "size": path.stat().st_size, "content_type": content_type}]),
|
|
167
|
+
)
|
|
168
|
+
raise_for_status_with_body(response)
|
|
169
|
+
|
|
170
|
+
values = response.json()
|
|
171
|
+
|
|
172
|
+
# Extract from the files array
|
|
173
|
+
file_info = values["files"][0]
|
|
174
|
+
upload_url = file_info["upload_url"]
|
|
175
|
+
nomic_url = file_info["nomic_url"]
|
|
176
|
+
|
|
177
|
+
# upload the file to the designated pre-signed url
|
|
178
|
+
resp = requests.put(upload_url, data=pdf_file, headers={"x-amz-server-side-encryption": "AES256"})
|
|
179
|
+
|
|
180
|
+
raise_for_status_with_body(resp)
|
|
181
|
+
return UploadedFile(url=nomic_url)
|
|
182
|
+
|
|
183
|
+
@overload
|
|
184
|
+
def parse(self, file: "str | UploadedFile", *, block: Literal[True] = ...) -> "dict[str, Any]": ...
|
|
185
|
+
@overload
|
|
186
|
+
def parse(self, file: "str | UploadedFile", *, block: Literal[False]) -> PlatformTask["dict[str, Any]"]: ...
|
|
187
|
+
@overload
|
|
188
|
+
def parse(self, file: "str | UploadedFile", *, block: bool) -> Any: ...
|
|
189
|
+
|
|
190
|
+
def parse(self, file: "str | UploadedFile", *, block: bool = True) -> Any:
|
|
191
|
+
"""
|
|
192
|
+
Parses a document into a structured JSON representation.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
file: The file to parse. Can be a string URL or an UploadedFile object.
|
|
196
|
+
block: Whether to block until the task is complete.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
By default, returns the parsed document. If block is False, returns a PlatformTask that can be used to get
|
|
200
|
+
the result.
|
|
201
|
+
|
|
202
|
+
Raises:
|
|
203
|
+
ValueError: If an invalid URL is passed.
|
|
204
|
+
TaskFailed: If block is True and the task fails.
|
|
205
|
+
|
|
206
|
+
Example:
|
|
207
|
+
Complete end-to-end workflow with upload and parsing:
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
from nomic.documents import upload_file, parse
|
|
211
|
+
|
|
212
|
+
# Upload a PDF file
|
|
213
|
+
file = upload_file("my_document.pdf")
|
|
214
|
+
|
|
215
|
+
# Parse the document
|
|
216
|
+
result = parse(file)
|
|
217
|
+
print(result)
|
|
218
|
+
```
|
|
219
|
+
"""
|
|
220
|
+
client = get_client()
|
|
221
|
+
|
|
222
|
+
response = client._post("/v1/parse", json={"file_url": self._file_to_url(file)})
|
|
223
|
+
raise_for_status_with_body(response)
|
|
224
|
+
task = PlatformTask(response.json()["task_id"])
|
|
225
|
+
if block:
|
|
226
|
+
return task.get()
|
|
227
|
+
return task
|
|
228
|
+
|
|
229
|
+
@overload
|
|
230
|
+
def extract(
|
|
231
|
+
self,
|
|
232
|
+
files: "str | UploadedFile | Sequence[str | UploadedFile]",
|
|
233
|
+
schema: "dict[str, Any]",
|
|
234
|
+
*,
|
|
235
|
+
block: Literal[True] = ...,
|
|
236
|
+
) -> Any: ...
|
|
237
|
+
@overload
|
|
238
|
+
def extract(
|
|
239
|
+
self,
|
|
240
|
+
files: "str | UploadedFile | Sequence[str | UploadedFile]",
|
|
241
|
+
schema: "dict[str, Any]",
|
|
242
|
+
*,
|
|
243
|
+
block: Literal[False],
|
|
244
|
+
) -> PlatformTask[Any]: ...
|
|
245
|
+
@overload
|
|
246
|
+
def extract(
|
|
247
|
+
self,
|
|
248
|
+
files: "str | UploadedFile | Sequence[str | UploadedFile]",
|
|
249
|
+
schema: "dict[str, Any]",
|
|
250
|
+
*,
|
|
251
|
+
block: bool,
|
|
252
|
+
) -> Any: ...
|
|
253
|
+
|
|
254
|
+
def extract(
|
|
255
|
+
self,
|
|
256
|
+
files: "str | UploadedFile | Sequence[str | UploadedFile]",
|
|
257
|
+
schema: "dict[str, Any]",
|
|
258
|
+
*,
|
|
259
|
+
block: bool = True,
|
|
260
|
+
) -> Any:
|
|
261
|
+
"""
|
|
262
|
+
Extracts structured data from documents.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
files: List of uploaded files to extract from.
|
|
266
|
+
schema: A JSON schema defining the structure of data to extract.
|
|
267
|
+
block: Whether to block until the task is complete.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
By default, returns the extracted data matching the provided schema. If block is False, returns a PlatformTask
|
|
271
|
+
that can be used to get the result.
|
|
272
|
+
|
|
273
|
+
Raises:
|
|
274
|
+
ValueError: If an invalid URL is passed.
|
|
275
|
+
TaskFailed: If block is True and the task fails.
|
|
276
|
+
|
|
277
|
+
Example:
|
|
278
|
+
Complete end-to-end workflow with upload and extraction:
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
from nomic.documents import upload_file, extract
|
|
282
|
+
|
|
283
|
+
# Upload a PDF file
|
|
284
|
+
file = upload_file("my_document.pdf")
|
|
285
|
+
|
|
286
|
+
# Define extraction schema
|
|
287
|
+
schema = {
|
|
288
|
+
"type": "array",
|
|
289
|
+
"items": {
|
|
290
|
+
"type": "object",
|
|
291
|
+
"properties": {
|
|
292
|
+
"speaker": {"type": "string"},
|
|
293
|
+
"content": {"type": "string"},
|
|
294
|
+
}
|
|
295
|
+
},
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# Extract structured data
|
|
299
|
+
result = extract(file, schema)
|
|
300
|
+
print(result)
|
|
301
|
+
```
|
|
302
|
+
"""
|
|
303
|
+
jsonschema.Draft7Validator.check_schema(schema)
|
|
304
|
+
|
|
305
|
+
if isinstance(files, (str, UploadedFile)):
|
|
306
|
+
files = [files]
|
|
307
|
+
|
|
308
|
+
client = get_client()
|
|
309
|
+
|
|
310
|
+
response = client._post(
|
|
311
|
+
"/v1/extract", json={"file_urls": list(map(self._file_to_url, files)), "extraction_schema": schema}
|
|
312
|
+
)
|
|
313
|
+
raise_for_status_with_body(response)
|
|
314
|
+
task = PlatformTask(response.json()["task_id"])
|
|
315
|
+
if block:
|
|
316
|
+
return task.get()
|
|
317
|
+
return task
|
|
318
|
+
|
|
319
|
+
@staticmethod
|
|
320
|
+
def _file_to_url(file: "str | UploadedFile") -> str:
|
|
321
|
+
if isinstance(file, UploadedFile):
|
|
322
|
+
return file.url
|
|
323
|
+
parsed = urlparse(file)
|
|
324
|
+
if parsed.scheme in ("nomic", "http", "https"):
|
|
325
|
+
return file
|
|
326
|
+
if parsed.scheme == "file" or (not parsed.scheme and Path(file).exists()):
|
|
327
|
+
raise ValueError(
|
|
328
|
+
f"Cannot directly pass local file to platform: {file!r}\nPlease use upload_file() to upload it first."
|
|
329
|
+
)
|
|
330
|
+
if not parsed.scheme:
|
|
331
|
+
raise ValueError(f"Invalid URL: {file!r}")
|
|
332
|
+
raise ValueError(f"Unsupported scheme {parsed.scheme!r} for URL {file!r}")
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def raise_for_status_with_body(resp: requests.Response) -> None:
|
|
336
|
+
"""
|
|
337
|
+
Raises HTTPError if the response is not successful.
|
|
338
|
+
|
|
339
|
+
Like Response.raise_for_status, but includes the (truncated) response body in the
|
|
340
|
+
exception message for improved diagnostics.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
resp: The response to check
|
|
344
|
+
|
|
345
|
+
Raises:
|
|
346
|
+
requests.HTTPError: If the response is not successful
|
|
347
|
+
"""
|
|
348
|
+
http_error_msg = ""
|
|
349
|
+
if isinstance(resp.reason, bytes):
|
|
350
|
+
# We attempt to decode utf-8 first because some servers
|
|
351
|
+
# choose to localize their reason strings. If the string
|
|
352
|
+
# isn't utf-8, we fall back to iso-8859-1 for all other
|
|
353
|
+
# encodings. (See PR #3538)
|
|
354
|
+
try:
|
|
355
|
+
reason = resp.reason.decode("utf-8")
|
|
356
|
+
except UnicodeDecodeError:
|
|
357
|
+
reason = resp.reason.decode("iso-8859-1")
|
|
358
|
+
else:
|
|
359
|
+
reason = resp.reason
|
|
360
|
+
|
|
361
|
+
if 400 <= resp.status_code < 500:
|
|
362
|
+
http_error_msg = f"{resp.status_code} Client Error: {reason} for url: {resp.url}"
|
|
363
|
+
|
|
364
|
+
elif 500 <= resp.status_code < 600:
|
|
365
|
+
http_error_msg = f"{resp.status_code} Server Error: {reason} for url: {resp.url}"
|
|
366
|
+
|
|
367
|
+
if http_error_msg:
|
|
368
|
+
if (ctype := resp.headers.get("content-type")) is not None:
|
|
369
|
+
http_error_msg += f"\nContent-Type: {ctype}"
|
|
370
|
+
http_error_msg += f"\nBody: {format_body(resp)}"
|
|
371
|
+
raise requests.HTTPError(http_error_msg, response=resp)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def format_body(resp: requests.Response) -> str:
|
|
375
|
+
text = None
|
|
376
|
+
if (ctype := resp.headers.get("content-type")) and "application/json" in ctype.lower():
|
|
377
|
+
try:
|
|
378
|
+
data = resp.json()
|
|
379
|
+
text = json.dumps(data, indent=2, ensure_ascii=False)
|
|
380
|
+
except Exception:
|
|
381
|
+
pass
|
|
382
|
+
if text is None:
|
|
383
|
+
text = resp.text
|
|
384
|
+
|
|
385
|
+
limit = MAX_FAILRESP_LENGTH
|
|
386
|
+
if len(text) > limit:
|
|
387
|
+
return text[:limit] + f"\n… [truncated {len(text) - limit} chars]"
|
|
388
|
+
return text
|
|
@@ -8,11 +8,12 @@ import os
|
|
|
8
8
|
import re
|
|
9
9
|
import time
|
|
10
10
|
import unicodedata
|
|
11
|
+
from collections.abc import Iterable, Mapping
|
|
11
12
|
from contextlib import contextmanager
|
|
12
13
|
from datetime import datetime
|
|
13
14
|
from io import BytesIO
|
|
14
15
|
from pathlib import Path
|
|
15
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
16
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol, Tuple, TypeVar, Union
|
|
16
17
|
|
|
17
18
|
import numpy as np
|
|
18
19
|
import pandas as pd
|
|
@@ -37,6 +38,20 @@ from .data_operations import AtlasMapData, AtlasMapDuplicates, AtlasMapEmbedding
|
|
|
37
38
|
from .settings import *
|
|
38
39
|
from .utils import assert_valid_project_id, download_feather
|
|
39
40
|
|
|
41
|
+
if TYPE_CHECKING:
|
|
42
|
+
from typing_extensions import TypeAlias
|
|
43
|
+
|
|
44
|
+
T_co = TypeVar("T_co", covariant=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SupportsRead(Protocol[T_co]):
|
|
48
|
+
def read(self, length: int = ..., /) -> T_co: ...
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
_Data: "TypeAlias" = (
|
|
52
|
+
"Iterable[bytes] | str | bytes | SupportsRead[str | bytes] | list[tuple[Any, Any]] | tuple[tuple[Any, Any], ...] | Mapping[Any, Any]"
|
|
53
|
+
)
|
|
54
|
+
|
|
40
55
|
|
|
41
56
|
class AtlasUser:
|
|
42
57
|
def __init__(self):
|
|
@@ -410,6 +425,30 @@ class AtlasClass(object):
|
|
|
410
425
|
|
|
411
426
|
return organization_slug, organization_id
|
|
412
427
|
|
|
428
|
+
def _post(self, endpoint: str, *, json: "Any | None" = None) -> requests.Response:
|
|
429
|
+
response = requests.post(
|
|
430
|
+
self.atlas_api_path + endpoint,
|
|
431
|
+
headers=self.header,
|
|
432
|
+
json=json,
|
|
433
|
+
)
|
|
434
|
+
return response
|
|
435
|
+
|
|
436
|
+
def _get(self, endpoint: str) -> requests.Response:
|
|
437
|
+
response = requests.get(
|
|
438
|
+
self.atlas_api_path + endpoint,
|
|
439
|
+
headers=self.header,
|
|
440
|
+
)
|
|
441
|
+
return response
|
|
442
|
+
|
|
443
|
+
def _put(self, endpoint: str, *, data: "_Data | None" = None, json: "Any | None" = None) -> requests.Response:
|
|
444
|
+
response = requests.put(
|
|
445
|
+
self.atlas_api_path + endpoint,
|
|
446
|
+
headers=self.header,
|
|
447
|
+
data=data,
|
|
448
|
+
json=json,
|
|
449
|
+
)
|
|
450
|
+
return response
|
|
451
|
+
|
|
413
452
|
|
|
414
453
|
class AtlasIndex:
|
|
415
454
|
"""
|
|
@@ -1,19 +1,51 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nomic
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: The official Nomic python client.
|
|
5
5
|
Home-page: https://github.com/nomic-ai/nomic
|
|
6
6
|
Author: nomic.ai
|
|
7
7
|
Author-email: support@nomic.ai
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Platform: UNKNOWN
|
|
10
8
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
9
|
Classifier: Programming Language :: Python :: 3
|
|
12
10
|
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: click
|
|
12
|
+
Requires-Dist: jsonlines
|
|
13
|
+
Requires-Dist: jsonschema<5,>=4.23.0
|
|
14
|
+
Requires-Dist: loguru
|
|
15
|
+
Requires-Dist: rich
|
|
16
|
+
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: numpy
|
|
18
|
+
Requires-Dist: pandas
|
|
19
|
+
Requires-Dist: pydantic<3,>=2
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Requires-Dist: pyarrow
|
|
22
|
+
Requires-Dist: pillow
|
|
23
|
+
Requires-Dist: pyjwt
|
|
13
24
|
Provides-Extra: local
|
|
25
|
+
Requires-Dist: gpt4all<3,>=2.5.0; extra == "local"
|
|
14
26
|
Provides-Extra: aws
|
|
27
|
+
Requires-Dist: boto3; extra == "aws"
|
|
28
|
+
Requires-Dist: sagemaker; extra == "aws"
|
|
15
29
|
Provides-Extra: all
|
|
30
|
+
Requires-Dist: nomic[aws,local]; extra == "all"
|
|
16
31
|
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: nomic[all]; extra == "dev"
|
|
33
|
+
Requires-Dist: black==24.3.0; extra == "dev"
|
|
34
|
+
Requires-Dist: coverage; extra == "dev"
|
|
35
|
+
Requires-Dist: pylint; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest; extra == "dev"
|
|
37
|
+
Requires-Dist: isort; extra == "dev"
|
|
38
|
+
Requires-Dist: pyright<=1.1.377; extra == "dev"
|
|
39
|
+
Requires-Dist: myst-parser; extra == "dev"
|
|
40
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
41
|
+
Requires-Dist: mkautodoc; extra == "dev"
|
|
42
|
+
Requires-Dist: twine; extra == "dev"
|
|
43
|
+
Requires-Dist: mkdocstrings[python]; extra == "dev"
|
|
44
|
+
Requires-Dist: mkdocs-jupyter; extra == "dev"
|
|
45
|
+
Requires-Dist: pillow; extra == "dev"
|
|
46
|
+
Requires-Dist: cairosvg; extra == "dev"
|
|
47
|
+
Requires-Dist: pytorch-lightning; extra == "dev"
|
|
48
|
+
Requires-Dist: pandas; extra == "dev"
|
|
17
49
|
|
|
18
50
|
<h1 align="center">Nomic Atlas Python Client</h1>
|
|
19
51
|
<h3 align="center">Explore, label, search and share massive datasets in your web browser.</h3>
|
|
@@ -243,5 +275,3 @@ Join the discussion on our [:hut: Discord](https://discord.gg/myY5YDR8z8) to ask
|
|
|
243
275
|
<br>
|
|
244
276
|
|
|
245
277
|
[Go to top](#)
|
|
246
|
-
|
|
247
|
-
|
|
@@ -4,6 +4,7 @@ setup.py
|
|
|
4
4
|
nomic/__init__.py
|
|
5
5
|
nomic/atlas.py
|
|
6
6
|
nomic/cli.py
|
|
7
|
+
nomic/client.py
|
|
7
8
|
nomic/data_inference.py
|
|
8
9
|
nomic/data_operations.py
|
|
9
10
|
nomic/dataset.py
|
|
@@ -19,4 +20,8 @@ nomic.egg-info/top_level.txt
|
|
|
19
20
|
nomic/aws/__init__.py
|
|
20
21
|
nomic/aws/sagemaker.py
|
|
21
22
|
nomic/pl_callbacks/__init__.py
|
|
22
|
-
nomic/pl_callbacks/pl_callback.py
|
|
23
|
+
nomic/pl_callbacks/pl_callback.py
|
|
24
|
+
tests/__init__.py
|
|
25
|
+
tests/test_atlas_client.py
|
|
26
|
+
tests/test_documents.py
|
|
27
|
+
tests/test_embed.py
|
|
@@ -23,7 +23,7 @@ with open("README.md") as f:
|
|
|
23
23
|
|
|
24
24
|
setup(
|
|
25
25
|
name="nomic",
|
|
26
|
-
version="3.
|
|
26
|
+
version="3.6.0",
|
|
27
27
|
url="https://github.com/nomic-ai/nomic",
|
|
28
28
|
description=description,
|
|
29
29
|
long_description=long_description,
|
|
@@ -35,15 +35,17 @@ setup(
|
|
|
35
35
|
"License :: OSI Approved :: Apache Software License",
|
|
36
36
|
"Programming Language :: Python :: 3",
|
|
37
37
|
],
|
|
38
|
+
requires_python='>=3.8',
|
|
38
39
|
install_requires=[
|
|
39
40
|
"click",
|
|
40
41
|
"jsonlines",
|
|
42
|
+
"jsonschema>=4.23.0,<5",
|
|
41
43
|
"loguru",
|
|
42
44
|
"rich",
|
|
43
45
|
"requests",
|
|
44
46
|
"numpy",
|
|
45
47
|
"pandas",
|
|
46
|
-
"pydantic",
|
|
48
|
+
"pydantic>=2,<3",
|
|
47
49
|
"tqdm",
|
|
48
50
|
"pyarrow",
|
|
49
51
|
"pillow",
|
|
File without changes
|