athena-intelligence 0.1.122__tar.gz → 0.1.123__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/PKG-INFO +1 -1
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/pyproject.toml +1 -1
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/__init__.py +22 -2
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/client_wrapper.py +1 -1
- athena_intelligence-0.1.123/src/athena/tools/structured_data_extractor/client.py +264 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/__init__.py +20 -2
- athena_intelligence-0.1.123/src/athena/types/chunk.py +36 -0
- athena_intelligence-0.1.123/src/athena/types/chunk_content_item.py +58 -0
- athena_intelligence-0.1.123/src/athena/types/chunk_result.py +35 -0
- athena_intelligence-0.1.123/src/athena/types/chunk_result_chunk_id.py +5 -0
- athena_intelligence-0.1.123/src/athena/types/image_url_content.py +33 -0
- athena_intelligence-0.1.123/src/athena/types/prompt_message.py +35 -0
- athena_intelligence-0.1.122/src/athena/types/structured_data_extractor_reponse.py → athena_intelligence-0.1.123/src/athena/types/structured_data_extractor_response.py +9 -3
- athena_intelligence-0.1.123/src/athena/types/text_content.py +33 -0
- athena_intelligence-0.1.123/src/athena/types/type.py +25 -0
- athena_intelligence-0.1.122/src/athena/tools/structured_data_extractor/client.py +0 -150
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/README.md +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/drive/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/drive/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/general/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/general/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/research/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/research/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/sql/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/sql/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/base_client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/api_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/datetime_utils.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/file.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/http_client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/jsonable_encoder.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/pydantic_utilities.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/query_encoder.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/remove_none_from_dict.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/request_options.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/environment.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/bad_request_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/content_too_large_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/internal_server_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/not_found_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/unauthorized_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/unprocessable_entity_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/unsupported_media_type_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/py.typed +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/query/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/query/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/query/types/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/query/types/query_execute_request_database_asset_ids.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/calendar/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/calendar/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/email/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/email/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/structured_data_extractor/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/tasks/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/tasks/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/types/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/types/tools_data_frame_request_columns_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/asset_not_found_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/custom_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/data_frame_request_out.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/data_frame_request_out_columns_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/data_frame_request_out_data_item_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/data_frame_request_out_index_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/data_frame_unknown_format_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/document_chunk.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/drive_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/file_chunk_request_out.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/file_too_large_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/general_agent_config.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/general_agent_config_enabled_tools_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/general_agent_request.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/general_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/parent_folder_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/research_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/save_asset_request_out.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/sql_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/tool.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/version.py +0 -0
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
from .types import (
|
4
4
|
AssetNotFoundError,
|
5
|
+
Chunk,
|
6
|
+
ChunkContentItem,
|
7
|
+
ChunkContentItem_ImageUrl,
|
8
|
+
ChunkContentItem_Text,
|
9
|
+
ChunkResult,
|
10
|
+
ChunkResultChunkId,
|
5
11
|
CustomAgentResponse,
|
6
12
|
DataFrameRequestOut,
|
7
13
|
DataFrameRequestOutColumnsItem,
|
@@ -16,12 +22,16 @@ from .types import (
|
|
16
22
|
GeneralAgentConfigEnabledToolsItem,
|
17
23
|
GeneralAgentRequest,
|
18
24
|
GeneralAgentResponse,
|
25
|
+
ImageUrlContent,
|
19
26
|
ParentFolderError,
|
27
|
+
PromptMessage,
|
20
28
|
ResearchAgentResponse,
|
21
29
|
SaveAssetRequestOut,
|
22
30
|
SqlAgentResponse,
|
23
|
-
|
31
|
+
StructuredDataExtractorResponse,
|
32
|
+
TextContent,
|
24
33
|
Tool,
|
34
|
+
Type,
|
25
35
|
)
|
26
36
|
from .errors import (
|
27
37
|
BadRequestError,
|
@@ -42,6 +52,12 @@ __all__ = [
|
|
42
52
|
"AssetNotFoundError",
|
43
53
|
"AthenaEnvironment",
|
44
54
|
"BadRequestError",
|
55
|
+
"Chunk",
|
56
|
+
"ChunkContentItem",
|
57
|
+
"ChunkContentItem_ImageUrl",
|
58
|
+
"ChunkContentItem_Text",
|
59
|
+
"ChunkResult",
|
60
|
+
"ChunkResultChunkId",
|
45
61
|
"ContentTooLargeError",
|
46
62
|
"CustomAgentResponse",
|
47
63
|
"DataFrameRequestOut",
|
@@ -57,16 +73,20 @@ __all__ = [
|
|
57
73
|
"GeneralAgentConfigEnabledToolsItem",
|
58
74
|
"GeneralAgentRequest",
|
59
75
|
"GeneralAgentResponse",
|
76
|
+
"ImageUrlContent",
|
60
77
|
"InternalServerError",
|
61
78
|
"NotFoundError",
|
62
79
|
"ParentFolderError",
|
80
|
+
"PromptMessage",
|
63
81
|
"QueryExecuteRequestDatabaseAssetIds",
|
64
82
|
"ResearchAgentResponse",
|
65
83
|
"SaveAssetRequestOut",
|
66
84
|
"SqlAgentResponse",
|
67
|
-
"
|
85
|
+
"StructuredDataExtractorResponse",
|
86
|
+
"TextContent",
|
68
87
|
"Tool",
|
69
88
|
"ToolsDataFrameRequestColumnsItem",
|
89
|
+
"Type",
|
70
90
|
"UnauthorizedError",
|
71
91
|
"UnprocessableEntityError",
|
72
92
|
"UnsupportedMediaTypeError",
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/client_wrapper.py
RENAMED
@@ -17,7 +17,7 @@ class BaseClientWrapper:
|
|
17
17
|
headers: typing.Dict[str, str] = {
|
18
18
|
"X-Fern-Language": "Python",
|
19
19
|
"X-Fern-SDK-Name": "athena-intelligence",
|
20
|
-
"X-Fern-SDK-Version": "0.1.
|
20
|
+
"X-Fern-SDK-Version": "0.1.123",
|
21
21
|
}
|
22
22
|
headers["X-API-KEY"] = self.api_key
|
23
23
|
return headers
|
@@ -0,0 +1,264 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import typing
|
4
|
+
from json.decoder import JSONDecodeError
|
5
|
+
|
6
|
+
from ...core.api_error import ApiError
|
7
|
+
from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
8
|
+
from ...core.pydantic_utilities import pydantic_v1
|
9
|
+
from ...core.request_options import RequestOptions
|
10
|
+
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
11
|
+
from ...types.chunk import Chunk
|
12
|
+
from ...types.prompt_message import PromptMessage
|
13
|
+
from ...types.structured_data_extractor_response import StructuredDataExtractorResponse
|
14
|
+
|
15
|
+
# this is used as the default value for optional parameters
|
16
|
+
OMIT = typing.cast(typing.Any, ...)
|
17
|
+
|
18
|
+
|
19
|
+
class StructuredDataExtractorClient:
|
20
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
21
|
+
self._client_wrapper = client_wrapper
|
22
|
+
|
23
|
+
def invoke(
|
24
|
+
self,
|
25
|
+
*,
|
26
|
+
chunks: typing.Sequence[Chunk],
|
27
|
+
json_schema: typing.Dict[str, typing.Any],
|
28
|
+
chunk_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
29
|
+
reduce: typing.Optional[bool] = OMIT,
|
30
|
+
reduce_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
31
|
+
request_options: typing.Optional[RequestOptions] = None
|
32
|
+
) -> StructuredDataExtractorResponse:
|
33
|
+
"""
|
34
|
+
Extract structured data.
|
35
|
+
|
36
|
+
tl;dr:
|
37
|
+
|
38
|
+
- pass a valid JSON schema in `json_schema`
|
39
|
+
- pass the page chunks as a list of `Chunk` objects, by default: {"type": "text", "content": "..."}
|
40
|
+
- leave all other fields as default
|
41
|
+
|
42
|
+
Detailed configuration (only relevant for complex use cases):
|
43
|
+
|
44
|
+
The structured data extractor's architecture follows the map-reduce pattern,
|
45
|
+
where the asset is divided into chunks, the schema is extracted from each chunk,
|
46
|
+
and the chunks are then reduced to a single structured data object.
|
47
|
+
|
48
|
+
In some applications, you may not want to: - map (if your input asset is small enough) - reduce (if your output object is large enough that it will overflow the output length;
|
49
|
+
if you're extracting a long list of entities; if youre )
|
50
|
+
to extract all instances of the schema).
|
51
|
+
|
52
|
+
You can configure these behaviors with the `map` and `reduce` fields.
|
53
|
+
|
54
|
+
Parameters
|
55
|
+
----------
|
56
|
+
chunks : typing.Sequence[Chunk]
|
57
|
+
The chunks from which to extract structured data.
|
58
|
+
|
59
|
+
json_schema : typing.Dict[str, typing.Any]
|
60
|
+
The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
|
61
|
+
|
62
|
+
chunk_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
63
|
+
The prompt to use for the data extraction over *each individual chunk*. It must be a list of messages. The chunk content will be appended as a list of human messages.
|
64
|
+
|
65
|
+
reduce : typing.Optional[bool]
|
66
|
+
If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
|
67
|
+
|
68
|
+
reduce_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
69
|
+
The prompt to use for the reduce steps. It must be a list of messages. The two extraction attempts will be appended as a list of human messages.
|
70
|
+
|
71
|
+
request_options : typing.Optional[RequestOptions]
|
72
|
+
Request-specific configuration.
|
73
|
+
|
74
|
+
Returns
|
75
|
+
-------
|
76
|
+
StructuredDataExtractorResponse
|
77
|
+
Successful Response
|
78
|
+
|
79
|
+
Examples
|
80
|
+
--------
|
81
|
+
from athena import Chunk, ChunkContentItem_Text
|
82
|
+
from athena.client import Athena
|
83
|
+
|
84
|
+
client = Athena(
|
85
|
+
api_key="YOUR_API_KEY",
|
86
|
+
)
|
87
|
+
client.tools.structured_data_extractor.invoke(
|
88
|
+
chunks=[
|
89
|
+
Chunk(
|
90
|
+
chunk_id="1",
|
91
|
+
content=[
|
92
|
+
ChunkContentItem_Text(
|
93
|
+
text="John Smith is a 35 year old developer. You can reach him at john.smith@example.com",
|
94
|
+
)
|
95
|
+
],
|
96
|
+
),
|
97
|
+
Chunk(
|
98
|
+
chunk_id="2",
|
99
|
+
content=[
|
100
|
+
ChunkContentItem_Text(
|
101
|
+
text="Jane Doe is a 25 year old developer. You can reach her at jane@example.com",
|
102
|
+
)
|
103
|
+
],
|
104
|
+
),
|
105
|
+
],
|
106
|
+
json_schema={
|
107
|
+
"description": "A person",
|
108
|
+
"properties": {
|
109
|
+
"age": {"type": "integer"},
|
110
|
+
"email": {"type": "string"},
|
111
|
+
"name": {"type": "string"},
|
112
|
+
},
|
113
|
+
"required": ["name"],
|
114
|
+
"title": "Person",
|
115
|
+
"type": "object",
|
116
|
+
},
|
117
|
+
)
|
118
|
+
"""
|
119
|
+
_response = self._client_wrapper.httpx_client.request(
|
120
|
+
"api/v0/tools/structured-data-extractor/invoke",
|
121
|
+
method="POST",
|
122
|
+
json={
|
123
|
+
"chunk_messages": chunk_messages,
|
124
|
+
"chunks": chunks,
|
125
|
+
"json_schema": json_schema,
|
126
|
+
"reduce": reduce,
|
127
|
+
"reduce_messages": reduce_messages,
|
128
|
+
},
|
129
|
+
request_options=request_options,
|
130
|
+
omit=OMIT,
|
131
|
+
)
|
132
|
+
if 200 <= _response.status_code < 300:
|
133
|
+
return pydantic_v1.parse_obj_as(StructuredDataExtractorResponse, _response.json()) # type: ignore
|
134
|
+
if _response.status_code == 422:
|
135
|
+
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
136
|
+
try:
|
137
|
+
_response_json = _response.json()
|
138
|
+
except JSONDecodeError:
|
139
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
140
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
141
|
+
|
142
|
+
|
143
|
+
class AsyncStructuredDataExtractorClient:
|
144
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
145
|
+
self._client_wrapper = client_wrapper
|
146
|
+
|
147
|
+
async def invoke(
|
148
|
+
self,
|
149
|
+
*,
|
150
|
+
chunks: typing.Sequence[Chunk],
|
151
|
+
json_schema: typing.Dict[str, typing.Any],
|
152
|
+
chunk_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
153
|
+
reduce: typing.Optional[bool] = OMIT,
|
154
|
+
reduce_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
155
|
+
request_options: typing.Optional[RequestOptions] = None
|
156
|
+
) -> StructuredDataExtractorResponse:
|
157
|
+
"""
|
158
|
+
Extract structured data.
|
159
|
+
|
160
|
+
tl;dr:
|
161
|
+
|
162
|
+
- pass a valid JSON schema in `json_schema`
|
163
|
+
- pass the page chunks as a list of `Chunk` objects, by default: {"type": "text", "content": "..."}
|
164
|
+
- leave all other fields as default
|
165
|
+
|
166
|
+
Detailed configuration (only relevant for complex use cases):
|
167
|
+
|
168
|
+
The structured data extractor's architecture follows the map-reduce pattern,
|
169
|
+
where the asset is divided into chunks, the schema is extracted from each chunk,
|
170
|
+
and the chunks are then reduced to a single structured data object.
|
171
|
+
|
172
|
+
In some applications, you may not want to: - map (if your input asset is small enough) - reduce (if your output object is large enough that it will overflow the output length;
|
173
|
+
if you're extracting a long list of entities; if youre )
|
174
|
+
to extract all instances of the schema).
|
175
|
+
|
176
|
+
You can configure these behaviors with the `map` and `reduce` fields.
|
177
|
+
|
178
|
+
Parameters
|
179
|
+
----------
|
180
|
+
chunks : typing.Sequence[Chunk]
|
181
|
+
The chunks from which to extract structured data.
|
182
|
+
|
183
|
+
json_schema : typing.Dict[str, typing.Any]
|
184
|
+
The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
|
185
|
+
|
186
|
+
chunk_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
187
|
+
The prompt to use for the data extraction over *each individual chunk*. It must be a list of messages. The chunk content will be appended as a list of human messages.
|
188
|
+
|
189
|
+
reduce : typing.Optional[bool]
|
190
|
+
If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
|
191
|
+
|
192
|
+
reduce_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
193
|
+
The prompt to use for the reduce steps. It must be a list of messages. The two extraction attempts will be appended as a list of human messages.
|
194
|
+
|
195
|
+
request_options : typing.Optional[RequestOptions]
|
196
|
+
Request-specific configuration.
|
197
|
+
|
198
|
+
Returns
|
199
|
+
-------
|
200
|
+
StructuredDataExtractorResponse
|
201
|
+
Successful Response
|
202
|
+
|
203
|
+
Examples
|
204
|
+
--------
|
205
|
+
from athena import Chunk, ChunkContentItem_Text
|
206
|
+
from athena.client import AsyncAthena
|
207
|
+
|
208
|
+
client = AsyncAthena(
|
209
|
+
api_key="YOUR_API_KEY",
|
210
|
+
)
|
211
|
+
await client.tools.structured_data_extractor.invoke(
|
212
|
+
chunks=[
|
213
|
+
Chunk(
|
214
|
+
chunk_id="1",
|
215
|
+
content=[
|
216
|
+
ChunkContentItem_Text(
|
217
|
+
text="John Smith is a 35 year old developer. You can reach him at john.smith@example.com",
|
218
|
+
)
|
219
|
+
],
|
220
|
+
),
|
221
|
+
Chunk(
|
222
|
+
chunk_id="2",
|
223
|
+
content=[
|
224
|
+
ChunkContentItem_Text(
|
225
|
+
text="Jane Doe is a 25 year old developer. You can reach her at jane@example.com",
|
226
|
+
)
|
227
|
+
],
|
228
|
+
),
|
229
|
+
],
|
230
|
+
json_schema={
|
231
|
+
"description": "A person",
|
232
|
+
"properties": {
|
233
|
+
"age": {"type": "integer"},
|
234
|
+
"email": {"type": "string"},
|
235
|
+
"name": {"type": "string"},
|
236
|
+
},
|
237
|
+
"required": ["name"],
|
238
|
+
"title": "Person",
|
239
|
+
"type": "object",
|
240
|
+
},
|
241
|
+
)
|
242
|
+
"""
|
243
|
+
_response = await self._client_wrapper.httpx_client.request(
|
244
|
+
"api/v0/tools/structured-data-extractor/invoke",
|
245
|
+
method="POST",
|
246
|
+
json={
|
247
|
+
"chunk_messages": chunk_messages,
|
248
|
+
"chunks": chunks,
|
249
|
+
"json_schema": json_schema,
|
250
|
+
"reduce": reduce,
|
251
|
+
"reduce_messages": reduce_messages,
|
252
|
+
},
|
253
|
+
request_options=request_options,
|
254
|
+
omit=OMIT,
|
255
|
+
)
|
256
|
+
if 200 <= _response.status_code < 300:
|
257
|
+
return pydantic_v1.parse_obj_as(StructuredDataExtractorResponse, _response.json()) # type: ignore
|
258
|
+
if _response.status_code == 422:
|
259
|
+
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
260
|
+
try:
|
261
|
+
_response_json = _response.json()
|
262
|
+
except JSONDecodeError:
|
263
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
264
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
@@ -1,6 +1,10 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
from .asset_not_found_error import AssetNotFoundError
|
4
|
+
from .chunk import Chunk
|
5
|
+
from .chunk_content_item import ChunkContentItem, ChunkContentItem_ImageUrl, ChunkContentItem_Text
|
6
|
+
from .chunk_result import ChunkResult
|
7
|
+
from .chunk_result_chunk_id import ChunkResultChunkId
|
4
8
|
from .custom_agent_response import CustomAgentResponse
|
5
9
|
from .data_frame_request_out import DataFrameRequestOut
|
6
10
|
from .data_frame_request_out_columns_item import DataFrameRequestOutColumnsItem
|
@@ -15,15 +19,25 @@ from .general_agent_config import GeneralAgentConfig
|
|
15
19
|
from .general_agent_config_enabled_tools_item import GeneralAgentConfigEnabledToolsItem
|
16
20
|
from .general_agent_request import GeneralAgentRequest
|
17
21
|
from .general_agent_response import GeneralAgentResponse
|
22
|
+
from .image_url_content import ImageUrlContent
|
18
23
|
from .parent_folder_error import ParentFolderError
|
24
|
+
from .prompt_message import PromptMessage
|
19
25
|
from .research_agent_response import ResearchAgentResponse
|
20
26
|
from .save_asset_request_out import SaveAssetRequestOut
|
21
27
|
from .sql_agent_response import SqlAgentResponse
|
22
|
-
from .
|
28
|
+
from .structured_data_extractor_response import StructuredDataExtractorResponse
|
29
|
+
from .text_content import TextContent
|
23
30
|
from .tool import Tool
|
31
|
+
from .type import Type
|
24
32
|
|
25
33
|
__all__ = [
|
26
34
|
"AssetNotFoundError",
|
35
|
+
"Chunk",
|
36
|
+
"ChunkContentItem",
|
37
|
+
"ChunkContentItem_ImageUrl",
|
38
|
+
"ChunkContentItem_Text",
|
39
|
+
"ChunkResult",
|
40
|
+
"ChunkResultChunkId",
|
27
41
|
"CustomAgentResponse",
|
28
42
|
"DataFrameRequestOut",
|
29
43
|
"DataFrameRequestOutColumnsItem",
|
@@ -38,10 +52,14 @@ __all__ = [
|
|
38
52
|
"GeneralAgentConfigEnabledToolsItem",
|
39
53
|
"GeneralAgentRequest",
|
40
54
|
"GeneralAgentResponse",
|
55
|
+
"ImageUrlContent",
|
41
56
|
"ParentFolderError",
|
57
|
+
"PromptMessage",
|
42
58
|
"ResearchAgentResponse",
|
43
59
|
"SaveAssetRequestOut",
|
44
60
|
"SqlAgentResponse",
|
45
|
-
"
|
61
|
+
"StructuredDataExtractorResponse",
|
62
|
+
"TextContent",
|
46
63
|
"Tool",
|
64
|
+
"Type",
|
47
65
|
]
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import datetime as dt
|
4
|
+
import typing
|
5
|
+
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
7
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
from .chunk_content_item import ChunkContentItem
|
9
|
+
|
10
|
+
|
11
|
+
class Chunk(pydantic_v1.BaseModel):
|
12
|
+
"""
|
13
|
+
A chunk of content to extract data from.
|
14
|
+
"""
|
15
|
+
|
16
|
+
chunk_id: str
|
17
|
+
content: typing.List[ChunkContentItem]
|
18
|
+
metadata: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None
|
19
|
+
|
20
|
+
def json(self, **kwargs: typing.Any) -> str:
|
21
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
22
|
+
return super().json(**kwargs_with_defaults)
|
23
|
+
|
24
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
25
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
26
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
27
|
+
|
28
|
+
return deep_union_pydantic_dicts(
|
29
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
30
|
+
)
|
31
|
+
|
32
|
+
class Config:
|
33
|
+
frozen = True
|
34
|
+
smart_union = True
|
35
|
+
extra = pydantic_v1.Extra.allow
|
36
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import datetime as dt
|
6
|
+
import typing
|
7
|
+
|
8
|
+
from ..core.datetime_utils import serialize_datetime
|
9
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
10
|
+
|
11
|
+
|
12
|
+
class ChunkContentItem_Text(pydantic_v1.BaseModel):
|
13
|
+
text: str
|
14
|
+
type: typing.Literal["text"] = "text"
|
15
|
+
|
16
|
+
def json(self, **kwargs: typing.Any) -> str:
|
17
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
18
|
+
return super().json(**kwargs_with_defaults)
|
19
|
+
|
20
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
21
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
22
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
23
|
+
|
24
|
+
return deep_union_pydantic_dicts(
|
25
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
26
|
+
)
|
27
|
+
|
28
|
+
class Config:
|
29
|
+
frozen = True
|
30
|
+
smart_union = True
|
31
|
+
extra = pydantic_v1.Extra.allow
|
32
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
33
|
+
|
34
|
+
|
35
|
+
class ChunkContentItem_ImageUrl(pydantic_v1.BaseModel):
|
36
|
+
image_url: typing.Dict[str, str]
|
37
|
+
type: typing.Literal["image_url"] = "image_url"
|
38
|
+
|
39
|
+
def json(self, **kwargs: typing.Any) -> str:
|
40
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
41
|
+
return super().json(**kwargs_with_defaults)
|
42
|
+
|
43
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
44
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
45
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
46
|
+
|
47
|
+
return deep_union_pydantic_dicts(
|
48
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
49
|
+
)
|
50
|
+
|
51
|
+
class Config:
|
52
|
+
frozen = True
|
53
|
+
smart_union = True
|
54
|
+
extra = pydantic_v1.Extra.allow
|
55
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
56
|
+
|
57
|
+
|
58
|
+
ChunkContentItem = typing.Union[ChunkContentItem_Text, ChunkContentItem_ImageUrl]
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import datetime as dt
|
4
|
+
import typing
|
5
|
+
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
7
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
from .chunk_result_chunk_id import ChunkResultChunkId
|
9
|
+
|
10
|
+
|
11
|
+
class ChunkResult(pydantic_v1.BaseModel):
|
12
|
+
"""
|
13
|
+
The result of a chunk extraction.
|
14
|
+
"""
|
15
|
+
|
16
|
+
chunk_id: ChunkResultChunkId
|
17
|
+
chunk_result: typing.Optional[typing.Dict[str, typing.Any]] = None
|
18
|
+
|
19
|
+
def json(self, **kwargs: typing.Any) -> str:
|
20
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
21
|
+
return super().json(**kwargs_with_defaults)
|
22
|
+
|
23
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
24
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
25
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
26
|
+
|
27
|
+
return deep_union_pydantic_dicts(
|
28
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
29
|
+
)
|
30
|
+
|
31
|
+
class Config:
|
32
|
+
frozen = True
|
33
|
+
smart_union = True
|
34
|
+
extra = pydantic_v1.Extra.allow
|
35
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import datetime as dt
|
4
|
+
import typing
|
5
|
+
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
7
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
|
9
|
+
|
10
|
+
class ImageUrlContent(pydantic_v1.BaseModel):
|
11
|
+
"""
|
12
|
+
An image content item.
|
13
|
+
"""
|
14
|
+
|
15
|
+
image_url: typing.Dict[str, str]
|
16
|
+
|
17
|
+
def json(self, **kwargs: typing.Any) -> str:
|
18
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
19
|
+
return super().json(**kwargs_with_defaults)
|
20
|
+
|
21
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
22
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
23
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
24
|
+
|
25
|
+
return deep_union_pydantic_dicts(
|
26
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
27
|
+
)
|
28
|
+
|
29
|
+
class Config:
|
30
|
+
frozen = True
|
31
|
+
smart_union = True
|
32
|
+
extra = pydantic_v1.Extra.allow
|
33
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import datetime as dt
|
4
|
+
import typing
|
5
|
+
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
7
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
from .type import Type
|
9
|
+
|
10
|
+
|
11
|
+
class PromptMessage(pydantic_v1.BaseModel):
|
12
|
+
"""
|
13
|
+
A message to use for the structured data extractor.
|
14
|
+
"""
|
15
|
+
|
16
|
+
content: str
|
17
|
+
type: Type
|
18
|
+
|
19
|
+
def json(self, **kwargs: typing.Any) -> str:
|
20
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
21
|
+
return super().json(**kwargs_with_defaults)
|
22
|
+
|
23
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
24
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
25
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
26
|
+
|
27
|
+
return deep_union_pydantic_dicts(
|
28
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
29
|
+
)
|
30
|
+
|
31
|
+
class Config:
|
32
|
+
frozen = True
|
33
|
+
smart_union = True
|
34
|
+
extra = pydantic_v1.Extra.allow
|
35
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
@@ -5,16 +5,22 @@ import typing
|
|
5
5
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
7
7
|
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
from .chunk_result import ChunkResult
|
8
9
|
|
9
10
|
|
10
|
-
class
|
11
|
+
class StructuredDataExtractorResponse(pydantic_v1.BaseModel):
|
11
12
|
"""
|
12
13
|
The agent's response.
|
13
14
|
"""
|
14
15
|
|
15
|
-
|
16
|
+
chunk_by_chunk_data: typing.Optional[typing.List[ChunkResult]] = pydantic_v1.Field(default=None)
|
16
17
|
"""
|
17
|
-
The extracted structured data.
|
18
|
+
The extracted structured data for each chunk. A list where each element is guaranteed to match `json_schema`.
|
19
|
+
"""
|
20
|
+
|
21
|
+
reduced_data: typing.Optional[typing.Dict[str, typing.Any]] = pydantic_v1.Field(default=None)
|
22
|
+
"""
|
23
|
+
If reduce is True, the reduced structured data, otherwise null. Guaranteed to match `json_schema`.
|
18
24
|
"""
|
19
25
|
|
20
26
|
def json(self, **kwargs: typing.Any) -> str:
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import datetime as dt
|
4
|
+
import typing
|
5
|
+
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
7
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
|
9
|
+
|
10
|
+
class TextContent(pydantic_v1.BaseModel):
|
11
|
+
"""
|
12
|
+
A text content item in a multimodal message content.
|
13
|
+
"""
|
14
|
+
|
15
|
+
text: str
|
16
|
+
|
17
|
+
def json(self, **kwargs: typing.Any) -> str:
|
18
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
19
|
+
return super().json(**kwargs_with_defaults)
|
20
|
+
|
21
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
22
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
23
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
24
|
+
|
25
|
+
return deep_union_pydantic_dicts(
|
26
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
27
|
+
)
|
28
|
+
|
29
|
+
class Config:
|
30
|
+
frozen = True
|
31
|
+
smart_union = True
|
32
|
+
extra = pydantic_v1.Extra.allow
|
33
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import enum
|
4
|
+
import typing
|
5
|
+
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
7
|
+
|
8
|
+
|
9
|
+
class Type(str, enum.Enum):
|
10
|
+
SYSTEM = "system"
|
11
|
+
HUMAN = "human"
|
12
|
+
USER = "user"
|
13
|
+
|
14
|
+
def visit(
|
15
|
+
self,
|
16
|
+
system: typing.Callable[[], T_Result],
|
17
|
+
human: typing.Callable[[], T_Result],
|
18
|
+
user: typing.Callable[[], T_Result],
|
19
|
+
) -> T_Result:
|
20
|
+
if self is Type.SYSTEM:
|
21
|
+
return system()
|
22
|
+
if self is Type.HUMAN:
|
23
|
+
return human()
|
24
|
+
if self is Type.USER:
|
25
|
+
return user()
|
@@ -1,150 +0,0 @@
|
|
1
|
-
# This file was auto-generated by Fern from our API Definition.
|
2
|
-
|
3
|
-
import typing
|
4
|
-
from json.decoder import JSONDecodeError
|
5
|
-
|
6
|
-
from ...core.api_error import ApiError
|
7
|
-
from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
8
|
-
from ...core.pydantic_utilities import pydantic_v1
|
9
|
-
from ...core.request_options import RequestOptions
|
10
|
-
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
11
|
-
from ...types.structured_data_extractor_reponse import StructuredDataExtractorReponse
|
12
|
-
|
13
|
-
# this is used as the default value for optional parameters
|
14
|
-
OMIT = typing.cast(typing.Any, ...)
|
15
|
-
|
16
|
-
|
17
|
-
class StructuredDataExtractorClient:
|
18
|
-
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
19
|
-
self._client_wrapper = client_wrapper
|
20
|
-
|
21
|
-
def invoke(
|
22
|
-
self,
|
23
|
-
*,
|
24
|
-
asset_ids: typing.Sequence[str],
|
25
|
-
json_schema: typing.Dict[str, typing.Any],
|
26
|
-
map_: typing.Optional[bool] = OMIT,
|
27
|
-
reduce: typing.Optional[bool] = OMIT,
|
28
|
-
request_options: typing.Optional[RequestOptions] = None
|
29
|
-
) -> StructuredDataExtractorReponse:
|
30
|
-
"""
|
31
|
-
Coming soon! Extract structured data from assets of arbitrary length.
|
32
|
-
|
33
|
-
Parameters
|
34
|
-
----------
|
35
|
-
asset_ids : typing.Sequence[str]
|
36
|
-
The IDs of the assets from which to extract structured data matching `json_schema`.
|
37
|
-
|
38
|
-
json_schema : typing.Dict[str, typing.Any]
|
39
|
-
The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
|
40
|
-
|
41
|
-
map_ : typing.Optional[bool]
|
42
|
-
Whether to split the asset into chunks and attempt to extract the schema from each chunk. Set to false if you know the asset is small.
|
43
|
-
|
44
|
-
reduce : typing.Optional[bool]
|
45
|
-
If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
|
46
|
-
|
47
|
-
request_options : typing.Optional[RequestOptions]
|
48
|
-
Request-specific configuration.
|
49
|
-
|
50
|
-
Returns
|
51
|
-
-------
|
52
|
-
StructuredDataExtractorReponse
|
53
|
-
Successful Response
|
54
|
-
|
55
|
-
Examples
|
56
|
-
--------
|
57
|
-
from athena.client import Athena
|
58
|
-
|
59
|
-
client = Athena(
|
60
|
-
api_key="YOUR_API_KEY",
|
61
|
-
)
|
62
|
-
client.tools.structured_data_extractor.invoke(
|
63
|
-
asset_ids=["asset_ids"],
|
64
|
-
json_schema={"key": "value"},
|
65
|
-
)
|
66
|
-
"""
|
67
|
-
_response = self._client_wrapper.httpx_client.request(
|
68
|
-
"api/v0/tools/structured-data-extractor/invoke",
|
69
|
-
method="POST",
|
70
|
-
json={"asset_ids": asset_ids, "json_schema": json_schema, "map": map_, "reduce": reduce},
|
71
|
-
request_options=request_options,
|
72
|
-
omit=OMIT,
|
73
|
-
)
|
74
|
-
if 200 <= _response.status_code < 300:
|
75
|
-
return pydantic_v1.parse_obj_as(StructuredDataExtractorReponse, _response.json()) # type: ignore
|
76
|
-
if _response.status_code == 422:
|
77
|
-
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
78
|
-
try:
|
79
|
-
_response_json = _response.json()
|
80
|
-
except JSONDecodeError:
|
81
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
82
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
83
|
-
|
84
|
-
|
85
|
-
class AsyncStructuredDataExtractorClient:
|
86
|
-
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
87
|
-
self._client_wrapper = client_wrapper
|
88
|
-
|
89
|
-
async def invoke(
|
90
|
-
self,
|
91
|
-
*,
|
92
|
-
asset_ids: typing.Sequence[str],
|
93
|
-
json_schema: typing.Dict[str, typing.Any],
|
94
|
-
map_: typing.Optional[bool] = OMIT,
|
95
|
-
reduce: typing.Optional[bool] = OMIT,
|
96
|
-
request_options: typing.Optional[RequestOptions] = None
|
97
|
-
) -> StructuredDataExtractorReponse:
|
98
|
-
"""
|
99
|
-
Coming soon! Extract structured data from assets of arbitrary length.
|
100
|
-
|
101
|
-
Parameters
|
102
|
-
----------
|
103
|
-
asset_ids : typing.Sequence[str]
|
104
|
-
The IDs of the assets from which to extract structured data matching `json_schema`.
|
105
|
-
|
106
|
-
json_schema : typing.Dict[str, typing.Any]
|
107
|
-
The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
|
108
|
-
|
109
|
-
map_ : typing.Optional[bool]
|
110
|
-
Whether to split the asset into chunks and attempt to extract the schema from each chunk. Set to false if you know the asset is small.
|
111
|
-
|
112
|
-
reduce : typing.Optional[bool]
|
113
|
-
If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
|
114
|
-
|
115
|
-
request_options : typing.Optional[RequestOptions]
|
116
|
-
Request-specific configuration.
|
117
|
-
|
118
|
-
Returns
|
119
|
-
-------
|
120
|
-
StructuredDataExtractorReponse
|
121
|
-
Successful Response
|
122
|
-
|
123
|
-
Examples
|
124
|
-
--------
|
125
|
-
from athena.client import AsyncAthena
|
126
|
-
|
127
|
-
client = AsyncAthena(
|
128
|
-
api_key="YOUR_API_KEY",
|
129
|
-
)
|
130
|
-
await client.tools.structured_data_extractor.invoke(
|
131
|
-
asset_ids=["asset_ids"],
|
132
|
-
json_schema={"key": "value"},
|
133
|
-
)
|
134
|
-
"""
|
135
|
-
_response = await self._client_wrapper.httpx_client.request(
|
136
|
-
"api/v0/tools/structured-data-extractor/invoke",
|
137
|
-
method="POST",
|
138
|
-
json={"asset_ids": asset_ids, "json_schema": json_schema, "map": map_, "reduce": reduce},
|
139
|
-
request_options=request_options,
|
140
|
-
omit=OMIT,
|
141
|
-
)
|
142
|
-
if 200 <= _response.status_code < 300:
|
143
|
-
return pydantic_v1.parse_obj_as(StructuredDataExtractorReponse, _response.json()) # type: ignore
|
144
|
-
if _response.status_code == 422:
|
145
|
-
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
146
|
-
try:
|
147
|
-
_response_json = _response.json()
|
148
|
-
except JSONDecodeError:
|
149
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
150
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/drive/__init__.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/drive/client.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/general/__init__.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/general/client.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/research/__init__.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/research/client.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/agents/sql/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/datetime_utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/jsonable_encoder.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/pydantic_utilities.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/query_encoder.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/remove_none_from_dict.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/core/request_options.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/bad_request_error.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/not_found_error.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/errors/unauthorized_error.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/query/types/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/calendar/__init__.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/calendar/client.py
RENAMED
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/email/__init__.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/email/client.py
RENAMED
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/tasks/__init__.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/tasks/client.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/tools/types/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/document_chunk.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/drive_agent_response.py
RENAMED
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/file_too_large_error.py
RENAMED
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/general_agent_config.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/parent_folder_error.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.123}/src/athena/types/sql_agent_response.py
RENAMED
File without changes
|
File without changes
|
File without changes
|