athena-intelligence 0.1.122__tar.gz → 0.1.124__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/PKG-INFO +1 -1
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/pyproject.toml +1 -1
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/__init__.py +26 -2
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/client_wrapper.py +1 -1
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/client.py +125 -0
- athena_intelligence-0.1.124/src/athena/tools/structured_data_extractor/client.py +270 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/__init__.py +24 -2
- athena_intelligence-0.1.124/src/athena/types/asset_node.py +42 -0
- athena_intelligence-0.1.124/src/athena/types/chunk.py +36 -0
- athena_intelligence-0.1.124/src/athena/types/chunk_content_item.py +58 -0
- athena_intelligence-0.1.124/src/athena/types/chunk_result.py +35 -0
- athena_intelligence-0.1.124/src/athena/types/chunk_result_chunk_id.py +5 -0
- athena_intelligence-0.1.124/src/athena/types/folder_response.py +35 -0
- athena_intelligence-0.1.124/src/athena/types/image_url_content.py +33 -0
- athena_intelligence-0.1.124/src/athena/types/prompt_message.py +35 -0
- athena_intelligence-0.1.122/src/athena/types/structured_data_extractor_reponse.py → athena_intelligence-0.1.124/src/athena/types/structured_data_extractor_response.py +9 -3
- athena_intelligence-0.1.124/src/athena/types/text_content.py +33 -0
- athena_intelligence-0.1.124/src/athena/types/type.py +25 -0
- athena_intelligence-0.1.122/src/athena/tools/structured_data_extractor/client.py +0 -150
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/README.md +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/drive/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/drive/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/general/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/general/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/research/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/research/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/sql/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/sql/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/base_client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/api_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/datetime_utils.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/file.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/http_client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/jsonable_encoder.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/pydantic_utilities.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/query_encoder.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/remove_none_from_dict.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/request_options.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/environment.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/bad_request_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/content_too_large_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/internal_server_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/not_found_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/unauthorized_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/unprocessable_entity_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/unsupported_media_type_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/py.typed +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/types/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/types/query_execute_request_database_asset_ids.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/calendar/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/calendar/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/email/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/email/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/structured_data_extractor/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/tasks/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/tasks/client.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/types/__init__.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/types/tools_data_frame_request_columns_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/asset_not_found_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/custom_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out_columns_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out_data_item_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out_index_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_unknown_format_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/document_chunk.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/drive_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/file_chunk_request_out.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/file_too_large_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_config.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_config_enabled_tools_item.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_request.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/parent_folder_error.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/research_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/save_asset_request_out.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/sql_agent_response.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/tool.py +0 -0
- {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/version.py +0 -0
@@ -1,7 +1,14 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
from .types import (
|
4
|
+
AssetNode,
|
4
5
|
AssetNotFoundError,
|
6
|
+
Chunk,
|
7
|
+
ChunkContentItem,
|
8
|
+
ChunkContentItem_ImageUrl,
|
9
|
+
ChunkContentItem_Text,
|
10
|
+
ChunkResult,
|
11
|
+
ChunkResultChunkId,
|
5
12
|
CustomAgentResponse,
|
6
13
|
DataFrameRequestOut,
|
7
14
|
DataFrameRequestOutColumnsItem,
|
@@ -12,16 +19,21 @@ from .types import (
|
|
12
19
|
DriveAgentResponse,
|
13
20
|
FileChunkRequestOut,
|
14
21
|
FileTooLargeError,
|
22
|
+
FolderResponse,
|
15
23
|
GeneralAgentConfig,
|
16
24
|
GeneralAgentConfigEnabledToolsItem,
|
17
25
|
GeneralAgentRequest,
|
18
26
|
GeneralAgentResponse,
|
27
|
+
ImageUrlContent,
|
19
28
|
ParentFolderError,
|
29
|
+
PromptMessage,
|
20
30
|
ResearchAgentResponse,
|
21
31
|
SaveAssetRequestOut,
|
22
32
|
SqlAgentResponse,
|
23
|
-
|
33
|
+
StructuredDataExtractorResponse,
|
34
|
+
TextContent,
|
24
35
|
Tool,
|
36
|
+
Type,
|
25
37
|
)
|
26
38
|
from .errors import (
|
27
39
|
BadRequestError,
|
@@ -39,9 +51,16 @@ from .tools import ToolsDataFrameRequestColumnsItem
|
|
39
51
|
from .version import __version__
|
40
52
|
|
41
53
|
__all__ = [
|
54
|
+
"AssetNode",
|
42
55
|
"AssetNotFoundError",
|
43
56
|
"AthenaEnvironment",
|
44
57
|
"BadRequestError",
|
58
|
+
"Chunk",
|
59
|
+
"ChunkContentItem",
|
60
|
+
"ChunkContentItem_ImageUrl",
|
61
|
+
"ChunkContentItem_Text",
|
62
|
+
"ChunkResult",
|
63
|
+
"ChunkResultChunkId",
|
45
64
|
"ContentTooLargeError",
|
46
65
|
"CustomAgentResponse",
|
47
66
|
"DataFrameRequestOut",
|
@@ -53,20 +72,25 @@ __all__ = [
|
|
53
72
|
"DriveAgentResponse",
|
54
73
|
"FileChunkRequestOut",
|
55
74
|
"FileTooLargeError",
|
75
|
+
"FolderResponse",
|
56
76
|
"GeneralAgentConfig",
|
57
77
|
"GeneralAgentConfigEnabledToolsItem",
|
58
78
|
"GeneralAgentRequest",
|
59
79
|
"GeneralAgentResponse",
|
80
|
+
"ImageUrlContent",
|
60
81
|
"InternalServerError",
|
61
82
|
"NotFoundError",
|
62
83
|
"ParentFolderError",
|
84
|
+
"PromptMessage",
|
63
85
|
"QueryExecuteRequestDatabaseAssetIds",
|
64
86
|
"ResearchAgentResponse",
|
65
87
|
"SaveAssetRequestOut",
|
66
88
|
"SqlAgentResponse",
|
67
|
-
"
|
89
|
+
"StructuredDataExtractorResponse",
|
90
|
+
"TextContent",
|
68
91
|
"Tool",
|
69
92
|
"ToolsDataFrameRequestColumnsItem",
|
93
|
+
"Type",
|
70
94
|
"UnauthorizedError",
|
71
95
|
"UnprocessableEntityError",
|
72
96
|
"UnsupportedMediaTypeError",
|
{athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/client_wrapper.py
RENAMED
@@ -17,7 +17,7 @@ class BaseClientWrapper:
|
|
17
17
|
headers: typing.Dict[str, str] = {
|
18
18
|
"X-Fern-Language": "Python",
|
19
19
|
"X-Fern-SDK-Name": "athena-intelligence",
|
20
|
-
"X-Fern-SDK-Version": "0.1.
|
20
|
+
"X-Fern-SDK-Version": "0.1.124",
|
21
21
|
}
|
22
22
|
headers["X-API-KEY"] = self.api_key
|
23
23
|
return headers
|
@@ -20,6 +20,7 @@ from ..types.data_frame_request_out import DataFrameRequestOut
|
|
20
20
|
from ..types.data_frame_unknown_format_error import DataFrameUnknownFormatError
|
21
21
|
from ..types.file_chunk_request_out import FileChunkRequestOut
|
22
22
|
from ..types.file_too_large_error import FileTooLargeError
|
23
|
+
from ..types.folder_response import FolderResponse
|
23
24
|
from ..types.parent_folder_error import ParentFolderError
|
24
25
|
from ..types.save_asset_request_out import SaveAssetRequestOut
|
25
26
|
from .calendar.client import AsyncCalendarClient, CalendarClient
|
@@ -94,6 +95,68 @@ class ToolsClient:
|
|
94
95
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
95
96
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
96
97
|
|
98
|
+
def list_contents(
|
99
|
+
self,
|
100
|
+
*,
|
101
|
+
folder_id: typing.Optional[str] = None,
|
102
|
+
include_asset_details: typing.Optional[bool] = None,
|
103
|
+
include_system_files: typing.Optional[bool] = None,
|
104
|
+
request_options: typing.Optional[RequestOptions] = None
|
105
|
+
) -> FolderResponse:
|
106
|
+
"""
|
107
|
+
List contents of a folder or entire workspace in a tree structure.
|
108
|
+
|
109
|
+
Parameters
|
110
|
+
----------
|
111
|
+
folder_id : typing.Optional[str]
|
112
|
+
|
113
|
+
include_asset_details : typing.Optional[bool]
|
114
|
+
|
115
|
+
include_system_files : typing.Optional[bool]
|
116
|
+
|
117
|
+
request_options : typing.Optional[RequestOptions]
|
118
|
+
Request-specific configuration.
|
119
|
+
|
120
|
+
Returns
|
121
|
+
-------
|
122
|
+
FolderResponse
|
123
|
+
Successful Response
|
124
|
+
|
125
|
+
Examples
|
126
|
+
--------
|
127
|
+
from athena.client import Athena
|
128
|
+
|
129
|
+
client = Athena(
|
130
|
+
api_key="YOUR_API_KEY",
|
131
|
+
)
|
132
|
+
client.tools.list_contents()
|
133
|
+
"""
|
134
|
+
_response = self._client_wrapper.httpx_client.request(
|
135
|
+
"api/v0/tools/contents",
|
136
|
+
method="GET",
|
137
|
+
params={
|
138
|
+
"folder_id": folder_id,
|
139
|
+
"include_asset_details": include_asset_details,
|
140
|
+
"include_system_files": include_system_files,
|
141
|
+
},
|
142
|
+
request_options=request_options,
|
143
|
+
)
|
144
|
+
if 200 <= _response.status_code < 300:
|
145
|
+
return pydantic_v1.parse_obj_as(FolderResponse, _response.json()) # type: ignore
|
146
|
+
if _response.status_code == 400:
|
147
|
+
raise BadRequestError(pydantic_v1.parse_obj_as(ParentFolderError, _response.json())) # type: ignore
|
148
|
+
if _response.status_code == 401:
|
149
|
+
raise UnauthorizedError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
150
|
+
if _response.status_code == 404:
|
151
|
+
raise NotFoundError(pydantic_v1.parse_obj_as(AssetNotFoundError, _response.json())) # type: ignore
|
152
|
+
if _response.status_code == 422:
|
153
|
+
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
154
|
+
try:
|
155
|
+
_response_json = _response.json()
|
156
|
+
except JSONDecodeError:
|
157
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
158
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
159
|
+
|
97
160
|
def data_frame(
|
98
161
|
self,
|
99
162
|
*,
|
@@ -347,6 +410,68 @@ class AsyncToolsClient:
|
|
347
410
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
348
411
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
349
412
|
|
413
|
+
async def list_contents(
|
414
|
+
self,
|
415
|
+
*,
|
416
|
+
folder_id: typing.Optional[str] = None,
|
417
|
+
include_asset_details: typing.Optional[bool] = None,
|
418
|
+
include_system_files: typing.Optional[bool] = None,
|
419
|
+
request_options: typing.Optional[RequestOptions] = None
|
420
|
+
) -> FolderResponse:
|
421
|
+
"""
|
422
|
+
List contents of a folder or entire workspace in a tree structure.
|
423
|
+
|
424
|
+
Parameters
|
425
|
+
----------
|
426
|
+
folder_id : typing.Optional[str]
|
427
|
+
|
428
|
+
include_asset_details : typing.Optional[bool]
|
429
|
+
|
430
|
+
include_system_files : typing.Optional[bool]
|
431
|
+
|
432
|
+
request_options : typing.Optional[RequestOptions]
|
433
|
+
Request-specific configuration.
|
434
|
+
|
435
|
+
Returns
|
436
|
+
-------
|
437
|
+
FolderResponse
|
438
|
+
Successful Response
|
439
|
+
|
440
|
+
Examples
|
441
|
+
--------
|
442
|
+
from athena.client import AsyncAthena
|
443
|
+
|
444
|
+
client = AsyncAthena(
|
445
|
+
api_key="YOUR_API_KEY",
|
446
|
+
)
|
447
|
+
await client.tools.list_contents()
|
448
|
+
"""
|
449
|
+
_response = await self._client_wrapper.httpx_client.request(
|
450
|
+
"api/v0/tools/contents",
|
451
|
+
method="GET",
|
452
|
+
params={
|
453
|
+
"folder_id": folder_id,
|
454
|
+
"include_asset_details": include_asset_details,
|
455
|
+
"include_system_files": include_system_files,
|
456
|
+
},
|
457
|
+
request_options=request_options,
|
458
|
+
)
|
459
|
+
if 200 <= _response.status_code < 300:
|
460
|
+
return pydantic_v1.parse_obj_as(FolderResponse, _response.json()) # type: ignore
|
461
|
+
if _response.status_code == 400:
|
462
|
+
raise BadRequestError(pydantic_v1.parse_obj_as(ParentFolderError, _response.json())) # type: ignore
|
463
|
+
if _response.status_code == 401:
|
464
|
+
raise UnauthorizedError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
465
|
+
if _response.status_code == 404:
|
466
|
+
raise NotFoundError(pydantic_v1.parse_obj_as(AssetNotFoundError, _response.json())) # type: ignore
|
467
|
+
if _response.status_code == 422:
|
468
|
+
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
469
|
+
try:
|
470
|
+
_response_json = _response.json()
|
471
|
+
except JSONDecodeError:
|
472
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
473
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
474
|
+
|
350
475
|
async def data_frame(
|
351
476
|
self,
|
352
477
|
*,
|
@@ -0,0 +1,270 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import typing
|
4
|
+
from json.decoder import JSONDecodeError
|
5
|
+
|
6
|
+
from ...core.api_error import ApiError
|
7
|
+
from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
8
|
+
from ...core.pydantic_utilities import pydantic_v1
|
9
|
+
from ...core.request_options import RequestOptions
|
10
|
+
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
11
|
+
from ...types.chunk import Chunk
|
12
|
+
from ...types.prompt_message import PromptMessage
|
13
|
+
from ...types.structured_data_extractor_response import StructuredDataExtractorResponse
|
14
|
+
|
15
|
+
# this is used as the default value for optional parameters
|
16
|
+
OMIT = typing.cast(typing.Any, ...)
|
17
|
+
|
18
|
+
|
19
|
+
class StructuredDataExtractorClient:
|
20
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
21
|
+
self._client_wrapper = client_wrapper
|
22
|
+
|
23
|
+
def invoke(
|
24
|
+
self,
|
25
|
+
*,
|
26
|
+
chunks: typing.Sequence[Chunk],
|
27
|
+
json_schema: typing.Dict[str, typing.Any],
|
28
|
+
chunk_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
29
|
+
reduce: typing.Optional[bool] = OMIT,
|
30
|
+
reduce_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
31
|
+
request_options: typing.Optional[RequestOptions] = None
|
32
|
+
) -> StructuredDataExtractorResponse:
|
33
|
+
"""
|
34
|
+
Extract structured data.
|
35
|
+
|
36
|
+
tl;dr:
|
37
|
+
|
38
|
+
- pass a valid JSON schema in `json_schema`
|
39
|
+
- pass the page chunks as a list of `Chunk` objects, by default: `{"type": "text", "content": "..."}`
|
40
|
+
- leave all other fields as default
|
41
|
+
|
42
|
+
Detailed configuration (only relevant for complex use cases):
|
43
|
+
|
44
|
+
The structured data extractor's architecture follows the map-reduce pattern,
|
45
|
+
where the asset is divided into chunks, the schema is extracted from each chunk,
|
46
|
+
and the chunks are then reduced to a single structured data object.
|
47
|
+
|
48
|
+
In some applications, you may not want to:
|
49
|
+
|
50
|
+
- map (if your input asset is small enough)
|
51
|
+
- reduce (if your output object is large enough that it will overflow the output length;
|
52
|
+
if you're extracting a long list of entities; if youre )
|
53
|
+
to extract all instances of the schema).
|
54
|
+
|
55
|
+
You can configure these behaviors with the `map` and `reduce` fields.
|
56
|
+
|
57
|
+
Parameters
|
58
|
+
----------
|
59
|
+
chunks : typing.Sequence[Chunk]
|
60
|
+
The chunks from which to extract structured data.
|
61
|
+
|
62
|
+
json_schema : typing.Dict[str, typing.Any]
|
63
|
+
The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
|
64
|
+
|
65
|
+
chunk_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
66
|
+
The prompt to use for the data extraction over *each individual chunk*. It must be a list of messages. The chunk content will be appended as a list of human messages.
|
67
|
+
|
68
|
+
reduce : typing.Optional[bool]
|
69
|
+
If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
|
70
|
+
|
71
|
+
reduce_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
72
|
+
The prompt to use for the reduce steps. It must be a list of messages. The two extraction attempts will be appended as a list of human messages.
|
73
|
+
|
74
|
+
request_options : typing.Optional[RequestOptions]
|
75
|
+
Request-specific configuration.
|
76
|
+
|
77
|
+
Returns
|
78
|
+
-------
|
79
|
+
StructuredDataExtractorResponse
|
80
|
+
Successful Response
|
81
|
+
|
82
|
+
Examples
|
83
|
+
--------
|
84
|
+
from athena import Chunk, ChunkContentItem_Text
|
85
|
+
from athena.client import Athena
|
86
|
+
|
87
|
+
client = Athena(
|
88
|
+
api_key="YOUR_API_KEY",
|
89
|
+
)
|
90
|
+
client.tools.structured_data_extractor.invoke(
|
91
|
+
chunks=[
|
92
|
+
Chunk(
|
93
|
+
chunk_id="1",
|
94
|
+
content=[
|
95
|
+
ChunkContentItem_Text(
|
96
|
+
text="John Smith is a 35 year old developer. You can reach him at john.smith@example.com",
|
97
|
+
)
|
98
|
+
],
|
99
|
+
),
|
100
|
+
Chunk(
|
101
|
+
chunk_id="2",
|
102
|
+
content=[
|
103
|
+
ChunkContentItem_Text(
|
104
|
+
text="Jane Doe is a 25 year old developer. You can reach her at jane@example.com",
|
105
|
+
)
|
106
|
+
],
|
107
|
+
),
|
108
|
+
],
|
109
|
+
json_schema={
|
110
|
+
"description": "A person",
|
111
|
+
"properties": {
|
112
|
+
"age": {"type": "integer"},
|
113
|
+
"email": {"type": "string"},
|
114
|
+
"name": {"type": "string"},
|
115
|
+
},
|
116
|
+
"required": ["name"],
|
117
|
+
"title": "Person",
|
118
|
+
"type": "object",
|
119
|
+
},
|
120
|
+
)
|
121
|
+
"""
|
122
|
+
_response = self._client_wrapper.httpx_client.request(
|
123
|
+
"api/v0/tools/structured-data-extractor/invoke",
|
124
|
+
method="POST",
|
125
|
+
json={
|
126
|
+
"chunk_messages": chunk_messages,
|
127
|
+
"chunks": chunks,
|
128
|
+
"json_schema": json_schema,
|
129
|
+
"reduce": reduce,
|
130
|
+
"reduce_messages": reduce_messages,
|
131
|
+
},
|
132
|
+
request_options=request_options,
|
133
|
+
omit=OMIT,
|
134
|
+
)
|
135
|
+
if 200 <= _response.status_code < 300:
|
136
|
+
return pydantic_v1.parse_obj_as(StructuredDataExtractorResponse, _response.json()) # type: ignore
|
137
|
+
if _response.status_code == 422:
|
138
|
+
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
139
|
+
try:
|
140
|
+
_response_json = _response.json()
|
141
|
+
except JSONDecodeError:
|
142
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
143
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
144
|
+
|
145
|
+
|
146
|
+
class AsyncStructuredDataExtractorClient:
|
147
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
148
|
+
self._client_wrapper = client_wrapper
|
149
|
+
|
150
|
+
async def invoke(
|
151
|
+
self,
|
152
|
+
*,
|
153
|
+
chunks: typing.Sequence[Chunk],
|
154
|
+
json_schema: typing.Dict[str, typing.Any],
|
155
|
+
chunk_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
156
|
+
reduce: typing.Optional[bool] = OMIT,
|
157
|
+
reduce_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
|
158
|
+
request_options: typing.Optional[RequestOptions] = None
|
159
|
+
) -> StructuredDataExtractorResponse:
|
160
|
+
"""
|
161
|
+
Extract structured data.
|
162
|
+
|
163
|
+
tl;dr:
|
164
|
+
|
165
|
+
- pass a valid JSON schema in `json_schema`
|
166
|
+
- pass the page chunks as a list of `Chunk` objects, by default: `{"type": "text", "content": "..."}`
|
167
|
+
- leave all other fields as default
|
168
|
+
|
169
|
+
Detailed configuration (only relevant for complex use cases):
|
170
|
+
|
171
|
+
The structured data extractor's architecture follows the map-reduce pattern,
|
172
|
+
where the asset is divided into chunks, the schema is extracted from each chunk,
|
173
|
+
and the chunks are then reduced to a single structured data object.
|
174
|
+
|
175
|
+
In some applications, you may not want to:
|
176
|
+
|
177
|
+
- map (if your input asset is small enough)
|
178
|
+
- reduce (if your output object is large enough that it will overflow the output length;
|
179
|
+
if you're extracting a long list of entities; if youre )
|
180
|
+
to extract all instances of the schema).
|
181
|
+
|
182
|
+
You can configure these behaviors with the `map` and `reduce` fields.
|
183
|
+
|
184
|
+
Parameters
|
185
|
+
----------
|
186
|
+
chunks : typing.Sequence[Chunk]
|
187
|
+
The chunks from which to extract structured data.
|
188
|
+
|
189
|
+
json_schema : typing.Dict[str, typing.Any]
|
190
|
+
The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
|
191
|
+
|
192
|
+
chunk_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
193
|
+
The prompt to use for the data extraction over *each individual chunk*. It must be a list of messages. The chunk content will be appended as a list of human messages.
|
194
|
+
|
195
|
+
reduce : typing.Optional[bool]
|
196
|
+
If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
|
197
|
+
|
198
|
+
reduce_messages : typing.Optional[typing.Sequence[PromptMessage]]
|
199
|
+
The prompt to use for the reduce steps. It must be a list of messages. The two extraction attempts will be appended as a list of human messages.
|
200
|
+
|
201
|
+
request_options : typing.Optional[RequestOptions]
|
202
|
+
Request-specific configuration.
|
203
|
+
|
204
|
+
Returns
|
205
|
+
-------
|
206
|
+
StructuredDataExtractorResponse
|
207
|
+
Successful Response
|
208
|
+
|
209
|
+
Examples
|
210
|
+
--------
|
211
|
+
from athena import Chunk, ChunkContentItem_Text
|
212
|
+
from athena.client import AsyncAthena
|
213
|
+
|
214
|
+
client = AsyncAthena(
|
215
|
+
api_key="YOUR_API_KEY",
|
216
|
+
)
|
217
|
+
await client.tools.structured_data_extractor.invoke(
|
218
|
+
chunks=[
|
219
|
+
Chunk(
|
220
|
+
chunk_id="1",
|
221
|
+
content=[
|
222
|
+
ChunkContentItem_Text(
|
223
|
+
text="John Smith is a 35 year old developer. You can reach him at john.smith@example.com",
|
224
|
+
)
|
225
|
+
],
|
226
|
+
),
|
227
|
+
Chunk(
|
228
|
+
chunk_id="2",
|
229
|
+
content=[
|
230
|
+
ChunkContentItem_Text(
|
231
|
+
text="Jane Doe is a 25 year old developer. You can reach her at jane@example.com",
|
232
|
+
)
|
233
|
+
],
|
234
|
+
),
|
235
|
+
],
|
236
|
+
json_schema={
|
237
|
+
"description": "A person",
|
238
|
+
"properties": {
|
239
|
+
"age": {"type": "integer"},
|
240
|
+
"email": {"type": "string"},
|
241
|
+
"name": {"type": "string"},
|
242
|
+
},
|
243
|
+
"required": ["name"],
|
244
|
+
"title": "Person",
|
245
|
+
"type": "object",
|
246
|
+
},
|
247
|
+
)
|
248
|
+
"""
|
249
|
+
_response = await self._client_wrapper.httpx_client.request(
|
250
|
+
"api/v0/tools/structured-data-extractor/invoke",
|
251
|
+
method="POST",
|
252
|
+
json={
|
253
|
+
"chunk_messages": chunk_messages,
|
254
|
+
"chunks": chunks,
|
255
|
+
"json_schema": json_schema,
|
256
|
+
"reduce": reduce,
|
257
|
+
"reduce_messages": reduce_messages,
|
258
|
+
},
|
259
|
+
request_options=request_options,
|
260
|
+
omit=OMIT,
|
261
|
+
)
|
262
|
+
if 200 <= _response.status_code < 300:
|
263
|
+
return pydantic_v1.parse_obj_as(StructuredDataExtractorResponse, _response.json()) # type: ignore
|
264
|
+
if _response.status_code == 422:
|
265
|
+
raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
|
266
|
+
try:
|
267
|
+
_response_json = _response.json()
|
268
|
+
except JSONDecodeError:
|
269
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
270
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
@@ -1,6 +1,11 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
|
+
from .asset_node import AssetNode
|
3
4
|
from .asset_not_found_error import AssetNotFoundError
|
5
|
+
from .chunk import Chunk
|
6
|
+
from .chunk_content_item import ChunkContentItem, ChunkContentItem_ImageUrl, ChunkContentItem_Text
|
7
|
+
from .chunk_result import ChunkResult
|
8
|
+
from .chunk_result_chunk_id import ChunkResultChunkId
|
4
9
|
from .custom_agent_response import CustomAgentResponse
|
5
10
|
from .data_frame_request_out import DataFrameRequestOut
|
6
11
|
from .data_frame_request_out_columns_item import DataFrameRequestOutColumnsItem
|
@@ -11,19 +16,31 @@ from .document_chunk import DocumentChunk
|
|
11
16
|
from .drive_agent_response import DriveAgentResponse
|
12
17
|
from .file_chunk_request_out import FileChunkRequestOut
|
13
18
|
from .file_too_large_error import FileTooLargeError
|
19
|
+
from .folder_response import FolderResponse
|
14
20
|
from .general_agent_config import GeneralAgentConfig
|
15
21
|
from .general_agent_config_enabled_tools_item import GeneralAgentConfigEnabledToolsItem
|
16
22
|
from .general_agent_request import GeneralAgentRequest
|
17
23
|
from .general_agent_response import GeneralAgentResponse
|
24
|
+
from .image_url_content import ImageUrlContent
|
18
25
|
from .parent_folder_error import ParentFolderError
|
26
|
+
from .prompt_message import PromptMessage
|
19
27
|
from .research_agent_response import ResearchAgentResponse
|
20
28
|
from .save_asset_request_out import SaveAssetRequestOut
|
21
29
|
from .sql_agent_response import SqlAgentResponse
|
22
|
-
from .
|
30
|
+
from .structured_data_extractor_response import StructuredDataExtractorResponse
|
31
|
+
from .text_content import TextContent
|
23
32
|
from .tool import Tool
|
33
|
+
from .type import Type
|
24
34
|
|
25
35
|
__all__ = [
|
36
|
+
"AssetNode",
|
26
37
|
"AssetNotFoundError",
|
38
|
+
"Chunk",
|
39
|
+
"ChunkContentItem",
|
40
|
+
"ChunkContentItem_ImageUrl",
|
41
|
+
"ChunkContentItem_Text",
|
42
|
+
"ChunkResult",
|
43
|
+
"ChunkResultChunkId",
|
27
44
|
"CustomAgentResponse",
|
28
45
|
"DataFrameRequestOut",
|
29
46
|
"DataFrameRequestOutColumnsItem",
|
@@ -34,14 +51,19 @@ __all__ = [
|
|
34
51
|
"DriveAgentResponse",
|
35
52
|
"FileChunkRequestOut",
|
36
53
|
"FileTooLargeError",
|
54
|
+
"FolderResponse",
|
37
55
|
"GeneralAgentConfig",
|
38
56
|
"GeneralAgentConfigEnabledToolsItem",
|
39
57
|
"GeneralAgentRequest",
|
40
58
|
"GeneralAgentResponse",
|
59
|
+
"ImageUrlContent",
|
41
60
|
"ParentFolderError",
|
61
|
+
"PromptMessage",
|
42
62
|
"ResearchAgentResponse",
|
43
63
|
"SaveAssetRequestOut",
|
44
64
|
"SqlAgentResponse",
|
45
|
-
"
|
65
|
+
"StructuredDataExtractorResponse",
|
66
|
+
"TextContent",
|
46
67
|
"Tool",
|
68
|
+
"Type",
|
47
69
|
]
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import datetime as dt
|
6
|
+
import typing
|
7
|
+
|
8
|
+
from ..core.datetime_utils import serialize_datetime
|
9
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
10
|
+
|
11
|
+
|
12
|
+
class AssetNode(pydantic_v1.BaseModel):
|
13
|
+
"""
|
14
|
+
Model representing a node in the folder tree.
|
15
|
+
"""
|
16
|
+
|
17
|
+
children: typing.Optional[typing.Dict[str, typing.Optional[AssetNode]]] = None
|
18
|
+
id: str
|
19
|
+
media_type: str
|
20
|
+
name: str
|
21
|
+
type: str
|
22
|
+
|
23
|
+
def json(self, **kwargs: typing.Any) -> str:
|
24
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
25
|
+
return super().json(**kwargs_with_defaults)
|
26
|
+
|
27
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
28
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
29
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
30
|
+
|
31
|
+
return deep_union_pydantic_dicts(
|
32
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
33
|
+
)
|
34
|
+
|
35
|
+
class Config:
|
36
|
+
frozen = True
|
37
|
+
smart_union = True
|
38
|
+
extra = pydantic_v1.Extra.allow
|
39
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
40
|
+
|
41
|
+
|
42
|
+
AssetNode.update_forward_refs()
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import datetime as dt
|
4
|
+
import typing
|
5
|
+
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
7
|
+
from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
|
8
|
+
from .chunk_content_item import ChunkContentItem
|
9
|
+
|
10
|
+
|
11
|
+
class Chunk(pydantic_v1.BaseModel):
|
12
|
+
"""
|
13
|
+
A chunk of content to extract data from.
|
14
|
+
"""
|
15
|
+
|
16
|
+
chunk_id: str
|
17
|
+
content: typing.List[ChunkContentItem]
|
18
|
+
metadata: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None
|
19
|
+
|
20
|
+
def json(self, **kwargs: typing.Any) -> str:
|
21
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
22
|
+
return super().json(**kwargs_with_defaults)
|
23
|
+
|
24
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
25
|
+
kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
26
|
+
kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
|
27
|
+
|
28
|
+
return deep_union_pydantic_dicts(
|
29
|
+
super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
|
30
|
+
)
|
31
|
+
|
32
|
+
class Config:
|
33
|
+
frozen = True
|
34
|
+
smart_union = True
|
35
|
+
extra = pydantic_v1.Extra.allow
|
36
|
+
json_encoders = {dt.datetime: serialize_datetime}
|