athena-intelligence 0.1.122__tar.gz → 0.1.124__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/PKG-INFO +1 -1
  2. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/pyproject.toml +1 -1
  3. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/__init__.py +26 -2
  4. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/client_wrapper.py +1 -1
  5. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/client.py +125 -0
  6. athena_intelligence-0.1.124/src/athena/tools/structured_data_extractor/client.py +270 -0
  7. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/__init__.py +24 -2
  8. athena_intelligence-0.1.124/src/athena/types/asset_node.py +42 -0
  9. athena_intelligence-0.1.124/src/athena/types/chunk.py +36 -0
  10. athena_intelligence-0.1.124/src/athena/types/chunk_content_item.py +58 -0
  11. athena_intelligence-0.1.124/src/athena/types/chunk_result.py +35 -0
  12. athena_intelligence-0.1.124/src/athena/types/chunk_result_chunk_id.py +5 -0
  13. athena_intelligence-0.1.124/src/athena/types/folder_response.py +35 -0
  14. athena_intelligence-0.1.124/src/athena/types/image_url_content.py +33 -0
  15. athena_intelligence-0.1.124/src/athena/types/prompt_message.py +35 -0
  16. athena_intelligence-0.1.122/src/athena/types/structured_data_extractor_reponse.py → athena_intelligence-0.1.124/src/athena/types/structured_data_extractor_response.py +9 -3
  17. athena_intelligence-0.1.124/src/athena/types/text_content.py +33 -0
  18. athena_intelligence-0.1.124/src/athena/types/type.py +25 -0
  19. athena_intelligence-0.1.122/src/athena/tools/structured_data_extractor/client.py +0 -150
  20. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/README.md +0 -0
  21. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/__init__.py +0 -0
  22. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/client.py +0 -0
  23. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/drive/__init__.py +0 -0
  24. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/drive/client.py +0 -0
  25. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/general/__init__.py +0 -0
  26. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/general/client.py +0 -0
  27. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/research/__init__.py +0 -0
  28. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/research/client.py +0 -0
  29. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/sql/__init__.py +0 -0
  30. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/agents/sql/client.py +0 -0
  31. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/base_client.py +0 -0
  32. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/client.py +0 -0
  33. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/__init__.py +0 -0
  34. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/api_error.py +0 -0
  35. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/datetime_utils.py +0 -0
  36. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/file.py +0 -0
  37. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/http_client.py +0 -0
  38. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/jsonable_encoder.py +0 -0
  39. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/pydantic_utilities.py +0 -0
  40. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/query_encoder.py +0 -0
  41. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/remove_none_from_dict.py +0 -0
  42. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/core/request_options.py +0 -0
  43. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/environment.py +0 -0
  44. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/__init__.py +0 -0
  45. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/bad_request_error.py +0 -0
  46. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/content_too_large_error.py +0 -0
  47. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/internal_server_error.py +0 -0
  48. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/not_found_error.py +0 -0
  49. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/unauthorized_error.py +0 -0
  50. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/unprocessable_entity_error.py +0 -0
  51. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/errors/unsupported_media_type_error.py +0 -0
  52. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/py.typed +0 -0
  53. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/__init__.py +0 -0
  54. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/client.py +0 -0
  55. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/types/__init__.py +0 -0
  56. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/query/types/query_execute_request_database_asset_ids.py +0 -0
  57. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/__init__.py +0 -0
  58. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/calendar/__init__.py +0 -0
  59. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/calendar/client.py +0 -0
  60. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/email/__init__.py +0 -0
  61. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/email/client.py +0 -0
  62. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/structured_data_extractor/__init__.py +0 -0
  63. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/tasks/__init__.py +0 -0
  64. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/tasks/client.py +0 -0
  65. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/types/__init__.py +0 -0
  66. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/tools/types/tools_data_frame_request_columns_item.py +0 -0
  67. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/asset_not_found_error.py +0 -0
  68. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/custom_agent_response.py +0 -0
  69. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out.py +0 -0
  70. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out_columns_item.py +0 -0
  71. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out_data_item_item.py +0 -0
  72. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_request_out_index_item.py +0 -0
  73. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/data_frame_unknown_format_error.py +0 -0
  74. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/document_chunk.py +0 -0
  75. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/drive_agent_response.py +0 -0
  76. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/file_chunk_request_out.py +0 -0
  77. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/file_too_large_error.py +0 -0
  78. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_config.py +0 -0
  79. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_config_enabled_tools_item.py +0 -0
  80. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_request.py +0 -0
  81. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/general_agent_response.py +0 -0
  82. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/parent_folder_error.py +0 -0
  83. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/research_agent_response.py +0 -0
  84. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/save_asset_request_out.py +0 -0
  85. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/sql_agent_response.py +0 -0
  86. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/types/tool.py +0 -0
  87. {athena_intelligence-0.1.122 → athena_intelligence-0.1.124}/src/athena/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: athena-intelligence
3
- Version: 0.1.122
3
+ Version: 0.1.124
4
4
  Summary: Athena Intelligence Python Library
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "athena-intelligence"
3
- version = "0.1.122"
3
+ version = "0.1.124"
4
4
  description = "Athena Intelligence Python Library"
5
5
  readme = "README.md"
6
6
  authors = []
@@ -1,7 +1,14 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  from .types import (
4
+ AssetNode,
4
5
  AssetNotFoundError,
6
+ Chunk,
7
+ ChunkContentItem,
8
+ ChunkContentItem_ImageUrl,
9
+ ChunkContentItem_Text,
10
+ ChunkResult,
11
+ ChunkResultChunkId,
5
12
  CustomAgentResponse,
6
13
  DataFrameRequestOut,
7
14
  DataFrameRequestOutColumnsItem,
@@ -12,16 +19,21 @@ from .types import (
12
19
  DriveAgentResponse,
13
20
  FileChunkRequestOut,
14
21
  FileTooLargeError,
22
+ FolderResponse,
15
23
  GeneralAgentConfig,
16
24
  GeneralAgentConfigEnabledToolsItem,
17
25
  GeneralAgentRequest,
18
26
  GeneralAgentResponse,
27
+ ImageUrlContent,
19
28
  ParentFolderError,
29
+ PromptMessage,
20
30
  ResearchAgentResponse,
21
31
  SaveAssetRequestOut,
22
32
  SqlAgentResponse,
23
- StructuredDataExtractorReponse,
33
+ StructuredDataExtractorResponse,
34
+ TextContent,
24
35
  Tool,
36
+ Type,
25
37
  )
26
38
  from .errors import (
27
39
  BadRequestError,
@@ -39,9 +51,16 @@ from .tools import ToolsDataFrameRequestColumnsItem
39
51
  from .version import __version__
40
52
 
41
53
  __all__ = [
54
+ "AssetNode",
42
55
  "AssetNotFoundError",
43
56
  "AthenaEnvironment",
44
57
  "BadRequestError",
58
+ "Chunk",
59
+ "ChunkContentItem",
60
+ "ChunkContentItem_ImageUrl",
61
+ "ChunkContentItem_Text",
62
+ "ChunkResult",
63
+ "ChunkResultChunkId",
45
64
  "ContentTooLargeError",
46
65
  "CustomAgentResponse",
47
66
  "DataFrameRequestOut",
@@ -53,20 +72,25 @@ __all__ = [
53
72
  "DriveAgentResponse",
54
73
  "FileChunkRequestOut",
55
74
  "FileTooLargeError",
75
+ "FolderResponse",
56
76
  "GeneralAgentConfig",
57
77
  "GeneralAgentConfigEnabledToolsItem",
58
78
  "GeneralAgentRequest",
59
79
  "GeneralAgentResponse",
80
+ "ImageUrlContent",
60
81
  "InternalServerError",
61
82
  "NotFoundError",
62
83
  "ParentFolderError",
84
+ "PromptMessage",
63
85
  "QueryExecuteRequestDatabaseAssetIds",
64
86
  "ResearchAgentResponse",
65
87
  "SaveAssetRequestOut",
66
88
  "SqlAgentResponse",
67
- "StructuredDataExtractorReponse",
89
+ "StructuredDataExtractorResponse",
90
+ "TextContent",
68
91
  "Tool",
69
92
  "ToolsDataFrameRequestColumnsItem",
93
+ "Type",
70
94
  "UnauthorizedError",
71
95
  "UnprocessableEntityError",
72
96
  "UnsupportedMediaTypeError",
@@ -17,7 +17,7 @@ class BaseClientWrapper:
17
17
  headers: typing.Dict[str, str] = {
18
18
  "X-Fern-Language": "Python",
19
19
  "X-Fern-SDK-Name": "athena-intelligence",
20
- "X-Fern-SDK-Version": "0.1.122",
20
+ "X-Fern-SDK-Version": "0.1.124",
21
21
  }
22
22
  headers["X-API-KEY"] = self.api_key
23
23
  return headers
@@ -20,6 +20,7 @@ from ..types.data_frame_request_out import DataFrameRequestOut
20
20
  from ..types.data_frame_unknown_format_error import DataFrameUnknownFormatError
21
21
  from ..types.file_chunk_request_out import FileChunkRequestOut
22
22
  from ..types.file_too_large_error import FileTooLargeError
23
+ from ..types.folder_response import FolderResponse
23
24
  from ..types.parent_folder_error import ParentFolderError
24
25
  from ..types.save_asset_request_out import SaveAssetRequestOut
25
26
  from .calendar.client import AsyncCalendarClient, CalendarClient
@@ -94,6 +95,68 @@ class ToolsClient:
94
95
  raise ApiError(status_code=_response.status_code, body=_response.text)
95
96
  raise ApiError(status_code=_response.status_code, body=_response_json)
96
97
 
98
+ def list_contents(
99
+ self,
100
+ *,
101
+ folder_id: typing.Optional[str] = None,
102
+ include_asset_details: typing.Optional[bool] = None,
103
+ include_system_files: typing.Optional[bool] = None,
104
+ request_options: typing.Optional[RequestOptions] = None
105
+ ) -> FolderResponse:
106
+ """
107
+ List contents of a folder or entire workspace in a tree structure.
108
+
109
+ Parameters
110
+ ----------
111
+ folder_id : typing.Optional[str]
112
+
113
+ include_asset_details : typing.Optional[bool]
114
+
115
+ include_system_files : typing.Optional[bool]
116
+
117
+ request_options : typing.Optional[RequestOptions]
118
+ Request-specific configuration.
119
+
120
+ Returns
121
+ -------
122
+ FolderResponse
123
+ Successful Response
124
+
125
+ Examples
126
+ --------
127
+ from athena.client import Athena
128
+
129
+ client = Athena(
130
+ api_key="YOUR_API_KEY",
131
+ )
132
+ client.tools.list_contents()
133
+ """
134
+ _response = self._client_wrapper.httpx_client.request(
135
+ "api/v0/tools/contents",
136
+ method="GET",
137
+ params={
138
+ "folder_id": folder_id,
139
+ "include_asset_details": include_asset_details,
140
+ "include_system_files": include_system_files,
141
+ },
142
+ request_options=request_options,
143
+ )
144
+ if 200 <= _response.status_code < 300:
145
+ return pydantic_v1.parse_obj_as(FolderResponse, _response.json()) # type: ignore
146
+ if _response.status_code == 400:
147
+ raise BadRequestError(pydantic_v1.parse_obj_as(ParentFolderError, _response.json())) # type: ignore
148
+ if _response.status_code == 401:
149
+ raise UnauthorizedError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
150
+ if _response.status_code == 404:
151
+ raise NotFoundError(pydantic_v1.parse_obj_as(AssetNotFoundError, _response.json())) # type: ignore
152
+ if _response.status_code == 422:
153
+ raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
154
+ try:
155
+ _response_json = _response.json()
156
+ except JSONDecodeError:
157
+ raise ApiError(status_code=_response.status_code, body=_response.text)
158
+ raise ApiError(status_code=_response.status_code, body=_response_json)
159
+
97
160
  def data_frame(
98
161
  self,
99
162
  *,
@@ -347,6 +410,68 @@ class AsyncToolsClient:
347
410
  raise ApiError(status_code=_response.status_code, body=_response.text)
348
411
  raise ApiError(status_code=_response.status_code, body=_response_json)
349
412
 
413
+ async def list_contents(
414
+ self,
415
+ *,
416
+ folder_id: typing.Optional[str] = None,
417
+ include_asset_details: typing.Optional[bool] = None,
418
+ include_system_files: typing.Optional[bool] = None,
419
+ request_options: typing.Optional[RequestOptions] = None
420
+ ) -> FolderResponse:
421
+ """
422
+ List contents of a folder or entire workspace in a tree structure.
423
+
424
+ Parameters
425
+ ----------
426
+ folder_id : typing.Optional[str]
427
+
428
+ include_asset_details : typing.Optional[bool]
429
+
430
+ include_system_files : typing.Optional[bool]
431
+
432
+ request_options : typing.Optional[RequestOptions]
433
+ Request-specific configuration.
434
+
435
+ Returns
436
+ -------
437
+ FolderResponse
438
+ Successful Response
439
+
440
+ Examples
441
+ --------
442
+ from athena.client import AsyncAthena
443
+
444
+ client = AsyncAthena(
445
+ api_key="YOUR_API_KEY",
446
+ )
447
+ await client.tools.list_contents()
448
+ """
449
+ _response = await self._client_wrapper.httpx_client.request(
450
+ "api/v0/tools/contents",
451
+ method="GET",
452
+ params={
453
+ "folder_id": folder_id,
454
+ "include_asset_details": include_asset_details,
455
+ "include_system_files": include_system_files,
456
+ },
457
+ request_options=request_options,
458
+ )
459
+ if 200 <= _response.status_code < 300:
460
+ return pydantic_v1.parse_obj_as(FolderResponse, _response.json()) # type: ignore
461
+ if _response.status_code == 400:
462
+ raise BadRequestError(pydantic_v1.parse_obj_as(ParentFolderError, _response.json())) # type: ignore
463
+ if _response.status_code == 401:
464
+ raise UnauthorizedError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
465
+ if _response.status_code == 404:
466
+ raise NotFoundError(pydantic_v1.parse_obj_as(AssetNotFoundError, _response.json())) # type: ignore
467
+ if _response.status_code == 422:
468
+ raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
469
+ try:
470
+ _response_json = _response.json()
471
+ except JSONDecodeError:
472
+ raise ApiError(status_code=_response.status_code, body=_response.text)
473
+ raise ApiError(status_code=_response.status_code, body=_response_json)
474
+
350
475
  async def data_frame(
351
476
  self,
352
477
  *,
@@ -0,0 +1,270 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from json.decoder import JSONDecodeError
5
+
6
+ from ...core.api_error import ApiError
7
+ from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
8
+ from ...core.pydantic_utilities import pydantic_v1
9
+ from ...core.request_options import RequestOptions
10
+ from ...errors.unprocessable_entity_error import UnprocessableEntityError
11
+ from ...types.chunk import Chunk
12
+ from ...types.prompt_message import PromptMessage
13
+ from ...types.structured_data_extractor_response import StructuredDataExtractorResponse
14
+
15
+ # this is used as the default value for optional parameters
16
+ OMIT = typing.cast(typing.Any, ...)
17
+
18
+
19
+ class StructuredDataExtractorClient:
20
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
21
+ self._client_wrapper = client_wrapper
22
+
23
+ def invoke(
24
+ self,
25
+ *,
26
+ chunks: typing.Sequence[Chunk],
27
+ json_schema: typing.Dict[str, typing.Any],
28
+ chunk_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
29
+ reduce: typing.Optional[bool] = OMIT,
30
+ reduce_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
31
+ request_options: typing.Optional[RequestOptions] = None
32
+ ) -> StructuredDataExtractorResponse:
33
+ """
34
+ Extract structured data.
35
+
36
+ tl;dr:
37
+
38
+ - pass a valid JSON schema in `json_schema`
39
+ - pass the page chunks as a list of `Chunk` objects, by default: `{"type": "text", "content": "..."}`
40
+ - leave all other fields as default
41
+
42
+ Detailed configuration (only relevant for complex use cases):
43
+
44
+ The structured data extractor's architecture follows the map-reduce pattern,
45
+ where the asset is divided into chunks, the schema is extracted from each chunk,
46
+ and the chunks are then reduced to a single structured data object.
47
+
48
+ In some applications, you may not want to:
49
+
50
+ - map (if your input asset is small enough)
51
+ - reduce (if your output object is large enough that it will overflow the output length;
52
+ if you're extracting a long list of entities; if youre )
53
+ to extract all instances of the schema).
54
+
55
+ You can configure these behaviors with the `map` and `reduce` fields.
56
+
57
+ Parameters
58
+ ----------
59
+ chunks : typing.Sequence[Chunk]
60
+ The chunks from which to extract structured data.
61
+
62
+ json_schema : typing.Dict[str, typing.Any]
63
+ The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
64
+
65
+ chunk_messages : typing.Optional[typing.Sequence[PromptMessage]]
66
+ The prompt to use for the data extraction over *each individual chunk*. It must be a list of messages. The chunk content will be appended as a list of human messages.
67
+
68
+ reduce : typing.Optional[bool]
69
+ If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
70
+
71
+ reduce_messages : typing.Optional[typing.Sequence[PromptMessage]]
72
+ The prompt to use for the reduce steps. It must be a list of messages. The two extraction attempts will be appended as a list of human messages.
73
+
74
+ request_options : typing.Optional[RequestOptions]
75
+ Request-specific configuration.
76
+
77
+ Returns
78
+ -------
79
+ StructuredDataExtractorResponse
80
+ Successful Response
81
+
82
+ Examples
83
+ --------
84
+ from athena import Chunk, ChunkContentItem_Text
85
+ from athena.client import Athena
86
+
87
+ client = Athena(
88
+ api_key="YOUR_API_KEY",
89
+ )
90
+ client.tools.structured_data_extractor.invoke(
91
+ chunks=[
92
+ Chunk(
93
+ chunk_id="1",
94
+ content=[
95
+ ChunkContentItem_Text(
96
+ text="John Smith is a 35 year old developer. You can reach him at john.smith@example.com",
97
+ )
98
+ ],
99
+ ),
100
+ Chunk(
101
+ chunk_id="2",
102
+ content=[
103
+ ChunkContentItem_Text(
104
+ text="Jane Doe is a 25 year old developer. You can reach her at jane@example.com",
105
+ )
106
+ ],
107
+ ),
108
+ ],
109
+ json_schema={
110
+ "description": "A person",
111
+ "properties": {
112
+ "age": {"type": "integer"},
113
+ "email": {"type": "string"},
114
+ "name": {"type": "string"},
115
+ },
116
+ "required": ["name"],
117
+ "title": "Person",
118
+ "type": "object",
119
+ },
120
+ )
121
+ """
122
+ _response = self._client_wrapper.httpx_client.request(
123
+ "api/v0/tools/structured-data-extractor/invoke",
124
+ method="POST",
125
+ json={
126
+ "chunk_messages": chunk_messages,
127
+ "chunks": chunks,
128
+ "json_schema": json_schema,
129
+ "reduce": reduce,
130
+ "reduce_messages": reduce_messages,
131
+ },
132
+ request_options=request_options,
133
+ omit=OMIT,
134
+ )
135
+ if 200 <= _response.status_code < 300:
136
+ return pydantic_v1.parse_obj_as(StructuredDataExtractorResponse, _response.json()) # type: ignore
137
+ if _response.status_code == 422:
138
+ raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
139
+ try:
140
+ _response_json = _response.json()
141
+ except JSONDecodeError:
142
+ raise ApiError(status_code=_response.status_code, body=_response.text)
143
+ raise ApiError(status_code=_response.status_code, body=_response_json)
144
+
145
+
146
+ class AsyncStructuredDataExtractorClient:
147
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
148
+ self._client_wrapper = client_wrapper
149
+
150
+ async def invoke(
151
+ self,
152
+ *,
153
+ chunks: typing.Sequence[Chunk],
154
+ json_schema: typing.Dict[str, typing.Any],
155
+ chunk_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
156
+ reduce: typing.Optional[bool] = OMIT,
157
+ reduce_messages: typing.Optional[typing.Sequence[PromptMessage]] = OMIT,
158
+ request_options: typing.Optional[RequestOptions] = None
159
+ ) -> StructuredDataExtractorResponse:
160
+ """
161
+ Extract structured data.
162
+
163
+ tl;dr:
164
+
165
+ - pass a valid JSON schema in `json_schema`
166
+ - pass the page chunks as a list of `Chunk` objects, by default: `{"type": "text", "content": "..."}`
167
+ - leave all other fields as default
168
+
169
+ Detailed configuration (only relevant for complex use cases):
170
+
171
+ The structured data extractor's architecture follows the map-reduce pattern,
172
+ where the asset is divided into chunks, the schema is extracted from each chunk,
173
+ and the chunks are then reduced to a single structured data object.
174
+
175
+ In some applications, you may not want to:
176
+
177
+ - map (if your input asset is small enough)
178
+ - reduce (if your output object is large enough that it will overflow the output length;
179
+ if you're extracting a long list of entities; if youre )
180
+ to extract all instances of the schema).
181
+
182
+ You can configure these behaviors with the `map` and `reduce` fields.
183
+
184
+ Parameters
185
+ ----------
186
+ chunks : typing.Sequence[Chunk]
187
+ The chunks from which to extract structured data.
188
+
189
+ json_schema : typing.Dict[str, typing.Any]
190
+ The JSON schema to use for validation (version draft 2020-12). See the docs [here](https://json-schema.org/learn/getting-started-step-by-step).
191
+
192
+ chunk_messages : typing.Optional[typing.Sequence[PromptMessage]]
193
+ The prompt to use for the data extraction over *each individual chunk*. It must be a list of messages. The chunk content will be appended as a list of human messages.
194
+
195
+ reduce : typing.Optional[bool]
196
+ If `map`, whether to reduce the chunks to a single structured object (true) or return the full list (false). Use True unless you want to preserve duplicates from each page or expect the object to overflow the output context.
197
+
198
+ reduce_messages : typing.Optional[typing.Sequence[PromptMessage]]
199
+ The prompt to use for the reduce steps. It must be a list of messages. The two extraction attempts will be appended as a list of human messages.
200
+
201
+ request_options : typing.Optional[RequestOptions]
202
+ Request-specific configuration.
203
+
204
+ Returns
205
+ -------
206
+ StructuredDataExtractorResponse
207
+ Successful Response
208
+
209
+ Examples
210
+ --------
211
+ from athena import Chunk, ChunkContentItem_Text
212
+ from athena.client import AsyncAthena
213
+
214
+ client = AsyncAthena(
215
+ api_key="YOUR_API_KEY",
216
+ )
217
+ await client.tools.structured_data_extractor.invoke(
218
+ chunks=[
219
+ Chunk(
220
+ chunk_id="1",
221
+ content=[
222
+ ChunkContentItem_Text(
223
+ text="John Smith is a 35 year old developer. You can reach him at john.smith@example.com",
224
+ )
225
+ ],
226
+ ),
227
+ Chunk(
228
+ chunk_id="2",
229
+ content=[
230
+ ChunkContentItem_Text(
231
+ text="Jane Doe is a 25 year old developer. You can reach her at jane@example.com",
232
+ )
233
+ ],
234
+ ),
235
+ ],
236
+ json_schema={
237
+ "description": "A person",
238
+ "properties": {
239
+ "age": {"type": "integer"},
240
+ "email": {"type": "string"},
241
+ "name": {"type": "string"},
242
+ },
243
+ "required": ["name"],
244
+ "title": "Person",
245
+ "type": "object",
246
+ },
247
+ )
248
+ """
249
+ _response = await self._client_wrapper.httpx_client.request(
250
+ "api/v0/tools/structured-data-extractor/invoke",
251
+ method="POST",
252
+ json={
253
+ "chunk_messages": chunk_messages,
254
+ "chunks": chunks,
255
+ "json_schema": json_schema,
256
+ "reduce": reduce,
257
+ "reduce_messages": reduce_messages,
258
+ },
259
+ request_options=request_options,
260
+ omit=OMIT,
261
+ )
262
+ if 200 <= _response.status_code < 300:
263
+ return pydantic_v1.parse_obj_as(StructuredDataExtractorResponse, _response.json()) # type: ignore
264
+ if _response.status_code == 422:
265
+ raise UnprocessableEntityError(pydantic_v1.parse_obj_as(typing.Any, _response.json())) # type: ignore
266
+ try:
267
+ _response_json = _response.json()
268
+ except JSONDecodeError:
269
+ raise ApiError(status_code=_response.status_code, body=_response.text)
270
+ raise ApiError(status_code=_response.status_code, body=_response_json)
@@ -1,6 +1,11 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
+ from .asset_node import AssetNode
3
4
  from .asset_not_found_error import AssetNotFoundError
5
+ from .chunk import Chunk
6
+ from .chunk_content_item import ChunkContentItem, ChunkContentItem_ImageUrl, ChunkContentItem_Text
7
+ from .chunk_result import ChunkResult
8
+ from .chunk_result_chunk_id import ChunkResultChunkId
4
9
  from .custom_agent_response import CustomAgentResponse
5
10
  from .data_frame_request_out import DataFrameRequestOut
6
11
  from .data_frame_request_out_columns_item import DataFrameRequestOutColumnsItem
@@ -11,19 +16,31 @@ from .document_chunk import DocumentChunk
11
16
  from .drive_agent_response import DriveAgentResponse
12
17
  from .file_chunk_request_out import FileChunkRequestOut
13
18
  from .file_too_large_error import FileTooLargeError
19
+ from .folder_response import FolderResponse
14
20
  from .general_agent_config import GeneralAgentConfig
15
21
  from .general_agent_config_enabled_tools_item import GeneralAgentConfigEnabledToolsItem
16
22
  from .general_agent_request import GeneralAgentRequest
17
23
  from .general_agent_response import GeneralAgentResponse
24
+ from .image_url_content import ImageUrlContent
18
25
  from .parent_folder_error import ParentFolderError
26
+ from .prompt_message import PromptMessage
19
27
  from .research_agent_response import ResearchAgentResponse
20
28
  from .save_asset_request_out import SaveAssetRequestOut
21
29
  from .sql_agent_response import SqlAgentResponse
22
- from .structured_data_extractor_reponse import StructuredDataExtractorReponse
30
+ from .structured_data_extractor_response import StructuredDataExtractorResponse
31
+ from .text_content import TextContent
23
32
  from .tool import Tool
33
+ from .type import Type
24
34
 
25
35
  __all__ = [
36
+ "AssetNode",
26
37
  "AssetNotFoundError",
38
+ "Chunk",
39
+ "ChunkContentItem",
40
+ "ChunkContentItem_ImageUrl",
41
+ "ChunkContentItem_Text",
42
+ "ChunkResult",
43
+ "ChunkResultChunkId",
27
44
  "CustomAgentResponse",
28
45
  "DataFrameRequestOut",
29
46
  "DataFrameRequestOutColumnsItem",
@@ -34,14 +51,19 @@ __all__ = [
34
51
  "DriveAgentResponse",
35
52
  "FileChunkRequestOut",
36
53
  "FileTooLargeError",
54
+ "FolderResponse",
37
55
  "GeneralAgentConfig",
38
56
  "GeneralAgentConfigEnabledToolsItem",
39
57
  "GeneralAgentRequest",
40
58
  "GeneralAgentResponse",
59
+ "ImageUrlContent",
41
60
  "ParentFolderError",
61
+ "PromptMessage",
42
62
  "ResearchAgentResponse",
43
63
  "SaveAssetRequestOut",
44
64
  "SqlAgentResponse",
45
- "StructuredDataExtractorReponse",
65
+ "StructuredDataExtractorResponse",
66
+ "TextContent",
46
67
  "Tool",
68
+ "Type",
47
69
  ]
@@ -0,0 +1,42 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime as dt
6
+ import typing
7
+
8
+ from ..core.datetime_utils import serialize_datetime
9
+ from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
10
+
11
+
12
+ class AssetNode(pydantic_v1.BaseModel):
13
+ """
14
+ Model representing a node in the folder tree.
15
+ """
16
+
17
+ children: typing.Optional[typing.Dict[str, typing.Optional[AssetNode]]] = None
18
+ id: str
19
+ media_type: str
20
+ name: str
21
+ type: str
22
+
23
+ def json(self, **kwargs: typing.Any) -> str:
24
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
25
+ return super().json(**kwargs_with_defaults)
26
+
27
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
28
+ kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
29
+ kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
30
+
31
+ return deep_union_pydantic_dicts(
32
+ super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
33
+ )
34
+
35
+ class Config:
36
+ frozen = True
37
+ smart_union = True
38
+ extra = pydantic_v1.Extra.allow
39
+ json_encoders = {dt.datetime: serialize_datetime}
40
+
41
+
42
+ AssetNode.update_forward_refs()
@@ -0,0 +1,36 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
8
+ from .chunk_content_item import ChunkContentItem
9
+
10
+
11
+ class Chunk(pydantic_v1.BaseModel):
12
+ """
13
+ A chunk of content to extract data from.
14
+ """
15
+
16
+ chunk_id: str
17
+ content: typing.List[ChunkContentItem]
18
+ metadata: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None
19
+
20
+ def json(self, **kwargs: typing.Any) -> str:
21
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
22
+ return super().json(**kwargs_with_defaults)
23
+
24
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
25
+ kwargs_with_defaults_exclude_unset: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
26
+ kwargs_with_defaults_exclude_none: typing.Any = {"by_alias": True, "exclude_none": True, **kwargs}
27
+
28
+ return deep_union_pydantic_dicts(
29
+ super().dict(**kwargs_with_defaults_exclude_unset), super().dict(**kwargs_with_defaults_exclude_none)
30
+ )
31
+
32
+ class Config:
33
+ frozen = True
34
+ smart_union = True
35
+ extra = pydantic_v1.Extra.allow
36
+ json_encoders = {dt.datetime: serialize_datetime}