everyrow 0.1.10__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. everyrow/__init__.py +8 -1
  2. everyrow/api_utils.py +5 -1
  3. everyrow/billing.py +29 -0
  4. everyrow/constants.py +1 -1
  5. everyrow/generated/__init__.py +1 -1
  6. everyrow/generated/api/{default/interrupt_chat_task_tasks_chat_interrupt_post.py → artifacts/create_artifact_artifacts_post.py} +38 -32
  7. everyrow/generated/api/billing/__init__.py +1 -0
  8. everyrow/generated/api/{default/get_queues_stats_jobs_queues_get.py → billing/get_billing_balance_billing_get.py} +25 -17
  9. everyrow/generated/api/operations/__init__.py +1 -0
  10. everyrow/generated/api/{default/re_execute_task_endpoint_tasks_re_execute_post.py → operations/agent_map_operations_agent_map_post.py} +53 -41
  11. everyrow/generated/api/{default/create_workflow_from_artifact_workflows_from_artifact_post.py → operations/dedupe_operations_dedupe_post.py} +39 -33
  12. everyrow/generated/api/{default/submit_task_tasks_post.py → operations/merge_operations_merge_post.py} +37 -29
  13. everyrow/generated/api/{default/copy_workflow_endpoint_workflows_copy_post.py → operations/rank_operations_rank_post.py} +43 -29
  14. everyrow/generated/api/{default/export_to_google_sheets_export_post.py → operations/screen_operations_screen_post.py} +43 -31
  15. everyrow/generated/api/operations/single_agent_operations_single_agent_post.py +236 -0
  16. everyrow/generated/api/sessions/__init__.py +1 -0
  17. everyrow/generated/api/{default/copy_artifacts_artifacts_copy_post.py → sessions/create_session_endpoint_sessions_post.py} +35 -27
  18. everyrow/generated/api/tasks/__init__.py +1 -0
  19. everyrow/generated/api/{default/get_job_progress_for_task_jobs_progress_get.py → tasks/get_task_result_tasks_task_id_result_get.py} +45 -33
  20. everyrow/generated/api/{default/get_task_status_endpoint_tasks_task_id_status_get.py → tasks/get_task_status_tasks_task_id_status_get.py} +24 -42
  21. everyrow/generated/models/__init__.py +82 -266
  22. everyrow/generated/models/agent_map_operation.py +315 -0
  23. everyrow/generated/models/{artifact_group_record_metadata_type_0.py → agent_map_operation_input_type_1_item.py} +5 -5
  24. everyrow/generated/models/agent_map_operation_input_type_2.py +46 -0
  25. everyrow/generated/models/{standalone_artifact_record_analysis_type_0.py → agent_map_operation_response_schema_type_0.py} +5 -5
  26. everyrow/generated/models/{create_query_params.py → billing_response.py} +13 -12
  27. everyrow/generated/models/{continue_task_request.py → create_artifact_request.py} +43 -43
  28. everyrow/generated/models/create_artifact_request_data_type_0_item.py +46 -0
  29. everyrow/generated/models/{task_metadata_cols_to_rename_type_0.py → create_artifact_request_data_type_1.py} +5 -5
  30. everyrow/generated/models/{copy_artifacts_response.py → create_artifact_response.py} +12 -12
  31. everyrow/generated/models/{create_session_request.py → create_session.py} +6 -7
  32. everyrow/generated/models/dedupe_operation.py +151 -0
  33. everyrow/generated/models/dedupe_operation_input_type_1_item.py +46 -0
  34. everyrow/generated/models/{import_request_token_data.py → dedupe_operation_input_type_2.py} +5 -5
  35. everyrow/generated/models/error_response.py +109 -0
  36. everyrow/generated/models/{task_insert_query_params.py → error_response_details_type_0.py} +5 -5
  37. everyrow/generated/models/insufficient_balance_error.py +8 -0
  38. everyrow/generated/models/{llm_enum.py → llm_enum_public.py} +1 -20
  39. everyrow/generated/models/merge_operation.py +278 -0
  40. everyrow/generated/models/merge_operation_left_input_type_1_item.py +46 -0
  41. everyrow/generated/models/{chat_completion_message_tool_call.py → merge_operation_left_input_type_2.py} +5 -5
  42. everyrow/generated/models/merge_operation_right_input_type_1_item.py +46 -0
  43. everyrow/generated/models/merge_operation_right_input_type_2.py +46 -0
  44. everyrow/generated/models/merge_operation_use_web_search_type_0.py +10 -0
  45. everyrow/generated/models/operation_response.py +131 -0
  46. everyrow/generated/models/{multi_agent_effort_level.py → public_effort_level.py} +1 -1
  47. everyrow/generated/models/public_task_type.py +12 -0
  48. everyrow/generated/models/rank_operation.py +203 -0
  49. everyrow/generated/models/rank_operation_input_type_1_item.py +46 -0
  50. everyrow/generated/models/{export_request_token_data.py → rank_operation_input_type_2.py} +5 -5
  51. everyrow/generated/models/{artifact_group_record_analysis_type_0.py → rank_operation_response_schema_type_0.py} +5 -5
  52. everyrow/generated/models/screen_operation.py +186 -0
  53. everyrow/generated/models/screen_operation_input_type_1_item.py +46 -0
  54. everyrow/generated/models/screen_operation_input_type_2.py +46 -0
  55. everyrow/generated/models/screen_operation_response_schema_type_0.py +46 -0
  56. everyrow/generated/models/{create_session_response.py → session_response.py} +7 -8
  57. everyrow/generated/models/single_agent_operation.py +304 -0
  58. everyrow/generated/models/single_agent_operation_input_type_1_item.py +46 -0
  59. everyrow/generated/models/single_agent_operation_input_type_2.py +46 -0
  60. everyrow/generated/models/single_agent_operation_response_schema_type_0.py +46 -0
  61. everyrow/generated/models/task_result_response.py +185 -0
  62. everyrow/generated/models/task_result_response_data_type_0_item.py +46 -0
  63. everyrow/generated/models/task_result_response_data_type_1.py +46 -0
  64. everyrow/generated/models/task_status_response.py +99 -19
  65. everyrow/ops.py +360 -434
  66. everyrow/session.py +5 -7
  67. everyrow/task.py +68 -115
  68. {everyrow-0.1.10.dist-info → everyrow-0.2.0.dist-info}/METADATA +22 -8
  69. everyrow-0.2.0.dist-info/RECORD +81 -0
  70. everyrow/citations.py +0 -50
  71. everyrow/generated/api/default/continue_task_endpoint_tasks_continue_post.py +0 -208
  72. everyrow/generated/api/default/create_api_key_endpoint_api_keys_create_post.py +0 -186
  73. everyrow/generated/api/default/create_session_endpoint_sessions_create_post.py +0 -198
  74. everyrow/generated/api/default/generate_feedback_endpoint_tasks_generate_feedback_post.py +0 -186
  75. everyrow/generated/api/default/get_artifacts_artifacts_get.py +0 -260
  76. everyrow/generated/api/default/get_default_timeout_seconds_models_default_timeout_seconds_get.py +0 -165
  77. everyrow/generated/api/default/get_metrics_metrics_get.py +0 -80
  78. everyrow/generated/api/default/get_user_usage_usage_get.py +0 -123
  79. everyrow/generated/api/default/healthz_healthz_get.py +0 -127
  80. everyrow/generated/api/default/import_from_google_sheets_import_post.py +0 -170
  81. everyrow/generated/api/default/list_api_keys_endpoint_api_keys_get.py +0 -186
  82. everyrow/generated/api/default/revoke_api_key_endpoint_api_keys_key_id_revoke_post.py +0 -181
  83. everyrow/generated/api/default/revoke_jobs_for_task_jobs_revoke_post.py +0 -164
  84. everyrow/generated/api/default/rollback_to_message_endpoint_tasks_chat_rollback_post.py +0 -186
  85. everyrow/generated/api/default/submit_chat_task_tasks_chat_post.py +0 -164
  86. everyrow/generated/api/default/task_resource_estimation_task_resource_estimation_post.py +0 -319
  87. everyrow/generated/api/default/trigger_workflow_execution_endpoint_workflows_trigger_post.py +0 -166
  88. everyrow/generated/api/default/whoami_whoami_get.py +0 -127
  89. everyrow/generated/models/agent_improvement_instruction.py +0 -69
  90. everyrow/generated/models/agent_query_params.py +0 -383
  91. everyrow/generated/models/agent_query_params_system_prompt_kind_type_0.py +0 -10
  92. everyrow/generated/models/agent_task_args.py +0 -163
  93. everyrow/generated/models/agent_task_args_processing_mode.py +0 -9
  94. everyrow/generated/models/allowed_suggestions.py +0 -9
  95. everyrow/generated/models/api_key_info.py +0 -163
  96. everyrow/generated/models/artifact_changed_payload.py +0 -89
  97. everyrow/generated/models/artifact_group_record.py +0 -363
  98. everyrow/generated/models/artifact_group_record_trace_mapping_type_0.py +0 -46
  99. everyrow/generated/models/artifact_status.py +0 -14
  100. everyrow/generated/models/auto_cohort_conversation_message.py +0 -533
  101. everyrow/generated/models/aux_data.py +0 -128
  102. everyrow/generated/models/aux_data_source_bank.py +0 -59
  103. everyrow/generated/models/chat_message_metadata.py +0 -193
  104. everyrow/generated/models/concatenate_query_params.py +0 -46
  105. everyrow/generated/models/concatenate_request.py +0 -306
  106. everyrow/generated/models/continue_reason.py +0 -9
  107. everyrow/generated/models/controller_improvement_round.py +0 -79
  108. everyrow/generated/models/conversation_changed_payload.py +0 -89
  109. everyrow/generated/models/copy_artifacts_request.py +0 -70
  110. everyrow/generated/models/copy_workflow_request.py +0 -62
  111. everyrow/generated/models/copy_workflow_response.py +0 -70
  112. everyrow/generated/models/create_api_key_request.py +0 -95
  113. everyrow/generated/models/create_api_key_response.py +0 -96
  114. everyrow/generated/models/create_group_query_params.py +0 -61
  115. everyrow/generated/models/create_group_request.py +0 -305
  116. everyrow/generated/models/create_request.py +0 -305
  117. everyrow/generated/models/create_workflow_from_artifact_request.py +0 -92
  118. everyrow/generated/models/create_workflow_from_artifact_response.py +0 -70
  119. everyrow/generated/models/data_frame_method.py +0 -18
  120. everyrow/generated/models/date_cutoffs.py +0 -145
  121. everyrow/generated/models/dedupe_public_params.py +0 -64
  122. everyrow/generated/models/dedupe_request_params.py +0 -311
  123. everyrow/generated/models/deep_merge_public_params.py +0 -143
  124. everyrow/generated/models/deep_merge_request.py +0 -313
  125. everyrow/generated/models/deep_rank_public_params.py +0 -109
  126. everyrow/generated/models/deep_rank_request.py +0 -313
  127. everyrow/generated/models/deep_screen_public_params.py +0 -132
  128. everyrow/generated/models/deep_screen_request.py +0 -313
  129. everyrow/generated/models/derive_expression.py +0 -69
  130. everyrow/generated/models/derive_query_params.py +0 -75
  131. everyrow/generated/models/derive_request.py +0 -307
  132. everyrow/generated/models/document_query_tool.py +0 -12
  133. everyrow/generated/models/drop_columns_query_params.py +0 -61
  134. everyrow/generated/models/drop_columns_request.py +0 -305
  135. everyrow/generated/models/event_type.py +0 -14
  136. everyrow/generated/models/execution_metadata.py +0 -146
  137. everyrow/generated/models/export_request.py +0 -75
  138. everyrow/generated/models/export_to_google_sheets_export_post_response_export_to_google_sheets_export_post.py +0 -46
  139. everyrow/generated/models/filter_query_params.py +0 -91
  140. everyrow/generated/models/filter_request.py +0 -305
  141. everyrow/generated/models/flatten_query_params.py +0 -46
  142. everyrow/generated/models/flatten_request.py +0 -305
  143. everyrow/generated/models/generate_feedback_request.py +0 -62
  144. everyrow/generated/models/group_by_query_params.py +0 -62
  145. everyrow/generated/models/group_by_request.py +0 -305
  146. everyrow/generated/models/healthz_healthz_get_response_healthz_healthz_get.py +0 -46
  147. everyrow/generated/models/image_chat_content_part.py +0 -80
  148. everyrow/generated/models/image_chat_content_part_image_url.py +0 -46
  149. everyrow/generated/models/import_from_google_sheets_import_post_response_import_from_google_sheets_import_post.py +0 -46
  150. everyrow/generated/models/import_request.py +0 -83
  151. everyrow/generated/models/join_query_params.py +0 -73
  152. everyrow/generated/models/join_request.py +0 -305
  153. everyrow/generated/models/map_agent_request_params.py +0 -313
  154. everyrow/generated/models/map_multi_agent_request_params.py +0 -313
  155. everyrow/generated/models/message_created_payload.py +0 -98
  156. everyrow/generated/models/multi_agent_query_params.py +0 -264
  157. everyrow/generated/models/multi_modal_chat_message.py +0 -160
  158. everyrow/generated/models/multi_modal_chat_message_role.py +0 -10
  159. everyrow/generated/models/preview_metadata.py +0 -144
  160. everyrow/generated/models/processing_mode.py +0 -10
  161. everyrow/generated/models/progress_status.py +0 -83
  162. everyrow/generated/models/queue_stats.py +0 -77
  163. everyrow/generated/models/reduce_agent_request_params.py +0 -305
  164. everyrow/generated/models/reduce_multi_agent_request_params.py +0 -305
  165. everyrow/generated/models/resource_estimation_response.py +0 -85
  166. everyrow/generated/models/response_schema_type.py +0 -9
  167. everyrow/generated/models/revoke_api_key_response.py +0 -61
  168. everyrow/generated/models/rollback_to_message_request.py +0 -62
  169. everyrow/generated/models/rollback_to_message_response.py +0 -77
  170. everyrow/generated/models/session_changed_payload.py +0 -69
  171. everyrow/generated/models/simple_chat_message.py +0 -121
  172. everyrow/generated/models/simple_chat_message_role.py +0 -10
  173. everyrow/generated/models/simple_chat_message_with_tool_calls.py +0 -156
  174. everyrow/generated/models/source_database_entry.py +0 -92
  175. everyrow/generated/models/standalone_artifact_record.py +0 -311
  176. everyrow/generated/models/standalone_artifact_record_metadata_type_0.py +0 -46
  177. everyrow/generated/models/standalone_artifact_record_trace_mapping_type_0.py +0 -46
  178. everyrow/generated/models/status_count.py +0 -71
  179. everyrow/generated/models/status_count_status.py +0 -13
  180. everyrow/generated/models/submit_chat_task_body.py +0 -497
  181. everyrow/generated/models/submit_chat_task_body_selected_task_type_type_0.py +0 -11
  182. everyrow/generated/models/submit_task_body.py +0 -745
  183. everyrow/generated/models/task_changed_payload.py +0 -105
  184. everyrow/generated/models/task_effort.py +0 -10
  185. everyrow/generated/models/task_id_request.py +0 -62
  186. everyrow/generated/models/task_insert.py +0 -725
  187. everyrow/generated/models/task_metadata.py +0 -323
  188. everyrow/generated/models/task_response.py +0 -62
  189. everyrow/generated/models/task_type.py +0 -31
  190. everyrow/generated/models/text_chat_content_part.py +0 -74
  191. everyrow/generated/models/tool_response_message.py +0 -127
  192. everyrow/generated/models/toolkit_constants.py +0 -80
  193. everyrow/generated/models/trace_changed_payload.py +0 -94
  194. everyrow/generated/models/trace_info.py +0 -78
  195. everyrow/generated/models/trigger_workflow_execution_request.py +0 -112
  196. everyrow/generated/models/trigger_workflow_execution_request_task_params.py +0 -65
  197. everyrow/generated/models/trigger_workflow_execution_request_task_params_additional_property.py +0 -46
  198. everyrow/generated/models/trigger_workflow_execution_response.py +0 -69
  199. everyrow/generated/models/upload_csv_payload.py +0 -310
  200. everyrow/generated/models/upload_csv_query_params.py +0 -114
  201. everyrow/generated/models/usage_response.py +0 -77
  202. everyrow/generated/models/whoami_whoami_get_response_whoami_whoami_get.py +0 -46
  203. everyrow/generated/models/workflow_leaf_node_input.py +0 -70
  204. everyrow-0.1.10.dist-info/RECORD +0 -183
  205. /everyrow/generated/api/{default → artifacts}/__init__.py +0 -0
  206. {everyrow-0.1.10.dist-info → everyrow-0.2.0.dist-info}/WHEEL +0 -0
  207. {everyrow-0.1.10.dist-info → everyrow-0.2.0.dist-info}/licenses/LICENSE.txt +0 -0
everyrow/ops.py CHANGED
@@ -5,43 +5,49 @@ from uuid import UUID
5
5
  from pandas import DataFrame
6
6
  from pydantic import BaseModel
7
7
 
8
+ from everyrow.api_utils import handle_response
8
9
  from everyrow.constants import EveryrowError
10
+ from everyrow.generated.api.artifacts import create_artifact_artifacts_post
11
+ from everyrow.generated.api.operations import (
12
+ agent_map_operations_agent_map_post,
13
+ dedupe_operations_dedupe_post,
14
+ merge_operations_merge_post,
15
+ rank_operations_rank_post,
16
+ screen_operations_screen_post,
17
+ single_agent_operations_single_agent_post,
18
+ )
9
19
  from everyrow.generated.models import (
10
- AgentQueryParams,
11
- CreateGroupQueryParams,
12
- CreateGroupRequest,
13
- CreateQueryParams,
14
- CreateRequest,
15
- DedupePublicParams,
16
- DedupeRequestParams,
17
- DeepMergePublicParams,
18
- DeepMergeRequest,
19
- DeepRankPublicParams,
20
- DeepRankRequest,
21
- DeepScreenPublicParams,
22
- DeepScreenRequest,
23
- DeriveExpression,
24
- DeriveQueryParams,
25
- DeriveRequest,
26
- MapAgentRequestParams,
27
- ProcessingMode,
28
- ReduceAgentRequestParams,
29
- ResponseSchemaType,
20
+ AgentMapOperation,
21
+ AgentMapOperationInputType1Item,
22
+ AgentMapOperationResponseSchemaType0,
23
+ CreateArtifactRequest,
24
+ CreateArtifactRequestDataType0Item,
25
+ CreateArtifactRequestDataType1,
26
+ DedupeOperation,
27
+ DedupeOperationInputType1Item,
28
+ LLMEnumPublic,
29
+ MergeOperation,
30
+ MergeOperationLeftInputType1Item,
31
+ MergeOperationRightInputType1Item,
32
+ PublicEffortLevel,
33
+ RankOperation,
34
+ RankOperationInputType1Item,
35
+ RankOperationResponseSchemaType0,
36
+ ScreenOperation,
37
+ ScreenOperationInputType1Item,
38
+ ScreenOperationResponseSchemaType0,
39
+ SingleAgentOperation,
40
+ SingleAgentOperationInputType1Item,
41
+ SingleAgentOperationInputType2,
42
+ SingleAgentOperationResponseSchemaType0,
30
43
  )
31
- from everyrow.generated.models.submit_task_body import SubmitTaskBody
32
44
  from everyrow.generated.types import UNSET
33
45
  from everyrow.result import Result, ScalarResult, TableResult
34
46
  from everyrow.session import Session, create_session
35
- from everyrow.task import (
36
- LLM,
37
- EffortLevel,
38
- EveryrowTask,
39
- await_task_completion,
40
- read_table_result,
41
- submit_task,
42
- )
47
+ from everyrow.task import LLM, EffortLevel, EveryrowTask
43
48
 
44
49
  T = TypeVar("T", bound=BaseModel)
50
+ InputData = UUID | list[dict[str, Any]] | dict[str, Any]
45
51
 
46
52
 
47
53
  class DefaultAgentResponse(BaseModel):
@@ -52,13 +58,88 @@ class DefaultScreenResult(BaseModel):
52
58
  passes: bool
53
59
 
54
60
 
61
+ def _df_to_records(df: DataFrame) -> list[dict[str, Any]]:
62
+ """Convert a DataFrame to a list of records, handling NaN/NaT."""
63
+ json_str = df.to_json(orient="records")
64
+ assert json_str is not None
65
+ return json.loads(json_str)
66
+
67
+
68
+ def _prepare_table_input[T](
69
+ input: DataFrame | UUID | TableResult | None,
70
+ item_class: type[T],
71
+ ) -> UUID | list[T]:
72
+ """Convert table input to UUID or list of generated model items."""
73
+ if input is None:
74
+ return []
75
+ if isinstance(input, UUID):
76
+ return input
77
+ if isinstance(input, TableResult):
78
+ return input.artifact_id
79
+ if isinstance(input, DataFrame):
80
+ records = _df_to_records(input)
81
+ return [item_class.from_dict(r) for r in records] # type: ignore[attr-defined]
82
+ raise TypeError(f"Unsupported input type: {type(input)}")
83
+
84
+
85
+ def _prepare_single_input[TItem, TObj](
86
+ input: BaseModel | DataFrame | UUID | Result | None,
87
+ item_class: type[TItem],
88
+ object_class: type[TObj],
89
+ ) -> UUID | list[TItem] | TObj:
90
+ """Convert single-agent input to the appropriate generated model type."""
91
+ if input is None:
92
+ return object_class.from_dict({}) # type: ignore[attr-defined]
93
+ if isinstance(input, UUID):
94
+ return input
95
+ if isinstance(input, Result):
96
+ return input.artifact_id
97
+ if isinstance(input, DataFrame):
98
+ records = _df_to_records(input)
99
+ return [item_class.from_dict(r) for r in records] # type: ignore[attr-defined]
100
+ if isinstance(input, BaseModel):
101
+ return object_class.from_dict(input.model_dump()) # type: ignore[attr-defined]
102
+ raise TypeError(f"Unsupported input type: {type(input)}")
103
+
104
+
105
+ # --- Artifact creation ---
106
+
107
+
108
+ async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
109
+ """Create a scalar artifact by uploading a single record."""
110
+ body = CreateArtifactRequest(
111
+ data=CreateArtifactRequestDataType1.from_dict(input.model_dump()),
112
+ session_id=session.session_id,
113
+ )
114
+ response = await create_artifact_artifacts_post.asyncio(client=session.client, body=body)
115
+ response = handle_response(response)
116
+ return response.artifact_id
117
+
118
+
119
+ async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
120
+ """Create a table artifact by uploading a list of records."""
121
+ records = _df_to_records(input)
122
+ body = CreateArtifactRequest(
123
+ data=[CreateArtifactRequestDataType0Item.from_dict(r) for r in records],
124
+ session_id=session.session_id,
125
+ )
126
+ response = await create_artifact_artifacts_post.asyncio(client=session.client, body=body)
127
+ response = handle_response(response)
128
+ return response.artifact_id
129
+
130
+
131
+ # --- Single Agent ---
132
+
133
+
55
134
  @overload
56
135
  async def single_agent[T: BaseModel](
57
136
  task: str,
58
137
  session: Session | None = None,
59
138
  input: BaseModel | UUID | Result | None = None,
60
- effort_level: EffortLevel = EffortLevel.LOW,
139
+ effort_level: EffortLevel | None = EffortLevel.LOW,
61
140
  llm: LLM | None = None,
141
+ iteration_budget: int | None = None,
142
+ include_research: bool | None = None,
62
143
  response_model: type[T] = DefaultAgentResponse,
63
144
  return_table: Literal[False] = False,
64
145
  ) -> ScalarResult[T]: ...
@@ -69,8 +150,10 @@ async def single_agent(
69
150
  task: str,
70
151
  session: Session | None = None,
71
152
  input: BaseModel | UUID | Result | None = None,
72
- effort_level: EffortLevel = EffortLevel.LOW,
153
+ effort_level: EffortLevel | None = EffortLevel.LOW,
73
154
  llm: LLM | None = None,
155
+ iteration_budget: int | None = None,
156
+ include_research: bool | None = None,
74
157
  response_model: type[BaseModel] = DefaultAgentResponse,
75
158
  return_table: Literal[True] = True,
76
159
  ) -> TableResult: ...
@@ -80,11 +163,30 @@ async def single_agent[T: BaseModel](
80
163
  task: str,
81
164
  session: Session | None = None,
82
165
  input: BaseModel | DataFrame | UUID | Result | None = None,
83
- effort_level: EffortLevel = EffortLevel.LOW,
166
+ effort_level: EffortLevel | None = EffortLevel.LOW,
84
167
  llm: LLM | None = None,
168
+ iteration_budget: int | None = None,
169
+ include_research: bool | None = None,
85
170
  response_model: type[T] = DefaultAgentResponse,
86
171
  return_table: bool = False,
87
172
  ) -> ScalarResult[T] | TableResult:
173
+ """Execute an AI agent task on the provided input.
174
+
175
+ Args:
176
+ task: Instructions for the AI agent to execute.
177
+ session: Optional session. If not provided, one will be created automatically.
178
+ input: Input data (BaseModel, DataFrame, UUID, or Result).
179
+ effort_level: Effort level preset (low/medium/high). Mutually exclusive with
180
+ custom params (llm, iteration_budget, include_research). Default: low.
181
+ llm: LLM to use. Required when effort_level is None.
182
+ iteration_budget: Number of agent iterations (0-20). Required when effort_level is None.
183
+ include_research: Include research notes. Required when effort_level is None.
184
+ response_model: Pydantic model for the response schema.
185
+ return_table: If True, return a TableResult instead of ScalarResult.
186
+
187
+ Returns:
188
+ ScalarResult or TableResult depending on return_table parameter.
189
+ """
88
190
  if session is None:
89
191
  async with create_session() as internal_session:
90
192
  cohort_task = await single_agent_async(
@@ -93,6 +195,8 @@ async def single_agent[T: BaseModel](
93
195
  input=input,
94
196
  effort_level=effort_level,
95
197
  llm=llm,
198
+ iteration_budget=iteration_budget,
199
+ include_research=include_research,
96
200
  response_model=response_model,
97
201
  return_table=return_table,
98
202
  )
@@ -103,6 +207,8 @@ async def single_agent[T: BaseModel](
103
207
  input=input,
104
208
  effort_level=effort_level,
105
209
  llm=llm,
210
+ iteration_budget=iteration_budget,
211
+ include_research=include_research,
106
212
  response_model=response_model,
107
213
  return_table=return_table,
108
214
  )
@@ -113,311 +219,212 @@ async def single_agent_async[T: BaseModel](
113
219
  task: str,
114
220
  session: Session,
115
221
  input: BaseModel | DataFrame | UUID | Result | None = None,
116
- effort_level: EffortLevel = EffortLevel.LOW,
222
+ effort_level: EffortLevel | None = EffortLevel.LOW,
117
223
  llm: LLM | None = None,
224
+ iteration_budget: int | None = None,
225
+ include_research: bool | None = None,
118
226
  response_model: type[T] = DefaultAgentResponse,
119
227
  return_table: bool = False,
120
228
  ) -> EveryrowTask[T]:
121
- if input is not None:
122
- input_artifact_ids = [await _process_single_agent_input(input, session)]
123
- else:
124
- input_artifact_ids = []
229
+ """Submit a single_agent task asynchronously."""
230
+ input_data = _prepare_single_input(input, SingleAgentOperationInputType1Item, SingleAgentOperationInputType2)
125
231
 
126
- query = AgentQueryParams(
232
+ # Build the operation body with either preset or custom params
233
+ body = SingleAgentOperation(
234
+ input_=input_data, # type: ignore
127
235
  task=task,
128
- llm=llm or UNSET,
129
- effort_level=effort_level,
130
- response_schema=response_model.model_json_schema(),
131
- response_schema_type=ResponseSchemaType.JSON,
132
- is_expand=return_table,
133
- include_provenance_and_notes=False,
134
- )
135
- request = ReduceAgentRequestParams(
136
- query=query,
137
- input_artifacts=input_artifact_ids,
138
- )
139
- body = SubmitTaskBody(
140
- payload=request,
141
236
  session_id=session.session_id,
237
+ response_schema=SingleAgentOperationResponseSchemaType0.from_dict(response_model.model_json_schema()),
238
+ effort_level=PublicEffortLevel(effort_level.value) if effort_level is not None else UNSET,
239
+ llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
240
+ iteration_budget=iteration_budget if iteration_budget is not None else UNSET,
241
+ include_research=include_research if include_research is not None else UNSET,
242
+ return_list=return_table,
142
243
  )
143
244
 
144
- cohort_task = EveryrowTask(
145
- response_model=response_model, is_map=False, is_expand=return_table
146
- )
147
- await cohort_task.submit(body, session.client)
245
+ response = await single_agent_operations_single_agent_post.asyncio(client=session.client, body=body)
246
+ response = handle_response(response)
247
+
248
+ cohort_task: EveryrowTask[T] = EveryrowTask(response_model=response_model, is_map=False, is_expand=return_table)
249
+ cohort_task.set_submitted(response.task_id, response.session_id, session.client)
148
250
  return cohort_task
149
251
 
150
252
 
253
+ # --- Agent Map ---
254
+
255
+
151
256
  async def agent_map(
152
257
  task: str,
153
258
  session: Session | None = None,
154
259
  input: DataFrame | UUID | TableResult | None = None,
155
- effort_level: EffortLevel = EffortLevel.LOW,
260
+ effort_level: EffortLevel | None = EffortLevel.LOW,
156
261
  llm: LLM | None = None,
262
+ iteration_budget: int | None = None,
263
+ include_research: bool | None = None,
264
+ enforce_row_independence: bool = False,
157
265
  response_model: type[BaseModel] = DefaultAgentResponse,
158
266
  ) -> TableResult:
267
+ """Execute an AI agent task on each row of the input table.
268
+
269
+ Args:
270
+ task: Instructions for the AI agent to execute per row.
271
+ session: Optional session. If not provided, one will be created automatically.
272
+ input: The input table (DataFrame, UUID, or TableResult).
273
+ effort_level: Effort level preset (low/medium/high). Mutually exclusive with
274
+ custom params (llm, iteration_budget, include_research). Default: low.
275
+ llm: LLM to use for each agent. Required when effort_level is None.
276
+ iteration_budget: Number of agent iterations per row (0-20). Required when effort_level is None.
277
+ include_research: Include research notes. Required when effort_level is None.
278
+ response_model: Pydantic model for the response schema.
279
+
280
+ Returns:
281
+ TableResult containing the agent results merged with input rows.
282
+ """
159
283
  if input is None:
160
284
  raise EveryrowError("input is required for agent_map")
161
285
  if session is None:
162
286
  async with create_session() as internal_session:
163
287
  cohort_task = await agent_map_async(
164
- task,
165
- internal_session,
166
- input,
167
- effort_level,
168
- llm,
169
- response_model,
288
+ task=task,
289
+ session=internal_session,
290
+ input=input,
291
+ effort_level=effort_level,
292
+ llm=llm,
293
+ iteration_budget=iteration_budget,
294
+ include_research=include_research,
295
+ enforce_row_independence=enforce_row_independence,
296
+ response_model=response_model,
170
297
  )
171
298
  result = await cohort_task.await_result()
172
299
  if isinstance(result, TableResult):
173
300
  return result
174
- else:
175
- raise EveryrowError("Agent map task did not return a table result")
301
+ raise EveryrowError("Agent map task did not return a table result")
176
302
  cohort_task = await agent_map_async(
177
- task, session, input, effort_level, llm, response_model
303
+ task=task,
304
+ session=session,
305
+ input=input,
306
+ effort_level=effort_level,
307
+ llm=llm,
308
+ iteration_budget=iteration_budget,
309
+ include_research=include_research,
310
+ enforce_row_independence=enforce_row_independence,
311
+ response_model=response_model,
178
312
  )
179
313
  result = await cohort_task.await_result()
180
314
  if isinstance(result, TableResult):
181
315
  return result
182
- else:
183
- raise EveryrowError("Agent map task did not return a table result")
184
-
185
-
186
- def _convert_pydantic_to_custom_schema(model: type[BaseModel]) -> dict[str, Any]:
187
- """Convert a Pydantic model to the custom response schema format expected by rank.
188
-
189
- The custom format uses _model_name instead of type: object, and uses optional: bool
190
- instead of required arrays.
191
-
192
- Example:
193
- class ScreeningResult(BaseModel):
194
- screening_result: str = Field(..., description="...")
195
-
196
- Converts to:
197
- {
198
- "_model_name": "ScreeningResult",
199
- "screening_result": {
200
- "type": "str",
201
- "optional": False,
202
- "description": "..."
203
- }
204
- }
205
- """
206
- json_schema = model.model_json_schema()
207
-
208
- # Extract model name from title or use the class name
209
- model_name = json_schema.get("title", model.__name__)
210
-
211
- # Build the custom schema format
212
- custom_schema: dict[str, Any] = {"_model_name": model_name}
213
-
214
- # Convert properties
215
- properties = json_schema.get("properties", {})
216
- required = set(json_schema.get("required", []))
217
-
218
- # Map JSON schema types to custom format types
219
- type_mapping = {
220
- "string": "str",
221
- "integer": "int",
222
- "number": "float",
223
- "boolean": "bool",
224
- }
225
-
226
- for field_name, field_schema in properties.items():
227
- # Copy the field schema
228
- custom_field: dict[str, Any] = {}
229
-
230
- # Map type from JSON schema format to custom format
231
- field_type = field_schema.get("type")
232
- if field_type:
233
- # Convert JSON schema type to custom format type
234
- custom_field["type"] = type_mapping.get(field_type, field_type)
235
-
236
- # Add description if present
237
- if "description" in field_schema:
238
- custom_field["description"] = field_schema["description"]
239
-
240
- # Set optional flag (opposite of required)
241
- custom_field["optional"] = field_name not in required
242
-
243
- custom_schema[field_name] = custom_field
244
-
245
- return custom_schema
316
+ raise EveryrowError("Agent map task did not return a table result")
246
317
 
247
318
 
248
319
  async def agent_map_async(
249
320
  task: str,
250
321
  session: Session,
251
322
  input: DataFrame | UUID | TableResult,
252
- effort_level: EffortLevel = EffortLevel.LOW,
323
+ effort_level: EffortLevel | None = EffortLevel.LOW,
253
324
  llm: LLM | None = None,
325
+ iteration_budget: int | None = None,
326
+ include_research: bool | None = None,
327
+ enforce_row_independence: bool = False,
254
328
  response_model: type[BaseModel] = DefaultAgentResponse,
255
329
  ) -> EveryrowTask[BaseModel]:
256
- input_artifact_ids = [await _process_agent_map_input(input, session)]
257
- query = AgentQueryParams(
330
+ """Submit an agent_map task asynchronously."""
331
+ input_data = _prepare_table_input(input, AgentMapOperationInputType1Item)
332
+
333
+ # Build the operation body with either preset or custom params
334
+ body = AgentMapOperation(
335
+ input_=input_data, # type: ignore
258
336
  task=task,
259
- effort_level=effort_level,
260
- llm=llm or UNSET,
261
- response_schema=_convert_pydantic_to_custom_schema(response_model),
262
- response_schema_type=ResponseSchemaType.CUSTOM,
263
- is_expand=False,
264
- include_provenance_and_notes=False,
265
- )
266
- request = MapAgentRequestParams(
267
- query=query,
268
- input_artifacts=input_artifact_ids,
269
- context_artifacts=[],
270
- join_with_input=True,
271
- )
272
- body = SubmitTaskBody(
273
- payload=request,
274
337
  session_id=session.session_id,
338
+ response_schema=AgentMapOperationResponseSchemaType0.from_dict(response_model.model_json_schema()),
339
+ effort_level=PublicEffortLevel(effort_level.value) if effort_level is not None else UNSET,
340
+ llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
341
+ iteration_budget=iteration_budget if iteration_budget is not None else UNSET,
342
+ include_research=include_research if include_research is not None else UNSET,
343
+ join_with_input=True,
344
+ enforce_row_independence=enforce_row_independence,
275
345
  )
276
346
 
277
- cohort_task = EveryrowTask(
278
- response_model=response_model, is_map=True, is_expand=False
279
- )
280
- await cohort_task.submit(body, session.client)
281
- return cohort_task
282
-
347
+ response = await agent_map_operations_agent_map_post.asyncio(client=session.client, body=body)
348
+ response = handle_response(response)
283
349
 
284
- async def _process_agent_map_input(
285
- input: DataFrame | UUID | TableResult,
286
- session: Session,
287
- ) -> UUID:
288
- if isinstance(input, TableResult):
289
- return input.artifact_id
290
- elif isinstance(input, DataFrame):
291
- return await create_table_artifact(input, session)
292
- else:
293
- return input
294
-
295
-
296
- async def _process_single_agent_input(
297
- input: BaseModel | DataFrame | UUID | Result,
298
- session: Session,
299
- ) -> UUID:
300
- if isinstance(input, Result):
301
- return input.artifact_id
302
- elif isinstance(input, DataFrame):
303
- return await create_table_artifact(input, session)
304
- elif isinstance(input, BaseModel):
305
- return await create_scalar_artifact(input, session)
306
- else:
307
- return input
308
-
309
-
310
- async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
311
- payload = CreateRequest(query=CreateQueryParams(data_to_create=input.model_dump()))
312
- body = SubmitTaskBody(
313
- payload=payload,
314
- session_id=session.session_id,
315
- )
316
- task_id = await submit_task(body, session.client)
317
- finished_create_artifact_task = await await_task_completion(task_id, session.client)
318
- return finished_create_artifact_task.artifact_id # type: ignore (we check artifact_id in await_task_completion)
350
+ cohort_task = EveryrowTask(response_model=response_model, is_map=True, is_expand=False)
351
+ cohort_task.set_submitted(response.task_id, response.session_id, session.client)
352
+ return cohort_task
319
353
 
320
354
 
321
- async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
322
- # Use to_json to handle NaN/NaT serialization, then parse back to Python objects
323
- json_str = input.to_json(orient="records")
324
- assert json_str is not None # to_json returns str when no path_or_buf provided
325
- records = json.loads(json_str)
326
- payload = CreateGroupRequest(query=CreateGroupQueryParams(data_to_create=records))
327
- body = SubmitTaskBody(
328
- payload=payload,
329
- session_id=session.session_id,
330
- )
331
- task_id = await submit_task(body, session.client)
332
- finished_create_artifact_task = await await_task_completion(task_id, session.client)
333
- return finished_create_artifact_task.artifact_id # type: ignore (we check artifact_id in await_task_completion)
355
+ # --- Screen ---
334
356
 
335
357
 
336
- async def merge(
358
+ async def screen[T: BaseModel](
337
359
  task: str,
338
360
  session: Session | None = None,
339
- left_table: DataFrame | UUID | TableResult | None = None,
340
- right_table: DataFrame | UUID | TableResult | None = None,
341
- merge_on_left: str | None = None,
342
- merge_on_right: str | None = None,
361
+ input: DataFrame | UUID | TableResult | None = None,
362
+ response_model: type[T] | None = None,
343
363
  ) -> TableResult:
344
- """Merge two tables using merge operation.
364
+ """Screen rows in a table using AI.
345
365
 
346
366
  Args:
347
- task: The task description for the merge operation
367
+ task: The task description for screening
348
368
  session: Optional session. If not provided, one will be created automatically.
349
- left_table: The left table to merge (DataFrame, UUID, or TableResult)
350
- right_table: The right table to merge (DataFrame, UUID, or TableResult)
351
- merge_on_left: Optional column name in left table to merge on
352
- merge_on_right: Optional column name in right table to merge on
369
+ input: The input table (DataFrame, UUID, or TableResult)
370
+ response_model: Optional Pydantic model for the response schema.
353
371
 
354
372
  Returns:
355
- TableResult containing the merged table
373
+ TableResult containing the screened table
356
374
  """
357
- if left_table is None or right_table is None:
358
- raise EveryrowError("left_table and right_table are required for merge")
375
+ if input is None:
376
+ raise EveryrowError("input is required for screen")
359
377
  if session is None:
360
378
  async with create_session() as internal_session:
361
- cohort_task = await merge_async(
379
+ cohort_task = await screen_async(
362
380
  task=task,
363
381
  session=internal_session,
364
- left_table=left_table,
365
- right_table=right_table,
366
- merge_on_left=merge_on_left,
367
- merge_on_right=merge_on_right,
382
+ input=input,
383
+ response_model=response_model,
368
384
  )
369
385
  result = await cohort_task.await_result()
370
386
  if isinstance(result, TableResult):
371
387
  return result
372
- else:
373
- raise EveryrowError("Merge task did not return a table result")
374
- cohort_task = await merge_async(
375
- task=task,
376
- session=session,
377
- left_table=left_table,
378
- right_table=right_table,
379
- merge_on_left=merge_on_left,
380
- merge_on_right=merge_on_right,
381
- )
388
+ raise EveryrowError("Screen task did not return a table result")
389
+ cohort_task = await screen_async(task=task, session=session, input=input, response_model=response_model)
382
390
  result = await cohort_task.await_result()
383
391
  if isinstance(result, TableResult):
384
392
  return result
385
- else:
386
- raise EveryrowError("Merge task did not return a table result")
393
+ raise EveryrowError("Screen task did not return a table result")
387
394
 
388
395
 
389
- async def merge_async(
396
+ async def screen_async[T: BaseModel](
390
397
  task: str,
391
398
  session: Session,
392
- left_table: DataFrame | UUID | TableResult,
393
- right_table: DataFrame | UUID | TableResult,
394
- merge_on_left: str | None = None,
395
- merge_on_right: str | None = None,
396
- ) -> EveryrowTask[BaseModel]:
397
- """Submit a merge task asynchronously."""
398
- left_artifact_id = await _process_agent_map_input(left_table, session)
399
- right_artifact_id = await _process_agent_map_input(right_table, session)
399
+ input: DataFrame | UUID | TableResult,
400
+ response_model: type[T] | None = None,
401
+ ) -> EveryrowTask[T]:
402
+ """Submit a screen task asynchronously."""
403
+ input_data = _prepare_table_input(input, ScreenOperationInputType1Item)
404
+ actual_response_model = response_model or DefaultScreenResult
400
405
 
401
- query = DeepMergePublicParams(
406
+ body = ScreenOperation(
407
+ input_=input_data, # type: ignore
402
408
  task=task,
403
- merge_on_left=merge_on_left or UNSET,
404
- merge_on_right=merge_on_right or UNSET,
405
- )
406
- request = DeepMergeRequest(
407
- query=query,
408
- input_artifacts=[left_artifact_id],
409
- context_artifacts=[right_artifact_id],
410
- )
411
- body = SubmitTaskBody(
412
- payload=request,
413
409
  session_id=session.session_id,
410
+ response_schema=ScreenOperationResponseSchemaType0.from_dict(actual_response_model.model_json_schema()),
414
411
  )
415
412
 
416
- cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
417
- await cohort_task.submit(body, session.client)
413
+ response = await screen_operations_screen_post.asyncio(client=session.client, body=body)
414
+ response = handle_response(response)
415
+
416
+ cohort_task: EveryrowTask[T] = EveryrowTask(
417
+ response_model=actual_response_model, # type: ignore[arg-type]
418
+ is_map=True,
419
+ is_expand=False,
420
+ )
421
+ cohort_task.set_submitted(response.task_id, response.session_id, session.client)
418
422
  return cohort_task
419
423
 
420
424
 
425
+ # --- Rank ---
426
+
427
+
421
428
  async def rank[T: BaseModel](
422
429
  task: str,
423
430
  session: Session | None = None,
@@ -427,13 +434,13 @@ async def rank[T: BaseModel](
427
434
  response_model: type[T] | None = None,
428
435
  ascending_order: bool = True,
429
436
  ) -> TableResult:
430
- """Rank rows in a table using rank operation.
437
+ """Rank rows in a table using AI.
431
438
 
432
439
  Args:
433
440
  task: The task description for ranking
434
441
  session: Optional session. If not provided, one will be created automatically.
435
442
  input: The input table (DataFrame, UUID, or TableResult)
436
- field_name: The name of the field to extract and sort by
443
+ field_name: The name of the field to sort by
437
444
  field_type: The type of the field (default: "float", ignored if response_model is provided)
438
445
  response_model: Optional Pydantic model for the response schema
439
446
  ascending_order: If True, sort in ascending order
@@ -457,8 +464,7 @@ async def rank[T: BaseModel](
457
464
  result = await cohort_task.await_result()
458
465
  if isinstance(result, TableResult):
459
466
  return result
460
- else:
461
- raise EveryrowError("Rank task did not return a table result")
467
+ raise EveryrowError("Rank task did not return a table result")
462
468
  cohort_task = await rank_async(
463
469
  task=task,
464
470
  session=session,
@@ -471,8 +477,7 @@ async def rank[T: BaseModel](
471
477
  result = await cohort_task.await_result()
472
478
  if isinstance(result, TableResult):
473
479
  return result
474
- else:
475
- raise EveryrowError("Rank task did not return a table result")
480
+ raise EveryrowError("Rank task did not return a table result")
476
481
 
477
482
 
478
483
  async def rank_async[T: BaseModel](
@@ -485,276 +490,197 @@ async def rank_async[T: BaseModel](
485
490
  ascending_order: bool = True,
486
491
  ) -> EveryrowTask[T]:
487
492
  """Submit a rank task asynchronously."""
488
- input_artifact_id = await _process_agent_map_input(input, session)
493
+ input_data = _prepare_table_input(input, RankOperationInputType1Item)
489
494
 
490
495
  if response_model is not None:
491
- response_schema = _convert_pydantic_to_custom_schema(response_model)
492
- if field_name not in response_schema:
493
- raise ValueError(
494
- f"Field {field_name} not in response model {response_model.__name__}"
495
- )
496
+ response_schema = response_model.model_json_schema()
497
+ # Validate that field_name exists in the model
498
+ properties = response_schema.get("properties", {})
499
+ if field_name not in properties:
500
+ raise ValueError(f"Field {field_name} not in response model {response_model.__name__}")
496
501
  else:
502
+ # Build a minimal JSON schema with just the sort field
503
+ json_type_map = {
504
+ "float": "number",
505
+ "int": "integer",
506
+ "str": "string",
507
+ "bool": "boolean",
508
+ }
497
509
  response_schema = {
498
- "_model_name": "RankResponse",
499
- field_name: {
500
- "type": field_type,
501
- "optional": False,
502
- },
510
+ "type": "object",
511
+ "properties": {field_name: {"type": json_type_map.get(field_type, field_type)}},
512
+ "required": [field_name],
503
513
  }
504
514
 
505
- query = DeepRankPublicParams(
515
+ body = RankOperation(
516
+ input_=input_data, # type: ignore
506
517
  task=task,
507
- response_schema=response_schema,
508
- field_to_sort_by=field_name,
509
- ascending_order=ascending_order,
510
- )
511
- request = DeepRankRequest(
512
- query=query,
513
- input_artifacts=[input_artifact_id],
514
- context_artifacts=[],
515
- )
516
- body = SubmitTaskBody(
517
- payload=request,
518
+ sort_by=field_name,
518
519
  session_id=session.session_id,
520
+ response_schema=RankOperationResponseSchemaType0.from_dict(response_schema),
521
+ ascending=ascending_order,
519
522
  )
520
523
 
524
+ response = await rank_operations_rank_post.asyncio(client=session.client, body=body)
525
+ response = handle_response(response)
526
+
521
527
  cohort_task: EveryrowTask[T] = EveryrowTask(
522
528
  response_model=response_model or BaseModel, # type: ignore[arg-type]
523
529
  is_map=True,
524
530
  is_expand=False,
525
531
  )
526
- await cohort_task.submit(body, session.client)
532
+ cohort_task.set_submitted(response.task_id, response.session_id, session.client)
527
533
  return cohort_task
528
534
 
529
535
 
530
- async def screen[T: BaseModel](
536
+ # --- Merge ---
537
+
538
+
539
+ async def merge(
531
540
  task: str,
532
541
  session: Session | None = None,
533
- input: DataFrame | UUID | TableResult | None = None,
534
- response_model: type[T] | None = None,
542
+ left_table: DataFrame | UUID | TableResult | None = None,
543
+ right_table: DataFrame | UUID | TableResult | None = None,
544
+ merge_on_left: str | None = None,
545
+ merge_on_right: str | None = None,
546
+ use_web_search: Literal["auto", "yes", "no"] | None = None,
535
547
  ) -> TableResult:
536
- """Screen rows in a table using screen operation.
548
+ """Merge two tables using AI.
537
549
 
538
550
  Args:
539
- task: The task description for screening
551
+ task: The task description for the merge operation
540
552
  session: Optional session. If not provided, one will be created automatically.
541
- input: The input table (DataFrame, UUID, or TableResult)
542
- response_model: Optional Pydantic model for the response schema.
543
- If not provided, defaults to a result with just a "passes" boolean.
553
+ left_table: The left table to merge (DataFrame, UUID, or TableResult)
554
+ right_table: The right table to merge (DataFrame, UUID, or TableResult)
555
+ merge_on_left: Optional column name in left table to merge on
556
+ merge_on_right: Optional column name in right table to merge on
557
+ use_web_search: Optional. Control web search behavior: "auto" tries LLM merge first then conditionally searches, "no" skips web search entirely, "yes" forces web search on every row. Defaults to "auto" if not provided.
544
558
 
545
559
  Returns:
546
- TableResult containing the screened table
560
+ TableResult containing the merged table
547
561
  """
548
- if input is None:
549
- raise EveryrowError("input is required for screen")
562
+ if left_table is None or right_table is None:
563
+ raise EveryrowError("left_table and right_table are required for merge")
550
564
  if session is None:
551
565
  async with create_session() as internal_session:
552
- cohort_task = await screen_async(
566
+ cohort_task = await merge_async(
553
567
  task=task,
554
568
  session=internal_session,
555
- input=input,
556
- response_model=response_model,
569
+ left_table=left_table,
570
+ right_table=right_table,
571
+ merge_on_left=merge_on_left,
572
+ merge_on_right=merge_on_right,
573
+ use_web_search=use_web_search,
557
574
  )
558
575
  result = await cohort_task.await_result()
559
576
  if isinstance(result, TableResult):
560
577
  return result
561
- else:
562
- raise EveryrowError("Screen task did not return a table result")
563
- cohort_task = await screen_async(
578
+ raise EveryrowError("Merge task did not return a table result")
579
+ cohort_task = await merge_async(
564
580
  task=task,
565
581
  session=session,
566
- input=input,
567
- response_model=response_model,
582
+ left_table=left_table,
583
+ right_table=right_table,
584
+ merge_on_left=merge_on_left,
585
+ merge_on_right=merge_on_right,
586
+ use_web_search=use_web_search,
568
587
  )
569
588
  result = await cohort_task.await_result()
570
589
  if isinstance(result, TableResult):
571
590
  return result
572
- else:
573
- raise EveryrowError("Screen task did not return a table result")
591
+ raise EveryrowError("Merge task did not return a table result")
574
592
 
575
593
 
576
- async def screen_async[T: BaseModel](
594
+ async def merge_async(
577
595
  task: str,
578
596
  session: Session,
579
- input: DataFrame | UUID | TableResult,
580
- response_model: type[T] | None = None,
581
- ) -> EveryrowTask[T]:
582
- """Submit a screen task asynchronously."""
583
- input_artifact_id = await _process_agent_map_input(input, session)
584
-
585
- actual_response_model = response_model or DefaultScreenResult
586
- response_schema = actual_response_model.model_json_schema()
597
+ left_table: DataFrame | UUID | TableResult,
598
+ right_table: DataFrame | UUID | TableResult,
599
+ merge_on_left: str | None = None,
600
+ merge_on_right: str | None = None,
601
+ use_web_search: Literal["auto", "yes", "no"] | None = None,
602
+ ) -> EveryrowTask[BaseModel]:
603
+ """Submit a merge task asynchronously."""
604
+ left_data = _prepare_table_input(left_table, MergeOperationLeftInputType1Item)
605
+ right_data = _prepare_table_input(right_table, MergeOperationRightInputType1Item)
587
606
 
588
- query = DeepScreenPublicParams(
607
+ body = MergeOperation(
608
+ left_input=left_data, # type: ignore
609
+ right_input=right_data, # type: ignore
589
610
  task=task,
590
- response_schema=response_schema,
591
- response_schema_type=ResponseSchemaType.JSON,
592
- )
593
- request = DeepScreenRequest(
594
- query=query,
595
- input_artifacts=[input_artifact_id],
596
- )
597
- body = SubmitTaskBody(
598
- payload=request,
611
+ left_key=merge_on_left or UNSET,
612
+ right_key=merge_on_right or UNSET,
613
+ use_web_search=use_web_search or UNSET, # type: ignore
599
614
  session_id=session.session_id,
600
615
  )
601
616
 
602
- cohort_task: EveryrowTask[T] = EveryrowTask(
603
- response_model=actual_response_model, # type: ignore[arg-type]
604
- is_map=True,
605
- is_expand=False,
606
- )
607
- await cohort_task.submit(body, session.client)
617
+ response = await merge_operations_merge_post.asyncio(client=session.client, body=body)
618
+ response = handle_response(response)
619
+
620
+ cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
621
+ cohort_task.set_submitted(response.task_id, response.session_id, session.client)
608
622
  return cohort_task
609
623
 
610
624
 
625
+ # --- Dedupe ---
626
+
627
+
611
628
  async def dedupe(
612
629
  equivalence_relation: str,
613
630
  session: Session | None = None,
614
631
  input: DataFrame | UUID | TableResult | None = None,
615
- select_representative: bool = True,
616
632
  ) -> TableResult:
617
- """Dedupe a table by removing duplicates using dedupe operation.
633
+ """Dedupe a table by removing duplicates using AI.
618
634
 
619
635
  Args:
620
636
  equivalence_relation: Description of what makes items equivalent
621
637
  session: Optional session. If not provided, one will be created automatically.
622
638
  input: The input table (DataFrame, UUID, or TableResult)
623
- select_representative: If True, select a representative for each group of duplicates
624
639
 
625
640
  Returns:
626
- TableResult containing the deduped table with duplicates removed
641
+ TableResult containing the deduped table
627
642
  """
628
- if input is None or equivalence_relation is None:
629
- raise EveryrowError("input and equivalence_relation are required for dedupe")
643
+ if input is None:
644
+ raise EveryrowError("input is required for dedupe")
630
645
  if session is None:
631
646
  async with create_session() as internal_session:
632
647
  cohort_task = await dedupe_async(
633
648
  session=internal_session,
634
649
  input=input,
635
650
  equivalence_relation=equivalence_relation,
636
- select_representative=select_representative,
637
651
  )
638
652
  result = await cohort_task.await_result()
639
653
  if isinstance(result, TableResult):
640
654
  return result
641
- else:
642
- raise EveryrowError("Dedupe task did not return a table result")
655
+ raise EveryrowError("Dedupe task did not return a table result")
643
656
  cohort_task = await dedupe_async(
644
657
  session=session,
645
658
  input=input,
646
659
  equivalence_relation=equivalence_relation,
647
- select_representative=select_representative,
648
660
  )
649
661
  result = await cohort_task.await_result()
650
662
  if isinstance(result, TableResult):
651
663
  return result
652
- else:
653
- raise EveryrowError("Dedupe task did not return a table result")
664
+ raise EveryrowError("Dedupe task did not return a table result")
654
665
 
655
666
 
656
667
  async def dedupe_async(
657
668
  session: Session,
658
669
  input: DataFrame | UUID | TableResult,
659
670
  equivalence_relation: str,
660
- select_representative: bool = True,
661
671
  ) -> EveryrowTask[BaseModel]:
662
672
  """Submit a dedupe task asynchronously."""
663
- input_artifact_id = await _process_agent_map_input(input, session)
673
+ input_data = _prepare_table_input(input, DedupeOperationInputType1Item)
664
674
 
665
- query = DedupePublicParams(
675
+ body = DedupeOperation(
676
+ input_=input_data, # type: ignore
666
677
  equivalence_relation=equivalence_relation,
667
- select_representative=select_representative,
668
- )
669
- request = DedupeRequestParams(
670
- query=query,
671
- input_artifacts=[input_artifact_id],
672
- processing_mode=ProcessingMode.MAP,
673
- )
674
- body = SubmitTaskBody(
675
- payload=request,
676
678
  session_id=session.session_id,
677
679
  )
678
680
 
681
+ response = await dedupe_operations_dedupe_post.asyncio(client=session.client, body=body)
682
+ response = handle_response(response)
683
+
679
684
  cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
680
- await cohort_task.submit(body, session.client)
685
+ cohort_task.set_submitted(response.task_id, response.session_id, session.client)
681
686
  return cohort_task
682
-
683
-
684
- async def derive(
685
- session: Session | None = None,
686
- input: DataFrame | UUID | TableResult | None = None,
687
- expressions: dict[str, str] | None = None,
688
- ) -> TableResult:
689
- """Derive new columns using pandas eval expressions.
690
-
691
- Args:
692
- session: Optional session. If not provided, one will be created automatically.
693
- input: The input table (DataFrame, UUID, or TableResult)
694
- expressions: A dictionary mapping column names to pandas expressions.
695
- Example: {"approved": "True", "score": "price * quantity"}
696
-
697
- Returns:
698
- TableResult containing the table with new derived columns
699
- """
700
- if input is None or expressions is None:
701
- raise EveryrowError("input and expressions are required for derive")
702
- if session is None:
703
- async with create_session() as internal_session:
704
- input_artifact_id = await _process_agent_map_input(input, internal_session)
705
-
706
- derive_expressions = [
707
- DeriveExpression(column_name=col_name, expression=expr)
708
- for col_name, expr in expressions.items()
709
- ]
710
-
711
- query = DeriveQueryParams(expressions=derive_expressions)
712
- request = DeriveRequest(
713
- query=query,
714
- input_artifacts=[input_artifact_id],
715
- )
716
- body = SubmitTaskBody(
717
- payload=request,
718
- session_id=internal_session.session_id,
719
- )
720
-
721
- task_id = await submit_task(body, internal_session.client)
722
- finished_task = await await_task_completion(
723
- task_id, internal_session.client
724
- )
725
-
726
- data = await read_table_result(
727
- finished_task.artifact_id, # type: ignore[arg-type]
728
- internal_session.client,
729
- )
730
- return TableResult(
731
- artifact_id=finished_task.artifact_id, # type: ignore
732
- data=data,
733
- error=finished_task.error,
734
- )
735
- input_artifact_id = await _process_agent_map_input(input, session)
736
-
737
- derive_expressions = [
738
- DeriveExpression(column_name=col_name, expression=expr)
739
- for col_name, expr in expressions.items()
740
- ]
741
-
742
- query = DeriveQueryParams(expressions=derive_expressions)
743
- request = DeriveRequest(
744
- query=query,
745
- input_artifacts=[input_artifact_id],
746
- )
747
- body = SubmitTaskBody(
748
- payload=request,
749
- session_id=session.session_id,
750
- )
751
-
752
- task_id = await submit_task(body, session.client)
753
- finished_task = await await_task_completion(task_id, session.client)
754
-
755
- data = await read_table_result(finished_task.artifact_id, session.client) # type: ignore
756
- return TableResult(
757
- artifact_id=finished_task.artifact_id, # type: ignore
758
- data=data,
759
- error=finished_task.error,
760
- )