everyrow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. everyrow/__init__.py +4 -0
  2. everyrow/api_utils.py +45 -0
  3. everyrow/citations.py +46 -0
  4. everyrow/constants.py +4 -0
  5. everyrow/generated/__init__.py +8 -0
  6. everyrow/generated/api/__init__.py +1 -0
  7. everyrow/generated/api/default/__init__.py +1 -0
  8. everyrow/generated/api/default/continue_task_endpoint_tasks_continue_post.py +208 -0
  9. everyrow/generated/api/default/copy_artifacts_artifacts_copy_post.py +166 -0
  10. everyrow/generated/api/default/copy_workflow_endpoint_workflows_copy_post.py +166 -0
  11. everyrow/generated/api/default/create_api_key_endpoint_api_keys_create_post.py +186 -0
  12. everyrow/generated/api/default/create_session_endpoint_sessions_create_post.py +198 -0
  13. everyrow/generated/api/default/create_workflow_from_artifact_workflows_from_artifact_post.py +174 -0
  14. everyrow/generated/api/default/export_to_google_sheets_export_post.py +168 -0
  15. everyrow/generated/api/default/generate_feedback_endpoint_tasks_generate_feedback_post.py +186 -0
  16. everyrow/generated/api/default/get_artifacts_artifacts_get.py +260 -0
  17. everyrow/generated/api/default/get_default_timeout_seconds_models_default_timeout_seconds_get.py +165 -0
  18. everyrow/generated/api/default/get_job_progress_for_task_jobs_progress_get.py +167 -0
  19. everyrow/generated/api/default/get_metrics_metrics_get.py +80 -0
  20. everyrow/generated/api/default/get_queues_stats_jobs_queues_get.py +123 -0
  21. everyrow/generated/api/default/get_task_status_endpoint_tasks_task_id_status_get.py +193 -0
  22. everyrow/generated/api/default/get_user_usage_usage_get.py +123 -0
  23. everyrow/generated/api/default/healthz_healthz_get.py +127 -0
  24. everyrow/generated/api/default/import_from_google_sheets_import_post.py +170 -0
  25. everyrow/generated/api/default/interrupt_chat_task_tasks_chat_interrupt_post.py +172 -0
  26. everyrow/generated/api/default/list_api_keys_endpoint_api_keys_get.py +186 -0
  27. everyrow/generated/api/default/re_execute_task_endpoint_tasks_re_execute_post.py +192 -0
  28. everyrow/generated/api/default/revoke_api_key_endpoint_api_keys_key_id_revoke_post.py +181 -0
  29. everyrow/generated/api/default/revoke_jobs_for_task_jobs_revoke_post.py +164 -0
  30. everyrow/generated/api/default/rollback_to_message_endpoint_tasks_chat_rollback_post.py +186 -0
  31. everyrow/generated/api/default/submit_chat_task_tasks_chat_post.py +164 -0
  32. everyrow/generated/api/default/submit_task_tasks_post.py +172 -0
  33. everyrow/generated/api/default/task_resource_estimation_task_resource_estimation_post.py +319 -0
  34. everyrow/generated/api/default/trigger_workflow_execution_endpoint_workflows_trigger_post.py +166 -0
  35. everyrow/generated/api/default/whoami_whoami_get.py +127 -0
  36. everyrow/generated/client.py +268 -0
  37. everyrow/generated/errors.py +16 -0
  38. everyrow/generated/models/__init__.py +281 -0
  39. everyrow/generated/models/agent_improvement_instruction.py +69 -0
  40. everyrow/generated/models/agent_query_params.py +362 -0
  41. everyrow/generated/models/agent_query_params_system_prompt_kind_type_0.py +10 -0
  42. everyrow/generated/models/agent_task_args.py +163 -0
  43. everyrow/generated/models/agent_task_args_processing_mode.py +9 -0
  44. everyrow/generated/models/allowed_suggestions.py +8 -0
  45. everyrow/generated/models/api_key_info.py +163 -0
  46. everyrow/generated/models/artifact_changed_payload.py +89 -0
  47. everyrow/generated/models/artifact_group_record.py +330 -0
  48. everyrow/generated/models/artifact_group_record_metadata_type_0.py +46 -0
  49. everyrow/generated/models/artifact_group_record_trace_mapping_type_0.py +46 -0
  50. everyrow/generated/models/artifact_status.py +14 -0
  51. everyrow/generated/models/auto_cohort_conversation_message.py +533 -0
  52. everyrow/generated/models/aux_data.py +128 -0
  53. everyrow/generated/models/aux_data_source_bank.py +59 -0
  54. everyrow/generated/models/chat_completion_message_tool_call.py +46 -0
  55. everyrow/generated/models/chat_message_metadata.py +193 -0
  56. everyrow/generated/models/concatenate_query_params.py +46 -0
  57. everyrow/generated/models/concatenate_request.py +306 -0
  58. everyrow/generated/models/continue_reason.py +9 -0
  59. everyrow/generated/models/continue_task_request.py +133 -0
  60. everyrow/generated/models/controller_improvement_round.py +79 -0
  61. everyrow/generated/models/conversation_changed_payload.py +89 -0
  62. everyrow/generated/models/copy_artifacts_request.py +70 -0
  63. everyrow/generated/models/copy_artifacts_response.py +70 -0
  64. everyrow/generated/models/copy_workflow_request.py +62 -0
  65. everyrow/generated/models/copy_workflow_response.py +70 -0
  66. everyrow/generated/models/create_api_key_request.py +95 -0
  67. everyrow/generated/models/create_api_key_response.py +96 -0
  68. everyrow/generated/models/create_group_query_params.py +61 -0
  69. everyrow/generated/models/create_group_request.py +305 -0
  70. everyrow/generated/models/create_query_params.py +61 -0
  71. everyrow/generated/models/create_request.py +305 -0
  72. everyrow/generated/models/create_session_request.py +62 -0
  73. everyrow/generated/models/create_session_response.py +63 -0
  74. everyrow/generated/models/create_workflow_from_artifact_request.py +92 -0
  75. everyrow/generated/models/create_workflow_from_artifact_response.py +70 -0
  76. everyrow/generated/models/data_frame_method.py +18 -0
  77. everyrow/generated/models/date_cutoffs.py +145 -0
  78. everyrow/generated/models/dedupe_mode.py +9 -0
  79. everyrow/generated/models/dedupe_query_params.py +174 -0
  80. everyrow/generated/models/dedupe_request_params.py +311 -0
  81. everyrow/generated/models/deep_merge_public_params.py +143 -0
  82. everyrow/generated/models/deep_merge_request.py +313 -0
  83. everyrow/generated/models/deep_rank_public_params.py +99 -0
  84. everyrow/generated/models/deep_rank_request.py +313 -0
  85. everyrow/generated/models/deep_screen_public_params.py +122 -0
  86. everyrow/generated/models/deep_screen_request.py +313 -0
  87. everyrow/generated/models/derive_expression.py +69 -0
  88. everyrow/generated/models/derive_query_params.py +75 -0
  89. everyrow/generated/models/derive_request.py +307 -0
  90. everyrow/generated/models/document_query_tool.py +12 -0
  91. everyrow/generated/models/drop_columns_query_params.py +61 -0
  92. everyrow/generated/models/drop_columns_request.py +305 -0
  93. everyrow/generated/models/embedding_models.py +9 -0
  94. everyrow/generated/models/event_type.py +14 -0
  95. everyrow/generated/models/execution_metadata.py +146 -0
  96. everyrow/generated/models/export_request.py +75 -0
  97. everyrow/generated/models/export_request_token_data.py +46 -0
  98. everyrow/generated/models/export_to_google_sheets_export_post_response_export_to_google_sheets_export_post.py +46 -0
  99. everyrow/generated/models/filter_query_params.py +91 -0
  100. everyrow/generated/models/filter_request.py +305 -0
  101. everyrow/generated/models/flatten_query_params.py +46 -0
  102. everyrow/generated/models/flatten_request.py +305 -0
  103. everyrow/generated/models/generate_feedback_request.py +62 -0
  104. everyrow/generated/models/group_by_query_params.py +62 -0
  105. everyrow/generated/models/group_by_request.py +305 -0
  106. everyrow/generated/models/healthz_healthz_get_response_healthz_healthz_get.py +46 -0
  107. everyrow/generated/models/http_validation_error.py +79 -0
  108. everyrow/generated/models/image_chat_content_part.py +80 -0
  109. everyrow/generated/models/image_chat_content_part_image_url.py +46 -0
  110. everyrow/generated/models/import_from_google_sheets_import_post_response_import_from_google_sheets_import_post.py +46 -0
  111. everyrow/generated/models/import_request.py +83 -0
  112. everyrow/generated/models/import_request_token_data.py +46 -0
  113. everyrow/generated/models/insufficient_balance_error.py +81 -0
  114. everyrow/generated/models/join_query_params.py +73 -0
  115. everyrow/generated/models/join_request.py +305 -0
  116. everyrow/generated/models/llm_enum.py +54 -0
  117. everyrow/generated/models/map_agent_request_params.py +313 -0
  118. everyrow/generated/models/map_multi_agent_request_params.py +313 -0
  119. everyrow/generated/models/message_created_payload.py +98 -0
  120. everyrow/generated/models/multi_agent_effort_level.py +10 -0
  121. everyrow/generated/models/multi_agent_query_params.py +264 -0
  122. everyrow/generated/models/multi_modal_chat_message.py +160 -0
  123. everyrow/generated/models/multi_modal_chat_message_role.py +10 -0
  124. everyrow/generated/models/preview_metadata.py +144 -0
  125. everyrow/generated/models/processing_mode.py +10 -0
  126. everyrow/generated/models/progress_status.py +83 -0
  127. everyrow/generated/models/queue_stats.py +77 -0
  128. everyrow/generated/models/reduce_agent_request_params.py +305 -0
  129. everyrow/generated/models/reduce_multi_agent_request_params.py +305 -0
  130. everyrow/generated/models/resource_estimation_response.py +85 -0
  131. everyrow/generated/models/response_schema_type.py +9 -0
  132. everyrow/generated/models/revoke_api_key_response.py +61 -0
  133. everyrow/generated/models/rollback_to_message_request.py +62 -0
  134. everyrow/generated/models/rollback_to_message_response.py +77 -0
  135. everyrow/generated/models/session_changed_payload.py +69 -0
  136. everyrow/generated/models/simple_chat_message.py +121 -0
  137. everyrow/generated/models/simple_chat_message_role.py +10 -0
  138. everyrow/generated/models/simple_chat_message_with_tool_calls.py +156 -0
  139. everyrow/generated/models/source_database_entry.py +92 -0
  140. everyrow/generated/models/standalone_artifact_record.py +278 -0
  141. everyrow/generated/models/standalone_artifact_record_metadata_type_0.py +46 -0
  142. everyrow/generated/models/standalone_artifact_record_trace_mapping_type_0.py +46 -0
  143. everyrow/generated/models/status_count.py +71 -0
  144. everyrow/generated/models/status_count_status.py +13 -0
  145. everyrow/generated/models/submit_chat_task_body.py +497 -0
  146. everyrow/generated/models/submit_chat_task_body_selected_task_type_type_0.py +11 -0
  147. everyrow/generated/models/submit_task_body.py +745 -0
  148. everyrow/generated/models/task_changed_payload.py +105 -0
  149. everyrow/generated/models/task_effort.py +10 -0
  150. everyrow/generated/models/task_id_request.py +62 -0
  151. everyrow/generated/models/task_insert.py +725 -0
  152. everyrow/generated/models/task_insert_query_params.py +46 -0
  153. everyrow/generated/models/task_metadata.py +323 -0
  154. everyrow/generated/models/task_metadata_cols_to_rename_type_0.py +46 -0
  155. everyrow/generated/models/task_response.py +62 -0
  156. everyrow/generated/models/task_status.py +12 -0
  157. everyrow/generated/models/task_status_response.py +112 -0
  158. everyrow/generated/models/task_type.py +31 -0
  159. everyrow/generated/models/text_chat_content_part.py +74 -0
  160. everyrow/generated/models/tool_response_message.py +127 -0
  161. everyrow/generated/models/toolkit_constants.py +80 -0
  162. everyrow/generated/models/trace_changed_payload.py +94 -0
  163. everyrow/generated/models/trace_info.py +78 -0
  164. everyrow/generated/models/trigger_workflow_execution_request.py +112 -0
  165. everyrow/generated/models/trigger_workflow_execution_request_task_params.py +65 -0
  166. everyrow/generated/models/trigger_workflow_execution_request_task_params_additional_property.py +46 -0
  167. everyrow/generated/models/trigger_workflow_execution_response.py +69 -0
  168. everyrow/generated/models/upload_csv_payload.py +310 -0
  169. everyrow/generated/models/upload_csv_query_params.py +114 -0
  170. everyrow/generated/models/usage_response.py +77 -0
  171. everyrow/generated/models/validation_error.py +90 -0
  172. everyrow/generated/models/whoami_whoami_get_response_whoami_whoami_get.py +46 -0
  173. everyrow/generated/models/workflow_leaf_node_input.py +70 -0
  174. everyrow/generated/py.typed +1 -0
  175. everyrow/generated/types.py +54 -0
  176. everyrow/ops.py +672 -0
  177. everyrow/result.py +25 -0
  178. everyrow/session.py +53 -0
  179. everyrow/task.py +143 -0
  180. everyrow-0.1.0.dist-info/METADATA +238 -0
  181. everyrow-0.1.0.dist-info/RECORD +183 -0
  182. everyrow-0.1.0.dist-info/WHEEL +4 -0
  183. everyrow-0.1.0.dist-info/licenses/LICENSE.txt +21 -0
everyrow/ops.py ADDED
@@ -0,0 +1,672 @@
1
+ from typing import Any, Literal, TypeVar, overload
2
+ from uuid import UUID
3
+
4
+ from pandas import DataFrame
5
+ from pydantic import BaseModel
6
+
7
+ from everyrow.constants import EveryrowError
8
+ from everyrow.generated.models import (
9
+ AgentQueryParams,
10
+ CreateGroupQueryParams,
11
+ CreateGroupRequest,
12
+ CreateQueryParams,
13
+ CreateRequest,
14
+ DedupeMode,
15
+ DedupeQueryParams,
16
+ DedupeRequestParams,
17
+ DeepMergePublicParams,
18
+ DeepMergeRequest,
19
+ DeepRankPublicParams,
20
+ DeepRankRequest,
21
+ DeepScreenPublicParams,
22
+ DeepScreenRequest,
23
+ DeriveExpression,
24
+ DeriveQueryParams,
25
+ DeriveRequest,
26
+ EmbeddingModels,
27
+ MapAgentRequestParams,
28
+ ProcessingMode,
29
+ ReduceAgentRequestParams,
30
+ ResponseSchemaType,
31
+ )
32
+ from everyrow.generated.models.submit_task_body import SubmitTaskBody
33
+ from everyrow.generated.types import UNSET
34
+ from everyrow.result import Result, ScalarResult, TableResult
35
+ from everyrow.session import Session
36
+ from everyrow.task import (
37
+ LLM,
38
+ EffortLevel,
39
+ EveryrowTask,
40
+ await_task_completion,
41
+ read_table_result,
42
+ submit_task,
43
+ )
44
+
45
+ T = TypeVar("T", bound=BaseModel)
46
+
47
+
48
+ class DefaultAgentResponse(BaseModel):
49
+ answer: str
50
+
51
+
52
+ @overload
53
+ async def single_agent[T: BaseModel](
54
+ task: str,
55
+ session: Session,
56
+ input: BaseModel | UUID | Result | None = None,
57
+ effort_level: EffortLevel = EffortLevel.LOW,
58
+ llm: LLM | None = None,
59
+ response_model: type[T] = DefaultAgentResponse,
60
+ return_table: Literal[False] = False,
61
+ ) -> ScalarResult[T]: ...
62
+
63
+
64
+ @overload
65
+ async def single_agent(
66
+ task: str,
67
+ session: Session,
68
+ input: BaseModel | UUID | Result | None = None,
69
+ effort_level: EffortLevel = EffortLevel.LOW,
70
+ llm: LLM | None = None,
71
+ response_model: type[BaseModel] = DefaultAgentResponse,
72
+ return_table: Literal[True] = True,
73
+ ) -> TableResult: ...
74
+
75
+
76
+ async def single_agent[T: BaseModel](
77
+ task: str,
78
+ session: Session,
79
+ input: BaseModel | DataFrame | UUID | Result | None = None,
80
+ effort_level: EffortLevel = EffortLevel.LOW,
81
+ llm: LLM | None = None,
82
+ response_model: type[T] = DefaultAgentResponse,
83
+ return_table: bool = False,
84
+ ) -> ScalarResult[T] | TableResult:
85
+ cohort_task = await single_agent_async(
86
+ task=task,
87
+ session=session,
88
+ input=input,
89
+ effort_level=effort_level,
90
+ llm=llm,
91
+ response_model=response_model,
92
+ return_table=return_table,
93
+ )
94
+ return await cohort_task.await_result(session.client)
95
+
96
+
97
+ async def single_agent_async[T: BaseModel](
98
+ task: str,
99
+ session: Session,
100
+ input: BaseModel | DataFrame | UUID | Result | None = None,
101
+ effort_level: EffortLevel = EffortLevel.LOW,
102
+ llm: LLM | None = None,
103
+ response_model: type[T] = DefaultAgentResponse,
104
+ return_table: bool = False,
105
+ ) -> EveryrowTask[T]:
106
+ if input is not None:
107
+ input_artifact_ids = [await _process_single_agent_input(input, session)]
108
+ else:
109
+ input_artifact_ids = []
110
+
111
+ query = AgentQueryParams(
112
+ task=task,
113
+ llm=llm or UNSET,
114
+ effort_level=effort_level,
115
+ response_schema=response_model.model_json_schema(),
116
+ response_schema_type=ResponseSchemaType.JSON,
117
+ is_expand=return_table,
118
+ include_provenance_and_notes=False,
119
+ )
120
+ request = ReduceAgentRequestParams(
121
+ query=query,
122
+ input_artifacts=input_artifact_ids,
123
+ )
124
+ body = SubmitTaskBody(
125
+ payload=request,
126
+ session_id=session.session_id,
127
+ )
128
+
129
+ cohort_task = EveryrowTask(
130
+ response_model=response_model, is_map=False, is_expand=return_table
131
+ )
132
+ await cohort_task.submit(body, session.client)
133
+ return cohort_task
134
+
135
+
136
+ async def agent_map(
137
+ task: str,
138
+ session: Session,
139
+ input: DataFrame | UUID | TableResult,
140
+ effort_level: EffortLevel = EffortLevel.LOW,
141
+ llm: LLM | None = None,
142
+ response_model: type[BaseModel] = DefaultAgentResponse,
143
+ return_table_per_row: bool = False,
144
+ ) -> TableResult:
145
+ cohort_task = await agent_map_async(
146
+ task, session, input, effort_level, llm, response_model, return_table_per_row
147
+ )
148
+ result = await cohort_task.await_result(session.client)
149
+ if isinstance(result, TableResult):
150
+ return result
151
+ else:
152
+ raise EveryrowError("Agent map task did not return a table result")
153
+
154
+
155
+ def _convert_pydantic_to_custom_schema(model: type[BaseModel]) -> dict[str, Any]:
156
+ """Convert a Pydantic model to the custom response schema format expected by rank.
157
+
158
+ The custom format uses _model_name instead of type: object, and uses optional: bool
159
+ instead of required arrays.
160
+
161
+ Example:
162
+ class ScreeningResult(BaseModel):
163
+ screening_result: str = Field(..., description="...")
164
+
165
+ Converts to:
166
+ {
167
+ "_model_name": "ScreeningResult",
168
+ "screening_result": {
169
+ "type": "str",
170
+ "optional": False,
171
+ "description": "..."
172
+ }
173
+ }
174
+ """
175
+ json_schema = model.model_json_schema()
176
+
177
+ # Extract model name from title or use the class name
178
+ model_name = json_schema.get("title", model.__name__)
179
+
180
+ # Build the custom schema format
181
+ custom_schema: dict[str, Any] = {"_model_name": model_name}
182
+
183
+ # Convert properties
184
+ properties = json_schema.get("properties", {})
185
+ required = set(json_schema.get("required", []))
186
+
187
+ # Map JSON schema types to custom format types
188
+ type_mapping = {
189
+ "string": "str",
190
+ "integer": "int",
191
+ "number": "float",
192
+ "boolean": "bool",
193
+ }
194
+
195
+ for field_name, field_schema in properties.items():
196
+ # Copy the field schema
197
+ custom_field: dict[str, Any] = {}
198
+
199
+ # Map type from JSON schema format to custom format
200
+ field_type = field_schema.get("type")
201
+ if field_type:
202
+ # Convert JSON schema type to custom format type
203
+ custom_field["type"] = type_mapping.get(field_type, field_type)
204
+
205
+ # Add description if present
206
+ if "description" in field_schema:
207
+ custom_field["description"] = field_schema["description"]
208
+
209
+ # Set optional flag (opposite of required)
210
+ custom_field["optional"] = field_name not in required
211
+
212
+ custom_schema[field_name] = custom_field
213
+
214
+ return custom_schema
215
+
216
+
217
+ async def agent_map_async(
218
+ task: str,
219
+ session: Session,
220
+ input: DataFrame | UUID | TableResult,
221
+ effort_level: EffortLevel = EffortLevel.LOW,
222
+ llm: LLM | None = None,
223
+ response_model: type[BaseModel] = DefaultAgentResponse,
224
+ return_table_per_row: bool = False,
225
+ ) -> EveryrowTask[BaseModel]:
226
+ input_artifact_ids = [await _process_agent_map_input(input, session)]
227
+ query = AgentQueryParams(
228
+ task=task,
229
+ effort_level=effort_level,
230
+ llm=llm or UNSET,
231
+ response_schema=_convert_pydantic_to_custom_schema(response_model),
232
+ response_schema_type=ResponseSchemaType.CUSTOM,
233
+ is_expand=return_table_per_row,
234
+ include_provenance_and_notes=False,
235
+ )
236
+ request = MapAgentRequestParams(
237
+ query=query,
238
+ input_artifacts=input_artifact_ids,
239
+ context_artifacts=[],
240
+ join_with_input=True,
241
+ )
242
+ body = SubmitTaskBody(
243
+ payload=request,
244
+ session_id=session.session_id,
245
+ )
246
+
247
+ cohort_task = EveryrowTask(
248
+ response_model=response_model, is_map=True, is_expand=return_table_per_row
249
+ )
250
+ await cohort_task.submit(body, session.client)
251
+ return cohort_task
252
+
253
+
254
+ async def _process_agent_map_input(
255
+ input: DataFrame | UUID | TableResult,
256
+ session: Session,
257
+ ) -> UUID:
258
+ if isinstance(input, TableResult):
259
+ return input.artifact_id
260
+ elif isinstance(input, DataFrame):
261
+ return await create_table_artifact(input, session)
262
+ else:
263
+ return input
264
+
265
+
266
+ async def _process_single_agent_input(
267
+ input: BaseModel | DataFrame | UUID | Result,
268
+ session: Session,
269
+ ) -> UUID:
270
+ if isinstance(input, Result):
271
+ return input.artifact_id
272
+ elif isinstance(input, DataFrame):
273
+ return await create_table_artifact(input, session)
274
+ elif isinstance(input, BaseModel):
275
+ return await create_scalar_artifact(input, session)
276
+ else:
277
+ return input
278
+
279
+
280
+ async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
281
+ payload = CreateRequest(query=CreateQueryParams(data_to_create=input.model_dump()))
282
+ body = SubmitTaskBody(
283
+ payload=payload,
284
+ session_id=session.session_id,
285
+ )
286
+ task_id = await submit_task(body, session.client)
287
+ finished_create_artifact_task = await await_task_completion(task_id, session.client)
288
+ return finished_create_artifact_task.artifact_id # type: ignore (we check artifact_id in await_task_completion)
289
+
290
+
291
+ async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
292
+ payload = CreateGroupRequest(
293
+ query=CreateGroupQueryParams(data_to_create=input.to_dict(orient="records"))
294
+ )
295
+ body = SubmitTaskBody(
296
+ payload=payload,
297
+ session_id=session.session_id,
298
+ )
299
+ task_id = await submit_task(body, session.client)
300
+ finished_create_artifact_task = await await_task_completion(task_id, session.client)
301
+ return finished_create_artifact_task.artifact_id # type: ignore (we check artifact_id in await_task_completion)
302
+
303
+
304
+ async def merge(
305
+ task: str,
306
+ session: Session,
307
+ left_table: DataFrame | UUID | TableResult,
308
+ right_table: DataFrame | UUID | TableResult,
309
+ merge_on_left: str | None = None,
310
+ merge_on_right: str | None = None,
311
+ merge_model: LLM | None = None,
312
+ preview: bool = False,
313
+ ) -> TableResult:
314
+ """Merge two tables using merge operation.
315
+
316
+ Args:
317
+ task: The task description for the merge operation
318
+ session: The session to use
319
+ left_table: The left table to merge (DataFrame, UUID, or TableResult)
320
+ right_table: The right table to merge (DataFrame, UUID, or TableResult)
321
+ merge_on_left: Optional column name in left table to merge on
322
+ merge_on_right: Optional column name in right table to merge on
323
+ merge_model: Optional LLM model to use for merge operation
324
+ preview: If True, process only the first few inputs
325
+
326
+ Returns:
327
+ TableResult containing the merged table
328
+ """
329
+ cohort_task = await merge_async(
330
+ task=task,
331
+ session=session,
332
+ left_table=left_table,
333
+ right_table=right_table,
334
+ merge_on_left=merge_on_left,
335
+ merge_on_right=merge_on_right,
336
+ merge_model=merge_model,
337
+ preview=preview,
338
+ )
339
+ result = await cohort_task.await_result(session.client)
340
+ if isinstance(result, TableResult):
341
+ return result
342
+ else:
343
+ raise EveryrowError("Merge task did not return a table result")
344
+
345
+
346
+ async def merge_async(
347
+ task: str,
348
+ session: Session,
349
+ left_table: DataFrame | UUID | TableResult,
350
+ right_table: DataFrame | UUID | TableResult,
351
+ merge_on_left: str | None = None,
352
+ merge_on_right: str | None = None,
353
+ merge_model: LLM | None = None,
354
+ preview: bool = False,
355
+ ) -> EveryrowTask[BaseModel]:
356
+ """Submit a merge task asynchronously."""
357
+ left_artifact_id = await _process_agent_map_input(left_table, session)
358
+ right_artifact_id = await _process_agent_map_input(right_table, session)
359
+
360
+ query = DeepMergePublicParams(
361
+ task=task,
362
+ merge_on_left=merge_on_left or UNSET,
363
+ merge_on_right=merge_on_right or UNSET,
364
+ merge_model=merge_model or UNSET,
365
+ preview=preview,
366
+ )
367
+ request = DeepMergeRequest(
368
+ query=query,
369
+ input_artifacts=[left_artifact_id],
370
+ context_artifacts=[right_artifact_id],
371
+ )
372
+ body = SubmitTaskBody(
373
+ payload=request,
374
+ session_id=session.session_id,
375
+ )
376
+
377
+ cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
378
+ await cohort_task.submit(body, session.client)
379
+ return cohort_task
380
+
381
+
382
+ async def rank[T: BaseModel](
383
+ task: str,
384
+ session: Session,
385
+ input: DataFrame | UUID | TableResult,
386
+ field_name: str,
387
+ field_type: Literal["float", "int", "str", "bool"] = "float",
388
+ response_model: type[T] | None = None,
389
+ ascending_order: bool = True,
390
+ preview: bool = False,
391
+ ) -> TableResult:
392
+ """Rank rows in a table using rank operation.
393
+
394
+ Args:
395
+ task: The task description for ranking
396
+ session: The session to use
397
+ input: The input table (DataFrame, UUID, or TableResult)
398
+ field_name: The name of the field to extract and sort by
399
+ field_type: The type of the field (default: "float", ignored if response_model is provided)
400
+ response_model: Optional Pydantic model for the response schema
401
+ ascending_order: If True, sort in ascending order
402
+ preview: If True, process only the first few inputs
403
+
404
+ Returns:
405
+ TableResult containing the ranked table
406
+ """
407
+ cohort_task = await rank_async(
408
+ task=task,
409
+ session=session,
410
+ input=input,
411
+ field_name=field_name,
412
+ field_type=field_type,
413
+ response_model=response_model,
414
+ ascending_order=ascending_order,
415
+ preview=preview,
416
+ )
417
+ result = await cohort_task.await_result(session.client)
418
+ if isinstance(result, TableResult):
419
+ return result
420
+ else:
421
+ raise EveryrowError("Rank task did not return a table result")
422
+
423
+
424
+ async def rank_async[T: BaseModel](
425
+ task: str,
426
+ session: Session,
427
+ input: DataFrame | UUID | TableResult,
428
+ field_name: str,
429
+ field_type: Literal["float", "int", "str", "bool"] = "float",
430
+ response_model: type[T] | None = None,
431
+ ascending_order: bool = True,
432
+ preview: bool = False,
433
+ ) -> EveryrowTask[T]:
434
+ """Submit a rank task asynchronously."""
435
+ input_artifact_id = await _process_agent_map_input(input, session)
436
+
437
+ if response_model is not None:
438
+ response_schema = _convert_pydantic_to_custom_schema(response_model)
439
+ if field_name not in response_schema:
440
+ raise ValueError(
441
+ f"Field {field_name} not in response model {response_model.__name__}"
442
+ )
443
+ else:
444
+ response_schema = {
445
+ "_model_name": "RankResponse",
446
+ field_name: {
447
+ "type": field_type,
448
+ "optional": False,
449
+ },
450
+ }
451
+
452
+ query = DeepRankPublicParams(
453
+ task=task,
454
+ response_schema=response_schema,
455
+ field_to_sort_by=field_name,
456
+ ascending_order=ascending_order,
457
+ preview=preview,
458
+ )
459
+ request = DeepRankRequest(
460
+ query=query,
461
+ input_artifacts=[input_artifact_id],
462
+ context_artifacts=[],
463
+ )
464
+ body = SubmitTaskBody(
465
+ payload=request,
466
+ session_id=session.session_id,
467
+ )
468
+
469
+ cohort_task: EveryrowTask[T] = EveryrowTask(
470
+ response_model=response_model or BaseModel, # type: ignore[arg-type]
471
+ is_map=True,
472
+ is_expand=False,
473
+ )
474
+ await cohort_task.submit(body, session.client)
475
+ return cohort_task
476
+
477
+
478
+ async def screen[T: BaseModel](
479
+ task: str,
480
+ session: Session,
481
+ input: DataFrame | UUID | TableResult,
482
+ response_model: type[T] | None = None,
483
+ batch_size: int | None = None,
484
+ preview: bool = False,
485
+ ) -> TableResult:
486
+ """Screen rows in a table using screen operation.
487
+
488
+ Args:
489
+ task: The task description for screening
490
+ session: The session to use
491
+ input: The input table (DataFrame, UUID, or TableResult)
492
+ response_model: Optional Pydantic model for the response schema
493
+ batch_size: Optional batch size for processing (default: 10)
494
+ preview: If True, process only the first few inputs
495
+
496
+ Returns:
497
+ TableResult containing the screened table
498
+ """
499
+ cohort_task = await screen_async(
500
+ task=task,
501
+ session=session,
502
+ input=input,
503
+ response_model=response_model,
504
+ batch_size=batch_size,
505
+ preview=preview,
506
+ )
507
+ result = await cohort_task.await_result(session.client)
508
+ if isinstance(result, TableResult):
509
+ return result
510
+ else:
511
+ raise EveryrowError("Screen task did not return a table result")
512
+
513
+
514
+ async def screen_async[T: BaseModel](
515
+ task: str,
516
+ session: Session,
517
+ input: DataFrame | UUID | TableResult,
518
+ response_model: type[T] | None = None,
519
+ batch_size: int | None = None,
520
+ preview: bool = False,
521
+ ) -> EveryrowTask[T]:
522
+ """Submit a screen task asynchronously."""
523
+ input_artifact_id = await _process_agent_map_input(input, session)
524
+
525
+ if response_model is not None:
526
+ response_schema = response_model.model_json_schema()
527
+ response_schema_type = ResponseSchemaType.JSON
528
+ else:
529
+ response_schema = UNSET
530
+ response_schema_type = UNSET
531
+
532
+ query = DeepScreenPublicParams(
533
+ task=task,
534
+ batch_size=batch_size or UNSET,
535
+ response_schema=response_schema,
536
+ response_schema_type=response_schema_type,
537
+ preview=preview,
538
+ )
539
+ request = DeepScreenRequest(
540
+ query=query,
541
+ input_artifacts=[input_artifact_id],
542
+ )
543
+ body = SubmitTaskBody(
544
+ payload=request,
545
+ session_id=session.session_id,
546
+ )
547
+
548
+ cohort_task: EveryrowTask[T] = EveryrowTask(
549
+ response_model=response_model or DefaultAgentResponse, # type: ignore[arg-type]
550
+ is_map=True,
551
+ is_expand=False,
552
+ )
553
+ await cohort_task.submit(body, session.client)
554
+ return cohort_task
555
+
556
+
557
+ async def dedupe(
558
+ session: Session,
559
+ input: DataFrame | UUID | TableResult,
560
+ equivalence_relation: str,
561
+ llm: LLM | None = None,
562
+ chunk_size: int | None = None,
563
+ mode: DedupeMode | None = None,
564
+ embedding_model: EmbeddingModels | None = None,
565
+ ) -> TableResult:
566
+ """Dedupe a table by removing duplicates using dedupe operation.
567
+
568
+ Args:
569
+ session: The session to use
570
+ input: The input table (DataFrame, UUID, or TableResult)
571
+ equivalence_relation: Description of what makes items equivalent
572
+ llm: Optional LLM model to use for deduplication
573
+ chunk_size: Optional maximum number of items to process in a single LLM call (default: 40)
574
+ mode: Optional dedupe mode (AGENTIC or DIRECT)
575
+ max_consecutive_empty: Optional stop processing a row after this many consecutive comparisons with no matches
576
+ embedding_model: Optional embedding model to use when reorder_by_embedding is True
577
+
578
+ Returns:
579
+ TableResult containing the deduped table with duplicates removed
580
+ """
581
+ cohort_task = await dedupe_async(
582
+ session=session,
583
+ input=input,
584
+ equivalence_relation=equivalence_relation,
585
+ llm=llm,
586
+ chunk_size=chunk_size,
587
+ mode=mode,
588
+ embedding_model=embedding_model,
589
+ )
590
+ result = await cohort_task.await_result(session.client)
591
+ if isinstance(result, TableResult):
592
+ return result
593
+ else:
594
+ raise EveryrowError("Dedupe task did not return a table result")
595
+
596
+
597
+ async def dedupe_async(
598
+ session: Session,
599
+ input: DataFrame | UUID | TableResult,
600
+ equivalence_relation: str,
601
+ llm: LLM | None = None,
602
+ chunk_size: int | None = None,
603
+ mode: DedupeMode | None = None,
604
+ embedding_model: EmbeddingModels | None = None,
605
+ ) -> EveryrowTask[BaseModel]:
606
+ """Submit a dedupe task asynchronously."""
607
+ input_artifact_id = await _process_agent_map_input(input, session)
608
+
609
+ query = DedupeQueryParams(
610
+ equivalence_relation=equivalence_relation,
611
+ llm=llm or UNSET,
612
+ chunk_size=chunk_size or UNSET,
613
+ mode=mode or UNSET,
614
+ embedding_model=embedding_model or UNSET,
615
+ )
616
+ request = DedupeRequestParams(
617
+ query=query,
618
+ input_artifacts=[input_artifact_id],
619
+ processing_mode=ProcessingMode.MAP,
620
+ )
621
+ body = SubmitTaskBody(
622
+ payload=request,
623
+ session_id=session.session_id,
624
+ )
625
+
626
+ cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
627
+ await cohort_task.submit(body, session.client)
628
+ return cohort_task
629
+
630
+
631
+ async def derive(
632
+ session: Session,
633
+ input: DataFrame | UUID | TableResult,
634
+ expressions: dict[str, str],
635
+ ) -> TableResult:
636
+ """Derive new columns using pandas eval expressions.
637
+
638
+ Args:
639
+ session: The session to use
640
+ input: The input table (DataFrame, UUID, or TableResult)
641
+ expressions: A dictionary mapping column names to pandas expressions.
642
+ Example: {"approved": "True", "score": "price * quantity"}
643
+
644
+ Returns:
645
+ TableResult containing the table with new derived columns
646
+ """
647
+ input_artifact_id = await _process_agent_map_input(input, session)
648
+
649
+ derive_expressions = [
650
+ DeriveExpression(column_name=col_name, expression=expr)
651
+ for col_name, expr in expressions.items()
652
+ ]
653
+
654
+ query = DeriveQueryParams(expressions=derive_expressions)
655
+ request = DeriveRequest(
656
+ query=query,
657
+ input_artifacts=[input_artifact_id],
658
+ )
659
+ body = SubmitTaskBody(
660
+ payload=request,
661
+ session_id=session.session_id,
662
+ )
663
+
664
+ task_id = await submit_task(body, session.client)
665
+ finished_task = await await_task_completion(task_id, session.client)
666
+
667
+ data = await read_table_result(finished_task.artifact_id, session.client) # type: ignore
668
+ return TableResult(
669
+ artifact_id=finished_task.artifact_id, # type: ignore
670
+ data=data,
671
+ error=finished_task.error,
672
+ )
everyrow/result.py ADDED
@@ -0,0 +1,25 @@
1
+ from typing import TypeVar
2
+ from uuid import UUID
3
+
4
+ import attrs
5
+ from pandas import DataFrame
6
+ from pydantic import BaseModel
7
+
8
+ T = TypeVar("T", bound=str | BaseModel)
9
+
10
+
11
+ @attrs.define
12
+ class ScalarResult[T: str | BaseModel]:
13
+ artifact_id: UUID
14
+ data: T
15
+ error: str | None
16
+
17
+
18
+ @attrs.define
19
+ class TableResult:
20
+ artifact_id: UUID
21
+ data: DataFrame
22
+ error: str | None
23
+
24
+
25
+ Result = ScalarResult | TableResult