PyPI - everyrow - Versions diffs - 0.1.10__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

everyrow 0.1.10py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (207) hide show

everyrow/ops.py CHANGED Viewed

@@ -5,43 +5,49 @@ from uuid import UUID
 from pandas import DataFrame
 from pydantic import BaseModel
+from everyrow.api_utils import handle_response
 from everyrow.constants import EveryrowError
+from everyrow.generated.api.artifacts import create_artifact_artifacts_post
+from everyrow.generated.api.operations import (
+    agent_map_operations_agent_map_post,
+    dedupe_operations_dedupe_post,
+    merge_operations_merge_post,
+    rank_operations_rank_post,
+    screen_operations_screen_post,
+    single_agent_operations_single_agent_post,
+)
 from everyrow.generated.models import (
-    AgentQueryParams,
-    CreateGroupQueryParams,
-    CreateGroupRequest,
-    CreateQueryParams,
-    CreateRequest,
-    DedupePublicParams,
-    DedupeRequestParams,
-    DeepMergePublicParams,
-    DeepMergeRequest,
-    DeepRankPublicParams,
-    DeepRankRequest,
-    DeepScreenPublicParams,
-    DeepScreenRequest,
-    DeriveExpression,
-    DeriveQueryParams,
-    DeriveRequest,
-    MapAgentRequestParams,
-    ProcessingMode,
-    ReduceAgentRequestParams,
-    ResponseSchemaType,
+    AgentMapOperation,
+    AgentMapOperationInputType1Item,
+    AgentMapOperationResponseSchemaType0,
+    CreateArtifactRequest,
+    CreateArtifactRequestDataType0Item,
+    CreateArtifactRequestDataType1,
+    DedupeOperation,
+    DedupeOperationInputType1Item,
+    LLMEnumPublic,
+    MergeOperation,
+    MergeOperationLeftInputType1Item,
+    MergeOperationRightInputType1Item,
+    PublicEffortLevel,
+    RankOperation,
+    RankOperationInputType1Item,
+    RankOperationResponseSchemaType0,
+    ScreenOperation,
+    ScreenOperationInputType1Item,
+    ScreenOperationResponseSchemaType0,
+    SingleAgentOperation,
+    SingleAgentOperationInputType1Item,
+    SingleAgentOperationInputType2,
+    SingleAgentOperationResponseSchemaType0,
 )
-from everyrow.generated.models.submit_task_body import SubmitTaskBody
 from everyrow.generated.types import UNSET
 from everyrow.result import Result, ScalarResult, TableResult
 from everyrow.session import Session, create_session
-from everyrow.task import (
-    LLM,
-    EffortLevel,
-    EveryrowTask,
-    await_task_completion,
-    read_table_result,
-    submit_task,
-)
+from everyrow.task import LLM, EffortLevel, EveryrowTask
 T = TypeVar("T", bound=BaseModel)
+InputData = UUID | list[dict[str, Any]] | dict[str, Any]
 class DefaultAgentResponse(BaseModel):
@@ -52,13 +58,88 @@ class DefaultScreenResult(BaseModel):
     passes: bool
+def _df_to_records(df: DataFrame) -> list[dict[str, Any]]:
+    """Convert a DataFrame to a list of records, handling NaN/NaT."""
+    json_str = df.to_json(orient="records")
+    assert json_str is not None
+    return json.loads(json_str)
+def _prepare_table_input[T](
+    input: DataFrame | UUID | TableResult | None,
+    item_class: type[T],
+) -> UUID | list[T]:
+    """Convert table input to UUID or list of generated model items."""
+    if input is None:
+        return []
+    if isinstance(input, UUID):
+        return input
+    if isinstance(input, TableResult):
+        return input.artifact_id
+    if isinstance(input, DataFrame):
+        records = _df_to_records(input)
+        return [item_class.from_dict(r) for r in records]  # type: ignore[attr-defined]
+    raise TypeError(f"Unsupported input type: {type(input)}")
+def _prepare_single_input[TItem, TObj](
+    input: BaseModel | DataFrame | UUID | Result | None,
+    item_class: type[TItem],
+    object_class: type[TObj],
+) -> UUID | list[TItem] | TObj:
+    """Convert single-agent input to the appropriate generated model type."""
+    if input is None:
+        return object_class.from_dict({})  # type: ignore[attr-defined]
+    if isinstance(input, UUID):
+        return input
+    if isinstance(input, Result):
+        return input.artifact_id
+    if isinstance(input, DataFrame):
+        records = _df_to_records(input)
+        return [item_class.from_dict(r) for r in records]  # type: ignore[attr-defined]
+    if isinstance(input, BaseModel):
+        return object_class.from_dict(input.model_dump())  # type: ignore[attr-defined]
+    raise TypeError(f"Unsupported input type: {type(input)}")
+# --- Artifact creation ---
+async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
+    """Create a scalar artifact by uploading a single record."""
+    body = CreateArtifactRequest(
+        data=CreateArtifactRequestDataType1.from_dict(input.model_dump()),
+        session_id=session.session_id,
+    )
+    response = await create_artifact_artifacts_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
+    return response.artifact_id
+async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
+    """Create a table artifact by uploading a list of records."""
+    records = _df_to_records(input)
+    body = CreateArtifactRequest(
+        data=[CreateArtifactRequestDataType0Item.from_dict(r) for r in records],
+        session_id=session.session_id,
+    )
+    response = await create_artifact_artifacts_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
+    return response.artifact_id
+# --- Single Agent ---
 @overload
 async def single_agent[T: BaseModel](
     task: str,
     session: Session | None = None,
     input: BaseModel | UUID | Result | None = None,
-    effort_level: EffortLevel = EffortLevel.LOW,
+    effort_level: EffortLevel | None = EffortLevel.LOW,
     llm: LLM | None = None,
+    iteration_budget: int | None = None,
+    include_research: bool | None = None,
     response_model: type[T] = DefaultAgentResponse,
     return_table: Literal[False] = False,
 ) -> ScalarResult[T]: ...
@@ -69,8 +150,10 @@ async def single_agent(
     task: str,
     session: Session | None = None,
     input: BaseModel | UUID | Result | None = None,
-    effort_level: EffortLevel = EffortLevel.LOW,
+    effort_level: EffortLevel | None = EffortLevel.LOW,
     llm: LLM | None = None,
+    iteration_budget: int | None = None,
+    include_research: bool | None = None,
     response_model: type[BaseModel] = DefaultAgentResponse,
     return_table: Literal[True] = True,
 ) -> TableResult: ...
@@ -80,11 +163,30 @@ async def single_agent[T: BaseModel](
     task: str,
     session: Session | None = None,
     input: BaseModel | DataFrame | UUID | Result | None = None,
-    effort_level: EffortLevel = EffortLevel.LOW,
+    effort_level: EffortLevel | None = EffortLevel.LOW,
     llm: LLM | None = None,
+    iteration_budget: int | None = None,
+    include_research: bool | None = None,
     response_model: type[T] = DefaultAgentResponse,
     return_table: bool = False,
 ) -> ScalarResult[T] | TableResult:
+    """Execute an AI agent task on the provided input.
+    Args:
+        task: Instructions for the AI agent to execute.
+        session: Optional session. If not provided, one will be created automatically.
+        input: Input data (BaseModel, DataFrame, UUID, or Result).
+        effort_level: Effort level preset (low/medium/high). Mutually exclusive with
+            custom params (llm, iteration_budget, include_research). Default: low.
+        llm: LLM to use. Required when effort_level is None.
+        iteration_budget: Number of agent iterations (0-20). Required when effort_level is None.
+        include_research: Include research notes. Required when effort_level is None.
+        response_model: Pydantic model for the response schema.
+        return_table: If True, return a TableResult instead of ScalarResult.
+    Returns:
+        ScalarResult or TableResult depending on return_table parameter.
+    """
     if session is None:
         async with create_session() as internal_session:
             cohort_task = await single_agent_async(
@@ -93,6 +195,8 @@ async def single_agent[T: BaseModel](
                 input=input,
                 effort_level=effort_level,
                 llm=llm,
+                iteration_budget=iteration_budget,
+                include_research=include_research,
                 response_model=response_model,
                 return_table=return_table,
             )
@@ -103,6 +207,8 @@ async def single_agent[T: BaseModel](
         input=input,
         effort_level=effort_level,
         llm=llm,
+        iteration_budget=iteration_budget,
+        include_research=include_research,
         response_model=response_model,
         return_table=return_table,
     )
@@ -113,311 +219,212 @@ async def single_agent_async[T: BaseModel](
     task: str,
     session: Session,
     input: BaseModel | DataFrame | UUID | Result | None = None,
-    effort_level: EffortLevel = EffortLevel.LOW,
+    effort_level: EffortLevel | None = EffortLevel.LOW,
     llm: LLM | None = None,
+    iteration_budget: int | None = None,
+    include_research: bool | None = None,
     response_model: type[T] = DefaultAgentResponse,
     return_table: bool = False,
 ) -> EveryrowTask[T]:
-    if input is not None:
-        input_artifact_ids = [await _process_single_agent_input(input, session)]
-    else:
-        input_artifact_ids = []
+    """Submit a single_agent task asynchronously."""
+    input_data = _prepare_single_input(input, SingleAgentOperationInputType1Item, SingleAgentOperationInputType2)
-    query = AgentQueryParams(
+    # Build the operation body with either preset or custom params
+    body = SingleAgentOperation(
+        input_=input_data,  # type: ignore
         task=task,
-        llm=llm or UNSET,
-        effort_level=effort_level,
-        response_schema=response_model.model_json_schema(),
-        response_schema_type=ResponseSchemaType.JSON,
-        is_expand=return_table,
-        include_provenance_and_notes=False,
-    )
-    request = ReduceAgentRequestParams(
-        query=query,
-        input_artifacts=input_artifact_ids,
-    )
-    body = SubmitTaskBody(
-        payload=request,
         session_id=session.session_id,
+        response_schema=SingleAgentOperationResponseSchemaType0.from_dict(response_model.model_json_schema()),
+        effort_level=PublicEffortLevel(effort_level.value) if effort_level is not None else UNSET,
+        llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
+        iteration_budget=iteration_budget if iteration_budget is not None else UNSET,
+        include_research=include_research if include_research is not None else UNSET,
+        return_list=return_table,
     )
-    cohort_task = EveryrowTask(
-        response_model=response_model, is_map=False, is_expand=return_table
-    )
-    await cohort_task.submit(body, session.client)
+    response = await single_agent_operations_single_agent_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
+    cohort_task: EveryrowTask[T] = EveryrowTask(response_model=response_model, is_map=False, is_expand=return_table)
+    cohort_task.set_submitted(response.task_id, response.session_id, session.client)
     return cohort_task
+# --- Agent Map ---
 async def agent_map(
     task: str,
     session: Session | None = None,
     input: DataFrame | UUID | TableResult | None = None,
-    effort_level: EffortLevel = EffortLevel.LOW,
+    effort_level: EffortLevel | None = EffortLevel.LOW,
     llm: LLM | None = None,
+    iteration_budget: int | None = None,
+    include_research: bool | None = None,
+    enforce_row_independence: bool = False,
     response_model: type[BaseModel] = DefaultAgentResponse,
 ) -> TableResult:
+    """Execute an AI agent task on each row of the input table.
+    Args:
+        task: Instructions for the AI agent to execute per row.
+        session: Optional session. If not provided, one will be created automatically.
+        input: The input table (DataFrame, UUID, or TableResult).
+        effort_level: Effort level preset (low/medium/high). Mutually exclusive with
+            custom params (llm, iteration_budget, include_research). Default: low.
+        llm: LLM to use for each agent. Required when effort_level is None.
+        iteration_budget: Number of agent iterations per row (0-20). Required when effort_level is None.
+        include_research: Include research notes. Required when effort_level is None.
+        response_model: Pydantic model for the response schema.
+    Returns:
+        TableResult containing the agent results merged with input rows.
+    """
     if input is None:
         raise EveryrowError("input is required for agent_map")
     if session is None:
         async with create_session() as internal_session:
             cohort_task = await agent_map_async(
-                task,
-                internal_session,
-                input,
-                effort_level,
-                llm,
-                response_model,
+                task=task,
+                session=internal_session,
+                input=input,
+                effort_level=effort_level,
+                llm=llm,
+                iteration_budget=iteration_budget,
+                include_research=include_research,
+                enforce_row_independence=enforce_row_independence,
+                response_model=response_model,
             )
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
                 return result
-            else:
-                raise EveryrowError("Agent map task did not return a table result")
+            raise EveryrowError("Agent map task did not return a table result")
     cohort_task = await agent_map_async(
-        task, session, input, effort_level, llm, response_model
+        task=task,
+        session=session,
+        input=input,
+        effort_level=effort_level,
+        llm=llm,
+        iteration_budget=iteration_budget,
+        include_research=include_research,
+        enforce_row_independence=enforce_row_independence,
+        response_model=response_model,
     )
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
-    else:
-        raise EveryrowError("Agent map task did not return a table result")
-def _convert_pydantic_to_custom_schema(model: type[BaseModel]) -> dict[str, Any]:
-    """Convert a Pydantic model to the custom response schema format expected by rank.
-    The custom format uses _model_name instead of type: object, and uses optional: bool
-    instead of required arrays.
-    Example:
-        class ScreeningResult(BaseModel):
-            screening_result: str = Field(..., description="...")
-        Converts to:
-        {
-            "_model_name": "ScreeningResult",
-            "screening_result": {
-                "type": "str",
-                "optional": False,
-                "description": "..."
-            }
-        }
-    """
-    json_schema = model.model_json_schema()
-    # Extract model name from title or use the class name
-    model_name = json_schema.get("title", model.__name__)
-    # Build the custom schema format
-    custom_schema: dict[str, Any] = {"_model_name": model_name}
-    # Convert properties
-    properties = json_schema.get("properties", {})
-    required = set(json_schema.get("required", []))
-    # Map JSON schema types to custom format types
-    type_mapping = {
-        "string": "str",
-        "integer": "int",
-        "number": "float",
-        "boolean": "bool",
-    }
-    for field_name, field_schema in properties.items():
-        # Copy the field schema
-        custom_field: dict[str, Any] = {}
-        # Map type from JSON schema format to custom format
-        field_type = field_schema.get("type")
-        if field_type:
-            # Convert JSON schema type to custom format type
-            custom_field["type"] = type_mapping.get(field_type, field_type)
-        # Add description if present
-        if "description" in field_schema:
-            custom_field["description"] = field_schema["description"]
-        # Set optional flag (opposite of required)
-        custom_field["optional"] = field_name not in required
-        custom_schema[field_name] = custom_field
-    return custom_schema
+    raise EveryrowError("Agent map task did not return a table result")
 async def agent_map_async(
     task: str,
     session: Session,
     input: DataFrame | UUID | TableResult,
-    effort_level: EffortLevel = EffortLevel.LOW,
+    effort_level: EffortLevel | None = EffortLevel.LOW,
     llm: LLM | None = None,
+    iteration_budget: int | None = None,
+    include_research: bool | None = None,
+    enforce_row_independence: bool = False,
     response_model: type[BaseModel] = DefaultAgentResponse,
 ) -> EveryrowTask[BaseModel]:
-    input_artifact_ids = [await _process_agent_map_input(input, session)]
-    query = AgentQueryParams(
+    """Submit an agent_map task asynchronously."""
+    input_data = _prepare_table_input(input, AgentMapOperationInputType1Item)
+    # Build the operation body with either preset or custom params
+    body = AgentMapOperation(
+        input_=input_data,  # type: ignore
         task=task,
-        effort_level=effort_level,
-        llm=llm or UNSET,
-        response_schema=_convert_pydantic_to_custom_schema(response_model),
-        response_schema_type=ResponseSchemaType.CUSTOM,
-        is_expand=False,
-        include_provenance_and_notes=False,
-    )
-    request = MapAgentRequestParams(
-        query=query,
-        input_artifacts=input_artifact_ids,
-        context_artifacts=[],
-        join_with_input=True,
-    )
-    body = SubmitTaskBody(
-        payload=request,
         session_id=session.session_id,
+        response_schema=AgentMapOperationResponseSchemaType0.from_dict(response_model.model_json_schema()),
+        effort_level=PublicEffortLevel(effort_level.value) if effort_level is not None else UNSET,
+        llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
+        iteration_budget=iteration_budget if iteration_budget is not None else UNSET,
+        include_research=include_research if include_research is not None else UNSET,
+        join_with_input=True,
+        enforce_row_independence=enforce_row_independence,
     )
-    cohort_task = EveryrowTask(
-        response_model=response_model, is_map=True, is_expand=False
-    )
-    await cohort_task.submit(body, session.client)
-    return cohort_task
+    response = await agent_map_operations_agent_map_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
-async def _process_agent_map_input(
-    input: DataFrame | UUID | TableResult,
-    session: Session,
-) -> UUID:
-    if isinstance(input, TableResult):
-        return input.artifact_id
-    elif isinstance(input, DataFrame):
-        return await create_table_artifact(input, session)
-    else:
-        return input
-async def _process_single_agent_input(
-    input: BaseModel | DataFrame | UUID | Result,
-    session: Session,
-) -> UUID:
-    if isinstance(input, Result):
-        return input.artifact_id
-    elif isinstance(input, DataFrame):
-        return await create_table_artifact(input, session)
-    elif isinstance(input, BaseModel):
-        return await create_scalar_artifact(input, session)
-    else:
-        return input
-async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
-    payload = CreateRequest(query=CreateQueryParams(data_to_create=input.model_dump()))
-    body = SubmitTaskBody(
-        payload=payload,
-        session_id=session.session_id,
-    )
-    task_id = await submit_task(body, session.client)
-    finished_create_artifact_task = await await_task_completion(task_id, session.client)
-    return finished_create_artifact_task.artifact_id  # type: ignore (we check artifact_id in await_task_completion)
+    cohort_task = EveryrowTask(response_model=response_model, is_map=True, is_expand=False)
+    cohort_task.set_submitted(response.task_id, response.session_id, session.client)
+    return cohort_task
-async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
-    # Use to_json to handle NaN/NaT serialization, then parse back to Python objects
-    json_str = input.to_json(orient="records")
-    assert json_str is not None  # to_json returns str when no path_or_buf provided
-    records = json.loads(json_str)
-    payload = CreateGroupRequest(query=CreateGroupQueryParams(data_to_create=records))
-    body = SubmitTaskBody(
-        payload=payload,
-        session_id=session.session_id,
-    )
-    task_id = await submit_task(body, session.client)
-    finished_create_artifact_task = await await_task_completion(task_id, session.client)
-    return finished_create_artifact_task.artifact_id  # type: ignore (we check artifact_id in await_task_completion)
+# --- Screen ---
-async def merge(
+async def screen[T: BaseModel](
     task: str,
     session: Session | None = None,
-    left_table: DataFrame | UUID | TableResult | None = None,
-    right_table: DataFrame | UUID | TableResult | None = None,
-    merge_on_left: str | None = None,
-    merge_on_right: str | None = None,
+    input: DataFrame | UUID | TableResult | None = None,
+    response_model: type[T] | None = None,
 ) -> TableResult:
-    """Merge two tables using merge operation.
+    """Screen rows in a table using AI.
     Args:
-        task: The task description for the merge operation
+        task: The task description for screening
         session: Optional session. If not provided, one will be created automatically.
-        left_table: The left table to merge (DataFrame, UUID, or TableResult)
-        right_table: The right table to merge (DataFrame, UUID, or TableResult)
-        merge_on_left: Optional column name in left table to merge on
-        merge_on_right: Optional column name in right table to merge on
+        input: The input table (DataFrame, UUID, or TableResult)
+        response_model: Optional Pydantic model for the response schema.
     Returns:
-        TableResult containing the merged table
+        TableResult containing the screened table
     """
-    if left_table is None or right_table is None:
-        raise EveryrowError("left_table and right_table are required for merge")
+    if input is None:
+        raise EveryrowError("input is required for screen")
     if session is None:
         async with create_session() as internal_session:
-            cohort_task = await merge_async(
+            cohort_task = await screen_async(
                 task=task,
                 session=internal_session,
-                left_table=left_table,
-                right_table=right_table,
-                merge_on_left=merge_on_left,
-                merge_on_right=merge_on_right,
+                input=input,
+                response_model=response_model,
             )
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
                 return result
-            else:
-                raise EveryrowError("Merge task did not return a table result")
-    cohort_task = await merge_async(
-        task=task,
-        session=session,
-        left_table=left_table,
-        right_table=right_table,
-        merge_on_left=merge_on_left,
-        merge_on_right=merge_on_right,
-    )
+            raise EveryrowError("Screen task did not return a table result")
+    cohort_task = await screen_async(task=task, session=session, input=input, response_model=response_model)
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
-    else:
-        raise EveryrowError("Merge task did not return a table result")
+    raise EveryrowError("Screen task did not return a table result")
-async def merge_async(
+async def screen_async[T: BaseModel](
     task: str,
     session: Session,
-    left_table: DataFrame | UUID | TableResult,
-    right_table: DataFrame | UUID | TableResult,
-    merge_on_left: str | None = None,
-    merge_on_right: str | None = None,
-) -> EveryrowTask[BaseModel]:
-    """Submit a merge task asynchronously."""
-    left_artifact_id = await _process_agent_map_input(left_table, session)
-    right_artifact_id = await _process_agent_map_input(right_table, session)
+    input: DataFrame | UUID | TableResult,
+    response_model: type[T] | None = None,
+) -> EveryrowTask[T]:
+    """Submit a screen task asynchronously."""
+    input_data = _prepare_table_input(input, ScreenOperationInputType1Item)
+    actual_response_model = response_model or DefaultScreenResult
-    query = DeepMergePublicParams(
+    body = ScreenOperation(
+        input_=input_data,  # type: ignore
         task=task,
-        merge_on_left=merge_on_left or UNSET,
-        merge_on_right=merge_on_right or UNSET,
-    )
-    request = DeepMergeRequest(
-        query=query,
-        input_artifacts=[left_artifact_id],
-        context_artifacts=[right_artifact_id],
-    )
-    body = SubmitTaskBody(
-        payload=request,
         session_id=session.session_id,
+        response_schema=ScreenOperationResponseSchemaType0.from_dict(actual_response_model.model_json_schema()),
     )
-    cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
-    await cohort_task.submit(body, session.client)
+    response = await screen_operations_screen_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
+    cohort_task: EveryrowTask[T] = EveryrowTask(
+        response_model=actual_response_model,  # type: ignore[arg-type]
+        is_map=True,
+        is_expand=False,
+    )
+    cohort_task.set_submitted(response.task_id, response.session_id, session.client)
     return cohort_task
+# --- Rank ---
 async def rank[T: BaseModel](
     task: str,
     session: Session | None = None,
@@ -427,13 +434,13 @@ async def rank[T: BaseModel](
     response_model: type[T] | None = None,
     ascending_order: bool = True,
 ) -> TableResult:
-    """Rank rows in a table using rank operation.
+    """Rank rows in a table using AI.
     Args:
         task: The task description for ranking
         session: Optional session. If not provided, one will be created automatically.
         input: The input table (DataFrame, UUID, or TableResult)
-        field_name: The name of the field to extract and sort by
+        field_name: The name of the field to sort by
         field_type: The type of the field (default: "float", ignored if response_model is provided)
         response_model: Optional Pydantic model for the response schema
         ascending_order: If True, sort in ascending order
@@ -457,8 +464,7 @@ async def rank[T: BaseModel](
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
                 return result
-            else:
-                raise EveryrowError("Rank task did not return a table result")
+            raise EveryrowError("Rank task did not return a table result")
     cohort_task = await rank_async(
         task=task,
         session=session,
@@ -471,8 +477,7 @@ async def rank[T: BaseModel](
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
-    else:
-        raise EveryrowError("Rank task did not return a table result")
+    raise EveryrowError("Rank task did not return a table result")
 async def rank_async[T: BaseModel](
@@ -485,276 +490,197 @@ async def rank_async[T: BaseModel](
     ascending_order: bool = True,
 ) -> EveryrowTask[T]:
     """Submit a rank task asynchronously."""
-    input_artifact_id = await _process_agent_map_input(input, session)
+    input_data = _prepare_table_input(input, RankOperationInputType1Item)
     if response_model is not None:
-        response_schema = _convert_pydantic_to_custom_schema(response_model)
-        if field_name not in response_schema:
-            raise ValueError(
-                f"Field {field_name} not in response model {response_model.__name__}"
-            )
+        response_schema = response_model.model_json_schema()
+        # Validate that field_name exists in the model
+        properties = response_schema.get("properties", {})
+        if field_name not in properties:
+            raise ValueError(f"Field {field_name} not in response model {response_model.__name__}")
     else:
+        # Build a minimal JSON schema with just the sort field
+        json_type_map = {
+            "float": "number",
+            "int": "integer",
+            "str": "string",
+            "bool": "boolean",
+        }
         response_schema = {
-            "_model_name": "RankResponse",
-            field_name: {
-                "type": field_type,
-                "optional": False,
-            },
+            "type": "object",
+            "properties": {field_name: {"type": json_type_map.get(field_type, field_type)}},
+            "required": [field_name],
         }
-    query = DeepRankPublicParams(
+    body = RankOperation(
+        input_=input_data,  # type: ignore
         task=task,
-        response_schema=response_schema,
-        field_to_sort_by=field_name,
-        ascending_order=ascending_order,
-    )
-    request = DeepRankRequest(
-        query=query,
-        input_artifacts=[input_artifact_id],
-        context_artifacts=[],
-    )
-    body = SubmitTaskBody(
-        payload=request,
+        sort_by=field_name,
         session_id=session.session_id,
+        response_schema=RankOperationResponseSchemaType0.from_dict(response_schema),
+        ascending=ascending_order,
     )
+    response = await rank_operations_rank_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
     cohort_task: EveryrowTask[T] = EveryrowTask(
         response_model=response_model or BaseModel,  # type: ignore[arg-type]
         is_map=True,
         is_expand=False,
     )
-    await cohort_task.submit(body, session.client)
+    cohort_task.set_submitted(response.task_id, response.session_id, session.client)
     return cohort_task
-async def screen[T: BaseModel](
+# --- Merge ---
+async def merge(
     task: str,
     session: Session | None = None,
-    input: DataFrame | UUID | TableResult | None = None,
-    response_model: type[T] | None = None,
+    left_table: DataFrame | UUID | TableResult | None = None,
+    right_table: DataFrame | UUID | TableResult | None = None,
+    merge_on_left: str | None = None,
+    merge_on_right: str | None = None,
+    use_web_search: Literal["auto", "yes", "no"] | None = None,
 ) -> TableResult:
-    """Screen rows in a table using screen operation.
+    """Merge two tables using AI.
     Args:
-        task: The task description for screening
+        task: The task description for the merge operation
         session: Optional session. If not provided, one will be created automatically.
-        input: The input table (DataFrame, UUID, or TableResult)
-        response_model: Optional Pydantic model for the response schema.
-            If not provided, defaults to a result with just a "passes" boolean.
+        left_table: The left table to merge (DataFrame, UUID, or TableResult)
+        right_table: The right table to merge (DataFrame, UUID, or TableResult)
+        merge_on_left: Optional column name in left table to merge on
+        merge_on_right: Optional column name in right table to merge on
+        use_web_search: Optional. Control web search behavior: "auto" tries LLM merge first then conditionally searches, "no" skips web search entirely, "yes" forces web search on every row. Defaults to "auto" if not provided.
     Returns:
-        TableResult containing the screened table
+        TableResult containing the merged table
     """
-    if input is None:
-        raise EveryrowError("input is required for screen")
+    if left_table is None or right_table is None:
+        raise EveryrowError("left_table and right_table are required for merge")
     if session is None:
         async with create_session() as internal_session:
-            cohort_task = await screen_async(
+            cohort_task = await merge_async(
                 task=task,
                 session=internal_session,
-                input=input,
-                response_model=response_model,
+                left_table=left_table,
+                right_table=right_table,
+                merge_on_left=merge_on_left,
+                merge_on_right=merge_on_right,
+                use_web_search=use_web_search,
             )
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
                 return result
-            else:
-                raise EveryrowError("Screen task did not return a table result")
-    cohort_task = await screen_async(
+            raise EveryrowError("Merge task did not return a table result")
+    cohort_task = await merge_async(
         task=task,
         session=session,
-        input=input,
-        response_model=response_model,
+        left_table=left_table,
+        right_table=right_table,
+        merge_on_left=merge_on_left,
+        merge_on_right=merge_on_right,
+        use_web_search=use_web_search,
     )
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
-    else:
-        raise EveryrowError("Screen task did not return a table result")
+    raise EveryrowError("Merge task did not return a table result")
-async def screen_async[T: BaseModel](
+async def merge_async(
     task: str,
     session: Session,
-    input: DataFrame | UUID | TableResult,
-    response_model: type[T] | None = None,
-) -> EveryrowTask[T]:
-    """Submit a screen task asynchronously."""
-    input_artifact_id = await _process_agent_map_input(input, session)
-    actual_response_model = response_model or DefaultScreenResult
-    response_schema = actual_response_model.model_json_schema()
+    left_table: DataFrame | UUID | TableResult,
+    right_table: DataFrame | UUID | TableResult,
+    merge_on_left: str | None = None,
+    merge_on_right: str | None = None,
+    use_web_search: Literal["auto", "yes", "no"] | None = None,
+) -> EveryrowTask[BaseModel]:
+    """Submit a merge task asynchronously."""
+    left_data = _prepare_table_input(left_table, MergeOperationLeftInputType1Item)
+    right_data = _prepare_table_input(right_table, MergeOperationRightInputType1Item)
-    query = DeepScreenPublicParams(
+    body = MergeOperation(
+        left_input=left_data,  # type: ignore
+        right_input=right_data,  # type: ignore
         task=task,
-        response_schema=response_schema,
-        response_schema_type=ResponseSchemaType.JSON,
-    )
-    request = DeepScreenRequest(
-        query=query,
-        input_artifacts=[input_artifact_id],
-    )
-    body = SubmitTaskBody(
-        payload=request,
+        left_key=merge_on_left or UNSET,
+        right_key=merge_on_right or UNSET,
+        use_web_search=use_web_search or UNSET,  # type: ignore
         session_id=session.session_id,
     )
-    cohort_task: EveryrowTask[T] = EveryrowTask(
-        response_model=actual_response_model,  # type: ignore[arg-type]
-        is_map=True,
-        is_expand=False,
-    )
-    await cohort_task.submit(body, session.client)
+    response = await merge_operations_merge_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
+    cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
+    cohort_task.set_submitted(response.task_id, response.session_id, session.client)
     return cohort_task
+# --- Dedupe ---
 async def dedupe(
     equivalence_relation: str,
     session: Session | None = None,
     input: DataFrame | UUID | TableResult | None = None,
-    select_representative: bool = True,
 ) -> TableResult:
-    """Dedupe a table by removing duplicates using dedupe operation.
+    """Dedupe a table by removing duplicates using AI.
     Args:
         equivalence_relation: Description of what makes items equivalent
         session: Optional session. If not provided, one will be created automatically.
         input: The input table (DataFrame, UUID, or TableResult)
-        select_representative: If True, select a representative for each group of duplicates
     Returns:
-        TableResult containing the deduped table with duplicates removed
+        TableResult containing the deduped table
     """
-    if input is None or equivalence_relation is None:
-        raise EveryrowError("input and equivalence_relation are required for dedupe")
+    if input is None:
+        raise EveryrowError("input is required for dedupe")
     if session is None:
         async with create_session() as internal_session:
             cohort_task = await dedupe_async(
                 session=internal_session,
                 input=input,
                 equivalence_relation=equivalence_relation,
-                select_representative=select_representative,
             )
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
                 return result
-            else:
-                raise EveryrowError("Dedupe task did not return a table result")
+            raise EveryrowError("Dedupe task did not return a table result")
     cohort_task = await dedupe_async(
         session=session,
         input=input,
         equivalence_relation=equivalence_relation,
-        select_representative=select_representative,
     )
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
-    else:
-        raise EveryrowError("Dedupe task did not return a table result")
+    raise EveryrowError("Dedupe task did not return a table result")
 async def dedupe_async(
     session: Session,
     input: DataFrame | UUID | TableResult,
     equivalence_relation: str,
-    select_representative: bool = True,
 ) -> EveryrowTask[BaseModel]:
     """Submit a dedupe task asynchronously."""
-    input_artifact_id = await _process_agent_map_input(input, session)
+    input_data = _prepare_table_input(input, DedupeOperationInputType1Item)
-    query = DedupePublicParams(
+    body = DedupeOperation(
+        input_=input_data,  # type: ignore
         equivalence_relation=equivalence_relation,
-        select_representative=select_representative,
-    )
-    request = DedupeRequestParams(
-        query=query,
-        input_artifacts=[input_artifact_id],
-        processing_mode=ProcessingMode.MAP,
-    )
-    body = SubmitTaskBody(
-        payload=request,
         session_id=session.session_id,
     )
+    response = await dedupe_operations_dedupe_post.asyncio(client=session.client, body=body)
+    response = handle_response(response)
     cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
-    await cohort_task.submit(body, session.client)
+    cohort_task.set_submitted(response.task_id, response.session_id, session.client)
     return cohort_task
-async def derive(
-    session: Session | None = None,
-    input: DataFrame | UUID | TableResult | None = None,
-    expressions: dict[str, str] | None = None,
-) -> TableResult:
-    """Derive new columns using pandas eval expressions.
-    Args:
-        session: Optional session. If not provided, one will be created automatically.
-        input: The input table (DataFrame, UUID, or TableResult)
-        expressions: A dictionary mapping column names to pandas expressions.
-            Example: {"approved": "True", "score": "price * quantity"}
-    Returns:
-        TableResult containing the table with new derived columns
-    """
-    if input is None or expressions is None:
-        raise EveryrowError("input and expressions are required for derive")
-    if session is None:
-        async with create_session() as internal_session:
-            input_artifact_id = await _process_agent_map_input(input, internal_session)
-            derive_expressions = [
-                DeriveExpression(column_name=col_name, expression=expr)
-                for col_name, expr in expressions.items()
-            ]
-            query = DeriveQueryParams(expressions=derive_expressions)
-            request = DeriveRequest(
-                query=query,
-                input_artifacts=[input_artifact_id],
-            )
-            body = SubmitTaskBody(
-                payload=request,
-                session_id=internal_session.session_id,
-            )
-            task_id = await submit_task(body, internal_session.client)
-            finished_task = await await_task_completion(
-                task_id, internal_session.client
-            )
-            data = await read_table_result(
-                finished_task.artifact_id,  # type: ignore[arg-type]
-                internal_session.client,
-            )
-            return TableResult(
-                artifact_id=finished_task.artifact_id,  # type: ignore
-                data=data,
-                error=finished_task.error,
-            )
-    input_artifact_id = await _process_agent_map_input(input, session)
-    derive_expressions = [
-        DeriveExpression(column_name=col_name, expression=expr)
-        for col_name, expr in expressions.items()
-    ]
-    query = DeriveQueryParams(expressions=derive_expressions)
-    request = DeriveRequest(
-        query=query,
-        input_artifacts=[input_artifact_id],
-    )
-    body = SubmitTaskBody(
-        payload=request,
-        session_id=session.session_id,
-    )
-    task_id = await submit_task(body, session.client)
-    finished_task = await await_task_completion(task_id, session.client)
-    data = await read_table_result(finished_task.artifact_id, session.client)  # type: ignore
-    return TableResult(
-        artifact_id=finished_task.artifact_id,  # type: ignore
-        data=data,
-        error=finished_task.error,
-    )

everyrow 0.1.10__py3-none-any.whl → 0.2.0__py3-none-any.whl

everyrow 0.1.10py3-none-any.whl → 0.2.0py3-none-any.whl