PyPI - everyrow - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

everyrow 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

everyrow/__init__.py +2 -1
everyrow/citations.py +6 -2
everyrow/ops.py +154 -54
everyrow/session.py +33 -11
everyrow/task.py +99 -15
everyrow-0.1.1.dist-info/METADATA +275 -0
{everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/RECORD +9 -9
everyrow-0.1.0.dist-info/METADATA +0 -238
{everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/WHEEL +0 -0
{everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/licenses/LICENSE.txt +0 -0

everyrow/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from everyrow.api_utils import create_client
 from everyrow.session import create_session
+from everyrow.task import fetch_task_data
-__all__ = ["create_client", "create_session"]
+__all__ = ["create_client", "create_session", "fetch_task_data"]

everyrow/citations.py CHANGED Viewed

@@ -9,7 +9,9 @@ from everyrow.generated.models import (
 from everyrow.generated.types import Unset
-def _render_citations(data: dict[str, Any], source_bank: AuxDataSourceBank) -> dict[str, Any]:
+def _render_citations(
+    data: dict[str, Any], source_bank: AuxDataSourceBank
+) -> dict[str, Any]:
     result = deepcopy(data)
     for source_id, source_data in source_bank.to_dict().items():
         for key, value in result.items():
@@ -21,7 +23,9 @@ def _render_citations(data: dict[str, Any], source_bank: AuxDataSourceBank) -> d
 def render_citations_standalone(artifact: StandaloneArtifactRecord):
-    if isinstance(artifact.aux_data, Unset) or isinstance(artifact.aux_data.source_bank, Unset):
+    if isinstance(artifact.aux_data, Unset) or isinstance(
+        artifact.aux_data.source_bank, Unset
+    ):
         return artifact
     source_bank = (
         artifact.aux_data.source_bank

everyrow/ops.py CHANGED Viewed

@@ -11,7 +11,6 @@ from everyrow.generated.models import (
     CreateGroupRequest,
     CreateQueryParams,
     CreateRequest,
-    DedupeMode,
     DedupeQueryParams,
     DedupeRequestParams,
     DeepMergePublicParams,
@@ -23,7 +22,6 @@ from everyrow.generated.models import (
     DeriveExpression,
     DeriveQueryParams,
     DeriveRequest,
-    EmbeddingModels,
     MapAgentRequestParams,
     ProcessingMode,
     ReduceAgentRequestParams,
@@ -32,7 +30,7 @@ from everyrow.generated.models import (
 from everyrow.generated.models.submit_task_body import SubmitTaskBody
 from everyrow.generated.types import UNSET
 from everyrow.result import Result, ScalarResult, TableResult
-from everyrow.session import Session
+from everyrow.session import Session, create_session
 from everyrow.task import (
     LLM,
     EffortLevel,
@@ -52,7 +50,7 @@ class DefaultAgentResponse(BaseModel):
 @overload
 async def single_agent[T: BaseModel](
     task: str,
-    session: Session,
+    session: Session | None = None,
     input: BaseModel | UUID | Result | None = None,
     effort_level: EffortLevel = EffortLevel.LOW,
     llm: LLM | None = None,
@@ -64,7 +62,7 @@ async def single_agent[T: BaseModel](
 @overload
 async def single_agent(
     task: str,
-    session: Session,
+    session: Session | None = None,
     input: BaseModel | UUID | Result | None = None,
     effort_level: EffortLevel = EffortLevel.LOW,
     llm: LLM | None = None,
@@ -75,13 +73,25 @@ async def single_agent(
 async def single_agent[T: BaseModel](
     task: str,
-    session: Session,
+    session: Session | None = None,
     input: BaseModel | DataFrame | UUID | Result | None = None,
     effort_level: EffortLevel = EffortLevel.LOW,
     llm: LLM | None = None,
     response_model: type[T] = DefaultAgentResponse,
     return_table: bool = False,
 ) -> ScalarResult[T] | TableResult:
+    if session is None:
+        async with create_session() as internal_session:
+            cohort_task = await single_agent_async(
+                task=task,
+                session=internal_session,
+                input=input,
+                effort_level=effort_level,
+                llm=llm,
+                response_model=response_model,
+                return_table=return_table,
+            )
+            return await cohort_task.await_result()
     cohort_task = await single_agent_async(
         task=task,
         session=session,
@@ -91,7 +101,7 @@ async def single_agent[T: BaseModel](
         response_model=response_model,
         return_table=return_table,
     )
-    return await cohort_task.await_result(session.client)
+    return await cohort_task.await_result()
 async def single_agent_async[T: BaseModel](
@@ -135,17 +145,29 @@ async def single_agent_async[T: BaseModel](
 async def agent_map(
     task: str,
-    session: Session,
-    input: DataFrame | UUID | TableResult,
+    session: Session | None = None,
+    input: DataFrame | UUID | TableResult | None = None,
     effort_level: EffortLevel = EffortLevel.LOW,
     llm: LLM | None = None,
     response_model: type[BaseModel] = DefaultAgentResponse,
     return_table_per_row: bool = False,
 ) -> TableResult:
+    if input is None:
+        raise EveryrowError("input is required for agent_map")
+    if session is None:
+        async with create_session() as internal_session:
+            cohort_task = await agent_map_async(
+                task, internal_session, input, effort_level, llm, response_model, return_table_per_row
+            )
+            result = await cohort_task.await_result()
+            if isinstance(result, TableResult):
+                return result
+            else:
+                raise EveryrowError("Agent map task did not return a table result")
     cohort_task = await agent_map_async(
         task, session, input, effort_level, llm, response_model, return_table_per_row
     )
-    result = await cohort_task.await_result(session.client)
+    result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
     else:
@@ -303,9 +325,9 @@ async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
 async def merge(
     task: str,
-    session: Session,
-    left_table: DataFrame | UUID | TableResult,
-    right_table: DataFrame | UUID | TableResult,
+    session: Session | None = None,
+    left_table: DataFrame | UUID | TableResult | None = None,
+    right_table: DataFrame | UUID | TableResult | None = None,
     merge_on_left: str | None = None,
     merge_on_right: str | None = None,
     merge_model: LLM | None = None,
@@ -315,7 +337,7 @@ async def merge(
     Args:
         task: The task description for the merge operation
-        session: The session to use
+        session: Optional session. If not provided, one will be created automatically.
         left_table: The left table to merge (DataFrame, UUID, or TableResult)
         right_table: The right table to merge (DataFrame, UUID, or TableResult)
         merge_on_left: Optional column name in left table to merge on
@@ -326,6 +348,25 @@ async def merge(
     Returns:
         TableResult containing the merged table
     """
+    if left_table is None or right_table is None:
+        raise EveryrowError("left_table and right_table are required for merge")
+    if session is None:
+        async with create_session() as internal_session:
+            cohort_task = await merge_async(
+                task=task,
+                session=internal_session,
+                left_table=left_table,
+                right_table=right_table,
+                merge_on_left=merge_on_left,
+                merge_on_right=merge_on_right,
+                merge_model=merge_model,
+                preview=preview,
+            )
+            result = await cohort_task.await_result()
+            if isinstance(result, TableResult):
+                return result
+            else:
+                raise EveryrowError("Merge task did not return a table result")
     cohort_task = await merge_async(
         task=task,
         session=session,
@@ -336,7 +377,7 @@ async def merge(
         merge_model=merge_model,
         preview=preview,
     )
-    result = await cohort_task.await_result(session.client)
+    result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
     else:
@@ -381,9 +422,9 @@ async def merge_async(
 async def rank[T: BaseModel](
     task: str,
-    session: Session,
-    input: DataFrame | UUID | TableResult,
-    field_name: str,
+    session: Session | None = None,
+    input: DataFrame | UUID | TableResult | None = None,
+    field_name: str | None = None,
     field_type: Literal["float", "int", "str", "bool"] = "float",
     response_model: type[T] | None = None,
     ascending_order: bool = True,
@@ -393,7 +434,7 @@ async def rank[T: BaseModel](
     Args:
         task: The task description for ranking
-        session: The session to use
+        session: Optional session. If not provided, one will be created automatically.
         input: The input table (DataFrame, UUID, or TableResult)
         field_name: The name of the field to extract and sort by
         field_type: The type of the field (default: "float", ignored if response_model is provided)
@@ -404,6 +445,25 @@ async def rank[T: BaseModel](
     Returns:
         TableResult containing the ranked table
     """
+    if input is None or field_name is None:
+        raise EveryrowError("input and field_name are required for rank")
+    if session is None:
+        async with create_session() as internal_session:
+            cohort_task = await rank_async(
+                task=task,
+                session=internal_session,
+                input=input,
+                field_name=field_name,
+                field_type=field_type,
+                response_model=response_model,
+                ascending_order=ascending_order,
+                preview=preview,
+            )
+            result = await cohort_task.await_result()
+            if isinstance(result, TableResult):
+                return result
+            else:
+                raise EveryrowError("Rank task did not return a table result")
     cohort_task = await rank_async(
         task=task,
         session=session,
@@ -414,7 +474,7 @@ async def rank[T: BaseModel](
         ascending_order=ascending_order,
         preview=preview,
     )
-    result = await cohort_task.await_result(session.client)
+    result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
     else:
@@ -477,8 +537,8 @@ async def rank_async[T: BaseModel](
 async def screen[T: BaseModel](
     task: str,
-    session: Session,
-    input: DataFrame | UUID | TableResult,
+    session: Session | None = None,
+    input: DataFrame | UUID | TableResult | None = None,
     response_model: type[T] | None = None,
     batch_size: int | None = None,
     preview: bool = False,
@@ -487,7 +547,7 @@ async def screen[T: BaseModel](
     Args:
         task: The task description for screening
-        session: The session to use
+        session: Optional session. If not provided, one will be created automatically.
         input: The input table (DataFrame, UUID, or TableResult)
         response_model: Optional Pydantic model for the response schema
         batch_size: Optional batch size for processing (default: 10)
@@ -496,6 +556,23 @@ async def screen[T: BaseModel](
     Returns:
         TableResult containing the screened table
     """
+    if input is None:
+        raise EveryrowError("input is required for screen")
+    if session is None:
+        async with create_session() as internal_session:
+            cohort_task = await screen_async(
+                task=task,
+                session=internal_session,
+                input=input,
+                response_model=response_model,
+                batch_size=batch_size,
+                preview=preview,
+            )
+            result = await cohort_task.await_result()
+            if isinstance(result, TableResult):
+                return result
+            else:
+                raise EveryrowError("Screen task did not return a table result")
     cohort_task = await screen_async(
         task=task,
         session=session,
@@ -504,7 +581,7 @@ async def screen[T: BaseModel](
         batch_size=batch_size,
         preview=preview,
     )
-    result = await cohort_task.await_result(session.client)
+    result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
     else:
@@ -555,39 +632,40 @@ async def screen_async[T: BaseModel](
 async def dedupe(
-    session: Session,
-    input: DataFrame | UUID | TableResult,
-    equivalence_relation: str,
-    llm: LLM | None = None,
-    chunk_size: int | None = None,
-    mode: DedupeMode | None = None,
-    embedding_model: EmbeddingModels | None = None,
+    session: Session | None = None,
+    input: DataFrame | UUID | TableResult | None = None,
+    equivalence_relation: str | None = None,
 ) -> TableResult:
     """Dedupe a table by removing duplicates using dedupe operation.
     Args:
-        session: The session to use
+        session: Optional session. If not provided, one will be created automatically.
         input: The input table (DataFrame, UUID, or TableResult)
         equivalence_relation: Description of what makes items equivalent
-        llm: Optional LLM model to use for deduplication
-        chunk_size: Optional maximum number of items to process in a single LLM call (default: 40)
-        mode: Optional dedupe mode (AGENTIC or DIRECT)
-        max_consecutive_empty: Optional stop processing a row after this many consecutive comparisons with no matches
-        embedding_model: Optional embedding model to use when reorder_by_embedding is True
     Returns:
         TableResult containing the deduped table with duplicates removed
     """
+    if input is None or equivalence_relation is None:
+        raise EveryrowError("input and equivalence_relation are required for dedupe")
+    if session is None:
+        async with create_session() as internal_session:
+            cohort_task = await dedupe_async(
+                session=internal_session,
+                input=input,
+                equivalence_relation=equivalence_relation,
+            )
+            result = await cohort_task.await_result()
+            if isinstance(result, TableResult):
+                return result
+            else:
+                raise EveryrowError("Dedupe task did not return a table result")
     cohort_task = await dedupe_async(
         session=session,
         input=input,
         equivalence_relation=equivalence_relation,
-        llm=llm,
-        chunk_size=chunk_size,
-        mode=mode,
-        embedding_model=embedding_model,
     )
-    result = await cohort_task.await_result(session.client)
+    result = await cohort_task.await_result()
     if isinstance(result, TableResult):
         return result
     else:
@@ -598,20 +676,12 @@ async def dedupe_async(
     session: Session,
     input: DataFrame | UUID | TableResult,
     equivalence_relation: str,
-    llm: LLM | None = None,
-    chunk_size: int | None = None,
-    mode: DedupeMode | None = None,
-    embedding_model: EmbeddingModels | None = None,
 ) -> EveryrowTask[BaseModel]:
     """Submit a dedupe task asynchronously."""
     input_artifact_id = await _process_agent_map_input(input, session)
     query = DedupeQueryParams(
         equivalence_relation=equivalence_relation,
-        llm=llm or UNSET,
-        chunk_size=chunk_size or UNSET,
-        mode=mode or UNSET,
-        embedding_model=embedding_model or UNSET,
     )
     request = DedupeRequestParams(
         query=query,
@@ -629,14 +699,14 @@ async def dedupe_async(
 async def derive(
-    session: Session,
-    input: DataFrame | UUID | TableResult,
-    expressions: dict[str, str],
+    session: Session | None = None,
+    input: DataFrame | UUID | TableResult | None = None,
+    expressions: dict[str, str] | None = None,
 ) -> TableResult:
     """Derive new columns using pandas eval expressions.
     Args:
-        session: The session to use
+        session: Optional session. If not provided, one will be created automatically.
         input: The input table (DataFrame, UUID, or TableResult)
         expressions: A dictionary mapping column names to pandas expressions.
             Example: {"approved": "True", "score": "price * quantity"}
@@ -644,6 +714,36 @@ async def derive(
     Returns:
         TableResult containing the table with new derived columns
     """
+    if input is None or expressions is None:
+        raise EveryrowError("input and expressions are required for derive")
+    if session is None:
+        async with create_session() as internal_session:
+            input_artifact_id = await _process_agent_map_input(input, internal_session)
+            derive_expressions = [
+                DeriveExpression(column_name=col_name, expression=expr)
+                for col_name, expr in expressions.items()
+            ]
+            query = DeriveQueryParams(expressions=derive_expressions)
+            request = DeriveRequest(
+                query=query,
+                input_artifacts=[input_artifact_id],
+            )
+            body = SubmitTaskBody(
+                payload=request,
+                session_id=internal_session.session_id,
+            )
+            task_id = await submit_task(body, internal_session.client)
+            finished_task = await await_task_completion(task_id, internal_session.client)
+            data = await read_table_result(finished_task.artifact_id, internal_session.client)  # type: ignore
+            return TableResult(
+                artifact_id=finished_task.artifact_id,  # type: ignore
+                data=data,
+                error=finished_task.error,
+            )
     input_artifact_id = await _process_agent_map_input(input, session)
     derive_expressions = [

everyrow/session.py CHANGED Viewed

@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
 from datetime import datetime
 from uuid import UUID
-from everyrow.api_utils import handle_response
+from everyrow.api_utils import create_client, handle_response
 from everyrow.generated.api.default import (
     create_session_endpoint_sessions_create_post,
 )
@@ -33,21 +33,43 @@ class Session:
 @asynccontextmanager
 async def create_session(
-    client: AuthenticatedClient,
+    client: AuthenticatedClient | None = None,
     name: str | None = None,
 ) -> AsyncGenerator[Session, None]:
     """Create a new session and yield it as an async context manager.
     Args:
-        client: Authenticated client to use for session creation.
-                The client should already be in an async context manager.
+        client: Optional authenticated client. If not provided, one will be created
+                automatically using the EVERYROW_API_KEY environment variable and
+                managed within this context manager.
         name: Name for the session. If not provided, defaults to
               "everyrow-sdk-session-{timestamp}".
+    Example:
+        # With explicit client (client lifecycle managed externally)
+        async with create_client() as client:
+            async with create_session(client=client, name="My Session") as session:
+                ...
+        # Without client (client created and managed internally)
+        async with create_session(name="My Session") as session:
+            ...
     """
-    response = await create_session_endpoint_sessions_create_post.asyncio(
-        client=client,
-        body=CreateSessionRequest(name=name or f"everyrow-sdk-session-{datetime.now().isoformat()}"),
-    )
-    response = handle_response(response)
-    session = Session(client=client, session_id=response.session_id)
-    yield session
+    owns_client = client is None
+    if owns_client:
+        client = create_client()
+        await client.__aenter__()
+    try:
+        response = await create_session_endpoint_sessions_create_post.asyncio(
+            client=client,
+            body=CreateSessionRequest(
+                name=name or f"everyrow-sdk-session-{datetime.now().isoformat()}"
+            ),
+        )
+        response = handle_response(response)
+        session = Session(client=client, session_id=response.session_id)
+        yield session
+    finally:
+        if owns_client:
+            await client.__aexit__()

everyrow/task.py CHANGED Viewed

@@ -5,7 +5,7 @@ from uuid import UUID
 from pandas import DataFrame
 from pydantic.main import BaseModel
-from everyrow.api_utils import handle_response
+from everyrow.api_utils import create_client, handle_response
 from everyrow.citations import render_citations_group, render_citations_standalone
 from everyrow.constants import EveryrowError
 from everyrow.generated.api.default import (
@@ -34,26 +34,46 @@ T = TypeVar("T", bound=BaseModel)
 class EveryrowTask[T: BaseModel]:
     def __init__(self, response_model: type[T], is_map: bool, is_expand: bool):
-        self.task_id = None
+        self.task_id: UUID | None = None
+        self.session_id: UUID | None = None
+        self._client: AuthenticatedClient | None = None
         self._is_map = is_map
         self._is_expand = is_expand
         self._response_model = response_model
-    async def submit(self, body: SubmitTaskBody, client: AuthenticatedClient) -> UUID:
+    async def submit(
+        self,
+        body: SubmitTaskBody,
+        client: AuthenticatedClient,
+    ) -> UUID:
         task_id = await submit_task(body, client)
         self.task_id = task_id
+        self.session_id = body.session_id
+        self._client = client
         return task_id
-    async def get_status(self, client: AuthenticatedClient) -> TaskStatusResponse:
+    async def get_status(
+        self, client: AuthenticatedClient | None = None
+    ) -> TaskStatusResponse:
         if self.task_id is None:
             raise EveryrowError("Task must be submitted before fetching status")
+        client = client or self._client
+        if client is None:
+            raise EveryrowError("No client available. Provide a client or use the task within a session context.")
         return await get_task_status(self.task_id, client)
-    async def await_result(self, client: AuthenticatedClient) -> TableResult | ScalarResult[T]:
+    async def await_result(
+        self, client: AuthenticatedClient | None = None
+    ) -> TableResult | ScalarResult[T]:
         if self.task_id is None:
             raise EveryrowError("Task must be submitted before awaiting result")
+        client = client or self._client
+        if client is None:
+            raise EveryrowError("No client available. Provide a client or use the task within a session context.")
         final_status_response = await await_task_completion(self.task_id, client)
-        artifact_id = cast(UUID, final_status_response.artifact_id)  # we check artifact_id in await_task_completion
+        artifact_id = cast(
+            UUID, final_status_response.artifact_id
+        )  # we check artifact_id in await_task_completion
         if self._is_map or self._is_expand:
             data = await read_table_result(artifact_id, client=client)
@@ -63,7 +83,9 @@ class EveryrowTask[T: BaseModel]:
                 error=final_status_response.error,
             )
         else:
-            data = await read_scalar_result(artifact_id, self._response_model, client=client)
+            data = await read_scalar_result(
+                artifact_id, self._response_model, client=client
+            )
             return ScalarResult(
                 artifact_id=artifact_id,
                 data=data,
@@ -77,7 +99,9 @@ async def submit_task(body: SubmitTaskBody, client: AuthenticatedClient) -> UUID
     return response.task_id
-async def await_task_completion(task_id: UUID, client: AuthenticatedClient) -> TaskStatusResponse:
+async def await_task_completion(
+    task_id: UUID, client: AuthenticatedClient
+) -> TaskStatusResponse:
     max_retries = 3
     retries = 0
     while True:
@@ -85,7 +109,9 @@ async def await_task_completion(task_id: UUID, client: AuthenticatedClient) -> T
             status_response = await get_task_status(task_id, client)
         except Exception as e:
             if retries >= max_retries:
-                raise EveryrowError(f"Failed to get task status after {max_retries} retries") from e
+                raise EveryrowError(
+                    f"Failed to get task status after {max_retries} retries"
+                ) from e
             retries += 1
         else:
             retries = 0
@@ -96,14 +122,23 @@ async def await_task_completion(task_id: UUID, client: AuthenticatedClient) -> T
             ):
                 break
         await asyncio.sleep(1)
-    if status_response.status == TaskStatus.FAILED or status_response.artifact_id is None:
-        raise EveryrowError(f"Failed to create input in everyrow: {status_response.error}")
+    if (
+        status_response.status == TaskStatus.FAILED
+        or status_response.artifact_id is None
+    ):
+        raise EveryrowError(
+            f"Failed to create input in everyrow: {status_response.error}"
+        )
     return status_response
-async def get_task_status(task_id: UUID, client: AuthenticatedClient) -> TaskStatusResponse:
-    response = await get_task_status_endpoint_tasks_task_id_status_get.asyncio(client=client, task_id=task_id)
+async def get_task_status(
+    task_id: UUID, client: AuthenticatedClient
+) -> TaskStatusResponse:
+    response = await get_task_status_endpoint_tasks_task_id_status_get.asyncio(
+        client=client, task_id=task_id
+    )
     response = handle_response(response)
     return response
@@ -112,7 +147,9 @@ async def read_table_result(
     artifact_id: UUID,
     client: AuthenticatedClient,
 ) -> DataFrame:
-    response = await get_artifacts_artifacts_get.asyncio(client=client, artifact_ids=[artifact_id])
+    response = await get_artifacts_artifacts_get.asyncio(
+        client=client, artifact_ids=[artifact_id]
+    )
     response = handle_response(response)
     if len(response) != 1:
         raise EveryrowError(f"Expected 1 artifact, got {len(response)}")
@@ -130,7 +167,9 @@ async def read_scalar_result[T: BaseModel](
     response_model: type[T],
     client: AuthenticatedClient,
 ) -> T:
-    response = await get_artifacts_artifacts_get.asyncio(client=client, artifact_ids=[artifact_id])
+    response = await get_artifacts_artifacts_get.asyncio(
+        client=client, artifact_ids=[artifact_id]
+    )
     response = handle_response(response)
     if len(response) != 1:
         raise EveryrowError(f"Expected 1 artifact, got {len(response)}")
@@ -141,3 +180,48 @@ async def read_scalar_result[T: BaseModel](
     artifact = render_citations_standalone(artifact)
     return response_model(**artifact.data)
+async def fetch_task_data(
+    task_id: UUID | str,
+    client: AuthenticatedClient | None = None,
+) -> DataFrame:
+    """Fetch the result data for a completed task as a pandas DataFrame.
+    This is a convenience helper that retrieves the table-level group artifact
+    associated with a task and returns it as a DataFrame.
+    Args:
+        task_id: The UUID of the task to fetch data for (can be a string or UUID).
+        client: Optional authenticated client. If not provided, one will be created
+            using the EVERYROW_API_KEY environment variable.
+    Returns:
+        A pandas DataFrame containing the task result data.
+    Raises:
+        EveryrowError: If the task has not completed, failed, or has no artifact.
+    Example:
+        >>> from everyrow import fetch_task_data
+        >>> df = await fetch_task_data("12345678-1234-1234-1234-123456789abc")
+        >>> print(df.head())
+    """
+    if isinstance(task_id, str):
+        task_id = UUID(task_id)
+    if client is None:
+        client = create_client()
+    status_response = await get_task_status(task_id, client)
+    if status_response.status not in (TaskStatus.COMPLETED,):
+        raise EveryrowError(
+            f"Task {task_id} is not completed (status: {status_response.status.value}). "
+            f"Error: {status_response.error}"
+        )
+    if status_response.artifact_id is None:
+        raise EveryrowError(f"Task {task_id} has no associated artifact.")
+    return await read_table_result(status_response.artifact_id, client)

everyrow-0.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,275 @@
+Metadata-Version: 2.4
+Name: everyrow
+Version: 0.1.1
+Summary: An SDK for everyrow.io: agent ops at spreadsheet scale
+License-File: LICENSE.txt
+Requires-Python: >=3.12
+Requires-Dist: attrs>=25.4.0
+Requires-Dist: pandas>=2.3.3
+Requires-Dist: pydantic>=2.12.5
+Requires-Dist: python-dotenv>=1.2.1
+Description-Content-Type: text/markdown
+![hero](https://github.com/user-attachments/assets/254fa2ed-c1f3-4ee8-b93d-d169edf32f27)
+# <picture><img src="images/future-search-logo-128.webp" alt="FutureSearch" height="24" align="bottom"></picture> everyrow SDK
+Python SDK for [everyrow.io](https://everyrow.io). Rank, dedupe, merge, and screen your dataframes using natural language—or run web agents to research every row.
+## Table of Contents
+New to everyrow? Head to [Getting Started](#getting-started)
+Looking to use our agent-backed utilities? Check out:
+- [Rank](#rank)
+- [Dedupe](#dedupe)
+- [Merge](#merge)
+- [Screen](#screen)
+- [Agent Tasks](#agent-tasks)
+## Getting Started
+Get an API key at [everyrow.io](https://everyrow.io).
+```bash
+export EVERYROW_API_KEY=your_api_key_here
+```
+### Installation
+```bash
+pip install everyrow
+```
+For development:
+```bash
+uv pip install -e .
+uv sync
+```
+Requires Python >= 3.12
+### Claude Code Plugin
+There's a plugin for [Claude Code](https://code.claude.com/) that teaches Claude how to use the SDK:
+```sh
+# from Claude Code
+/plugin marketplace add futuresearch/everyrow-sdk
+/plugin install everyrow@futuresearch
+# from terminal
+claude plugin marketplace add futuresearch/everyrow-sdk
+claude plugin install everyrow@futuresearch
+```
+## Rank
+Score rows based on criteria you can't put in a database field. The AI researches each row and assigns scores based on qualitative factors.
+```python
+from everyrow.ops import rank
+result = await rank(
+    task="Score by likelihood to need data integration solutions",
+    input=leads_dataframe,
+    field_name="integration_need_score",
+)
+```
+Say you want to rank leads by "likelihood to need data integration tools"—Ultramain Systems (sells software to airlines) looks similar to Ukraine International Airlines (is an airline) by industry code, but their actual needs are completely different. Traditional scoring can't tell them apart.
+**Case studies:** [Lead Scoring with Data Fragmentation](https://futuresearch.ai/lead-scoring-data-fragmentation/) (1,000 leads, 7 min, $13) · [Lead Scoring Without CRM](https://futuresearch.ai/lead-scoring-without-crm/) ($28 vs $145 with Clay)
+[Full documentation →](docs/RANK.md)
+### Dedupe
+Deduplicate when fuzzy matching falls short. The AI understands that "AbbVie Inc", "Abbvie", and "AbbVie Pharmaceutical" are the same company, or that "Big Blue" means IBM.
+```python
+from everyrow.ops import dedupe
+result = await dedupe(
+    input=crm_data,
+    equivalence_relation="Two entries are duplicates if they represent the same legal entity",
+)
+```
+The `equivalence_relation` tells the AI what counts as a duplicate—natural language, not regex. Results include `equivalence_class_id` (groups duplicates), `equivalence_class_name` (human-readable cluster name), and `selected` (the canonical record in each cluster).
+**Case studies:** [CRM Deduplication](https://futuresearch.ai/crm-deduplication/) (500→124 rows, 2 min, $1.67) · [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/) (98% accuracy with career changes)
+[Full documentation →](docs/DEDUPE.md)
+### Merge
+Join two tables when the keys don't match exactly—or at all. The AI knows "Photoshop" belongs to "Adobe" and "Genentech" is a Roche subsidiary, even with zero string similarity.
+```python
+from everyrow.ops import merge
+result = await merge(
+    task="Match each software product to its parent company",
+    left_table=software_products,
+    right_table=approved_suppliers,
+    merge_on_left="software_name",
+    merge_on_right="company_name",
+)
+```
+Handles subsidiaries, abbreviations (MSD → Merck), regional names, typos, and pseudonyms. Fuzzy matching thresholds always fail somewhere—0.9 misses "Colfi" ↔ "Dr. Ioana Colfescu", 0.7 false-positives on "John Smith" ↔ "Jane Smith".
+**Case studies:** [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/) (2,000 products, 91% accuracy, $9) · [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/) (99.9% recall) · [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/)
+[Full documentation →](docs/MERGE.md)
+### Screen
+Filter rows based on criteria that require research—things you can't express in SQL. The AI actually researches each row (10-Ks, earnings reports, news) before deciding pass/fail.
+```python
+from everyrow.ops import screen
+from pydantic import BaseModel, Field
+class ScreenResult(BaseModel):
+    passes: bool = Field(description="True if company meets the criteria")
+result = await screen(
+    task="""
+        Find companies with >75% recurring revenue that would benefit from
+        Taiwan tensions - CHIPS Act beneficiaries, defense contractors,
+        cybersecurity firms. Exclude companies dependent on Taiwan manufacturing.
+    """,
+    input=sp500_companies,
+    response_model=ScreenResult,
+)
+```
+Works for investment theses, geopolitical exposure, vendor risk assessment, job posting filtering, lead qualification—anything requiring judgment. Screening 500 S&P 500 companies takes ~12 min and $3 with >90% precision. Regex gets 68%.
+**Case studies:** [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/) (63/502 passed, $3.29) · [Job Posting Screen](https://futuresearch.ai/job-posting-screening/) (>90% vs 68% regex) · [Lead Screening Workflow](https://futuresearch.ai/screening-workflow/)
+[Full documentation →](docs/SCREEN.md)
+### Agent Tasks
+For single-input tasks, use `single_agent`. For batch processing, use `agent_map`.
+```python
+from everyrow.ops import single_agent, agent_map
+from pandas import DataFrame
+# Single input
+result = await single_agent(
+    task="What is the capital of the given country?",
+    input={"country": "India"},
+)
+# Batch processing
+result = await agent_map(
+    task="What is the capital of the given country?",
+    input=DataFrame([{"country": "India"}, {"country": "USA"}]),
+)
+```
+Our agents are tuned on [Deep Research Bench](https://arxiv.org/abs/2506.06287), a benchmark we built for evaluating web research on questions that require extensive searching and cross-referencing.
+## Advanced
+### Sessions
+For quick one-off operations, sessions are created automatically:
+```python
+from everyrow.ops import single_agent
+result = await single_agent(
+    task="What is the capital of France?",
+    input={"country": "France"},
+)
+```
+For multiple operations, use an explicit session:
+```python
+from everyrow import create_session
+async with create_session(name="My Session") as session:
+    print(f"View session at: {session.get_url()}")
+    # All operations here share the same session
+```
+If you want more explicit control over the client (for example, to reuse it across sessions or configure custom settings), you can create it directly:
+```python
+from everyrow import create_client, create_session
+async with create_client() as client:
+    async with create_session(client=client, name="My Session") as session:
+        # ...
+```
+Sessions are visible on the [everyrow.io](https://everyrow.io) dashboard.
+### Async Operations
+All utilities have async variants for background processing. These need an explicit session since the task persists beyond the function call:
+```python
+from everyrow import create_session
+from everyrow.ops import rank_async
+async with create_session(name="Async Ranking") as session:
+    task = await rank_async(
+        session=session,
+        task="Score this organization",
+        input=dataframe,
+        field_name="score",
+    )
+    # Continue with other work...
+    result = await task.await_result()
+```
+## Case Studies
+More at [futuresearch.ai/solutions](https://futuresearch.ai/solutions/).
+**Notebooks:**
+- [CRM Deduplication](case_studies/dedupe/case_01_crm_data.ipynb)
+- [Thematic Stock Screen](case_studies/screen/thematic_stock_screen.ipynb)
+- [Oil Price Margin Screen](case_studies/screen/oil_price_margin_screen.ipynb)
+**On futuresearch.ai:**
+- [Lead Scoring with Data Fragmentation](https://futuresearch.ai/lead-scoring-data-fragmentation/)
+- [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/)
+- [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/)
+To run notebooks:
+```bash
+uv sync --group case-studies
+```
+## Development
+```bash
+uv sync
+lefthook install
+```
+```bash
+uv run pytest              # tests
+uv run ruff check .        # lint
+uv run ruff format .       # format
+uv run basedpyright        # type check
+./generate_openapi.sh      # regenerate client
+```
+The `everyrow/generated/` directory is excluded from linting (auto-generated code).
+## License
+This project is licensed under the MIT License - see LICENSE.txt file for details.

{everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-everyrow/__init__.py,sha256=jzEsDdUID7XTqUjciSqiG0qGNknZ46263eCC4v4apPk,136
+everyrow/__init__.py,sha256=g-I6zj2wOtb_WH6l0aYdtS83OxQJy78tJfm_H0vB5qk,197
 everyrow/api_utils.py,sha256=iU1LZYjB2iPHCRZjDNEW64gEQWQbiZxiB8XVoj5SzPM,1437
-everyrow/citations.py,sha256=Tq71l-KPhdVrJpMLjlqnVhOHQsv-9inWtP1qoD_Nzok,1681
+everyrow/citations.py,sha256=J5yJQ3P3g8a7kaQBluto6yK6bnLRzs4kP301bbS_KGo,1701
 everyrow/constants.py,sha256=OKsAtaodzvmPy9LNzmYl1u_axEe208NRBuAJGqghZs0,98
-everyrow/ops.py,sha256=w0gOClDB9HS0Pyz1rh4U3eftCYSatoy3ZO5vjUXbfgs,21324
+everyrow/ops.py,sha256=9utuzHSgEWviiQDv7FX4aGtGwSwPxFbT-k_XKfNmL0Q,25981
 everyrow/result.py,sha256=2vCiE17kdbgkYKAdvfkpXJsSCr10U8FdO8NpS8eiofg,413
-everyrow/session.py,sha256=AL1s6Yj4aa9cTmhCGjW6pkHug7sEoZfdp8EJ_DHJSnE,1971
-everyrow/task.py,sha256=5-hlWVS2JAVtFMz3TIP9vEjGo7oCqeIPW8E3qzF2SjU,5234
+everyrow/session.py,sha256=Au13oES0MPoBlfnL3LWUb45AB0vf3YtDES1YoYiZnjI,2721
+everyrow/task.py,sha256=I374zFqYQSUKmPe9MBN5Bb93uC8XdTD_zbmRr08vhCU,7605
 everyrow/generated/__init__.py,sha256=qUheje2C4lZ8b26EUHXHRJ3dWuzKiExv_JVOdVCFAek,150
 everyrow/generated/client.py,sha256=-rT3epMc77Y7QMTy5o1oH5hkGLufY9qFrD1rb7qItFU,12384
 everyrow/generated/errors.py,sha256=gO8GBmKqmSNgAg-E5oT-oOyxztvp7V_6XG7OUTT15q0,546
@@ -177,7 +177,7 @@ everyrow/generated/models/usage_response.py,sha256=k4WU5fOfyTMpXTTZ8OJG9i-TgU6Zw
 everyrow/generated/models/validation_error.py,sha256=n8d_ZobQV26pm0KyDAKvIo93uOBhz2BH59jpJAKwoPY,2180
 everyrow/generated/models/whoami_whoami_get_response_whoami_whoami_get.py,sha256=-NkKDTygoMsXFibAuU9nTRUOrsGwqm7PZ7EXfYI0G8E,1386
 everyrow/generated/models/workflow_leaf_node_input.py,sha256=TQ-y_VHus3WmpMUiFsXlD-d6Sm2nKraVvRFSWb_SzH0,1970
-everyrow-0.1.0.dist-info/METADATA,sha256=j2tGV1xg_qneY6UlRHoXlNnuajosqD-huI24VUbLvTI,4833
-everyrow-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-everyrow-0.1.0.dist-info/licenses/LICENSE.txt,sha256=8gN2nA06HyReyL7Mfu9nsBIpUF-B6wL5SJenlMRN8ac,1070
-everyrow-0.1.0.dist-info/RECORD,,
+everyrow-0.1.1.dist-info/METADATA,sha256=BAdgeuyOgo_mL1TrPGgGn0MUVEwdo2VunPZVsxD4lnM,9069
+everyrow-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+everyrow-0.1.1.dist-info/licenses/LICENSE.txt,sha256=8gN2nA06HyReyL7Mfu9nsBIpUF-B6wL5SJenlMRN8ac,1070
+everyrow-0.1.1.dist-info/RECORD,,

everyrow-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,238 +0,0 @@
-Metadata-Version: 2.4
-Name: everyrow
-Version: 0.1.0
-Summary: An SDK for everyrow.io: agent ops at spreadsheet scale
-License-File: LICENSE.txt
-Requires-Python: >=3.12
-Requires-Dist: attrs>=25.4.0
-Requires-Dist: pandas>=2.3.3
-Requires-Dist: pydantic>=2.12.5
-Requires-Dist: python-dotenv>=1.2.1
-Description-Content-Type: text/markdown
-# everyrow SDK
-The everyrow SDK provides intelligent data processing utilities powered by AI agents. Transform, dedupe, merge, rank, and screen your dataframes using natural language instructions. Whether you're deduplicating research papers, merging complex datasets, ranking organizations, or screening vendors, the SDK handles the heavy lifting by combining AI research capabilities with structured data operations.
-## Installation
-```bash
-uv pip install -e .
-```
-Or install dependencies:
-```bash
-uv sync
-```
-## Requirements
-- Python >= 3.12
-## Configuration
-Get an API key from https://everyrow.io and set it to get started:
-```bash
-# Set in your environment or .env file
-EVERYROW_API_KEY=your_api_key_here
-```
-## Usage
-### Quick Start
-```python
-from everyrow import create_session
-from everyrow.ops import dedupe
-from pandas import DataFrame
-async with create_session() as session:
-    data = DataFrame([...])
-    result = await dedupe(
-        session=session,
-        input=data,
-        equivalence_relation="Two items are duplicates if...",
-    )
-    print(result.data)
-```
-### Core Utilities
-#### Rank: `rank`
-Extract and rank rows based on AI-generated scores:
-```python
-from everyrow.ops import rank
-result = await rank(
-    session=session,
-    task="Score this organization by their contribution to AI research",
-    input=dataframe,
-    field_name="contribution_score",
-    ascending_order=False,
-)
-```
-#### Dedupe: `dedupe`
-Intelligently deduplicate your data using AI-powered equivalence detection:
-```python
-from everyrow.ops import dedupe
-result = await dedupe(
-    session=session,
-    input=dataframe,
-    equivalence_relation="Two entries are duplicates if they represent the same research work",
-)
-```
-#### Merge: `merge`
-Merge two tables using AI to match related rows:
-```python
-from everyrow.ops import merge
-result = await merge(
-    session=session,
-    task="Match clinical trial sponsors with parent companies",
-    left_table=trial_data,
-    right_table=company_data,
-    merge_on_left="sponsor",
-    merge_on_right="company",
-)
-```
-#### Screen: `screen`
-Evaluate and filter rows based on criteria that require research:
-```python
-from everyrow.ops import screen
-from pydantic import BaseModel
-class Assessment(BaseModel):
-    risk_level: str
-    recommendation: str
-result = await screen(
-    session=session,
-    task="Evaluate vendor security and financial stability",
-    input=vendors,
-    response_model=Assessment,
-)
-```
-### Viewing Sessions
-Every session has a web interface URL:
-```python
-async with create_session(name="My Session") as session:
-    print(f"View session at: {session.get_url()}")
-    # ... use session for operations
-```
-### Agent Tasks
-For single-input tasks, use `single_agent`:
-```python
-from everyrow.ops import single_agent
-from pydantic import BaseModel
-class Input(BaseModel):
-    country: str
-result = await single_agent(
-    session=session,
-    task="What is the capital of the given country?",
-    input=Input(country="India"),
-)
-```
-For batch processing, use `agent_map`:
-```python
-from everyrow.ops import agent_map
-result = await agent_map(
-    session=session,
-    task="What is the capital of the given country?",
-    input=DataFrame([{"country": "India"}, {"country": "USA"}]),
-)
-```
-### Async Operations
-All utilities have async variants for background processing:
-```python
-from everyrow.ops import rank_async
-task = await rank_async(
-    session=session,
-    task="Score this organization",
-    input=dataframe,
-    field_name="score",
-)
-# Continue with other work...
-result = await task.await_result(session.client)
-```
-## Case Studies
-The `case_studies/` directory contains example workflows demonstrating real-world usage of the SDK. To run case studies, install the optional dependencies:
-```bash
-uv sync --group case-studies
-```
-Then you can run the case study scripts or open the Jupyter notebooks in your preferred environment.
-## Development
-### Setup
-```bash
-uv sync
-lefthook install
-```
-### Running Tests
-```bash
-uv run pytest
-```
-### Linting & Formatting
-```bash
-uv run ruff check .
-uv run ruff check --fix .
-uv run ruff format .
-```
-### Type Checking
-```bash
-uv run basedpyright
-```
-### Generating OpenAPI Client
-```bash
-./generate_openapi.sh
-```
-Note: The `everyrow/generated/` directory is excluded from linting as it contains auto-generated code.
-## License
-This project is licensed under the MIT License - see LICENSE.txt file for details.

{everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

everyrow 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

everyrow 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl