everyrow 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- everyrow/__init__.py +2 -1
- everyrow/citations.py +6 -2
- everyrow/generated/models/__init__.py +6 -6
- everyrow/generated/models/agent_query_params.py +21 -0
- everyrow/generated/models/allowed_suggestions.py +1 -0
- everyrow/generated/models/artifact_group_record.py +42 -9
- everyrow/generated/models/artifact_group_record_analysis_type_0.py +46 -0
- everyrow/generated/models/dedupe_public_params.py +64 -0
- everyrow/generated/models/dedupe_request_params.py +5 -5
- everyrow/generated/models/deep_rank_public_params.py +10 -0
- everyrow/generated/models/deep_screen_public_params.py +10 -0
- everyrow/generated/models/standalone_artifact_record.py +33 -0
- everyrow/generated/models/standalone_artifact_record_analysis_type_0.py +46 -0
- everyrow/ops.py +186 -98
- everyrow/session.py +33 -11
- everyrow/task.py +102 -15
- everyrow-0.1.2.dist-info/METADATA +332 -0
- {everyrow-0.1.0.dist-info → everyrow-0.1.2.dist-info}/RECORD +20 -20
- everyrow/generated/models/dedupe_mode.py +0 -9
- everyrow/generated/models/dedupe_query_params.py +0 -174
- everyrow/generated/models/embedding_models.py +0 -9
- everyrow-0.1.0.dist-info/METADATA +0 -238
- {everyrow-0.1.0.dist-info → everyrow-0.1.2.dist-info}/WHEEL +0 -0
- {everyrow-0.1.0.dist-info → everyrow-0.1.2.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Any, TypeVar
|
|
5
|
+
|
|
6
|
+
from attrs import define as _attrs_define
|
|
7
|
+
from attrs import field as _attrs_field
|
|
8
|
+
|
|
9
|
+
T = TypeVar("T", bound="StandaloneArtifactRecordAnalysisType0")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@_attrs_define
|
|
13
|
+
class StandaloneArtifactRecordAnalysisType0:
|
|
14
|
+
""" """
|
|
15
|
+
|
|
16
|
+
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
|
17
|
+
|
|
18
|
+
def to_dict(self) -> dict[str, Any]:
|
|
19
|
+
field_dict: dict[str, Any] = {}
|
|
20
|
+
field_dict.update(self.additional_properties)
|
|
21
|
+
|
|
22
|
+
return field_dict
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
|
|
26
|
+
d = dict(src_dict)
|
|
27
|
+
standalone_artifact_record_analysis_type_0 = cls()
|
|
28
|
+
|
|
29
|
+
standalone_artifact_record_analysis_type_0.additional_properties = d
|
|
30
|
+
return standalone_artifact_record_analysis_type_0
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def additional_keys(self) -> list[str]:
|
|
34
|
+
return list(self.additional_properties.keys())
|
|
35
|
+
|
|
36
|
+
def __getitem__(self, key: str) -> Any:
|
|
37
|
+
return self.additional_properties[key]
|
|
38
|
+
|
|
39
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
40
|
+
self.additional_properties[key] = value
|
|
41
|
+
|
|
42
|
+
def __delitem__(self, key: str) -> None:
|
|
43
|
+
del self.additional_properties[key]
|
|
44
|
+
|
|
45
|
+
def __contains__(self, key: str) -> bool:
|
|
46
|
+
return key in self.additional_properties
|
everyrow/ops.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from typing import Any, Literal, TypeVar, overload
|
|
2
3
|
from uuid import UUID
|
|
3
4
|
|
|
@@ -11,8 +12,7 @@ from everyrow.generated.models import (
|
|
|
11
12
|
CreateGroupRequest,
|
|
12
13
|
CreateQueryParams,
|
|
13
14
|
CreateRequest,
|
|
14
|
-
|
|
15
|
-
DedupeQueryParams,
|
|
15
|
+
DedupePublicParams,
|
|
16
16
|
DedupeRequestParams,
|
|
17
17
|
DeepMergePublicParams,
|
|
18
18
|
DeepMergeRequest,
|
|
@@ -23,7 +23,6 @@ from everyrow.generated.models import (
|
|
|
23
23
|
DeriveExpression,
|
|
24
24
|
DeriveQueryParams,
|
|
25
25
|
DeriveRequest,
|
|
26
|
-
EmbeddingModels,
|
|
27
26
|
MapAgentRequestParams,
|
|
28
27
|
ProcessingMode,
|
|
29
28
|
ReduceAgentRequestParams,
|
|
@@ -32,7 +31,7 @@ from everyrow.generated.models import (
|
|
|
32
31
|
from everyrow.generated.models.submit_task_body import SubmitTaskBody
|
|
33
32
|
from everyrow.generated.types import UNSET
|
|
34
33
|
from everyrow.result import Result, ScalarResult, TableResult
|
|
35
|
-
from everyrow.session import Session
|
|
34
|
+
from everyrow.session import Session, create_session
|
|
36
35
|
from everyrow.task import (
|
|
37
36
|
LLM,
|
|
38
37
|
EffortLevel,
|
|
@@ -49,10 +48,14 @@ class DefaultAgentResponse(BaseModel):
|
|
|
49
48
|
answer: str
|
|
50
49
|
|
|
51
50
|
|
|
51
|
+
class DefaultScreenResult(BaseModel):
|
|
52
|
+
passes: bool
|
|
53
|
+
|
|
54
|
+
|
|
52
55
|
@overload
|
|
53
56
|
async def single_agent[T: BaseModel](
|
|
54
57
|
task: str,
|
|
55
|
-
session: Session,
|
|
58
|
+
session: Session | None = None,
|
|
56
59
|
input: BaseModel | UUID | Result | None = None,
|
|
57
60
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
58
61
|
llm: LLM | None = None,
|
|
@@ -64,7 +67,7 @@ async def single_agent[T: BaseModel](
|
|
|
64
67
|
@overload
|
|
65
68
|
async def single_agent(
|
|
66
69
|
task: str,
|
|
67
|
-
session: Session,
|
|
70
|
+
session: Session | None = None,
|
|
68
71
|
input: BaseModel | UUID | Result | None = None,
|
|
69
72
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
70
73
|
llm: LLM | None = None,
|
|
@@ -75,13 +78,25 @@ async def single_agent(
|
|
|
75
78
|
|
|
76
79
|
async def single_agent[T: BaseModel](
|
|
77
80
|
task: str,
|
|
78
|
-
session: Session,
|
|
81
|
+
session: Session | None = None,
|
|
79
82
|
input: BaseModel | DataFrame | UUID | Result | None = None,
|
|
80
83
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
81
84
|
llm: LLM | None = None,
|
|
82
85
|
response_model: type[T] = DefaultAgentResponse,
|
|
83
86
|
return_table: bool = False,
|
|
84
87
|
) -> ScalarResult[T] | TableResult:
|
|
88
|
+
if session is None:
|
|
89
|
+
async with create_session() as internal_session:
|
|
90
|
+
cohort_task = await single_agent_async(
|
|
91
|
+
task=task,
|
|
92
|
+
session=internal_session,
|
|
93
|
+
input=input,
|
|
94
|
+
effort_level=effort_level,
|
|
95
|
+
llm=llm,
|
|
96
|
+
response_model=response_model,
|
|
97
|
+
return_table=return_table,
|
|
98
|
+
)
|
|
99
|
+
return await cohort_task.await_result()
|
|
85
100
|
cohort_task = await single_agent_async(
|
|
86
101
|
task=task,
|
|
87
102
|
session=session,
|
|
@@ -91,7 +106,7 @@ async def single_agent[T: BaseModel](
|
|
|
91
106
|
response_model=response_model,
|
|
92
107
|
return_table=return_table,
|
|
93
108
|
)
|
|
94
|
-
return await cohort_task.await_result(
|
|
109
|
+
return await cohort_task.await_result()
|
|
95
110
|
|
|
96
111
|
|
|
97
112
|
async def single_agent_async[T: BaseModel](
|
|
@@ -135,17 +150,33 @@ async def single_agent_async[T: BaseModel](
|
|
|
135
150
|
|
|
136
151
|
async def agent_map(
|
|
137
152
|
task: str,
|
|
138
|
-
session: Session,
|
|
139
|
-
input: DataFrame | UUID | TableResult,
|
|
153
|
+
session: Session | None = None,
|
|
154
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
140
155
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
141
156
|
llm: LLM | None = None,
|
|
142
157
|
response_model: type[BaseModel] = DefaultAgentResponse,
|
|
143
|
-
return_table_per_row: bool = False,
|
|
144
158
|
) -> TableResult:
|
|
159
|
+
if input is None:
|
|
160
|
+
raise EveryrowError("input is required for agent_map")
|
|
161
|
+
if session is None:
|
|
162
|
+
async with create_session() as internal_session:
|
|
163
|
+
cohort_task = await agent_map_async(
|
|
164
|
+
task,
|
|
165
|
+
internal_session,
|
|
166
|
+
input,
|
|
167
|
+
effort_level,
|
|
168
|
+
llm,
|
|
169
|
+
response_model,
|
|
170
|
+
)
|
|
171
|
+
result = await cohort_task.await_result()
|
|
172
|
+
if isinstance(result, TableResult):
|
|
173
|
+
return result
|
|
174
|
+
else:
|
|
175
|
+
raise EveryrowError("Agent map task did not return a table result")
|
|
145
176
|
cohort_task = await agent_map_async(
|
|
146
|
-
task, session, input, effort_level, llm, response_model
|
|
177
|
+
task, session, input, effort_level, llm, response_model
|
|
147
178
|
)
|
|
148
|
-
result = await cohort_task.await_result(
|
|
179
|
+
result = await cohort_task.await_result()
|
|
149
180
|
if isinstance(result, TableResult):
|
|
150
181
|
return result
|
|
151
182
|
else:
|
|
@@ -221,7 +252,6 @@ async def agent_map_async(
|
|
|
221
252
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
222
253
|
llm: LLM | None = None,
|
|
223
254
|
response_model: type[BaseModel] = DefaultAgentResponse,
|
|
224
|
-
return_table_per_row: bool = False,
|
|
225
255
|
) -> EveryrowTask[BaseModel]:
|
|
226
256
|
input_artifact_ids = [await _process_agent_map_input(input, session)]
|
|
227
257
|
query = AgentQueryParams(
|
|
@@ -230,7 +260,7 @@ async def agent_map_async(
|
|
|
230
260
|
llm=llm or UNSET,
|
|
231
261
|
response_schema=_convert_pydantic_to_custom_schema(response_model),
|
|
232
262
|
response_schema_type=ResponseSchemaType.CUSTOM,
|
|
233
|
-
is_expand=
|
|
263
|
+
is_expand=False,
|
|
234
264
|
include_provenance_and_notes=False,
|
|
235
265
|
)
|
|
236
266
|
request = MapAgentRequestParams(
|
|
@@ -245,7 +275,7 @@ async def agent_map_async(
|
|
|
245
275
|
)
|
|
246
276
|
|
|
247
277
|
cohort_task = EveryrowTask(
|
|
248
|
-
response_model=response_model, is_map=True, is_expand=
|
|
278
|
+
response_model=response_model, is_map=True, is_expand=False
|
|
249
279
|
)
|
|
250
280
|
await cohort_task.submit(body, session.client)
|
|
251
281
|
return cohort_task
|
|
@@ -289,9 +319,11 @@ async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
|
|
|
289
319
|
|
|
290
320
|
|
|
291
321
|
async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
322
|
+
# Use to_json to handle NaN/NaT serialization, then parse back to Python objects
|
|
323
|
+
json_str = input.to_json(orient="records")
|
|
324
|
+
assert json_str is not None # to_json returns str when no path_or_buf provided
|
|
325
|
+
records = json.loads(json_str)
|
|
326
|
+
payload = CreateGroupRequest(query=CreateGroupQueryParams(data_to_create=records))
|
|
295
327
|
body = SubmitTaskBody(
|
|
296
328
|
payload=payload,
|
|
297
329
|
session_id=session.session_id,
|
|
@@ -303,29 +335,42 @@ async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
|
|
|
303
335
|
|
|
304
336
|
async def merge(
|
|
305
337
|
task: str,
|
|
306
|
-
session: Session,
|
|
307
|
-
left_table: DataFrame | UUID | TableResult,
|
|
308
|
-
right_table: DataFrame | UUID | TableResult,
|
|
338
|
+
session: Session | None = None,
|
|
339
|
+
left_table: DataFrame | UUID | TableResult | None = None,
|
|
340
|
+
right_table: DataFrame | UUID | TableResult | None = None,
|
|
309
341
|
merge_on_left: str | None = None,
|
|
310
342
|
merge_on_right: str | None = None,
|
|
311
|
-
merge_model: LLM | None = None,
|
|
312
|
-
preview: bool = False,
|
|
313
343
|
) -> TableResult:
|
|
314
344
|
"""Merge two tables using merge operation.
|
|
315
345
|
|
|
316
346
|
Args:
|
|
317
347
|
task: The task description for the merge operation
|
|
318
|
-
session:
|
|
348
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
319
349
|
left_table: The left table to merge (DataFrame, UUID, or TableResult)
|
|
320
350
|
right_table: The right table to merge (DataFrame, UUID, or TableResult)
|
|
321
351
|
merge_on_left: Optional column name in left table to merge on
|
|
322
352
|
merge_on_right: Optional column name in right table to merge on
|
|
323
|
-
merge_model: Optional LLM model to use for merge operation
|
|
324
|
-
preview: If True, process only the first few inputs
|
|
325
353
|
|
|
326
354
|
Returns:
|
|
327
355
|
TableResult containing the merged table
|
|
328
356
|
"""
|
|
357
|
+
if left_table is None or right_table is None:
|
|
358
|
+
raise EveryrowError("left_table and right_table are required for merge")
|
|
359
|
+
if session is None:
|
|
360
|
+
async with create_session() as internal_session:
|
|
361
|
+
cohort_task = await merge_async(
|
|
362
|
+
task=task,
|
|
363
|
+
session=internal_session,
|
|
364
|
+
left_table=left_table,
|
|
365
|
+
right_table=right_table,
|
|
366
|
+
merge_on_left=merge_on_left,
|
|
367
|
+
merge_on_right=merge_on_right,
|
|
368
|
+
)
|
|
369
|
+
result = await cohort_task.await_result()
|
|
370
|
+
if isinstance(result, TableResult):
|
|
371
|
+
return result
|
|
372
|
+
else:
|
|
373
|
+
raise EveryrowError("Merge task did not return a table result")
|
|
329
374
|
cohort_task = await merge_async(
|
|
330
375
|
task=task,
|
|
331
376
|
session=session,
|
|
@@ -333,10 +378,8 @@ async def merge(
|
|
|
333
378
|
right_table=right_table,
|
|
334
379
|
merge_on_left=merge_on_left,
|
|
335
380
|
merge_on_right=merge_on_right,
|
|
336
|
-
merge_model=merge_model,
|
|
337
|
-
preview=preview,
|
|
338
381
|
)
|
|
339
|
-
result = await cohort_task.await_result(
|
|
382
|
+
result = await cohort_task.await_result()
|
|
340
383
|
if isinstance(result, TableResult):
|
|
341
384
|
return result
|
|
342
385
|
else:
|
|
@@ -350,8 +393,6 @@ async def merge_async(
|
|
|
350
393
|
right_table: DataFrame | UUID | TableResult,
|
|
351
394
|
merge_on_left: str | None = None,
|
|
352
395
|
merge_on_right: str | None = None,
|
|
353
|
-
merge_model: LLM | None = None,
|
|
354
|
-
preview: bool = False,
|
|
355
396
|
) -> EveryrowTask[BaseModel]:
|
|
356
397
|
"""Submit a merge task asynchronously."""
|
|
357
398
|
left_artifact_id = await _process_agent_map_input(left_table, session)
|
|
@@ -361,8 +402,6 @@ async def merge_async(
|
|
|
361
402
|
task=task,
|
|
362
403
|
merge_on_left=merge_on_left or UNSET,
|
|
363
404
|
merge_on_right=merge_on_right or UNSET,
|
|
364
|
-
merge_model=merge_model or UNSET,
|
|
365
|
-
preview=preview,
|
|
366
405
|
)
|
|
367
406
|
request = DeepMergeRequest(
|
|
368
407
|
query=query,
|
|
@@ -381,29 +420,45 @@ async def merge_async(
|
|
|
381
420
|
|
|
382
421
|
async def rank[T: BaseModel](
|
|
383
422
|
task: str,
|
|
384
|
-
session: Session,
|
|
385
|
-
input: DataFrame | UUID | TableResult,
|
|
386
|
-
field_name: str,
|
|
423
|
+
session: Session | None = None,
|
|
424
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
425
|
+
field_name: str | None = None,
|
|
387
426
|
field_type: Literal["float", "int", "str", "bool"] = "float",
|
|
388
427
|
response_model: type[T] | None = None,
|
|
389
428
|
ascending_order: bool = True,
|
|
390
|
-
preview: bool = False,
|
|
391
429
|
) -> TableResult:
|
|
392
430
|
"""Rank rows in a table using rank operation.
|
|
393
431
|
|
|
394
432
|
Args:
|
|
395
433
|
task: The task description for ranking
|
|
396
|
-
session:
|
|
434
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
397
435
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
398
436
|
field_name: The name of the field to extract and sort by
|
|
399
437
|
field_type: The type of the field (default: "float", ignored if response_model is provided)
|
|
400
438
|
response_model: Optional Pydantic model for the response schema
|
|
401
439
|
ascending_order: If True, sort in ascending order
|
|
402
|
-
preview: If True, process only the first few inputs
|
|
403
440
|
|
|
404
441
|
Returns:
|
|
405
442
|
TableResult containing the ranked table
|
|
406
443
|
"""
|
|
444
|
+
if input is None or field_name is None:
|
|
445
|
+
raise EveryrowError("input and field_name are required for rank")
|
|
446
|
+
if session is None:
|
|
447
|
+
async with create_session() as internal_session:
|
|
448
|
+
cohort_task = await rank_async(
|
|
449
|
+
task=task,
|
|
450
|
+
session=internal_session,
|
|
451
|
+
input=input,
|
|
452
|
+
field_name=field_name,
|
|
453
|
+
field_type=field_type,
|
|
454
|
+
response_model=response_model,
|
|
455
|
+
ascending_order=ascending_order,
|
|
456
|
+
)
|
|
457
|
+
result = await cohort_task.await_result()
|
|
458
|
+
if isinstance(result, TableResult):
|
|
459
|
+
return result
|
|
460
|
+
else:
|
|
461
|
+
raise EveryrowError("Rank task did not return a table result")
|
|
407
462
|
cohort_task = await rank_async(
|
|
408
463
|
task=task,
|
|
409
464
|
session=session,
|
|
@@ -412,9 +467,8 @@ async def rank[T: BaseModel](
|
|
|
412
467
|
field_type=field_type,
|
|
413
468
|
response_model=response_model,
|
|
414
469
|
ascending_order=ascending_order,
|
|
415
|
-
preview=preview,
|
|
416
470
|
)
|
|
417
|
-
result = await cohort_task.await_result(
|
|
471
|
+
result = await cohort_task.await_result()
|
|
418
472
|
if isinstance(result, TableResult):
|
|
419
473
|
return result
|
|
420
474
|
else:
|
|
@@ -429,7 +483,6 @@ async def rank_async[T: BaseModel](
|
|
|
429
483
|
field_type: Literal["float", "int", "str", "bool"] = "float",
|
|
430
484
|
response_model: type[T] | None = None,
|
|
431
485
|
ascending_order: bool = True,
|
|
432
|
-
preview: bool = False,
|
|
433
486
|
) -> EveryrowTask[T]:
|
|
434
487
|
"""Submit a rank task asynchronously."""
|
|
435
488
|
input_artifact_id = await _process_agent_map_input(input, session)
|
|
@@ -454,7 +507,6 @@ async def rank_async[T: BaseModel](
|
|
|
454
507
|
response_schema=response_schema,
|
|
455
508
|
field_to_sort_by=field_name,
|
|
456
509
|
ascending_order=ascending_order,
|
|
457
|
-
preview=preview,
|
|
458
510
|
)
|
|
459
511
|
request = DeepRankRequest(
|
|
460
512
|
query=query,
|
|
@@ -477,34 +529,44 @@ async def rank_async[T: BaseModel](
|
|
|
477
529
|
|
|
478
530
|
async def screen[T: BaseModel](
|
|
479
531
|
task: str,
|
|
480
|
-
session: Session,
|
|
481
|
-
input: DataFrame | UUID | TableResult,
|
|
532
|
+
session: Session | None = None,
|
|
533
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
482
534
|
response_model: type[T] | None = None,
|
|
483
|
-
batch_size: int | None = None,
|
|
484
|
-
preview: bool = False,
|
|
485
535
|
) -> TableResult:
|
|
486
536
|
"""Screen rows in a table using screen operation.
|
|
487
537
|
|
|
488
538
|
Args:
|
|
489
539
|
task: The task description for screening
|
|
490
|
-
session:
|
|
540
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
491
541
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
492
|
-
response_model: Optional Pydantic model for the response schema
|
|
493
|
-
|
|
494
|
-
preview: If True, process only the first few inputs
|
|
542
|
+
response_model: Optional Pydantic model for the response schema.
|
|
543
|
+
If not provided, defaults to a result with just a "passes" boolean.
|
|
495
544
|
|
|
496
545
|
Returns:
|
|
497
546
|
TableResult containing the screened table
|
|
498
547
|
"""
|
|
548
|
+
if input is None:
|
|
549
|
+
raise EveryrowError("input is required for screen")
|
|
550
|
+
if session is None:
|
|
551
|
+
async with create_session() as internal_session:
|
|
552
|
+
cohort_task = await screen_async(
|
|
553
|
+
task=task,
|
|
554
|
+
session=internal_session,
|
|
555
|
+
input=input,
|
|
556
|
+
response_model=response_model,
|
|
557
|
+
)
|
|
558
|
+
result = await cohort_task.await_result()
|
|
559
|
+
if isinstance(result, TableResult):
|
|
560
|
+
return result
|
|
561
|
+
else:
|
|
562
|
+
raise EveryrowError("Screen task did not return a table result")
|
|
499
563
|
cohort_task = await screen_async(
|
|
500
564
|
task=task,
|
|
501
565
|
session=session,
|
|
502
566
|
input=input,
|
|
503
567
|
response_model=response_model,
|
|
504
|
-
batch_size=batch_size,
|
|
505
|
-
preview=preview,
|
|
506
568
|
)
|
|
507
|
-
result = await cohort_task.await_result(
|
|
569
|
+
result = await cohort_task.await_result()
|
|
508
570
|
if isinstance(result, TableResult):
|
|
509
571
|
return result
|
|
510
572
|
else:
|
|
@@ -516,25 +578,17 @@ async def screen_async[T: BaseModel](
|
|
|
516
578
|
session: Session,
|
|
517
579
|
input: DataFrame | UUID | TableResult,
|
|
518
580
|
response_model: type[T] | None = None,
|
|
519
|
-
batch_size: int | None = None,
|
|
520
|
-
preview: bool = False,
|
|
521
581
|
) -> EveryrowTask[T]:
|
|
522
582
|
"""Submit a screen task asynchronously."""
|
|
523
583
|
input_artifact_id = await _process_agent_map_input(input, session)
|
|
524
584
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
response_schema_type = ResponseSchemaType.JSON
|
|
528
|
-
else:
|
|
529
|
-
response_schema = UNSET
|
|
530
|
-
response_schema_type = UNSET
|
|
585
|
+
actual_response_model = response_model or DefaultScreenResult
|
|
586
|
+
response_schema = actual_response_model.model_json_schema()
|
|
531
587
|
|
|
532
588
|
query = DeepScreenPublicParams(
|
|
533
589
|
task=task,
|
|
534
|
-
batch_size=batch_size or UNSET,
|
|
535
590
|
response_schema=response_schema,
|
|
536
|
-
response_schema_type=
|
|
537
|
-
preview=preview,
|
|
591
|
+
response_schema_type=ResponseSchemaType.JSON,
|
|
538
592
|
)
|
|
539
593
|
request = DeepScreenRequest(
|
|
540
594
|
query=query,
|
|
@@ -546,7 +600,7 @@ async def screen_async[T: BaseModel](
|
|
|
546
600
|
)
|
|
547
601
|
|
|
548
602
|
cohort_task: EveryrowTask[T] = EveryrowTask(
|
|
549
|
-
response_model=
|
|
603
|
+
response_model=actual_response_model, # type: ignore[arg-type]
|
|
550
604
|
is_map=True,
|
|
551
605
|
is_expand=False,
|
|
552
606
|
)
|
|
@@ -555,39 +609,44 @@ async def screen_async[T: BaseModel](
|
|
|
555
609
|
|
|
556
610
|
|
|
557
611
|
async def dedupe(
|
|
558
|
-
session: Session,
|
|
559
|
-
input: DataFrame | UUID | TableResult,
|
|
560
612
|
equivalence_relation: str,
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
embedding_model: EmbeddingModels | None = None,
|
|
613
|
+
session: Session | None = None,
|
|
614
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
615
|
+
select_representative: bool = True,
|
|
565
616
|
) -> TableResult:
|
|
566
617
|
"""Dedupe a table by removing duplicates using dedupe operation.
|
|
567
618
|
|
|
568
619
|
Args:
|
|
569
|
-
session: The session to use
|
|
570
|
-
input: The input table (DataFrame, UUID, or TableResult)
|
|
571
620
|
equivalence_relation: Description of what makes items equivalent
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
max_consecutive_empty: Optional stop processing a row after this many consecutive comparisons with no matches
|
|
576
|
-
embedding_model: Optional embedding model to use when reorder_by_embedding is True
|
|
621
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
622
|
+
input: The input table (DataFrame, UUID, or TableResult)
|
|
623
|
+
select_representative: If True, select a representative for each group of duplicates
|
|
577
624
|
|
|
578
625
|
Returns:
|
|
579
626
|
TableResult containing the deduped table with duplicates removed
|
|
580
627
|
"""
|
|
628
|
+
if input is None or equivalence_relation is None:
|
|
629
|
+
raise EveryrowError("input and equivalence_relation are required for dedupe")
|
|
630
|
+
if session is None:
|
|
631
|
+
async with create_session() as internal_session:
|
|
632
|
+
cohort_task = await dedupe_async(
|
|
633
|
+
session=internal_session,
|
|
634
|
+
input=input,
|
|
635
|
+
equivalence_relation=equivalence_relation,
|
|
636
|
+
select_representative=select_representative,
|
|
637
|
+
)
|
|
638
|
+
result = await cohort_task.await_result()
|
|
639
|
+
if isinstance(result, TableResult):
|
|
640
|
+
return result
|
|
641
|
+
else:
|
|
642
|
+
raise EveryrowError("Dedupe task did not return a table result")
|
|
581
643
|
cohort_task = await dedupe_async(
|
|
582
644
|
session=session,
|
|
583
645
|
input=input,
|
|
584
646
|
equivalence_relation=equivalence_relation,
|
|
585
|
-
|
|
586
|
-
chunk_size=chunk_size,
|
|
587
|
-
mode=mode,
|
|
588
|
-
embedding_model=embedding_model,
|
|
647
|
+
select_representative=select_representative,
|
|
589
648
|
)
|
|
590
|
-
result = await cohort_task.await_result(
|
|
649
|
+
result = await cohort_task.await_result()
|
|
591
650
|
if isinstance(result, TableResult):
|
|
592
651
|
return result
|
|
593
652
|
else:
|
|
@@ -598,20 +657,14 @@ async def dedupe_async(
|
|
|
598
657
|
session: Session,
|
|
599
658
|
input: DataFrame | UUID | TableResult,
|
|
600
659
|
equivalence_relation: str,
|
|
601
|
-
|
|
602
|
-
chunk_size: int | None = None,
|
|
603
|
-
mode: DedupeMode | None = None,
|
|
604
|
-
embedding_model: EmbeddingModels | None = None,
|
|
660
|
+
select_representative: bool = True,
|
|
605
661
|
) -> EveryrowTask[BaseModel]:
|
|
606
662
|
"""Submit a dedupe task asynchronously."""
|
|
607
663
|
input_artifact_id = await _process_agent_map_input(input, session)
|
|
608
664
|
|
|
609
|
-
query =
|
|
665
|
+
query = DedupePublicParams(
|
|
610
666
|
equivalence_relation=equivalence_relation,
|
|
611
|
-
|
|
612
|
-
chunk_size=chunk_size or UNSET,
|
|
613
|
-
mode=mode or UNSET,
|
|
614
|
-
embedding_model=embedding_model or UNSET,
|
|
667
|
+
select_representative=select_representative,
|
|
615
668
|
)
|
|
616
669
|
request = DedupeRequestParams(
|
|
617
670
|
query=query,
|
|
@@ -629,14 +682,14 @@ async def dedupe_async(
|
|
|
629
682
|
|
|
630
683
|
|
|
631
684
|
async def derive(
|
|
632
|
-
session: Session,
|
|
633
|
-
input: DataFrame | UUID | TableResult,
|
|
634
|
-
expressions: dict[str, str],
|
|
685
|
+
session: Session | None = None,
|
|
686
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
687
|
+
expressions: dict[str, str] | None = None,
|
|
635
688
|
) -> TableResult:
|
|
636
689
|
"""Derive new columns using pandas eval expressions.
|
|
637
690
|
|
|
638
691
|
Args:
|
|
639
|
-
session:
|
|
692
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
640
693
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
641
694
|
expressions: A dictionary mapping column names to pandas expressions.
|
|
642
695
|
Example: {"approved": "True", "score": "price * quantity"}
|
|
@@ -644,6 +697,41 @@ async def derive(
|
|
|
644
697
|
Returns:
|
|
645
698
|
TableResult containing the table with new derived columns
|
|
646
699
|
"""
|
|
700
|
+
if input is None or expressions is None:
|
|
701
|
+
raise EveryrowError("input and expressions are required for derive")
|
|
702
|
+
if session is None:
|
|
703
|
+
async with create_session() as internal_session:
|
|
704
|
+
input_artifact_id = await _process_agent_map_input(input, internal_session)
|
|
705
|
+
|
|
706
|
+
derive_expressions = [
|
|
707
|
+
DeriveExpression(column_name=col_name, expression=expr)
|
|
708
|
+
for col_name, expr in expressions.items()
|
|
709
|
+
]
|
|
710
|
+
|
|
711
|
+
query = DeriveQueryParams(expressions=derive_expressions)
|
|
712
|
+
request = DeriveRequest(
|
|
713
|
+
query=query,
|
|
714
|
+
input_artifacts=[input_artifact_id],
|
|
715
|
+
)
|
|
716
|
+
body = SubmitTaskBody(
|
|
717
|
+
payload=request,
|
|
718
|
+
session_id=internal_session.session_id,
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
task_id = await submit_task(body, internal_session.client)
|
|
722
|
+
finished_task = await await_task_completion(
|
|
723
|
+
task_id, internal_session.client
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
data = await read_table_result(
|
|
727
|
+
finished_task.artifact_id, # type: ignore[arg-type]
|
|
728
|
+
internal_session.client,
|
|
729
|
+
)
|
|
730
|
+
return TableResult(
|
|
731
|
+
artifact_id=finished_task.artifact_id, # type: ignore
|
|
732
|
+
data=data,
|
|
733
|
+
error=finished_task.error,
|
|
734
|
+
)
|
|
647
735
|
input_artifact_id = await _process_agent_map_input(input, session)
|
|
648
736
|
|
|
649
737
|
derive_expressions = [
|
everyrow/session.py
CHANGED
|
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
-
from everyrow.api_utils import handle_response
|
|
7
|
+
from everyrow.api_utils import create_client, handle_response
|
|
8
8
|
from everyrow.generated.api.default import (
|
|
9
9
|
create_session_endpoint_sessions_create_post,
|
|
10
10
|
)
|
|
@@ -33,21 +33,43 @@ class Session:
|
|
|
33
33
|
|
|
34
34
|
@asynccontextmanager
|
|
35
35
|
async def create_session(
|
|
36
|
-
client: AuthenticatedClient,
|
|
36
|
+
client: AuthenticatedClient | None = None,
|
|
37
37
|
name: str | None = None,
|
|
38
38
|
) -> AsyncGenerator[Session, None]:
|
|
39
39
|
"""Create a new session and yield it as an async context manager.
|
|
40
40
|
|
|
41
41
|
Args:
|
|
42
|
-
client:
|
|
43
|
-
|
|
42
|
+
client: Optional authenticated client. If not provided, one will be created
|
|
43
|
+
automatically using the EVERYROW_API_KEY environment variable and
|
|
44
|
+
managed within this context manager.
|
|
44
45
|
name: Name for the session. If not provided, defaults to
|
|
45
46
|
"everyrow-sdk-session-{timestamp}".
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
# With explicit client (client lifecycle managed externally)
|
|
50
|
+
async with create_client() as client:
|
|
51
|
+
async with create_session(client=client, name="My Session") as session:
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
# Without client (client created and managed internally)
|
|
55
|
+
async with create_session(name="My Session") as session:
|
|
56
|
+
...
|
|
46
57
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
58
|
+
owns_client = client is None
|
|
59
|
+
if owns_client:
|
|
60
|
+
client = create_client()
|
|
61
|
+
await client.__aenter__()
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
response = await create_session_endpoint_sessions_create_post.asyncio(
|
|
65
|
+
client=client,
|
|
66
|
+
body=CreateSessionRequest(
|
|
67
|
+
name=name or f"everyrow-sdk-session-{datetime.now().isoformat()}"
|
|
68
|
+
),
|
|
69
|
+
)
|
|
70
|
+
response = handle_response(response)
|
|
71
|
+
session = Session(client=client, session_id=response.session_id)
|
|
72
|
+
yield session
|
|
73
|
+
finally:
|
|
74
|
+
if owns_client:
|
|
75
|
+
await client.__aexit__()
|