everyrow 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- everyrow/__init__.py +2 -1
- everyrow/citations.py +6 -2
- everyrow/ops.py +154 -54
- everyrow/session.py +33 -11
- everyrow/task.py +99 -15
- everyrow-0.1.1.dist-info/METADATA +275 -0
- {everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/RECORD +9 -9
- everyrow-0.1.0.dist-info/METADATA +0 -238
- {everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/WHEEL +0 -0
- {everyrow-0.1.0.dist-info → everyrow-0.1.1.dist-info}/licenses/LICENSE.txt +0 -0
everyrow/__init__.py
CHANGED
everyrow/citations.py
CHANGED
|
@@ -9,7 +9,9 @@ from everyrow.generated.models import (
|
|
|
9
9
|
from everyrow.generated.types import Unset
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def _render_citations(
|
|
12
|
+
def _render_citations(
|
|
13
|
+
data: dict[str, Any], source_bank: AuxDataSourceBank
|
|
14
|
+
) -> dict[str, Any]:
|
|
13
15
|
result = deepcopy(data)
|
|
14
16
|
for source_id, source_data in source_bank.to_dict().items():
|
|
15
17
|
for key, value in result.items():
|
|
@@ -21,7 +23,9 @@ def _render_citations(data: dict[str, Any], source_bank: AuxDataSourceBank) -> d
|
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
def render_citations_standalone(artifact: StandaloneArtifactRecord):
|
|
24
|
-
if isinstance(artifact.aux_data, Unset) or isinstance(
|
|
26
|
+
if isinstance(artifact.aux_data, Unset) or isinstance(
|
|
27
|
+
artifact.aux_data.source_bank, Unset
|
|
28
|
+
):
|
|
25
29
|
return artifact
|
|
26
30
|
source_bank = (
|
|
27
31
|
artifact.aux_data.source_bank
|
everyrow/ops.py
CHANGED
|
@@ -11,7 +11,6 @@ from everyrow.generated.models import (
|
|
|
11
11
|
CreateGroupRequest,
|
|
12
12
|
CreateQueryParams,
|
|
13
13
|
CreateRequest,
|
|
14
|
-
DedupeMode,
|
|
15
14
|
DedupeQueryParams,
|
|
16
15
|
DedupeRequestParams,
|
|
17
16
|
DeepMergePublicParams,
|
|
@@ -23,7 +22,6 @@ from everyrow.generated.models import (
|
|
|
23
22
|
DeriveExpression,
|
|
24
23
|
DeriveQueryParams,
|
|
25
24
|
DeriveRequest,
|
|
26
|
-
EmbeddingModels,
|
|
27
25
|
MapAgentRequestParams,
|
|
28
26
|
ProcessingMode,
|
|
29
27
|
ReduceAgentRequestParams,
|
|
@@ -32,7 +30,7 @@ from everyrow.generated.models import (
|
|
|
32
30
|
from everyrow.generated.models.submit_task_body import SubmitTaskBody
|
|
33
31
|
from everyrow.generated.types import UNSET
|
|
34
32
|
from everyrow.result import Result, ScalarResult, TableResult
|
|
35
|
-
from everyrow.session import Session
|
|
33
|
+
from everyrow.session import Session, create_session
|
|
36
34
|
from everyrow.task import (
|
|
37
35
|
LLM,
|
|
38
36
|
EffortLevel,
|
|
@@ -52,7 +50,7 @@ class DefaultAgentResponse(BaseModel):
|
|
|
52
50
|
@overload
|
|
53
51
|
async def single_agent[T: BaseModel](
|
|
54
52
|
task: str,
|
|
55
|
-
session: Session,
|
|
53
|
+
session: Session | None = None,
|
|
56
54
|
input: BaseModel | UUID | Result | None = None,
|
|
57
55
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
58
56
|
llm: LLM | None = None,
|
|
@@ -64,7 +62,7 @@ async def single_agent[T: BaseModel](
|
|
|
64
62
|
@overload
|
|
65
63
|
async def single_agent(
|
|
66
64
|
task: str,
|
|
67
|
-
session: Session,
|
|
65
|
+
session: Session | None = None,
|
|
68
66
|
input: BaseModel | UUID | Result | None = None,
|
|
69
67
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
70
68
|
llm: LLM | None = None,
|
|
@@ -75,13 +73,25 @@ async def single_agent(
|
|
|
75
73
|
|
|
76
74
|
async def single_agent[T: BaseModel](
|
|
77
75
|
task: str,
|
|
78
|
-
session: Session,
|
|
76
|
+
session: Session | None = None,
|
|
79
77
|
input: BaseModel | DataFrame | UUID | Result | None = None,
|
|
80
78
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
81
79
|
llm: LLM | None = None,
|
|
82
80
|
response_model: type[T] = DefaultAgentResponse,
|
|
83
81
|
return_table: bool = False,
|
|
84
82
|
) -> ScalarResult[T] | TableResult:
|
|
83
|
+
if session is None:
|
|
84
|
+
async with create_session() as internal_session:
|
|
85
|
+
cohort_task = await single_agent_async(
|
|
86
|
+
task=task,
|
|
87
|
+
session=internal_session,
|
|
88
|
+
input=input,
|
|
89
|
+
effort_level=effort_level,
|
|
90
|
+
llm=llm,
|
|
91
|
+
response_model=response_model,
|
|
92
|
+
return_table=return_table,
|
|
93
|
+
)
|
|
94
|
+
return await cohort_task.await_result()
|
|
85
95
|
cohort_task = await single_agent_async(
|
|
86
96
|
task=task,
|
|
87
97
|
session=session,
|
|
@@ -91,7 +101,7 @@ async def single_agent[T: BaseModel](
|
|
|
91
101
|
response_model=response_model,
|
|
92
102
|
return_table=return_table,
|
|
93
103
|
)
|
|
94
|
-
return await cohort_task.await_result(
|
|
104
|
+
return await cohort_task.await_result()
|
|
95
105
|
|
|
96
106
|
|
|
97
107
|
async def single_agent_async[T: BaseModel](
|
|
@@ -135,17 +145,29 @@ async def single_agent_async[T: BaseModel](
|
|
|
135
145
|
|
|
136
146
|
async def agent_map(
|
|
137
147
|
task: str,
|
|
138
|
-
session: Session,
|
|
139
|
-
input: DataFrame | UUID | TableResult,
|
|
148
|
+
session: Session | None = None,
|
|
149
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
140
150
|
effort_level: EffortLevel = EffortLevel.LOW,
|
|
141
151
|
llm: LLM | None = None,
|
|
142
152
|
response_model: type[BaseModel] = DefaultAgentResponse,
|
|
143
153
|
return_table_per_row: bool = False,
|
|
144
154
|
) -> TableResult:
|
|
155
|
+
if input is None:
|
|
156
|
+
raise EveryrowError("input is required for agent_map")
|
|
157
|
+
if session is None:
|
|
158
|
+
async with create_session() as internal_session:
|
|
159
|
+
cohort_task = await agent_map_async(
|
|
160
|
+
task, internal_session, input, effort_level, llm, response_model, return_table_per_row
|
|
161
|
+
)
|
|
162
|
+
result = await cohort_task.await_result()
|
|
163
|
+
if isinstance(result, TableResult):
|
|
164
|
+
return result
|
|
165
|
+
else:
|
|
166
|
+
raise EveryrowError("Agent map task did not return a table result")
|
|
145
167
|
cohort_task = await agent_map_async(
|
|
146
168
|
task, session, input, effort_level, llm, response_model, return_table_per_row
|
|
147
169
|
)
|
|
148
|
-
result = await cohort_task.await_result(
|
|
170
|
+
result = await cohort_task.await_result()
|
|
149
171
|
if isinstance(result, TableResult):
|
|
150
172
|
return result
|
|
151
173
|
else:
|
|
@@ -303,9 +325,9 @@ async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
|
|
|
303
325
|
|
|
304
326
|
async def merge(
|
|
305
327
|
task: str,
|
|
306
|
-
session: Session,
|
|
307
|
-
left_table: DataFrame | UUID | TableResult,
|
|
308
|
-
right_table: DataFrame | UUID | TableResult,
|
|
328
|
+
session: Session | None = None,
|
|
329
|
+
left_table: DataFrame | UUID | TableResult | None = None,
|
|
330
|
+
right_table: DataFrame | UUID | TableResult | None = None,
|
|
309
331
|
merge_on_left: str | None = None,
|
|
310
332
|
merge_on_right: str | None = None,
|
|
311
333
|
merge_model: LLM | None = None,
|
|
@@ -315,7 +337,7 @@ async def merge(
|
|
|
315
337
|
|
|
316
338
|
Args:
|
|
317
339
|
task: The task description for the merge operation
|
|
318
|
-
session:
|
|
340
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
319
341
|
left_table: The left table to merge (DataFrame, UUID, or TableResult)
|
|
320
342
|
right_table: The right table to merge (DataFrame, UUID, or TableResult)
|
|
321
343
|
merge_on_left: Optional column name in left table to merge on
|
|
@@ -326,6 +348,25 @@ async def merge(
|
|
|
326
348
|
Returns:
|
|
327
349
|
TableResult containing the merged table
|
|
328
350
|
"""
|
|
351
|
+
if left_table is None or right_table is None:
|
|
352
|
+
raise EveryrowError("left_table and right_table are required for merge")
|
|
353
|
+
if session is None:
|
|
354
|
+
async with create_session() as internal_session:
|
|
355
|
+
cohort_task = await merge_async(
|
|
356
|
+
task=task,
|
|
357
|
+
session=internal_session,
|
|
358
|
+
left_table=left_table,
|
|
359
|
+
right_table=right_table,
|
|
360
|
+
merge_on_left=merge_on_left,
|
|
361
|
+
merge_on_right=merge_on_right,
|
|
362
|
+
merge_model=merge_model,
|
|
363
|
+
preview=preview,
|
|
364
|
+
)
|
|
365
|
+
result = await cohort_task.await_result()
|
|
366
|
+
if isinstance(result, TableResult):
|
|
367
|
+
return result
|
|
368
|
+
else:
|
|
369
|
+
raise EveryrowError("Merge task did not return a table result")
|
|
329
370
|
cohort_task = await merge_async(
|
|
330
371
|
task=task,
|
|
331
372
|
session=session,
|
|
@@ -336,7 +377,7 @@ async def merge(
|
|
|
336
377
|
merge_model=merge_model,
|
|
337
378
|
preview=preview,
|
|
338
379
|
)
|
|
339
|
-
result = await cohort_task.await_result(
|
|
380
|
+
result = await cohort_task.await_result()
|
|
340
381
|
if isinstance(result, TableResult):
|
|
341
382
|
return result
|
|
342
383
|
else:
|
|
@@ -381,9 +422,9 @@ async def merge_async(
|
|
|
381
422
|
|
|
382
423
|
async def rank[T: BaseModel](
|
|
383
424
|
task: str,
|
|
384
|
-
session: Session,
|
|
385
|
-
input: DataFrame | UUID | TableResult,
|
|
386
|
-
field_name: str,
|
|
425
|
+
session: Session | None = None,
|
|
426
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
427
|
+
field_name: str | None = None,
|
|
387
428
|
field_type: Literal["float", "int", "str", "bool"] = "float",
|
|
388
429
|
response_model: type[T] | None = None,
|
|
389
430
|
ascending_order: bool = True,
|
|
@@ -393,7 +434,7 @@ async def rank[T: BaseModel](
|
|
|
393
434
|
|
|
394
435
|
Args:
|
|
395
436
|
task: The task description for ranking
|
|
396
|
-
session:
|
|
437
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
397
438
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
398
439
|
field_name: The name of the field to extract and sort by
|
|
399
440
|
field_type: The type of the field (default: "float", ignored if response_model is provided)
|
|
@@ -404,6 +445,25 @@ async def rank[T: BaseModel](
|
|
|
404
445
|
Returns:
|
|
405
446
|
TableResult containing the ranked table
|
|
406
447
|
"""
|
|
448
|
+
if input is None or field_name is None:
|
|
449
|
+
raise EveryrowError("input and field_name are required for rank")
|
|
450
|
+
if session is None:
|
|
451
|
+
async with create_session() as internal_session:
|
|
452
|
+
cohort_task = await rank_async(
|
|
453
|
+
task=task,
|
|
454
|
+
session=internal_session,
|
|
455
|
+
input=input,
|
|
456
|
+
field_name=field_name,
|
|
457
|
+
field_type=field_type,
|
|
458
|
+
response_model=response_model,
|
|
459
|
+
ascending_order=ascending_order,
|
|
460
|
+
preview=preview,
|
|
461
|
+
)
|
|
462
|
+
result = await cohort_task.await_result()
|
|
463
|
+
if isinstance(result, TableResult):
|
|
464
|
+
return result
|
|
465
|
+
else:
|
|
466
|
+
raise EveryrowError("Rank task did not return a table result")
|
|
407
467
|
cohort_task = await rank_async(
|
|
408
468
|
task=task,
|
|
409
469
|
session=session,
|
|
@@ -414,7 +474,7 @@ async def rank[T: BaseModel](
|
|
|
414
474
|
ascending_order=ascending_order,
|
|
415
475
|
preview=preview,
|
|
416
476
|
)
|
|
417
|
-
result = await cohort_task.await_result(
|
|
477
|
+
result = await cohort_task.await_result()
|
|
418
478
|
if isinstance(result, TableResult):
|
|
419
479
|
return result
|
|
420
480
|
else:
|
|
@@ -477,8 +537,8 @@ async def rank_async[T: BaseModel](
|
|
|
477
537
|
|
|
478
538
|
async def screen[T: BaseModel](
|
|
479
539
|
task: str,
|
|
480
|
-
session: Session,
|
|
481
|
-
input: DataFrame | UUID | TableResult,
|
|
540
|
+
session: Session | None = None,
|
|
541
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
482
542
|
response_model: type[T] | None = None,
|
|
483
543
|
batch_size: int | None = None,
|
|
484
544
|
preview: bool = False,
|
|
@@ -487,7 +547,7 @@ async def screen[T: BaseModel](
|
|
|
487
547
|
|
|
488
548
|
Args:
|
|
489
549
|
task: The task description for screening
|
|
490
|
-
session:
|
|
550
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
491
551
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
492
552
|
response_model: Optional Pydantic model for the response schema
|
|
493
553
|
batch_size: Optional batch size for processing (default: 10)
|
|
@@ -496,6 +556,23 @@ async def screen[T: BaseModel](
|
|
|
496
556
|
Returns:
|
|
497
557
|
TableResult containing the screened table
|
|
498
558
|
"""
|
|
559
|
+
if input is None:
|
|
560
|
+
raise EveryrowError("input is required for screen")
|
|
561
|
+
if session is None:
|
|
562
|
+
async with create_session() as internal_session:
|
|
563
|
+
cohort_task = await screen_async(
|
|
564
|
+
task=task,
|
|
565
|
+
session=internal_session,
|
|
566
|
+
input=input,
|
|
567
|
+
response_model=response_model,
|
|
568
|
+
batch_size=batch_size,
|
|
569
|
+
preview=preview,
|
|
570
|
+
)
|
|
571
|
+
result = await cohort_task.await_result()
|
|
572
|
+
if isinstance(result, TableResult):
|
|
573
|
+
return result
|
|
574
|
+
else:
|
|
575
|
+
raise EveryrowError("Screen task did not return a table result")
|
|
499
576
|
cohort_task = await screen_async(
|
|
500
577
|
task=task,
|
|
501
578
|
session=session,
|
|
@@ -504,7 +581,7 @@ async def screen[T: BaseModel](
|
|
|
504
581
|
batch_size=batch_size,
|
|
505
582
|
preview=preview,
|
|
506
583
|
)
|
|
507
|
-
result = await cohort_task.await_result(
|
|
584
|
+
result = await cohort_task.await_result()
|
|
508
585
|
if isinstance(result, TableResult):
|
|
509
586
|
return result
|
|
510
587
|
else:
|
|
@@ -555,39 +632,40 @@ async def screen_async[T: BaseModel](
|
|
|
555
632
|
|
|
556
633
|
|
|
557
634
|
async def dedupe(
|
|
558
|
-
session: Session,
|
|
559
|
-
input: DataFrame | UUID | TableResult,
|
|
560
|
-
equivalence_relation: str,
|
|
561
|
-
llm: LLM | None = None,
|
|
562
|
-
chunk_size: int | None = None,
|
|
563
|
-
mode: DedupeMode | None = None,
|
|
564
|
-
embedding_model: EmbeddingModels | None = None,
|
|
635
|
+
session: Session | None = None,
|
|
636
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
637
|
+
equivalence_relation: str | None = None,
|
|
565
638
|
) -> TableResult:
|
|
566
639
|
"""Dedupe a table by removing duplicates using dedupe operation.
|
|
567
640
|
|
|
568
641
|
Args:
|
|
569
|
-
session:
|
|
642
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
570
643
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
571
644
|
equivalence_relation: Description of what makes items equivalent
|
|
572
|
-
llm: Optional LLM model to use for deduplication
|
|
573
|
-
chunk_size: Optional maximum number of items to process in a single LLM call (default: 40)
|
|
574
|
-
mode: Optional dedupe mode (AGENTIC or DIRECT)
|
|
575
|
-
max_consecutive_empty: Optional stop processing a row after this many consecutive comparisons with no matches
|
|
576
|
-
embedding_model: Optional embedding model to use when reorder_by_embedding is True
|
|
577
645
|
|
|
578
646
|
Returns:
|
|
579
647
|
TableResult containing the deduped table with duplicates removed
|
|
580
648
|
"""
|
|
649
|
+
if input is None or equivalence_relation is None:
|
|
650
|
+
raise EveryrowError("input and equivalence_relation are required for dedupe")
|
|
651
|
+
if session is None:
|
|
652
|
+
async with create_session() as internal_session:
|
|
653
|
+
cohort_task = await dedupe_async(
|
|
654
|
+
session=internal_session,
|
|
655
|
+
input=input,
|
|
656
|
+
equivalence_relation=equivalence_relation,
|
|
657
|
+
)
|
|
658
|
+
result = await cohort_task.await_result()
|
|
659
|
+
if isinstance(result, TableResult):
|
|
660
|
+
return result
|
|
661
|
+
else:
|
|
662
|
+
raise EveryrowError("Dedupe task did not return a table result")
|
|
581
663
|
cohort_task = await dedupe_async(
|
|
582
664
|
session=session,
|
|
583
665
|
input=input,
|
|
584
666
|
equivalence_relation=equivalence_relation,
|
|
585
|
-
llm=llm,
|
|
586
|
-
chunk_size=chunk_size,
|
|
587
|
-
mode=mode,
|
|
588
|
-
embedding_model=embedding_model,
|
|
589
667
|
)
|
|
590
|
-
result = await cohort_task.await_result(
|
|
668
|
+
result = await cohort_task.await_result()
|
|
591
669
|
if isinstance(result, TableResult):
|
|
592
670
|
return result
|
|
593
671
|
else:
|
|
@@ -598,20 +676,12 @@ async def dedupe_async(
|
|
|
598
676
|
session: Session,
|
|
599
677
|
input: DataFrame | UUID | TableResult,
|
|
600
678
|
equivalence_relation: str,
|
|
601
|
-
llm: LLM | None = None,
|
|
602
|
-
chunk_size: int | None = None,
|
|
603
|
-
mode: DedupeMode | None = None,
|
|
604
|
-
embedding_model: EmbeddingModels | None = None,
|
|
605
679
|
) -> EveryrowTask[BaseModel]:
|
|
606
680
|
"""Submit a dedupe task asynchronously."""
|
|
607
681
|
input_artifact_id = await _process_agent_map_input(input, session)
|
|
608
682
|
|
|
609
683
|
query = DedupeQueryParams(
|
|
610
684
|
equivalence_relation=equivalence_relation,
|
|
611
|
-
llm=llm or UNSET,
|
|
612
|
-
chunk_size=chunk_size or UNSET,
|
|
613
|
-
mode=mode or UNSET,
|
|
614
|
-
embedding_model=embedding_model or UNSET,
|
|
615
685
|
)
|
|
616
686
|
request = DedupeRequestParams(
|
|
617
687
|
query=query,
|
|
@@ -629,14 +699,14 @@ async def dedupe_async(
|
|
|
629
699
|
|
|
630
700
|
|
|
631
701
|
async def derive(
|
|
632
|
-
session: Session,
|
|
633
|
-
input: DataFrame | UUID | TableResult,
|
|
634
|
-
expressions: dict[str, str],
|
|
702
|
+
session: Session | None = None,
|
|
703
|
+
input: DataFrame | UUID | TableResult | None = None,
|
|
704
|
+
expressions: dict[str, str] | None = None,
|
|
635
705
|
) -> TableResult:
|
|
636
706
|
"""Derive new columns using pandas eval expressions.
|
|
637
707
|
|
|
638
708
|
Args:
|
|
639
|
-
session:
|
|
709
|
+
session: Optional session. If not provided, one will be created automatically.
|
|
640
710
|
input: The input table (DataFrame, UUID, or TableResult)
|
|
641
711
|
expressions: A dictionary mapping column names to pandas expressions.
|
|
642
712
|
Example: {"approved": "True", "score": "price * quantity"}
|
|
@@ -644,6 +714,36 @@ async def derive(
|
|
|
644
714
|
Returns:
|
|
645
715
|
TableResult containing the table with new derived columns
|
|
646
716
|
"""
|
|
717
|
+
if input is None or expressions is None:
|
|
718
|
+
raise EveryrowError("input and expressions are required for derive")
|
|
719
|
+
if session is None:
|
|
720
|
+
async with create_session() as internal_session:
|
|
721
|
+
input_artifact_id = await _process_agent_map_input(input, internal_session)
|
|
722
|
+
|
|
723
|
+
derive_expressions = [
|
|
724
|
+
DeriveExpression(column_name=col_name, expression=expr)
|
|
725
|
+
for col_name, expr in expressions.items()
|
|
726
|
+
]
|
|
727
|
+
|
|
728
|
+
query = DeriveQueryParams(expressions=derive_expressions)
|
|
729
|
+
request = DeriveRequest(
|
|
730
|
+
query=query,
|
|
731
|
+
input_artifacts=[input_artifact_id],
|
|
732
|
+
)
|
|
733
|
+
body = SubmitTaskBody(
|
|
734
|
+
payload=request,
|
|
735
|
+
session_id=internal_session.session_id,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
task_id = await submit_task(body, internal_session.client)
|
|
739
|
+
finished_task = await await_task_completion(task_id, internal_session.client)
|
|
740
|
+
|
|
741
|
+
data = await read_table_result(finished_task.artifact_id, internal_session.client) # type: ignore
|
|
742
|
+
return TableResult(
|
|
743
|
+
artifact_id=finished_task.artifact_id, # type: ignore
|
|
744
|
+
data=data,
|
|
745
|
+
error=finished_task.error,
|
|
746
|
+
)
|
|
647
747
|
input_artifact_id = await _process_agent_map_input(input, session)
|
|
648
748
|
|
|
649
749
|
derive_expressions = [
|
everyrow/session.py
CHANGED
|
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
-
from everyrow.api_utils import handle_response
|
|
7
|
+
from everyrow.api_utils import create_client, handle_response
|
|
8
8
|
from everyrow.generated.api.default import (
|
|
9
9
|
create_session_endpoint_sessions_create_post,
|
|
10
10
|
)
|
|
@@ -33,21 +33,43 @@ class Session:
|
|
|
33
33
|
|
|
34
34
|
@asynccontextmanager
|
|
35
35
|
async def create_session(
|
|
36
|
-
client: AuthenticatedClient,
|
|
36
|
+
client: AuthenticatedClient | None = None,
|
|
37
37
|
name: str | None = None,
|
|
38
38
|
) -> AsyncGenerator[Session, None]:
|
|
39
39
|
"""Create a new session and yield it as an async context manager.
|
|
40
40
|
|
|
41
41
|
Args:
|
|
42
|
-
client:
|
|
43
|
-
|
|
42
|
+
client: Optional authenticated client. If not provided, one will be created
|
|
43
|
+
automatically using the EVERYROW_API_KEY environment variable and
|
|
44
|
+
managed within this context manager.
|
|
44
45
|
name: Name for the session. If not provided, defaults to
|
|
45
46
|
"everyrow-sdk-session-{timestamp}".
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
# With explicit client (client lifecycle managed externally)
|
|
50
|
+
async with create_client() as client:
|
|
51
|
+
async with create_session(client=client, name="My Session") as session:
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
# Without client (client created and managed internally)
|
|
55
|
+
async with create_session(name="My Session") as session:
|
|
56
|
+
...
|
|
46
57
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
58
|
+
owns_client = client is None
|
|
59
|
+
if owns_client:
|
|
60
|
+
client = create_client()
|
|
61
|
+
await client.__aenter__()
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
response = await create_session_endpoint_sessions_create_post.asyncio(
|
|
65
|
+
client=client,
|
|
66
|
+
body=CreateSessionRequest(
|
|
67
|
+
name=name or f"everyrow-sdk-session-{datetime.now().isoformat()}"
|
|
68
|
+
),
|
|
69
|
+
)
|
|
70
|
+
response = handle_response(response)
|
|
71
|
+
session = Session(client=client, session_id=response.session_id)
|
|
72
|
+
yield session
|
|
73
|
+
finally:
|
|
74
|
+
if owns_client:
|
|
75
|
+
await client.__aexit__()
|
everyrow/task.py
CHANGED
|
@@ -5,7 +5,7 @@ from uuid import UUID
|
|
|
5
5
|
from pandas import DataFrame
|
|
6
6
|
from pydantic.main import BaseModel
|
|
7
7
|
|
|
8
|
-
from everyrow.api_utils import handle_response
|
|
8
|
+
from everyrow.api_utils import create_client, handle_response
|
|
9
9
|
from everyrow.citations import render_citations_group, render_citations_standalone
|
|
10
10
|
from everyrow.constants import EveryrowError
|
|
11
11
|
from everyrow.generated.api.default import (
|
|
@@ -34,26 +34,46 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
34
34
|
|
|
35
35
|
class EveryrowTask[T: BaseModel]:
|
|
36
36
|
def __init__(self, response_model: type[T], is_map: bool, is_expand: bool):
|
|
37
|
-
self.task_id = None
|
|
37
|
+
self.task_id: UUID | None = None
|
|
38
|
+
self.session_id: UUID | None = None
|
|
39
|
+
self._client: AuthenticatedClient | None = None
|
|
38
40
|
self._is_map = is_map
|
|
39
41
|
self._is_expand = is_expand
|
|
40
42
|
self._response_model = response_model
|
|
41
43
|
|
|
42
|
-
async def submit(
|
|
44
|
+
async def submit(
|
|
45
|
+
self,
|
|
46
|
+
body: SubmitTaskBody,
|
|
47
|
+
client: AuthenticatedClient,
|
|
48
|
+
) -> UUID:
|
|
43
49
|
task_id = await submit_task(body, client)
|
|
44
50
|
self.task_id = task_id
|
|
51
|
+
self.session_id = body.session_id
|
|
52
|
+
self._client = client
|
|
45
53
|
return task_id
|
|
46
54
|
|
|
47
|
-
async def get_status(
|
|
55
|
+
async def get_status(
|
|
56
|
+
self, client: AuthenticatedClient | None = None
|
|
57
|
+
) -> TaskStatusResponse:
|
|
48
58
|
if self.task_id is None:
|
|
49
59
|
raise EveryrowError("Task must be submitted before fetching status")
|
|
60
|
+
client = client or self._client
|
|
61
|
+
if client is None:
|
|
62
|
+
raise EveryrowError("No client available. Provide a client or use the task within a session context.")
|
|
50
63
|
return await get_task_status(self.task_id, client)
|
|
51
64
|
|
|
52
|
-
async def await_result(
|
|
65
|
+
async def await_result(
|
|
66
|
+
self, client: AuthenticatedClient | None = None
|
|
67
|
+
) -> TableResult | ScalarResult[T]:
|
|
53
68
|
if self.task_id is None:
|
|
54
69
|
raise EveryrowError("Task must be submitted before awaiting result")
|
|
70
|
+
client = client or self._client
|
|
71
|
+
if client is None:
|
|
72
|
+
raise EveryrowError("No client available. Provide a client or use the task within a session context.")
|
|
55
73
|
final_status_response = await await_task_completion(self.task_id, client)
|
|
56
|
-
artifact_id = cast(
|
|
74
|
+
artifact_id = cast(
|
|
75
|
+
UUID, final_status_response.artifact_id
|
|
76
|
+
) # we check artifact_id in await_task_completion
|
|
57
77
|
|
|
58
78
|
if self._is_map or self._is_expand:
|
|
59
79
|
data = await read_table_result(artifact_id, client=client)
|
|
@@ -63,7 +83,9 @@ class EveryrowTask[T: BaseModel]:
|
|
|
63
83
|
error=final_status_response.error,
|
|
64
84
|
)
|
|
65
85
|
else:
|
|
66
|
-
data = await read_scalar_result(
|
|
86
|
+
data = await read_scalar_result(
|
|
87
|
+
artifact_id, self._response_model, client=client
|
|
88
|
+
)
|
|
67
89
|
return ScalarResult(
|
|
68
90
|
artifact_id=artifact_id,
|
|
69
91
|
data=data,
|
|
@@ -77,7 +99,9 @@ async def submit_task(body: SubmitTaskBody, client: AuthenticatedClient) -> UUID
|
|
|
77
99
|
return response.task_id
|
|
78
100
|
|
|
79
101
|
|
|
80
|
-
async def await_task_completion(
|
|
102
|
+
async def await_task_completion(
|
|
103
|
+
task_id: UUID, client: AuthenticatedClient
|
|
104
|
+
) -> TaskStatusResponse:
|
|
81
105
|
max_retries = 3
|
|
82
106
|
retries = 0
|
|
83
107
|
while True:
|
|
@@ -85,7 +109,9 @@ async def await_task_completion(task_id: UUID, client: AuthenticatedClient) -> T
|
|
|
85
109
|
status_response = await get_task_status(task_id, client)
|
|
86
110
|
except Exception as e:
|
|
87
111
|
if retries >= max_retries:
|
|
88
|
-
raise EveryrowError(
|
|
112
|
+
raise EveryrowError(
|
|
113
|
+
f"Failed to get task status after {max_retries} retries"
|
|
114
|
+
) from e
|
|
89
115
|
retries += 1
|
|
90
116
|
else:
|
|
91
117
|
retries = 0
|
|
@@ -96,14 +122,23 @@ async def await_task_completion(task_id: UUID, client: AuthenticatedClient) -> T
|
|
|
96
122
|
):
|
|
97
123
|
break
|
|
98
124
|
await asyncio.sleep(1)
|
|
99
|
-
if
|
|
100
|
-
|
|
125
|
+
if (
|
|
126
|
+
status_response.status == TaskStatus.FAILED
|
|
127
|
+
or status_response.artifact_id is None
|
|
128
|
+
):
|
|
129
|
+
raise EveryrowError(
|
|
130
|
+
f"Failed to create input in everyrow: {status_response.error}"
|
|
131
|
+
)
|
|
101
132
|
|
|
102
133
|
return status_response
|
|
103
134
|
|
|
104
135
|
|
|
105
|
-
async def get_task_status(
|
|
106
|
-
|
|
136
|
+
async def get_task_status(
|
|
137
|
+
task_id: UUID, client: AuthenticatedClient
|
|
138
|
+
) -> TaskStatusResponse:
|
|
139
|
+
response = await get_task_status_endpoint_tasks_task_id_status_get.asyncio(
|
|
140
|
+
client=client, task_id=task_id
|
|
141
|
+
)
|
|
107
142
|
response = handle_response(response)
|
|
108
143
|
return response
|
|
109
144
|
|
|
@@ -112,7 +147,9 @@ async def read_table_result(
|
|
|
112
147
|
artifact_id: UUID,
|
|
113
148
|
client: AuthenticatedClient,
|
|
114
149
|
) -> DataFrame:
|
|
115
|
-
response = await get_artifacts_artifacts_get.asyncio(
|
|
150
|
+
response = await get_artifacts_artifacts_get.asyncio(
|
|
151
|
+
client=client, artifact_ids=[artifact_id]
|
|
152
|
+
)
|
|
116
153
|
response = handle_response(response)
|
|
117
154
|
if len(response) != 1:
|
|
118
155
|
raise EveryrowError(f"Expected 1 artifact, got {len(response)}")
|
|
@@ -130,7 +167,9 @@ async def read_scalar_result[T: BaseModel](
|
|
|
130
167
|
response_model: type[T],
|
|
131
168
|
client: AuthenticatedClient,
|
|
132
169
|
) -> T:
|
|
133
|
-
response = await get_artifacts_artifacts_get.asyncio(
|
|
170
|
+
response = await get_artifacts_artifacts_get.asyncio(
|
|
171
|
+
client=client, artifact_ids=[artifact_id]
|
|
172
|
+
)
|
|
134
173
|
response = handle_response(response)
|
|
135
174
|
if len(response) != 1:
|
|
136
175
|
raise EveryrowError(f"Expected 1 artifact, got {len(response)}")
|
|
@@ -141,3 +180,48 @@ async def read_scalar_result[T: BaseModel](
|
|
|
141
180
|
artifact = render_citations_standalone(artifact)
|
|
142
181
|
|
|
143
182
|
return response_model(**artifact.data)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
async def fetch_task_data(
|
|
186
|
+
task_id: UUID | str,
|
|
187
|
+
client: AuthenticatedClient | None = None,
|
|
188
|
+
) -> DataFrame:
|
|
189
|
+
"""Fetch the result data for a completed task as a pandas DataFrame.
|
|
190
|
+
|
|
191
|
+
This is a convenience helper that retrieves the table-level group artifact
|
|
192
|
+
associated with a task and returns it as a DataFrame.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
task_id: The UUID of the task to fetch data for (can be a string or UUID).
|
|
196
|
+
client: Optional authenticated client. If not provided, one will be created
|
|
197
|
+
using the EVERYROW_API_KEY environment variable.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
A pandas DataFrame containing the task result data.
|
|
201
|
+
|
|
202
|
+
Raises:
|
|
203
|
+
EveryrowError: If the task has not completed, failed, or has no artifact.
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
>>> from everyrow import fetch_task_data
|
|
207
|
+
>>> df = await fetch_task_data("12345678-1234-1234-1234-123456789abc")
|
|
208
|
+
>>> print(df.head())
|
|
209
|
+
"""
|
|
210
|
+
if isinstance(task_id, str):
|
|
211
|
+
task_id = UUID(task_id)
|
|
212
|
+
|
|
213
|
+
if client is None:
|
|
214
|
+
client = create_client()
|
|
215
|
+
|
|
216
|
+
status_response = await get_task_status(task_id, client)
|
|
217
|
+
|
|
218
|
+
if status_response.status not in (TaskStatus.COMPLETED,):
|
|
219
|
+
raise EveryrowError(
|
|
220
|
+
f"Task {task_id} is not completed (status: {status_response.status.value}). "
|
|
221
|
+
f"Error: {status_response.error}"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if status_response.artifact_id is None:
|
|
225
|
+
raise EveryrowError(f"Task {task_id} has no associated artifact.")
|
|
226
|
+
|
|
227
|
+
return await read_table_result(status_response.artifact_id, client)
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: everyrow
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: An SDK for everyrow.io: agent ops at spreadsheet scale
|
|
5
|
+
License-File: LICENSE.txt
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Requires-Dist: attrs>=25.4.0
|
|
8
|
+
Requires-Dist: pandas>=2.3.3
|
|
9
|
+
Requires-Dist: pydantic>=2.12.5
|
|
10
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
|
|
15
|
+
# <picture><img src="images/future-search-logo-128.webp" alt="FutureSearch" height="24" align="bottom"></picture> everyrow SDK
|
|
16
|
+
|
|
17
|
+
Python SDK for [everyrow.io](https://everyrow.io). Rank, dedupe, merge, and screen your dataframes using natural language—or run web agents to research every row.
|
|
18
|
+
|
|
19
|
+
## Table of Contents
|
|
20
|
+
|
|
21
|
+
New to everyrow? Head to [Getting Started](#getting-started)
|
|
22
|
+
|
|
23
|
+
Looking to use our agent-backed utilities? Check out:
|
|
24
|
+
- [Rank](#rank)
|
|
25
|
+
- [Dedupe](#dedupe)
|
|
26
|
+
- [Merge](#merge)
|
|
27
|
+
- [Screen](#screen)
|
|
28
|
+
- [Agent Tasks](#agent-tasks)
|
|
29
|
+
|
|
30
|
+
## Getting Started
|
|
31
|
+
|
|
32
|
+
Get an API key at [everyrow.io](https://everyrow.io).
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
export EVERYROW_API_KEY=your_api_key_here
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install everyrow
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
For development:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
uv pip install -e .
|
|
48
|
+
uv sync
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Requires Python >= 3.12
|
|
52
|
+
|
|
53
|
+
### Claude Code Plugin
|
|
54
|
+
|
|
55
|
+
There's a plugin for [Claude Code](https://code.claude.com/) that teaches Claude how to use the SDK:
|
|
56
|
+
|
|
57
|
+
```sh
|
|
58
|
+
# from Claude Code
|
|
59
|
+
/plugin marketplace add futuresearch/everyrow-sdk
|
|
60
|
+
/plugin install everyrow@futuresearch
|
|
61
|
+
|
|
62
|
+
# from terminal
|
|
63
|
+
claude plugin marketplace add futuresearch/everyrow-sdk
|
|
64
|
+
claude plugin install everyrow@futuresearch
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Rank
|
|
68
|
+
|
|
69
|
+
Score rows based on criteria you can't put in a database field. The AI researches each row and assigns scores based on qualitative factors.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from everyrow.ops import rank
|
|
73
|
+
|
|
74
|
+
result = await rank(
|
|
75
|
+
task="Score by likelihood to need data integration solutions",
|
|
76
|
+
input=leads_dataframe,
|
|
77
|
+
field_name="integration_need_score",
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Say you want to rank leads by "likelihood to need data integration tools"—Ultramain Systems (sells software to airlines) looks similar to Ukraine International Airlines (is an airline) by industry code, but their actual needs are completely different. Traditional scoring can't tell them apart.
|
|
82
|
+
|
|
83
|
+
**Case studies:** [Lead Scoring with Data Fragmentation](https://futuresearch.ai/lead-scoring-data-fragmentation/) (1,000 leads, 7 min, $13) · [Lead Scoring Without CRM](https://futuresearch.ai/lead-scoring-without-crm/) ($28 vs $145 with Clay)
|
|
84
|
+
|
|
85
|
+
[Full documentation →](docs/RANK.md)
|
|
86
|
+
|
|
87
|
+
### Dedupe
|
|
88
|
+
|
|
89
|
+
Deduplicate when fuzzy matching falls short. The AI understands that "AbbVie Inc", "Abbvie", and "AbbVie Pharmaceutical" are the same company, or that "Big Blue" means IBM.
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from everyrow.ops import dedupe
|
|
93
|
+
|
|
94
|
+
result = await dedupe(
|
|
95
|
+
input=crm_data,
|
|
96
|
+
equivalence_relation="Two entries are duplicates if they represent the same legal entity",
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
The `equivalence_relation` tells the AI what counts as a duplicate—natural language, not regex. Results include `equivalence_class_id` (groups duplicates), `equivalence_class_name` (human-readable cluster name), and `selected` (the canonical record in each cluster).
|
|
101
|
+
|
|
102
|
+
**Case studies:** [CRM Deduplication](https://futuresearch.ai/crm-deduplication/) (500→124 rows, 2 min, $1.67) · [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/) (98% accuracy with career changes)
|
|
103
|
+
|
|
104
|
+
[Full documentation →](docs/DEDUPE.md)
|
|
105
|
+
|
|
106
|
+
### Merge
|
|
107
|
+
|
|
108
|
+
Join two tables when the keys don't match exactly—or at all. The AI knows "Photoshop" belongs to "Adobe" and "Genentech" is a Roche subsidiary, even with zero string similarity.
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from everyrow.ops import merge
|
|
112
|
+
|
|
113
|
+
result = await merge(
|
|
114
|
+
task="Match each software product to its parent company",
|
|
115
|
+
left_table=software_products,
|
|
116
|
+
right_table=approved_suppliers,
|
|
117
|
+
merge_on_left="software_name",
|
|
118
|
+
merge_on_right="company_name",
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Handles subsidiaries, abbreviations (MSD → Merck), regional names, typos, and pseudonyms. Fuzzy matching thresholds always fail somewhere—0.9 misses "Colfi" ↔ "Dr. Ioana Colfescu", 0.7 false-positives on "John Smith" ↔ "Jane Smith".
|
|
123
|
+
|
|
124
|
+
**Case studies:** [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/) (2,000 products, 91% accuracy, $9) · [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/) (99.9% recall) · [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/)
|
|
125
|
+
|
|
126
|
+
[Full documentation →](docs/MERGE.md)
|
|
127
|
+
|
|
128
|
+
### Screen
|
|
129
|
+
|
|
130
|
+
Filter rows based on criteria that require research—things you can't express in SQL. The AI actually researches each row (10-Ks, earnings reports, news) before deciding pass/fail.
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from everyrow.ops import screen
|
|
134
|
+
from pydantic import BaseModel, Field
|
|
135
|
+
|
|
136
|
+
class ScreenResult(BaseModel):
|
|
137
|
+
passes: bool = Field(description="True if company meets the criteria")
|
|
138
|
+
|
|
139
|
+
result = await screen(
|
|
140
|
+
task="""
|
|
141
|
+
Find companies with >75% recurring revenue that would benefit from
|
|
142
|
+
Taiwan tensions - CHIPS Act beneficiaries, defense contractors,
|
|
143
|
+
cybersecurity firms. Exclude companies dependent on Taiwan manufacturing.
|
|
144
|
+
""",
|
|
145
|
+
input=sp500_companies,
|
|
146
|
+
response_model=ScreenResult,
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Works for investment theses, geopolitical exposure, vendor risk assessment, job posting filtering, lead qualification—anything requiring judgment. Screening 500 S&P 500 companies takes ~12 min and $3 with >90% precision. Regex gets 68%.
|
|
151
|
+
|
|
152
|
+
**Case studies:** [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/) (63/502 passed, $3.29) · [Job Posting Screen](https://futuresearch.ai/job-posting-screening/) (>90% vs 68% regex) · [Lead Screening Workflow](https://futuresearch.ai/screening-workflow/)
|
|
153
|
+
|
|
154
|
+
[Full documentation →](docs/SCREEN.md)
|
|
155
|
+
|
|
156
|
+
### Agent Tasks
|
|
157
|
+
|
|
158
|
+
For single-input tasks, use `single_agent`. For batch processing, use `agent_map`.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from everyrow.ops import single_agent, agent_map
|
|
162
|
+
from pandas import DataFrame
|
|
163
|
+
|
|
164
|
+
# Single input
|
|
165
|
+
result = await single_agent(
|
|
166
|
+
task="What is the capital of the given country?",
|
|
167
|
+
input={"country": "India"},
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Batch processing
|
|
171
|
+
result = await agent_map(
|
|
172
|
+
task="What is the capital of the given country?",
|
|
173
|
+
input=DataFrame([{"country": "India"}, {"country": "USA"}]),
|
|
174
|
+
)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Our agents are tuned on [Deep Research Bench](https://arxiv.org/abs/2506.06287), a benchmark we built for evaluating web research on questions that require extensive searching and cross-referencing.
|
|
178
|
+
|
|
179
|
+
## Advanced
|
|
180
|
+
|
|
181
|
+
### Sessions
|
|
182
|
+
|
|
183
|
+
For quick one-off operations, sessions are created automatically:
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from everyrow.ops import single_agent
|
|
187
|
+
|
|
188
|
+
result = await single_agent(
|
|
189
|
+
task="What is the capital of France?",
|
|
190
|
+
input={"country": "France"},
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
For multiple operations, use an explicit session:
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from everyrow import create_session
|
|
198
|
+
|
|
199
|
+
async with create_session(name="My Session") as session:
|
|
200
|
+
print(f"View session at: {session.get_url()}")
|
|
201
|
+
# All operations here share the same session
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
If you want more explicit control over the client (for example, to reuse it across sessions or configure custom settings), you can create it directly:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from everyrow import create_client, create_session
|
|
208
|
+
|
|
209
|
+
async with create_client() as client:
|
|
210
|
+
async with create_session(client=client, name="My Session") as session:
|
|
211
|
+
# ...
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Sessions are visible on the [everyrow.io](https://everyrow.io) dashboard.
|
|
215
|
+
|
|
216
|
+
### Async Operations
|
|
217
|
+
|
|
218
|
+
All utilities have async variants for background processing. These need an explicit session since the task persists beyond the function call:
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from everyrow import create_session
|
|
222
|
+
from everyrow.ops import rank_async
|
|
223
|
+
|
|
224
|
+
async with create_session(name="Async Ranking") as session:
|
|
225
|
+
task = await rank_async(
|
|
226
|
+
session=session,
|
|
227
|
+
task="Score this organization",
|
|
228
|
+
input=dataframe,
|
|
229
|
+
field_name="score",
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Continue with other work...
|
|
233
|
+
result = await task.await_result()
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Case Studies
|
|
237
|
+
|
|
238
|
+
More at [futuresearch.ai/solutions](https://futuresearch.ai/solutions/).
|
|
239
|
+
|
|
240
|
+
**Notebooks:**
|
|
241
|
+
- [CRM Deduplication](case_studies/dedupe/case_01_crm_data.ipynb)
|
|
242
|
+
- [Thematic Stock Screen](case_studies/screen/thematic_stock_screen.ipynb)
|
|
243
|
+
- [Oil Price Margin Screen](case_studies/screen/oil_price_margin_screen.ipynb)
|
|
244
|
+
|
|
245
|
+
**On futuresearch.ai:**
|
|
246
|
+
- [Lead Scoring with Data Fragmentation](https://futuresearch.ai/lead-scoring-data-fragmentation/)
|
|
247
|
+
- [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/)
|
|
248
|
+
- [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/)
|
|
249
|
+
|
|
250
|
+
To run notebooks:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
uv sync --group case-studies
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Development
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
uv sync
|
|
260
|
+
lefthook install
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
uv run pytest # tests
|
|
265
|
+
uv run ruff check . # lint
|
|
266
|
+
uv run ruff format . # format
|
|
267
|
+
uv run basedpyright # type check
|
|
268
|
+
./generate_openapi.sh # regenerate client
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
The `everyrow/generated/` directory is excluded from linting (auto-generated code).
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
This project is licensed under the MIT License - see LICENSE.txt file for details.
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
everyrow/__init__.py,sha256=
|
|
1
|
+
everyrow/__init__.py,sha256=g-I6zj2wOtb_WH6l0aYdtS83OxQJy78tJfm_H0vB5qk,197
|
|
2
2
|
everyrow/api_utils.py,sha256=iU1LZYjB2iPHCRZjDNEW64gEQWQbiZxiB8XVoj5SzPM,1437
|
|
3
|
-
everyrow/citations.py,sha256=
|
|
3
|
+
everyrow/citations.py,sha256=J5yJQ3P3g8a7kaQBluto6yK6bnLRzs4kP301bbS_KGo,1701
|
|
4
4
|
everyrow/constants.py,sha256=OKsAtaodzvmPy9LNzmYl1u_axEe208NRBuAJGqghZs0,98
|
|
5
|
-
everyrow/ops.py,sha256=
|
|
5
|
+
everyrow/ops.py,sha256=9utuzHSgEWviiQDv7FX4aGtGwSwPxFbT-k_XKfNmL0Q,25981
|
|
6
6
|
everyrow/result.py,sha256=2vCiE17kdbgkYKAdvfkpXJsSCr10U8FdO8NpS8eiofg,413
|
|
7
|
-
everyrow/session.py,sha256=
|
|
8
|
-
everyrow/task.py,sha256=
|
|
7
|
+
everyrow/session.py,sha256=Au13oES0MPoBlfnL3LWUb45AB0vf3YtDES1YoYiZnjI,2721
|
|
8
|
+
everyrow/task.py,sha256=I374zFqYQSUKmPe9MBN5Bb93uC8XdTD_zbmRr08vhCU,7605
|
|
9
9
|
everyrow/generated/__init__.py,sha256=qUheje2C4lZ8b26EUHXHRJ3dWuzKiExv_JVOdVCFAek,150
|
|
10
10
|
everyrow/generated/client.py,sha256=-rT3epMc77Y7QMTy5o1oH5hkGLufY9qFrD1rb7qItFU,12384
|
|
11
11
|
everyrow/generated/errors.py,sha256=gO8GBmKqmSNgAg-E5oT-oOyxztvp7V_6XG7OUTT15q0,546
|
|
@@ -177,7 +177,7 @@ everyrow/generated/models/usage_response.py,sha256=k4WU5fOfyTMpXTTZ8OJG9i-TgU6Zw
|
|
|
177
177
|
everyrow/generated/models/validation_error.py,sha256=n8d_ZobQV26pm0KyDAKvIo93uOBhz2BH59jpJAKwoPY,2180
|
|
178
178
|
everyrow/generated/models/whoami_whoami_get_response_whoami_whoami_get.py,sha256=-NkKDTygoMsXFibAuU9nTRUOrsGwqm7PZ7EXfYI0G8E,1386
|
|
179
179
|
everyrow/generated/models/workflow_leaf_node_input.py,sha256=TQ-y_VHus3WmpMUiFsXlD-d6Sm2nKraVvRFSWb_SzH0,1970
|
|
180
|
-
everyrow-0.1.
|
|
181
|
-
everyrow-0.1.
|
|
182
|
-
everyrow-0.1.
|
|
183
|
-
everyrow-0.1.
|
|
180
|
+
everyrow-0.1.1.dist-info/METADATA,sha256=BAdgeuyOgo_mL1TrPGgGn0MUVEwdo2VunPZVsxD4lnM,9069
|
|
181
|
+
everyrow-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
182
|
+
everyrow-0.1.1.dist-info/licenses/LICENSE.txt,sha256=8gN2nA06HyReyL7Mfu9nsBIpUF-B6wL5SJenlMRN8ac,1070
|
|
183
|
+
everyrow-0.1.1.dist-info/RECORD,,
|
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: everyrow
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: An SDK for everyrow.io: agent ops at spreadsheet scale
|
|
5
|
-
License-File: LICENSE.txt
|
|
6
|
-
Requires-Python: >=3.12
|
|
7
|
-
Requires-Dist: attrs>=25.4.0
|
|
8
|
-
Requires-Dist: pandas>=2.3.3
|
|
9
|
-
Requires-Dist: pydantic>=2.12.5
|
|
10
|
-
Requires-Dist: python-dotenv>=1.2.1
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
|
|
13
|
-
# everyrow SDK
|
|
14
|
-
|
|
15
|
-
The everyrow SDK provides intelligent data processing utilities powered by AI agents. Transform, dedupe, merge, rank, and screen your dataframes using natural language instructions. Whether you're deduplicating research papers, merging complex datasets, ranking organizations, or screening vendors, the SDK handles the heavy lifting by combining AI research capabilities with structured data operations.
|
|
16
|
-
|
|
17
|
-
## Installation
|
|
18
|
-
|
|
19
|
-
```bash
|
|
20
|
-
uv pip install -e .
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Or install dependencies:
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
uv sync
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
## Requirements
|
|
30
|
-
|
|
31
|
-
- Python >= 3.12
|
|
32
|
-
|
|
33
|
-
## Configuration
|
|
34
|
-
|
|
35
|
-
Get an API key from https://everyrow.io and set it to get started:
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
# Set in your environment or .env file
|
|
39
|
-
EVERYROW_API_KEY=your_api_key_here
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
## Usage
|
|
43
|
-
|
|
44
|
-
### Quick Start
|
|
45
|
-
|
|
46
|
-
```python
|
|
47
|
-
from everyrow import create_session
|
|
48
|
-
from everyrow.ops import dedupe
|
|
49
|
-
from pandas import DataFrame
|
|
50
|
-
|
|
51
|
-
async with create_session() as session:
|
|
52
|
-
data = DataFrame([...])
|
|
53
|
-
result = await dedupe(
|
|
54
|
-
session=session,
|
|
55
|
-
input=data,
|
|
56
|
-
equivalence_relation="Two items are duplicates if...",
|
|
57
|
-
)
|
|
58
|
-
print(result.data)
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### Core Utilities
|
|
62
|
-
|
|
63
|
-
#### Rank: `rank`
|
|
64
|
-
|
|
65
|
-
Extract and rank rows based on AI-generated scores:
|
|
66
|
-
|
|
67
|
-
```python
|
|
68
|
-
from everyrow.ops import rank
|
|
69
|
-
|
|
70
|
-
result = await rank(
|
|
71
|
-
session=session,
|
|
72
|
-
task="Score this organization by their contribution to AI research",
|
|
73
|
-
input=dataframe,
|
|
74
|
-
field_name="contribution_score",
|
|
75
|
-
ascending_order=False,
|
|
76
|
-
)
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
#### Dedupe: `dedupe`
|
|
80
|
-
|
|
81
|
-
Intelligently deduplicate your data using AI-powered equivalence detection:
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
from everyrow.ops import dedupe
|
|
85
|
-
|
|
86
|
-
result = await dedupe(
|
|
87
|
-
session=session,
|
|
88
|
-
input=dataframe,
|
|
89
|
-
equivalence_relation="Two entries are duplicates if they represent the same research work",
|
|
90
|
-
)
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
#### Merge: `merge`
|
|
94
|
-
|
|
95
|
-
Merge two tables using AI to match related rows:
|
|
96
|
-
|
|
97
|
-
```python
|
|
98
|
-
from everyrow.ops import merge
|
|
99
|
-
|
|
100
|
-
result = await merge(
|
|
101
|
-
session=session,
|
|
102
|
-
task="Match clinical trial sponsors with parent companies",
|
|
103
|
-
left_table=trial_data,
|
|
104
|
-
right_table=company_data,
|
|
105
|
-
merge_on_left="sponsor",
|
|
106
|
-
merge_on_right="company",
|
|
107
|
-
)
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
#### Screen: `screen`
|
|
111
|
-
|
|
112
|
-
Evaluate and filter rows based on criteria that require research:
|
|
113
|
-
|
|
114
|
-
```python
|
|
115
|
-
from everyrow.ops import screen
|
|
116
|
-
from pydantic import BaseModel
|
|
117
|
-
|
|
118
|
-
class Assessment(BaseModel):
|
|
119
|
-
risk_level: str
|
|
120
|
-
recommendation: str
|
|
121
|
-
|
|
122
|
-
result = await screen(
|
|
123
|
-
session=session,
|
|
124
|
-
task="Evaluate vendor security and financial stability",
|
|
125
|
-
input=vendors,
|
|
126
|
-
response_model=Assessment,
|
|
127
|
-
)
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
### Viewing Sessions
|
|
131
|
-
|
|
132
|
-
Every session has a web interface URL:
|
|
133
|
-
|
|
134
|
-
```python
|
|
135
|
-
async with create_session(name="My Session") as session:
|
|
136
|
-
print(f"View session at: {session.get_url()}")
|
|
137
|
-
# ... use session for operations
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
### Agent Tasks
|
|
141
|
-
|
|
142
|
-
For single-input tasks, use `single_agent`:
|
|
143
|
-
|
|
144
|
-
```python
|
|
145
|
-
from everyrow.ops import single_agent
|
|
146
|
-
from pydantic import BaseModel
|
|
147
|
-
|
|
148
|
-
class Input(BaseModel):
|
|
149
|
-
country: str
|
|
150
|
-
|
|
151
|
-
result = await single_agent(
|
|
152
|
-
session=session,
|
|
153
|
-
task="What is the capital of the given country?",
|
|
154
|
-
input=Input(country="India"),
|
|
155
|
-
)
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
For batch processing, use `agent_map`:
|
|
159
|
-
|
|
160
|
-
```python
|
|
161
|
-
from everyrow.ops import agent_map
|
|
162
|
-
|
|
163
|
-
result = await agent_map(
|
|
164
|
-
session=session,
|
|
165
|
-
task="What is the capital of the given country?",
|
|
166
|
-
input=DataFrame([{"country": "India"}, {"country": "USA"}]),
|
|
167
|
-
)
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
### Async Operations
|
|
171
|
-
|
|
172
|
-
All utilities have async variants for background processing:
|
|
173
|
-
|
|
174
|
-
```python
|
|
175
|
-
from everyrow.ops import rank_async
|
|
176
|
-
|
|
177
|
-
task = await rank_async(
|
|
178
|
-
session=session,
|
|
179
|
-
task="Score this organization",
|
|
180
|
-
input=dataframe,
|
|
181
|
-
field_name="score",
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
# Continue with other work...
|
|
185
|
-
|
|
186
|
-
result = await task.await_result(session.client)
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
## Case Studies
|
|
190
|
-
|
|
191
|
-
The `case_studies/` directory contains example workflows demonstrating real-world usage of the SDK. To run case studies, install the optional dependencies:
|
|
192
|
-
|
|
193
|
-
```bash
|
|
194
|
-
uv sync --group case-studies
|
|
195
|
-
```
|
|
196
|
-
|
|
197
|
-
Then you can run the case study scripts or open the Jupyter notebooks in your preferred environment.
|
|
198
|
-
|
|
199
|
-
## Development
|
|
200
|
-
|
|
201
|
-
### Setup
|
|
202
|
-
|
|
203
|
-
```bash
|
|
204
|
-
uv sync
|
|
205
|
-
lefthook install
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
### Running Tests
|
|
209
|
-
|
|
210
|
-
```bash
|
|
211
|
-
uv run pytest
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
### Linting & Formatting
|
|
215
|
-
|
|
216
|
-
```bash
|
|
217
|
-
uv run ruff check .
|
|
218
|
-
uv run ruff check --fix .
|
|
219
|
-
uv run ruff format .
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
### Type Checking
|
|
223
|
-
|
|
224
|
-
```bash
|
|
225
|
-
uv run basedpyright
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
### Generating OpenAPI Client
|
|
229
|
-
|
|
230
|
-
```bash
|
|
231
|
-
./generate_openapi.sh
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
Note: The `everyrow/generated/` directory is excluded from linting as it contains auto-generated code.
|
|
235
|
-
|
|
236
|
-
## License
|
|
237
|
-
|
|
238
|
-
This project is licensed under the MIT License - see LICENSE.txt file for details.
|
|
File without changes
|
|
File without changes
|