everyrow 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,10 @@ everyrow/__init__.py,sha256=g-I6zj2wOtb_WH6l0aYdtS83OxQJy78tJfm_H0vB5qk,197
2
2
  everyrow/api_utils.py,sha256=iU1LZYjB2iPHCRZjDNEW64gEQWQbiZxiB8XVoj5SzPM,1437
3
3
  everyrow/citations.py,sha256=J5yJQ3P3g8a7kaQBluto6yK6bnLRzs4kP301bbS_KGo,1701
4
4
  everyrow/constants.py,sha256=OKsAtaodzvmPy9LNzmYl1u_axEe208NRBuAJGqghZs0,98
5
- everyrow/ops.py,sha256=9utuzHSgEWviiQDv7FX4aGtGwSwPxFbT-k_XKfNmL0Q,25981
5
+ everyrow/ops.py,sha256=9WCsRjf7cEU22q1pJlBbVQ33cv7zKGhBR3XImxf5xvM,25477
6
6
  everyrow/result.py,sha256=2vCiE17kdbgkYKAdvfkpXJsSCr10U8FdO8NpS8eiofg,413
7
7
  everyrow/session.py,sha256=Au13oES0MPoBlfnL3LWUb45AB0vf3YtDES1YoYiZnjI,2721
8
- everyrow/task.py,sha256=I374zFqYQSUKmPe9MBN5Bb93uC8XdTD_zbmRr08vhCU,7605
8
+ everyrow/task.py,sha256=sinoK3vd4CCc2Xltkgo9jRLRwTfsvHoebJqlzYWU84Y,7649
9
9
  everyrow/generated/__init__.py,sha256=qUheje2C4lZ8b26EUHXHRJ3dWuzKiExv_JVOdVCFAek,150
10
10
  everyrow/generated/client.py,sha256=-rT3epMc77Y7QMTy5o1oH5hkGLufY9qFrD1rb7qItFU,12384
11
11
  everyrow/generated/errors.py,sha256=gO8GBmKqmSNgAg-E5oT-oOyxztvp7V_6XG7OUTT15q0,546
@@ -41,16 +41,17 @@ everyrow/generated/api/default/submit_task_tasks_post.py,sha256=60fnt4ubSi1n_nRr
41
41
  everyrow/generated/api/default/task_resource_estimation_task_resource_estimation_post.py,sha256=o4-Smjou0gD-Lzh2rSG5YTyfdqBTybX6h9rF5x9UiyM,10628
42
42
  everyrow/generated/api/default/trigger_workflow_execution_endpoint_workflows_trigger_post.py,sha256=oP5bDLciYJagXMDZF4F1ULEXJPIn1nvitOxCFOBj4oI,4688
43
43
  everyrow/generated/api/default/whoami_whoami_get.py,sha256=s1hj_NIATmg7YD3vSmce7ZPDBL1ldS5xWaSDyrW8Kqg,3428
44
- everyrow/generated/models/__init__.py,sha256=iGVmD5wEhYGHOtjvsi18IKdPSobE24FpqwFQHIcxXZM,12037
44
+ everyrow/generated/models/__init__.py,sha256=swXtiHNUdv0Tw4umQuIpxTBN1eVtyfwEF3Zg5A7NYq4,12180
45
45
  everyrow/generated/models/agent_improvement_instruction.py,sha256=M5J_4xsC-B9HPFoFi84fEEu4xGCfT4WRpEjFdzasFI8,1859
46
- everyrow/generated/models/agent_query_params.py,sha256=lh2Kr2lEjhutAbYzlMl7OOal-7tPU3e4FbqDrM9LeAs,14854
46
+ everyrow/generated/models/agent_query_params.py,sha256=-htPesmsmczaE1rBfHtgN5U8WyVgEs4gKmUPuqGjk4Q,15920
47
47
  everyrow/generated/models/agent_query_params_system_prompt_kind_type_0.py,sha256=5fkIgjOcr9pM1A3dxDe7GtPfGy-uZkKNYvDmnwNd0VU,284
48
48
  everyrow/generated/models/agent_task_args.py,sha256=DjsbTF-4be4IfsXT3vO0SWsmV5rk-2QGcgx1eRvyNnY,5984
49
49
  everyrow/generated/models/agent_task_args_processing_mode.py,sha256=p3eVdNK2hfPl1RPSlr33LECvg9aUEYbuX1gIusJ817I,170
50
- everyrow/generated/models/allowed_suggestions.py,sha256=vrgqbC9o7mwvSDPDrJebe3hL9TYO_VImZF2BGBV9Jcw,163
50
+ everyrow/generated/models/allowed_suggestions.py,sha256=4ommerhQnz-fvBE6H27dY9ZYuXUM_HJSiLjoPTMWWVw,215
51
51
  everyrow/generated/models/api_key_info.py,sha256=vNFeNHLTaE1vSqervHV2A7n2EBbh9GYIpmSJNZqyjg0,4871
52
52
  everyrow/generated/models/artifact_changed_payload.py,sha256=Olt7FdT4P99u3xexqeaWJMtm9_12pcLQ8gJIPrKWXe4,2635
53
- everyrow/generated/models/artifact_group_record.py,sha256=PtWNTX8WAAdSJE4wtjqVTc_WuCr_8_vbfUtoxQU_6oA,11938
53
+ everyrow/generated/models/artifact_group_record.py,sha256=Zcm8SbqJgyzSResBiRUNlCZl2wOo9BmFiMm4CObH1Y0,13459
54
+ everyrow/generated/models/artifact_group_record_analysis_type_0.py,sha256=moleLgOPJlRD5IZ0KG-VROXqdIBL2gf8o27O2b7B0C8,1353
54
55
  everyrow/generated/models/artifact_group_record_metadata_type_0.py,sha256=rGO0cmGhyQyMy81MjGwnlcv7BgRwHa5Wn2OSgRmuSpY,1353
55
56
  everyrow/generated/models/artifact_group_record_trace_mapping_type_0.py,sha256=pfu3BejCJQ9iuaKp6Yeuuf9ICOS7qE8rWvyhGrHBffU,1376
56
57
  everyrow/generated/models/artifact_status.py,sha256=F_mWQ2Zr5kvDP_w830X6Yp0jmiQ6POexDehU6oOc_Tw,325
@@ -81,14 +82,13 @@ everyrow/generated/models/create_workflow_from_artifact_request.py,sha256=V-k5ww
81
82
  everyrow/generated/models/create_workflow_from_artifact_response.py,sha256=NUhP6clDlWPjeVR01t6PbKIDv9BF-kSExfGbxyIOeLs,1875
82
83
  everyrow/generated/models/data_frame_method.py,sha256=b76Tam9r9Kfgo1tp_QTm-LuHblkFHBR-zkWILqk1t9U,364
83
84
  everyrow/generated/models/date_cutoffs.py,sha256=U7xF8GerEOhLfDlaQSMRqwRPSjtxKjAwR4gAPHZ8tCE,4611
84
- everyrow/generated/models/dedupe_mode.py,sha256=IKIMJHXfZQIm8FYlerPyN6mCty_cdrPmGPXSoUHV35E,161
85
- everyrow/generated/models/dedupe_query_params.py,sha256=2xG652gWyt_HC84PqhUKBq3MXF-Ju74AhozfgekRmJ8,6123
86
- everyrow/generated/models/dedupe_request_params.py,sha256=mZXx_JfV0OfoNsdKDdOen3irkhxs-F8If1rETinkFuo,11919
85
+ everyrow/generated/models/dedupe_public_params.py,sha256=ZRDuxHthH7Ugz3e1VadKHuSFDtt-SByc3JHN5kruN3Q,1882
86
+ everyrow/generated/models/dedupe_request_params.py,sha256=cOfKIwVImjmHOoDUO01PTu_AuFut4SOj9aWHQtYICD8,11923
87
87
  everyrow/generated/models/deep_merge_public_params.py,sha256=SlXSoxoN5wKsxPKAikll99VpDP-OazZ0lVrc1u4ug14,4701
88
88
  everyrow/generated/models/deep_merge_request.py,sha256=iQ8t9IIjfVXdlGLPJ26f7kvfyB734JKR4bb9jrvuHP0,12054
89
- everyrow/generated/models/deep_rank_public_params.py,sha256=fZz5AW1JKL4ud6IZDl5sCWpHdk_z4HM3CMGcAEV-zOw,3079
89
+ everyrow/generated/models/deep_rank_public_params.py,sha256=-KALHLQEfdC2zEOXGYCbE2A4zSvlqNlRlmNRVQfWiSY,3668
90
90
  everyrow/generated/models/deep_rank_request.py,sha256=wB9dq8U0d92FwtCPgdSZKxD_MNikxSSSdiyTpLKSph4,12035
91
- everyrow/generated/models/deep_screen_public_params.py,sha256=SzWji1Nmhb9wkQZZqBHdoU-TClbWdAABckUQTHwcJSU,4095
91
+ everyrow/generated/models/deep_screen_public_params.py,sha256=lekZ_5FR06EGkO7eCbL69_6TBTD6RZ4GajSyEKn6bTc,4684
92
92
  everyrow/generated/models/deep_screen_request.py,sha256=GK1b1yHmwPd1tjznTOfQlxCXlvnmTqX_8TOIShW8A8U,12073
93
93
  everyrow/generated/models/derive_expression.py,sha256=_ZZ58niRV_s8Zt0PKleYDAEVc8N_Auq34BhDbK_gr9g,1883
94
94
  everyrow/generated/models/derive_query_params.py,sha256=VN3CJ0bbXeKX9Vk7zqA-qVlQAyh36geUfU4DEyuJLpM,2171
@@ -96,7 +96,6 @@ everyrow/generated/models/derive_request.py,sha256=GbUFuWPl29DjPL6LlZm5kf_Wg9Ula
96
96
  everyrow/generated/models/document_query_tool.py,sha256=bX8S0kNIJfcfoWQ5Fh18YGue7mPHcrljAomLsVqG54Q,388
97
97
  everyrow/generated/models/drop_columns_query_params.py,sha256=Ym-sr6imPyLvJtV-csvCQ8RfIcmKf87ab6RRw-MZxZs,1679
98
98
  everyrow/generated/models/drop_columns_request.py,sha256=4fiqiPGybOGsmSwEqZ5shtraBoeevuWhRjEFH2U711E,11845
99
- everyrow/generated/models/embedding_models.py,sha256=1tedZFC86cPCFrFHHwaodErusZpzQQJMYfqOONzD2PE,228
100
99
  everyrow/generated/models/event_type.py,sha256=DF8cHLM7femjWYb3h_cwL0yIlLVYE-y_SIlx7MbmeAU,409
101
100
  everyrow/generated/models/execution_metadata.py,sha256=txNqFX7mlYLzdIIM_CeAtcXNX_5iBWCMIfI8lU3yqrc,4765
102
101
  everyrow/generated/models/export_request.py,sha256=CxKNBfFN1duGhIxrmuIeaYKVR80jF_D7ovoURMSH8nI,2026
@@ -143,7 +142,8 @@ everyrow/generated/models/simple_chat_message.py,sha256=itzqNz5Tp-pnR0pZnRohE0jr
143
142
  everyrow/generated/models/simple_chat_message_role.py,sha256=s4I3p5EVBQAobwuInqpQeV8qXa333gfeUO3aL-3Yun4,194
144
143
  everyrow/generated/models/simple_chat_message_with_tool_calls.py,sha256=UJqXFLTiLyFZuZ_SZSZIZ_ar0DRIds9SEWLnKyVy1sw,5508
145
144
  everyrow/generated/models/source_database_entry.py,sha256=eWDcpFTb_SMHSVZy1Op6rBztfJuVDgLeBOfZXsrvef4,2486
146
- everyrow/generated/models/standalone_artifact_record.py,sha256=5F-MbXe2qy4GU07VXxq9ggyrqZB1qMWfjMk5J_L2fQc,9891
145
+ everyrow/generated/models/standalone_artifact_record.py,sha256=c-2utZborD6HfmV99tNN6MaJK1WM2woH-QGh1Xdx37k,11468
146
+ everyrow/generated/models/standalone_artifact_record_analysis_type_0.py,sha256=Mxc-IximrKe2c6Bmo0L1LbNrYZAMlPRuahqr4y8UJqw,1378
147
147
  everyrow/generated/models/standalone_artifact_record_metadata_type_0.py,sha256=7BzFz1s9ecrNu56GTBot0AuqSTVRqFlZgrHl_9vA0pc,1378
148
148
  everyrow/generated/models/standalone_artifact_record_trace_mapping_type_0.py,sha256=f6RA3b3fR9MRov5VF9mJ4lmlbsIPUVu8irgdvEhroVg,1401
149
149
  everyrow/generated/models/status_count.py,sha256=avaIuYabln9HPLiQidxHjit_Az80y8x2j9xmTxhZHB8,1759
@@ -177,7 +177,7 @@ everyrow/generated/models/usage_response.py,sha256=k4WU5fOfyTMpXTTZ8OJG9i-TgU6Zw
177
177
  everyrow/generated/models/validation_error.py,sha256=n8d_ZobQV26pm0KyDAKvIo93uOBhz2BH59jpJAKwoPY,2180
178
178
  everyrow/generated/models/whoami_whoami_get_response_whoami_whoami_get.py,sha256=-NkKDTygoMsXFibAuU9nTRUOrsGwqm7PZ7EXfYI0G8E,1386
179
179
  everyrow/generated/models/workflow_leaf_node_input.py,sha256=TQ-y_VHus3WmpMUiFsXlD-d6Sm2nKraVvRFSWb_SzH0,1970
180
- everyrow-0.1.1.dist-info/METADATA,sha256=BAdgeuyOgo_mL1TrPGgGn0MUVEwdo2VunPZVsxD4lnM,9069
181
- everyrow-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
182
- everyrow-0.1.1.dist-info/licenses/LICENSE.txt,sha256=8gN2nA06HyReyL7Mfu9nsBIpUF-B6wL5SJenlMRN8ac,1070
183
- everyrow-0.1.1.dist-info/RECORD,,
180
+ everyrow-0.1.3.dist-info/METADATA,sha256=QaC2AFXFfxxEhr7GBlsfGCeyxWJGsdmdkq6VNprkb2c,11013
181
+ everyrow-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
182
+ everyrow-0.1.3.dist-info/licenses/LICENSE.txt,sha256=8gN2nA06HyReyL7Mfu9nsBIpUF-B6wL5SJenlMRN8ac,1070
183
+ everyrow-0.1.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class DedupeMode(str, Enum):
5
- AGENTIC = "agentic"
6
- DIRECT = "direct"
7
-
8
- def __str__(self) -> str:
9
- return str(self.value)
@@ -1,174 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections.abc import Mapping
4
- from typing import Any, TypeVar, cast
5
-
6
- from attrs import define as _attrs_define
7
- from attrs import field as _attrs_field
8
-
9
- from ..models.dedupe_mode import DedupeMode
10
- from ..models.embedding_models import EmbeddingModels
11
- from ..models.llm_enum import LLMEnum
12
- from ..types import UNSET, Unset
13
-
14
- T = TypeVar("T", bound="DedupeQueryParams")
15
-
16
-
17
- @_attrs_define
18
- class DedupeQueryParams:
19
- """Service-specific parameters for the deduplication service.
20
-
21
- Attributes:
22
- equivalence_relation (str): Description of what makes items equivalent
23
- llm (LLMEnum | Unset):
24
- chunk_size (int | Unset): Maximum number of items to process in a single LLM call Default: 25.
25
- mode (DedupeMode | Unset):
26
- preview (bool | Unset): When true, process only the first few items Default: False.
27
- embedding_model (EmbeddingModels | Unset):
28
- validate_groups (bool | Unset): Validate equivalence classes and split incorrectly merged groups before
29
- selecting representatives Default: False.
30
- use_clustering (bool | Unset): When true, cluster items by embedding similarity and only compare neighboring
31
- clusters. When false, use sequential chunking and compare all chunks (O(n²)) Default: True.
32
- early_stop_threshold (int | None | Unset): Stop cross-chunk comparisons for a row after this many consecutive
33
- comparisons with no matches. None disables early stopping.
34
- """
35
-
36
- equivalence_relation: str
37
- llm: LLMEnum | Unset = UNSET
38
- chunk_size: int | Unset = 25
39
- mode: DedupeMode | Unset = UNSET
40
- preview: bool | Unset = False
41
- embedding_model: EmbeddingModels | Unset = UNSET
42
- validate_groups: bool | Unset = False
43
- use_clustering: bool | Unset = True
44
- early_stop_threshold: int | None | Unset = UNSET
45
- additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
46
-
47
- def to_dict(self) -> dict[str, Any]:
48
- equivalence_relation = self.equivalence_relation
49
-
50
- llm: str | Unset = UNSET
51
- if not isinstance(self.llm, Unset):
52
- llm = self.llm.value
53
-
54
- chunk_size = self.chunk_size
55
-
56
- mode: str | Unset = UNSET
57
- if not isinstance(self.mode, Unset):
58
- mode = self.mode.value
59
-
60
- preview = self.preview
61
-
62
- embedding_model: str | Unset = UNSET
63
- if not isinstance(self.embedding_model, Unset):
64
- embedding_model = self.embedding_model.value
65
-
66
- validate_groups = self.validate_groups
67
-
68
- use_clustering = self.use_clustering
69
-
70
- early_stop_threshold: int | None | Unset
71
- if isinstance(self.early_stop_threshold, Unset):
72
- early_stop_threshold = UNSET
73
- else:
74
- early_stop_threshold = self.early_stop_threshold
75
-
76
- field_dict: dict[str, Any] = {}
77
- field_dict.update(self.additional_properties)
78
- field_dict.update(
79
- {
80
- "equivalence_relation": equivalence_relation,
81
- }
82
- )
83
- if llm is not UNSET:
84
- field_dict["llm"] = llm
85
- if chunk_size is not UNSET:
86
- field_dict["chunk_size"] = chunk_size
87
- if mode is not UNSET:
88
- field_dict["mode"] = mode
89
- if preview is not UNSET:
90
- field_dict["preview"] = preview
91
- if embedding_model is not UNSET:
92
- field_dict["embedding_model"] = embedding_model
93
- if validate_groups is not UNSET:
94
- field_dict["validate_groups"] = validate_groups
95
- if use_clustering is not UNSET:
96
- field_dict["use_clustering"] = use_clustering
97
- if early_stop_threshold is not UNSET:
98
- field_dict["early_stop_threshold"] = early_stop_threshold
99
-
100
- return field_dict
101
-
102
- @classmethod
103
- def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
104
- d = dict(src_dict)
105
- equivalence_relation = d.pop("equivalence_relation")
106
-
107
- _llm = d.pop("llm", UNSET)
108
- llm: LLMEnum | Unset
109
- if isinstance(_llm, Unset):
110
- llm = UNSET
111
- else:
112
- llm = LLMEnum(_llm)
113
-
114
- chunk_size = d.pop("chunk_size", UNSET)
115
-
116
- _mode = d.pop("mode", UNSET)
117
- mode: DedupeMode | Unset
118
- if isinstance(_mode, Unset):
119
- mode = UNSET
120
- else:
121
- mode = DedupeMode(_mode)
122
-
123
- preview = d.pop("preview", UNSET)
124
-
125
- _embedding_model = d.pop("embedding_model", UNSET)
126
- embedding_model: EmbeddingModels | Unset
127
- if isinstance(_embedding_model, Unset):
128
- embedding_model = UNSET
129
- else:
130
- embedding_model = EmbeddingModels(_embedding_model)
131
-
132
- validate_groups = d.pop("validate_groups", UNSET)
133
-
134
- use_clustering = d.pop("use_clustering", UNSET)
135
-
136
- def _parse_early_stop_threshold(data: object) -> int | None | Unset:
137
- if data is None:
138
- return data
139
- if isinstance(data, Unset):
140
- return data
141
- return cast(int | None | Unset, data)
142
-
143
- early_stop_threshold = _parse_early_stop_threshold(d.pop("early_stop_threshold", UNSET))
144
-
145
- dedupe_query_params = cls(
146
- equivalence_relation=equivalence_relation,
147
- llm=llm,
148
- chunk_size=chunk_size,
149
- mode=mode,
150
- preview=preview,
151
- embedding_model=embedding_model,
152
- validate_groups=validate_groups,
153
- use_clustering=use_clustering,
154
- early_stop_threshold=early_stop_threshold,
155
- )
156
-
157
- dedupe_query_params.additional_properties = d
158
- return dedupe_query_params
159
-
160
- @property
161
- def additional_keys(self) -> list[str]:
162
- return list(self.additional_properties.keys())
163
-
164
- def __getitem__(self, key: str) -> Any:
165
- return self.additional_properties[key]
166
-
167
- def __setitem__(self, key: str, value: Any) -> None:
168
- self.additional_properties[key] = value
169
-
170
- def __delitem__(self, key: str) -> None:
171
- del self.additional_properties[key]
172
-
173
- def __contains__(self, key: str) -> bool:
174
- return key in self.additional_properties
@@ -1,9 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class EmbeddingModels(str, Enum):
5
- TEXT_EMBEDDING_3_LARGE = "text-embedding-3-large"
6
- TEXT_EMBEDDING_3_SMALL = "text-embedding-3-small"
7
-
8
- def __str__(self) -> str:
9
- return str(self.value)
@@ -1,275 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: everyrow
3
- Version: 0.1.1
4
- Summary: An SDK for everyrow.io: agent ops at spreadsheet scale
5
- License-File: LICENSE.txt
6
- Requires-Python: >=3.12
7
- Requires-Dist: attrs>=25.4.0
8
- Requires-Dist: pandas>=2.3.3
9
- Requires-Dist: pydantic>=2.12.5
10
- Requires-Dist: python-dotenv>=1.2.1
11
- Description-Content-Type: text/markdown
12
-
13
- ![hero](https://github.com/user-attachments/assets/254fa2ed-c1f3-4ee8-b93d-d169edf32f27)
14
-
15
- # <picture><img src="images/future-search-logo-128.webp" alt="FutureSearch" height="24" align="bottom"></picture> everyrow SDK
16
-
17
- Python SDK for [everyrow.io](https://everyrow.io). Rank, dedupe, merge, and screen your dataframes using natural language—or run web agents to research every row.
18
-
19
- ## Table of Contents
20
-
21
- New to everyrow? Head to [Getting Started](#getting-started)
22
-
23
- Looking to use our agent-backed utilities? Check out:
24
- - [Rank](#rank)
25
- - [Dedupe](#dedupe)
26
- - [Merge](#merge)
27
- - [Screen](#screen)
28
- - [Agent Tasks](#agent-tasks)
29
-
30
- ## Getting Started
31
-
32
- Get an API key at [everyrow.io](https://everyrow.io).
33
-
34
- ```bash
35
- export EVERYROW_API_KEY=your_api_key_here
36
- ```
37
-
38
- ### Installation
39
-
40
- ```bash
41
- pip install everyrow
42
- ```
43
-
44
- For development:
45
-
46
- ```bash
47
- uv pip install -e .
48
- uv sync
49
- ```
50
-
51
- Requires Python >= 3.12
52
-
53
- ### Claude Code Plugin
54
-
55
- There's a plugin for [Claude Code](https://code.claude.com/) that teaches Claude how to use the SDK:
56
-
57
- ```sh
58
- # from Claude Code
59
- /plugin marketplace add futuresearch/everyrow-sdk
60
- /plugin install everyrow@futuresearch
61
-
62
- # from terminal
63
- claude plugin marketplace add futuresearch/everyrow-sdk
64
- claude plugin install everyrow@futuresearch
65
- ```
66
-
67
- ## Rank
68
-
69
- Score rows based on criteria you can't put in a database field. The AI researches each row and assigns scores based on qualitative factors.
70
-
71
- ```python
72
- from everyrow.ops import rank
73
-
74
- result = await rank(
75
- task="Score by likelihood to need data integration solutions",
76
- input=leads_dataframe,
77
- field_name="integration_need_score",
78
- )
79
- ```
80
-
81
- Say you want to rank leads by "likelihood to need data integration tools"—Ultramain Systems (sells software to airlines) looks similar to Ukraine International Airlines (is an airline) by industry code, but their actual needs are completely different. Traditional scoring can't tell them apart.
82
-
83
- **Case studies:** [Lead Scoring with Data Fragmentation](https://futuresearch.ai/lead-scoring-data-fragmentation/) (1,000 leads, 7 min, $13) · [Lead Scoring Without CRM](https://futuresearch.ai/lead-scoring-without-crm/) ($28 vs $145 with Clay)
84
-
85
- [Full documentation →](docs/RANK.md)
86
-
87
- ### Dedupe
88
-
89
- Deduplicate when fuzzy matching falls short. The AI understands that "AbbVie Inc", "Abbvie", and "AbbVie Pharmaceutical" are the same company, or that "Big Blue" means IBM.
90
-
91
- ```python
92
- from everyrow.ops import dedupe
93
-
94
- result = await dedupe(
95
- input=crm_data,
96
- equivalence_relation="Two entries are duplicates if they represent the same legal entity",
97
- )
98
- ```
99
-
100
- The `equivalence_relation` tells the AI what counts as a duplicate—natural language, not regex. Results include `equivalence_class_id` (groups duplicates), `equivalence_class_name` (human-readable cluster name), and `selected` (the canonical record in each cluster).
101
-
102
- **Case studies:** [CRM Deduplication](https://futuresearch.ai/crm-deduplication/) (500→124 rows, 2 min, $1.67) · [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/) (98% accuracy with career changes)
103
-
104
- [Full documentation →](docs/DEDUPE.md)
105
-
106
- ### Merge
107
-
108
- Join two tables when the keys don't match exactly—or at all. The AI knows "Photoshop" belongs to "Adobe" and "Genentech" is a Roche subsidiary, even with zero string similarity.
109
-
110
- ```python
111
- from everyrow.ops import merge
112
-
113
- result = await merge(
114
- task="Match each software product to its parent company",
115
- left_table=software_products,
116
- right_table=approved_suppliers,
117
- merge_on_left="software_name",
118
- merge_on_right="company_name",
119
- )
120
- ```
121
-
122
- Handles subsidiaries, abbreviations (MSD → Merck), regional names, typos, and pseudonyms. Fuzzy matching thresholds always fail somewhere—0.9 misses "Colfi" ↔ "Dr. Ioana Colfescu", 0.7 false-positives on "John Smith" ↔ "Jane Smith".
123
-
124
- **Case studies:** [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/) (2,000 products, 91% accuracy, $9) · [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/) (99.9% recall) · [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/)
125
-
126
- [Full documentation →](docs/MERGE.md)
127
-
128
- ### Screen
129
-
130
- Filter rows based on criteria that require research—things you can't express in SQL. The AI actually researches each row (10-Ks, earnings reports, news) before deciding pass/fail.
131
-
132
- ```python
133
- from everyrow.ops import screen
134
- from pydantic import BaseModel, Field
135
-
136
- class ScreenResult(BaseModel):
137
- passes: bool = Field(description="True if company meets the criteria")
138
-
139
- result = await screen(
140
- task="""
141
- Find companies with >75% recurring revenue that would benefit from
142
- Taiwan tensions - CHIPS Act beneficiaries, defense contractors,
143
- cybersecurity firms. Exclude companies dependent on Taiwan manufacturing.
144
- """,
145
- input=sp500_companies,
146
- response_model=ScreenResult,
147
- )
148
- ```
149
-
150
- Works for investment theses, geopolitical exposure, vendor risk assessment, job posting filtering, lead qualification—anything requiring judgment. Screening 500 S&P 500 companies takes ~12 min and $3 with >90% precision. Regex gets 68%.
151
-
152
- **Case studies:** [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/) (63/502 passed, $3.29) · [Job Posting Screen](https://futuresearch.ai/job-posting-screening/) (>90% vs 68% regex) · [Lead Screening Workflow](https://futuresearch.ai/screening-workflow/)
153
-
154
- [Full documentation →](docs/SCREEN.md)
155
-
156
- ### Agent Tasks
157
-
158
- For single-input tasks, use `single_agent`. For batch processing, use `agent_map`.
159
-
160
- ```python
161
- from everyrow.ops import single_agent, agent_map
162
- from pandas import DataFrame
163
-
164
- # Single input
165
- result = await single_agent(
166
- task="What is the capital of the given country?",
167
- input={"country": "India"},
168
- )
169
-
170
- # Batch processing
171
- result = await agent_map(
172
- task="What is the capital of the given country?",
173
- input=DataFrame([{"country": "India"}, {"country": "USA"}]),
174
- )
175
- ```
176
-
177
- Our agents are tuned on [Deep Research Bench](https://arxiv.org/abs/2506.06287), a benchmark we built for evaluating web research on questions that require extensive searching and cross-referencing.
178
-
179
- ## Advanced
180
-
181
- ### Sessions
182
-
183
- For quick one-off operations, sessions are created automatically:
184
-
185
- ```python
186
- from everyrow.ops import single_agent
187
-
188
- result = await single_agent(
189
- task="What is the capital of France?",
190
- input={"country": "France"},
191
- )
192
- ```
193
-
194
- For multiple operations, use an explicit session:
195
-
196
- ```python
197
- from everyrow import create_session
198
-
199
- async with create_session(name="My Session") as session:
200
- print(f"View session at: {session.get_url()}")
201
- # All operations here share the same session
202
- ```
203
-
204
- If you want more explicit control over the client (for example, to reuse it across sessions or configure custom settings), you can create it directly:
205
-
206
- ```python
207
- from everyrow import create_client, create_session
208
-
209
- async with create_client() as client:
210
- async with create_session(client=client, name="My Session") as session:
211
- # ...
212
- ```
213
-
214
- Sessions are visible on the [everyrow.io](https://everyrow.io) dashboard.
215
-
216
- ### Async Operations
217
-
218
- All utilities have async variants for background processing. These need an explicit session since the task persists beyond the function call:
219
-
220
- ```python
221
- from everyrow import create_session
222
- from everyrow.ops import rank_async
223
-
224
- async with create_session(name="Async Ranking") as session:
225
- task = await rank_async(
226
- session=session,
227
- task="Score this organization",
228
- input=dataframe,
229
- field_name="score",
230
- )
231
-
232
- # Continue with other work...
233
- result = await task.await_result()
234
- ```
235
-
236
- ## Case Studies
237
-
238
- More at [futuresearch.ai/solutions](https://futuresearch.ai/solutions/).
239
-
240
- **Notebooks:**
241
- - [CRM Deduplication](case_studies/dedupe/case_01_crm_data.ipynb)
242
- - [Thematic Stock Screen](case_studies/screen/thematic_stock_screen.ipynb)
243
- - [Oil Price Margin Screen](case_studies/screen/oil_price_margin_screen.ipynb)
244
-
245
- **On futuresearch.ai:**
246
- - [Lead Scoring with Data Fragmentation](https://futuresearch.ai/lead-scoring-data-fragmentation/)
247
- - [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/)
248
- - [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/)
249
-
250
- To run notebooks:
251
-
252
- ```bash
253
- uv sync --group case-studies
254
- ```
255
-
256
- ## Development
257
-
258
- ```bash
259
- uv sync
260
- lefthook install
261
- ```
262
-
263
- ```bash
264
- uv run pytest # tests
265
- uv run ruff check . # lint
266
- uv run ruff format . # format
267
- uv run basedpyright # type check
268
- ./generate_openapi.sh # regenerate client
269
- ```
270
-
271
- The `everyrow/generated/` directory is excluded from linting (auto-generated code).
272
-
273
- ## License
274
-
275
- This project is licensed under the MIT License - see LICENSE.txt file for details.