everyrow 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- everyrow/__init__.py +2 -1
- everyrow/citations.py +6 -2
- everyrow/generated/models/__init__.py +6 -6
- everyrow/generated/models/agent_query_params.py +21 -0
- everyrow/generated/models/allowed_suggestions.py +1 -0
- everyrow/generated/models/artifact_group_record.py +42 -9
- everyrow/generated/models/artifact_group_record_analysis_type_0.py +46 -0
- everyrow/generated/models/dedupe_public_params.py +64 -0
- everyrow/generated/models/dedupe_request_params.py +5 -5
- everyrow/generated/models/deep_rank_public_params.py +10 -0
- everyrow/generated/models/deep_screen_public_params.py +10 -0
- everyrow/generated/models/standalone_artifact_record.py +33 -0
- everyrow/generated/models/standalone_artifact_record_analysis_type_0.py +46 -0
- everyrow/ops.py +186 -98
- everyrow/session.py +33 -11
- everyrow/task.py +102 -15
- everyrow-0.1.2.dist-info/METADATA +332 -0
- {everyrow-0.1.0.dist-info → everyrow-0.1.2.dist-info}/RECORD +20 -20
- everyrow/generated/models/dedupe_mode.py +0 -9
- everyrow/generated/models/dedupe_query_params.py +0 -174
- everyrow/generated/models/embedding_models.py +0 -9
- everyrow-0.1.0.dist-info/METADATA +0 -238
- {everyrow-0.1.0.dist-info → everyrow-0.1.2.dist-info}/WHEEL +0 -0
- {everyrow-0.1.0.dist-info → everyrow-0.1.2.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from collections.abc import Mapping
|
|
4
|
-
from typing import Any, TypeVar, cast
|
|
5
|
-
|
|
6
|
-
from attrs import define as _attrs_define
|
|
7
|
-
from attrs import field as _attrs_field
|
|
8
|
-
|
|
9
|
-
from ..models.dedupe_mode import DedupeMode
|
|
10
|
-
from ..models.embedding_models import EmbeddingModels
|
|
11
|
-
from ..models.llm_enum import LLMEnum
|
|
12
|
-
from ..types import UNSET, Unset
|
|
13
|
-
|
|
14
|
-
T = TypeVar("T", bound="DedupeQueryParams")
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@_attrs_define
|
|
18
|
-
class DedupeQueryParams:
|
|
19
|
-
"""Service-specific parameters for the deduplication service.
|
|
20
|
-
|
|
21
|
-
Attributes:
|
|
22
|
-
equivalence_relation (str): Description of what makes items equivalent
|
|
23
|
-
llm (LLMEnum | Unset):
|
|
24
|
-
chunk_size (int | Unset): Maximum number of items to process in a single LLM call Default: 25.
|
|
25
|
-
mode (DedupeMode | Unset):
|
|
26
|
-
preview (bool | Unset): When true, process only the first few items Default: False.
|
|
27
|
-
embedding_model (EmbeddingModels | Unset):
|
|
28
|
-
validate_groups (bool | Unset): Validate equivalence classes and split incorrectly merged groups before
|
|
29
|
-
selecting representatives Default: False.
|
|
30
|
-
use_clustering (bool | Unset): When true, cluster items by embedding similarity and only compare neighboring
|
|
31
|
-
clusters. When false, use sequential chunking and compare all chunks (O(n²)) Default: True.
|
|
32
|
-
early_stop_threshold (int | None | Unset): Stop cross-chunk comparisons for a row after this many consecutive
|
|
33
|
-
comparisons with no matches. None disables early stopping.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
equivalence_relation: str
|
|
37
|
-
llm: LLMEnum | Unset = UNSET
|
|
38
|
-
chunk_size: int | Unset = 25
|
|
39
|
-
mode: DedupeMode | Unset = UNSET
|
|
40
|
-
preview: bool | Unset = False
|
|
41
|
-
embedding_model: EmbeddingModels | Unset = UNSET
|
|
42
|
-
validate_groups: bool | Unset = False
|
|
43
|
-
use_clustering: bool | Unset = True
|
|
44
|
-
early_stop_threshold: int | None | Unset = UNSET
|
|
45
|
-
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
|
46
|
-
|
|
47
|
-
def to_dict(self) -> dict[str, Any]:
|
|
48
|
-
equivalence_relation = self.equivalence_relation
|
|
49
|
-
|
|
50
|
-
llm: str | Unset = UNSET
|
|
51
|
-
if not isinstance(self.llm, Unset):
|
|
52
|
-
llm = self.llm.value
|
|
53
|
-
|
|
54
|
-
chunk_size = self.chunk_size
|
|
55
|
-
|
|
56
|
-
mode: str | Unset = UNSET
|
|
57
|
-
if not isinstance(self.mode, Unset):
|
|
58
|
-
mode = self.mode.value
|
|
59
|
-
|
|
60
|
-
preview = self.preview
|
|
61
|
-
|
|
62
|
-
embedding_model: str | Unset = UNSET
|
|
63
|
-
if not isinstance(self.embedding_model, Unset):
|
|
64
|
-
embedding_model = self.embedding_model.value
|
|
65
|
-
|
|
66
|
-
validate_groups = self.validate_groups
|
|
67
|
-
|
|
68
|
-
use_clustering = self.use_clustering
|
|
69
|
-
|
|
70
|
-
early_stop_threshold: int | None | Unset
|
|
71
|
-
if isinstance(self.early_stop_threshold, Unset):
|
|
72
|
-
early_stop_threshold = UNSET
|
|
73
|
-
else:
|
|
74
|
-
early_stop_threshold = self.early_stop_threshold
|
|
75
|
-
|
|
76
|
-
field_dict: dict[str, Any] = {}
|
|
77
|
-
field_dict.update(self.additional_properties)
|
|
78
|
-
field_dict.update(
|
|
79
|
-
{
|
|
80
|
-
"equivalence_relation": equivalence_relation,
|
|
81
|
-
}
|
|
82
|
-
)
|
|
83
|
-
if llm is not UNSET:
|
|
84
|
-
field_dict["llm"] = llm
|
|
85
|
-
if chunk_size is not UNSET:
|
|
86
|
-
field_dict["chunk_size"] = chunk_size
|
|
87
|
-
if mode is not UNSET:
|
|
88
|
-
field_dict["mode"] = mode
|
|
89
|
-
if preview is not UNSET:
|
|
90
|
-
field_dict["preview"] = preview
|
|
91
|
-
if embedding_model is not UNSET:
|
|
92
|
-
field_dict["embedding_model"] = embedding_model
|
|
93
|
-
if validate_groups is not UNSET:
|
|
94
|
-
field_dict["validate_groups"] = validate_groups
|
|
95
|
-
if use_clustering is not UNSET:
|
|
96
|
-
field_dict["use_clustering"] = use_clustering
|
|
97
|
-
if early_stop_threshold is not UNSET:
|
|
98
|
-
field_dict["early_stop_threshold"] = early_stop_threshold
|
|
99
|
-
|
|
100
|
-
return field_dict
|
|
101
|
-
|
|
102
|
-
@classmethod
|
|
103
|
-
def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
|
|
104
|
-
d = dict(src_dict)
|
|
105
|
-
equivalence_relation = d.pop("equivalence_relation")
|
|
106
|
-
|
|
107
|
-
_llm = d.pop("llm", UNSET)
|
|
108
|
-
llm: LLMEnum | Unset
|
|
109
|
-
if isinstance(_llm, Unset):
|
|
110
|
-
llm = UNSET
|
|
111
|
-
else:
|
|
112
|
-
llm = LLMEnum(_llm)
|
|
113
|
-
|
|
114
|
-
chunk_size = d.pop("chunk_size", UNSET)
|
|
115
|
-
|
|
116
|
-
_mode = d.pop("mode", UNSET)
|
|
117
|
-
mode: DedupeMode | Unset
|
|
118
|
-
if isinstance(_mode, Unset):
|
|
119
|
-
mode = UNSET
|
|
120
|
-
else:
|
|
121
|
-
mode = DedupeMode(_mode)
|
|
122
|
-
|
|
123
|
-
preview = d.pop("preview", UNSET)
|
|
124
|
-
|
|
125
|
-
_embedding_model = d.pop("embedding_model", UNSET)
|
|
126
|
-
embedding_model: EmbeddingModels | Unset
|
|
127
|
-
if isinstance(_embedding_model, Unset):
|
|
128
|
-
embedding_model = UNSET
|
|
129
|
-
else:
|
|
130
|
-
embedding_model = EmbeddingModels(_embedding_model)
|
|
131
|
-
|
|
132
|
-
validate_groups = d.pop("validate_groups", UNSET)
|
|
133
|
-
|
|
134
|
-
use_clustering = d.pop("use_clustering", UNSET)
|
|
135
|
-
|
|
136
|
-
def _parse_early_stop_threshold(data: object) -> int | None | Unset:
|
|
137
|
-
if data is None:
|
|
138
|
-
return data
|
|
139
|
-
if isinstance(data, Unset):
|
|
140
|
-
return data
|
|
141
|
-
return cast(int | None | Unset, data)
|
|
142
|
-
|
|
143
|
-
early_stop_threshold = _parse_early_stop_threshold(d.pop("early_stop_threshold", UNSET))
|
|
144
|
-
|
|
145
|
-
dedupe_query_params = cls(
|
|
146
|
-
equivalence_relation=equivalence_relation,
|
|
147
|
-
llm=llm,
|
|
148
|
-
chunk_size=chunk_size,
|
|
149
|
-
mode=mode,
|
|
150
|
-
preview=preview,
|
|
151
|
-
embedding_model=embedding_model,
|
|
152
|
-
validate_groups=validate_groups,
|
|
153
|
-
use_clustering=use_clustering,
|
|
154
|
-
early_stop_threshold=early_stop_threshold,
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
dedupe_query_params.additional_properties = d
|
|
158
|
-
return dedupe_query_params
|
|
159
|
-
|
|
160
|
-
@property
|
|
161
|
-
def additional_keys(self) -> list[str]:
|
|
162
|
-
return list(self.additional_properties.keys())
|
|
163
|
-
|
|
164
|
-
def __getitem__(self, key: str) -> Any:
|
|
165
|
-
return self.additional_properties[key]
|
|
166
|
-
|
|
167
|
-
def __setitem__(self, key: str, value: Any) -> None:
|
|
168
|
-
self.additional_properties[key] = value
|
|
169
|
-
|
|
170
|
-
def __delitem__(self, key: str) -> None:
|
|
171
|
-
del self.additional_properties[key]
|
|
172
|
-
|
|
173
|
-
def __contains__(self, key: str) -> bool:
|
|
174
|
-
return key in self.additional_properties
|
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: everyrow
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: An SDK for everyrow.io: agent ops at spreadsheet scale
|
|
5
|
-
License-File: LICENSE.txt
|
|
6
|
-
Requires-Python: >=3.12
|
|
7
|
-
Requires-Dist: attrs>=25.4.0
|
|
8
|
-
Requires-Dist: pandas>=2.3.3
|
|
9
|
-
Requires-Dist: pydantic>=2.12.5
|
|
10
|
-
Requires-Dist: python-dotenv>=1.2.1
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
|
|
13
|
-
# everyrow SDK
|
|
14
|
-
|
|
15
|
-
The everyrow SDK provides intelligent data processing utilities powered by AI agents. Transform, dedupe, merge, rank, and screen your dataframes using natural language instructions. Whether you're deduplicating research papers, merging complex datasets, ranking organizations, or screening vendors, the SDK handles the heavy lifting by combining AI research capabilities with structured data operations.
|
|
16
|
-
|
|
17
|
-
## Installation
|
|
18
|
-
|
|
19
|
-
```bash
|
|
20
|
-
uv pip install -e .
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Or install dependencies:
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
uv sync
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
## Requirements
|
|
30
|
-
|
|
31
|
-
- Python >= 3.12
|
|
32
|
-
|
|
33
|
-
## Configuration
|
|
34
|
-
|
|
35
|
-
Get an API key from https://everyrow.io and set it to get started:
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
# Set in your environment or .env file
|
|
39
|
-
EVERYROW_API_KEY=your_api_key_here
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
## Usage
|
|
43
|
-
|
|
44
|
-
### Quick Start
|
|
45
|
-
|
|
46
|
-
```python
|
|
47
|
-
from everyrow import create_session
|
|
48
|
-
from everyrow.ops import dedupe
|
|
49
|
-
from pandas import DataFrame
|
|
50
|
-
|
|
51
|
-
async with create_session() as session:
|
|
52
|
-
data = DataFrame([...])
|
|
53
|
-
result = await dedupe(
|
|
54
|
-
session=session,
|
|
55
|
-
input=data,
|
|
56
|
-
equivalence_relation="Two items are duplicates if...",
|
|
57
|
-
)
|
|
58
|
-
print(result.data)
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### Core Utilities
|
|
62
|
-
|
|
63
|
-
#### Rank: `rank`
|
|
64
|
-
|
|
65
|
-
Extract and rank rows based on AI-generated scores:
|
|
66
|
-
|
|
67
|
-
```python
|
|
68
|
-
from everyrow.ops import rank
|
|
69
|
-
|
|
70
|
-
result = await rank(
|
|
71
|
-
session=session,
|
|
72
|
-
task="Score this organization by their contribution to AI research",
|
|
73
|
-
input=dataframe,
|
|
74
|
-
field_name="contribution_score",
|
|
75
|
-
ascending_order=False,
|
|
76
|
-
)
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
#### Dedupe: `dedupe`
|
|
80
|
-
|
|
81
|
-
Intelligently deduplicate your data using AI-powered equivalence detection:
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
from everyrow.ops import dedupe
|
|
85
|
-
|
|
86
|
-
result = await dedupe(
|
|
87
|
-
session=session,
|
|
88
|
-
input=dataframe,
|
|
89
|
-
equivalence_relation="Two entries are duplicates if they represent the same research work",
|
|
90
|
-
)
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
#### Merge: `merge`
|
|
94
|
-
|
|
95
|
-
Merge two tables using AI to match related rows:
|
|
96
|
-
|
|
97
|
-
```python
|
|
98
|
-
from everyrow.ops import merge
|
|
99
|
-
|
|
100
|
-
result = await merge(
|
|
101
|
-
session=session,
|
|
102
|
-
task="Match clinical trial sponsors with parent companies",
|
|
103
|
-
left_table=trial_data,
|
|
104
|
-
right_table=company_data,
|
|
105
|
-
merge_on_left="sponsor",
|
|
106
|
-
merge_on_right="company",
|
|
107
|
-
)
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
#### Screen: `screen`
|
|
111
|
-
|
|
112
|
-
Evaluate and filter rows based on criteria that require research:
|
|
113
|
-
|
|
114
|
-
```python
|
|
115
|
-
from everyrow.ops import screen
|
|
116
|
-
from pydantic import BaseModel
|
|
117
|
-
|
|
118
|
-
class Assessment(BaseModel):
|
|
119
|
-
risk_level: str
|
|
120
|
-
recommendation: str
|
|
121
|
-
|
|
122
|
-
result = await screen(
|
|
123
|
-
session=session,
|
|
124
|
-
task="Evaluate vendor security and financial stability",
|
|
125
|
-
input=vendors,
|
|
126
|
-
response_model=Assessment,
|
|
127
|
-
)
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
### Viewing Sessions
|
|
131
|
-
|
|
132
|
-
Every session has a web interface URL:
|
|
133
|
-
|
|
134
|
-
```python
|
|
135
|
-
async with create_session(name="My Session") as session:
|
|
136
|
-
print(f"View session at: {session.get_url()}")
|
|
137
|
-
# ... use session for operations
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
### Agent Tasks
|
|
141
|
-
|
|
142
|
-
For single-input tasks, use `single_agent`:
|
|
143
|
-
|
|
144
|
-
```python
|
|
145
|
-
from everyrow.ops import single_agent
|
|
146
|
-
from pydantic import BaseModel
|
|
147
|
-
|
|
148
|
-
class Input(BaseModel):
|
|
149
|
-
country: str
|
|
150
|
-
|
|
151
|
-
result = await single_agent(
|
|
152
|
-
session=session,
|
|
153
|
-
task="What is the capital of the given country?",
|
|
154
|
-
input=Input(country="India"),
|
|
155
|
-
)
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
For batch processing, use `agent_map`:
|
|
159
|
-
|
|
160
|
-
```python
|
|
161
|
-
from everyrow.ops import agent_map
|
|
162
|
-
|
|
163
|
-
result = await agent_map(
|
|
164
|
-
session=session,
|
|
165
|
-
task="What is the capital of the given country?",
|
|
166
|
-
input=DataFrame([{"country": "India"}, {"country": "USA"}]),
|
|
167
|
-
)
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
### Async Operations
|
|
171
|
-
|
|
172
|
-
All utilities have async variants for background processing:
|
|
173
|
-
|
|
174
|
-
```python
|
|
175
|
-
from everyrow.ops import rank_async
|
|
176
|
-
|
|
177
|
-
task = await rank_async(
|
|
178
|
-
session=session,
|
|
179
|
-
task="Score this organization",
|
|
180
|
-
input=dataframe,
|
|
181
|
-
field_name="score",
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
# Continue with other work...
|
|
185
|
-
|
|
186
|
-
result = await task.await_result(session.client)
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
## Case Studies
|
|
190
|
-
|
|
191
|
-
The `case_studies/` directory contains example workflows demonstrating real-world usage of the SDK. To run case studies, install the optional dependencies:
|
|
192
|
-
|
|
193
|
-
```bash
|
|
194
|
-
uv sync --group case-studies
|
|
195
|
-
```
|
|
196
|
-
|
|
197
|
-
Then you can run the case study scripts or open the Jupyter notebooks in your preferred environment.
|
|
198
|
-
|
|
199
|
-
## Development
|
|
200
|
-
|
|
201
|
-
### Setup
|
|
202
|
-
|
|
203
|
-
```bash
|
|
204
|
-
uv sync
|
|
205
|
-
lefthook install
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
### Running Tests
|
|
209
|
-
|
|
210
|
-
```bash
|
|
211
|
-
uv run pytest
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
### Linting & Formatting
|
|
215
|
-
|
|
216
|
-
```bash
|
|
217
|
-
uv run ruff check .
|
|
218
|
-
uv run ruff check --fix .
|
|
219
|
-
uv run ruff format .
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
### Type Checking
|
|
223
|
-
|
|
224
|
-
```bash
|
|
225
|
-
uv run basedpyright
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
### Generating OpenAPI Client
|
|
229
|
-
|
|
230
|
-
```bash
|
|
231
|
-
./generate_openapi.sh
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
Note: The `everyrow/generated/` directory is excluded from linting as it contains auto-generated code.
|
|
235
|
-
|
|
236
|
-
## License
|
|
237
|
-
|
|
238
|
-
This project is licensed under the MIT License - see LICENSE.txt file for details.
|
|
File without changes
|
|
File without changes
|