everyrow-mcp 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
everyrow_mcp/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""MCP server for everyrow: agent ops at spreadsheet scale."""
|
everyrow_mcp/server.py
ADDED
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
"""MCP server for everyrow SDK operations."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from contextlib import asynccontextmanager
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from everyrow.api_utils import create_client
|
|
11
|
+
from everyrow.generated.api.default.whoami_whoami_get import asyncio as whoami
|
|
12
|
+
from everyrow.ops import agent_map, dedupe, merge, rank, screen
|
|
13
|
+
from mcp.server.fastmcp import FastMCP
|
|
14
|
+
from pydantic import BaseModel, ConfigDict, Field, create_model, field_validator
|
|
15
|
+
|
|
16
|
+
from everyrow_mcp.utils import (
|
|
17
|
+
resolve_output_path,
|
|
18
|
+
save_result_to_csv,
|
|
19
|
+
validate_csv_path,
|
|
20
|
+
validate_output_path,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@asynccontextmanager
|
|
25
|
+
async def lifespan(_server: FastMCP):
|
|
26
|
+
"""Validate everyrow credentials on startup."""
|
|
27
|
+
try:
|
|
28
|
+
client = create_client()
|
|
29
|
+
async with client as c:
|
|
30
|
+
response = await whoami(client=c)
|
|
31
|
+
if response is None:
|
|
32
|
+
raise RuntimeError("Failed to authenticate with everyrow API")
|
|
33
|
+
print("everyrow-mcp: Authenticated successfully")
|
|
34
|
+
except Exception as e:
|
|
35
|
+
raise RuntimeError(f"everyrow-mcp startup failed: {e}") from e
|
|
36
|
+
|
|
37
|
+
yield
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
mcp = FastMCP("everyrow_mcp", lifespan=lifespan)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ScreenInput(BaseModel):
|
|
44
|
+
"""Input for the screen operation."""
|
|
45
|
+
|
|
46
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
47
|
+
|
|
48
|
+
task: str = Field(
|
|
49
|
+
...,
|
|
50
|
+
description="Natural language description of the screening criteria. "
|
|
51
|
+
"Rows that meet the criteria will pass the screen.",
|
|
52
|
+
min_length=1,
|
|
53
|
+
)
|
|
54
|
+
input_csv: str = Field(
|
|
55
|
+
...,
|
|
56
|
+
description="Absolute path to the input CSV file to screen.",
|
|
57
|
+
)
|
|
58
|
+
output_path: str = Field(
|
|
59
|
+
...,
|
|
60
|
+
description="Output path: either a directory (file will be named 'screened_<input_name>.csv') "
|
|
61
|
+
"or a full file path ending in .csv",
|
|
62
|
+
)
|
|
63
|
+
response_schema: dict[str, Any] | None = Field(
|
|
64
|
+
default=None,
|
|
65
|
+
description="Optional JSON schema for the response model. "
|
|
66
|
+
"If not provided, uses a default schema with a 'passes' boolean field. "
|
|
67
|
+
"The schema should define fields that the LLM will extract for each row.",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@field_validator("input_csv")
|
|
71
|
+
@classmethod
|
|
72
|
+
def validate_input_csv(cls, v: str) -> str:
|
|
73
|
+
validate_csv_path(v)
|
|
74
|
+
return v
|
|
75
|
+
|
|
76
|
+
@field_validator("output_path")
|
|
77
|
+
@classmethod
|
|
78
|
+
def validate_output(cls, v: str) -> str:
|
|
79
|
+
validate_output_path(v)
|
|
80
|
+
return v
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@mcp.tool(name="everyrow_screen")
|
|
84
|
+
async def everyrow_screen(params: ScreenInput) -> str:
|
|
85
|
+
"""Filter rows in a CSV based on criteria that require judgment.
|
|
86
|
+
|
|
87
|
+
Screen evaluates each row against natural language criteria and keeps
|
|
88
|
+
only rows that pass. Useful for filtering based on semantic meaning
|
|
89
|
+
rather than exact string matching.
|
|
90
|
+
|
|
91
|
+
Examples:
|
|
92
|
+
- Filter job postings for "remote-friendly AND senior-level AND salary disclosed"
|
|
93
|
+
- Screen vendors for "financially stable AND good security practices"
|
|
94
|
+
- Filter leads for "likely to need our product based on company description"
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
params: ScreenInput containing task, input_csv path, output_path, and optional response_schema
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
JSON string with result summary including output file path and row counts
|
|
101
|
+
"""
|
|
102
|
+
df = pd.read_csv(params.input_csv)
|
|
103
|
+
input_rows = len(df)
|
|
104
|
+
|
|
105
|
+
response_model = None
|
|
106
|
+
if params.response_schema:
|
|
107
|
+
response_model = _schema_to_model("ScreenResult", params.response_schema)
|
|
108
|
+
|
|
109
|
+
result = await screen(
|
|
110
|
+
task=params.task,
|
|
111
|
+
input=df,
|
|
112
|
+
response_model=response_model,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
output_file = resolve_output_path(params.output_path, params.input_csv, "screened")
|
|
116
|
+
save_result_to_csv(result.data, output_file)
|
|
117
|
+
|
|
118
|
+
return json.dumps(
|
|
119
|
+
{
|
|
120
|
+
"status": "success",
|
|
121
|
+
"output_file": str(output_file),
|
|
122
|
+
"input_rows": input_rows,
|
|
123
|
+
"output_rows": len(result.data),
|
|
124
|
+
"rows_filtered": input_rows - len(result.data),
|
|
125
|
+
},
|
|
126
|
+
indent=2,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class RankInput(BaseModel):
|
|
131
|
+
"""Input for the rank operation."""
|
|
132
|
+
|
|
133
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
134
|
+
|
|
135
|
+
task: str = Field(
|
|
136
|
+
...,
|
|
137
|
+
description="Natural language description of the ranking criteria. "
|
|
138
|
+
"Describes what makes a row score higher or lower.",
|
|
139
|
+
min_length=1,
|
|
140
|
+
)
|
|
141
|
+
input_csv: str = Field(
|
|
142
|
+
...,
|
|
143
|
+
description="Absolute path to the input CSV file to rank.",
|
|
144
|
+
)
|
|
145
|
+
output_path: str = Field(
|
|
146
|
+
...,
|
|
147
|
+
description="Output path: either a directory (file will be named 'ranked_<input_name>.csv') "
|
|
148
|
+
"or a full file path ending in .csv",
|
|
149
|
+
)
|
|
150
|
+
field_name: str = Field(
|
|
151
|
+
...,
|
|
152
|
+
description="Name of the field to use for sorting. "
|
|
153
|
+
"This field will be added to the output with the LLM-assigned scores.",
|
|
154
|
+
)
|
|
155
|
+
field_type: str = Field(
|
|
156
|
+
default="float",
|
|
157
|
+
description="Type of the ranking field: 'float', 'int', 'str', or 'bool'",
|
|
158
|
+
)
|
|
159
|
+
ascending_order: bool = Field(
|
|
160
|
+
default=True,
|
|
161
|
+
description="If True, sort in ascending order (lowest first). "
|
|
162
|
+
"If False, sort in descending order (highest first).",
|
|
163
|
+
)
|
|
164
|
+
response_schema: dict[str, Any] | None = Field(
|
|
165
|
+
default=None,
|
|
166
|
+
description="Optional JSON schema for the response model. "
|
|
167
|
+
"Must include the field_name as a property. "
|
|
168
|
+
"If not provided, a simple schema with just field_name is used.",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
@field_validator("input_csv")
|
|
172
|
+
@classmethod
|
|
173
|
+
def validate_input_csv(cls, v: str) -> str:
|
|
174
|
+
validate_csv_path(v)
|
|
175
|
+
return v
|
|
176
|
+
|
|
177
|
+
@field_validator("output_path")
|
|
178
|
+
@classmethod
|
|
179
|
+
def validate_output(cls, v: str) -> str:
|
|
180
|
+
validate_output_path(v)
|
|
181
|
+
return v
|
|
182
|
+
|
|
183
|
+
@field_validator("field_type")
|
|
184
|
+
@classmethod
|
|
185
|
+
def validate_field_type(cls, v: str) -> str:
|
|
186
|
+
valid_types = {"float", "int", "str", "bool"}
|
|
187
|
+
if v not in valid_types:
|
|
188
|
+
raise ValueError(f"field_type must be one of {valid_types}")
|
|
189
|
+
return v
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@mcp.tool(name="everyrow_rank")
|
|
193
|
+
async def everyrow_rank(params: RankInput) -> str:
|
|
194
|
+
"""Score and sort rows in a CSV based on qualitative criteria.
|
|
195
|
+
|
|
196
|
+
Rank evaluates each row and assigns a score based on the task description,
|
|
197
|
+
then sorts the table by that score. Useful for prioritizing items based
|
|
198
|
+
on semantic evaluation.
|
|
199
|
+
|
|
200
|
+
Examples:
|
|
201
|
+
- Rank leads by "likelihood to need data integration solutions"
|
|
202
|
+
- Sort companies by "AI/ML adoption maturity"
|
|
203
|
+
- Prioritize candidates by "fit for senior engineering role"
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
params: RankInput containing task, input_csv, output_path, field_name, and options
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
JSON string with result summary including output file path
|
|
210
|
+
"""
|
|
211
|
+
df = pd.read_csv(params.input_csv)
|
|
212
|
+
|
|
213
|
+
response_model = None
|
|
214
|
+
if params.response_schema:
|
|
215
|
+
response_model = _schema_to_model("RankResult", params.response_schema)
|
|
216
|
+
|
|
217
|
+
result = await rank(
|
|
218
|
+
task=params.task,
|
|
219
|
+
input=df,
|
|
220
|
+
field_name=params.field_name,
|
|
221
|
+
field_type=params.field_type, # type: ignore
|
|
222
|
+
response_model=response_model,
|
|
223
|
+
ascending_order=params.ascending_order,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
output_file = resolve_output_path(params.output_path, params.input_csv, "ranked")
|
|
227
|
+
save_result_to_csv(result.data, output_file)
|
|
228
|
+
|
|
229
|
+
return json.dumps(
|
|
230
|
+
{
|
|
231
|
+
"status": "success",
|
|
232
|
+
"output_file": str(output_file),
|
|
233
|
+
"rows": len(result.data),
|
|
234
|
+
"sorted_by": params.field_name,
|
|
235
|
+
"ascending": params.ascending_order,
|
|
236
|
+
},
|
|
237
|
+
indent=2,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class DedupeInput(BaseModel):
|
|
242
|
+
"""Input for the dedupe operation."""
|
|
243
|
+
|
|
244
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
245
|
+
|
|
246
|
+
equivalence_relation: str = Field(
|
|
247
|
+
...,
|
|
248
|
+
description="Natural language description of what makes two rows equivalent/duplicates. "
|
|
249
|
+
"The LLM will use this to identify which rows represent the same entity.",
|
|
250
|
+
min_length=1,
|
|
251
|
+
)
|
|
252
|
+
input_csv: str = Field(
|
|
253
|
+
...,
|
|
254
|
+
description="Absolute path to the input CSV file to deduplicate.",
|
|
255
|
+
)
|
|
256
|
+
output_path: str = Field(
|
|
257
|
+
...,
|
|
258
|
+
description="Output path: either a directory (file will be named 'deduped_<input_name>.csv') "
|
|
259
|
+
"or a full file path ending in .csv",
|
|
260
|
+
)
|
|
261
|
+
select_representative: bool = Field(
|
|
262
|
+
default=True,
|
|
263
|
+
description="If True, select one representative row per duplicate group. "
|
|
264
|
+
"If False, keep all rows but mark duplicates with equivalence class info.",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
@field_validator("input_csv")
|
|
268
|
+
@classmethod
|
|
269
|
+
def validate_input_csv(cls, v: str) -> str:
|
|
270
|
+
validate_csv_path(v)
|
|
271
|
+
return v
|
|
272
|
+
|
|
273
|
+
@field_validator("output_path")
|
|
274
|
+
@classmethod
|
|
275
|
+
def validate_output(cls, v: str) -> str:
|
|
276
|
+
validate_output_path(v)
|
|
277
|
+
return v
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@mcp.tool(name="everyrow_dedupe")
|
|
281
|
+
async def everyrow_dedupe(params: DedupeInput) -> str:
|
|
282
|
+
"""Remove duplicate rows from a CSV using semantic equivalence.
|
|
283
|
+
|
|
284
|
+
Dedupe identifies rows that represent the same entity even when they
|
|
285
|
+
don't match exactly. Useful for fuzzy deduplication where string
|
|
286
|
+
matching fails.
|
|
287
|
+
|
|
288
|
+
Examples:
|
|
289
|
+
- Dedupe contacts: "Same person even with name abbreviations or career changes"
|
|
290
|
+
- Dedupe companies: "Same company including subsidiaries and name variations"
|
|
291
|
+
- Dedupe research papers: "Same work including preprints and published versions"
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
params: DedupeInput containing equivalence_relation, input_csv, output_path, and options
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
JSON string with result summary including output file path and dedup stats
|
|
298
|
+
"""
|
|
299
|
+
df = pd.read_csv(params.input_csv)
|
|
300
|
+
input_rows = len(df)
|
|
301
|
+
|
|
302
|
+
result = await dedupe(
|
|
303
|
+
equivalence_relation=params.equivalence_relation,
|
|
304
|
+
input=df,
|
|
305
|
+
select_representative=params.select_representative,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
output_file = resolve_output_path(params.output_path, params.input_csv, "deduped")
|
|
309
|
+
save_result_to_csv(result.data, output_file)
|
|
310
|
+
|
|
311
|
+
return json.dumps(
|
|
312
|
+
{
|
|
313
|
+
"status": "success",
|
|
314
|
+
"output_file": str(output_file),
|
|
315
|
+
"input_rows": input_rows,
|
|
316
|
+
"output_rows": len(result.data),
|
|
317
|
+
"duplicates_removed": input_rows - len(result.data),
|
|
318
|
+
},
|
|
319
|
+
indent=2,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class MergeInput(BaseModel):
|
|
324
|
+
"""Input for the merge operation."""
|
|
325
|
+
|
|
326
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
327
|
+
|
|
328
|
+
task: str = Field(
|
|
329
|
+
...,
|
|
330
|
+
description="Natural language description of how to match rows between the two tables. "
|
|
331
|
+
"Describes the relationship between entities in left and right tables.",
|
|
332
|
+
min_length=1,
|
|
333
|
+
)
|
|
334
|
+
left_csv: str = Field(
|
|
335
|
+
...,
|
|
336
|
+
description="Absolute path to the left/primary CSV file.",
|
|
337
|
+
)
|
|
338
|
+
right_csv: str = Field(
|
|
339
|
+
...,
|
|
340
|
+
description="Absolute path to the right/secondary CSV file to merge in.",
|
|
341
|
+
)
|
|
342
|
+
output_path: str = Field(
|
|
343
|
+
...,
|
|
344
|
+
description="Output path: either a directory (file will be named 'merged_<left_name>.csv') "
|
|
345
|
+
"or a full file path ending in .csv",
|
|
346
|
+
)
|
|
347
|
+
merge_on_left: str | None = Field(
|
|
348
|
+
default=None,
|
|
349
|
+
description="Optional column name in the left table to use as the merge key. "
|
|
350
|
+
"If not provided, the LLM will determine the best matching strategy.",
|
|
351
|
+
)
|
|
352
|
+
merge_on_right: str | None = Field(
|
|
353
|
+
default=None,
|
|
354
|
+
description="Optional column name in the right table to use as the merge key. "
|
|
355
|
+
"If not provided, the LLM will determine the best matching strategy.",
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
@field_validator("left_csv", "right_csv")
|
|
359
|
+
@classmethod
|
|
360
|
+
def validate_csv_paths(cls, v: str) -> str:
|
|
361
|
+
validate_csv_path(v)
|
|
362
|
+
return v
|
|
363
|
+
|
|
364
|
+
@field_validator("output_path")
|
|
365
|
+
@classmethod
|
|
366
|
+
def validate_output(cls, v: str) -> str:
|
|
367
|
+
validate_output_path(v)
|
|
368
|
+
return v
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
@mcp.tool(name="everyrow_merge")
|
|
372
|
+
async def everyrow_merge(params: MergeInput) -> str:
|
|
373
|
+
"""Join two CSV files using intelligent entity matching.
|
|
374
|
+
|
|
375
|
+
Merge combines two tables even when keys don't match exactly. The LLM
|
|
376
|
+
performs research and reasoning to identify which rows should be joined.
|
|
377
|
+
|
|
378
|
+
Examples:
|
|
379
|
+
- Match software products to parent companies (Photoshop -> Adobe)
|
|
380
|
+
- Match clinical trial sponsors to pharma companies (Genentech -> Roche)
|
|
381
|
+
- Join contact lists with different name formats
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
params: MergeInput containing task, left_csv, right_csv, output_path, and optional merge keys
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
JSON string with result summary including output file path and merge stats
|
|
388
|
+
"""
|
|
389
|
+
left_df = pd.read_csv(params.left_csv)
|
|
390
|
+
right_df = pd.read_csv(params.right_csv)
|
|
391
|
+
|
|
392
|
+
result = await merge(
|
|
393
|
+
task=params.task,
|
|
394
|
+
left_table=left_df,
|
|
395
|
+
right_table=right_df,
|
|
396
|
+
merge_on_left=params.merge_on_left,
|
|
397
|
+
merge_on_right=params.merge_on_right,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
output_file = resolve_output_path(params.output_path, params.left_csv, "merged")
|
|
401
|
+
save_result_to_csv(result.data, output_file)
|
|
402
|
+
|
|
403
|
+
return json.dumps(
|
|
404
|
+
{
|
|
405
|
+
"status": "success",
|
|
406
|
+
"output_file": str(output_file),
|
|
407
|
+
"left_rows": len(left_df),
|
|
408
|
+
"right_rows": len(right_df),
|
|
409
|
+
"output_rows": len(result.data),
|
|
410
|
+
},
|
|
411
|
+
indent=2,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
class AgentInput(BaseModel):
|
|
416
|
+
"""Input for the agent operation."""
|
|
417
|
+
|
|
418
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
419
|
+
|
|
420
|
+
task: str = Field(
|
|
421
|
+
...,
|
|
422
|
+
description="Natural language description of the task to perform on each row. "
|
|
423
|
+
"The agent will execute this task independently for each row in the input.",
|
|
424
|
+
min_length=1,
|
|
425
|
+
)
|
|
426
|
+
input_csv: str = Field(
|
|
427
|
+
...,
|
|
428
|
+
description="Absolute path to the input CSV file. The agent will process each row.",
|
|
429
|
+
)
|
|
430
|
+
output_path: str = Field(
|
|
431
|
+
...,
|
|
432
|
+
description="Output path: either a directory (file will be named 'agent_<input_name>.csv') "
|
|
433
|
+
"or a full file path ending in .csv",
|
|
434
|
+
)
|
|
435
|
+
response_schema: dict[str, Any] | None = Field(
|
|
436
|
+
default=None,
|
|
437
|
+
description="Optional JSON schema defining the structure of the agent's response. "
|
|
438
|
+
"If not provided, uses a default schema with an 'answer' string field. "
|
|
439
|
+
"The schema defines what fields the agent should extract/generate for each row.",
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
@field_validator("input_csv")
|
|
443
|
+
@classmethod
|
|
444
|
+
def validate_input_csv(cls, v: str) -> str:
|
|
445
|
+
validate_csv_path(v)
|
|
446
|
+
return v
|
|
447
|
+
|
|
448
|
+
@field_validator("output_path")
|
|
449
|
+
@classmethod
|
|
450
|
+
def validate_output(cls, v: str) -> str:
|
|
451
|
+
validate_output_path(v)
|
|
452
|
+
return v
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
@mcp.tool(name="everyrow_agent")
|
|
456
|
+
async def everyrow_agent(params: AgentInput) -> str:
|
|
457
|
+
"""Run web research agents on each row of a CSV.
|
|
458
|
+
|
|
459
|
+
Agent performs web research and extraction tasks on each row independently.
|
|
460
|
+
Useful for enriching data with information from the web.
|
|
461
|
+
|
|
462
|
+
Examples:
|
|
463
|
+
- "Find this company's latest funding round and lead investors"
|
|
464
|
+
- "Research the CEO's background and previous companies"
|
|
465
|
+
- "Find pricing information for this product"
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
params: AgentInput containing task, input_csv, output_path, and optional response_schema
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
JSON string with result summary including output file path
|
|
472
|
+
"""
|
|
473
|
+
df = pd.read_csv(params.input_csv)
|
|
474
|
+
|
|
475
|
+
if params.response_schema:
|
|
476
|
+
response_model = _schema_to_model("AgentResult", params.response_schema)
|
|
477
|
+
result = await agent_map(
|
|
478
|
+
task=params.task,
|
|
479
|
+
input=df,
|
|
480
|
+
response_model=response_model,
|
|
481
|
+
)
|
|
482
|
+
else:
|
|
483
|
+
result = await agent_map(
|
|
484
|
+
task=params.task,
|
|
485
|
+
input=df,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
output_file = resolve_output_path(params.output_path, params.input_csv, "agent")
|
|
489
|
+
save_result_to_csv(result.data, output_file)
|
|
490
|
+
|
|
491
|
+
return json.dumps(
|
|
492
|
+
{
|
|
493
|
+
"status": "success",
|
|
494
|
+
"output_file": str(output_file),
|
|
495
|
+
"rows_processed": len(result.data),
|
|
496
|
+
},
|
|
497
|
+
indent=2,
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
JSON_TYPE_MAP = {
|
|
502
|
+
"string": str,
|
|
503
|
+
"integer": int,
|
|
504
|
+
"number": float,
|
|
505
|
+
"boolean": bool,
|
|
506
|
+
"array": list,
|
|
507
|
+
"object": dict,
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _schema_to_model(name: str, schema: dict[str, Any]) -> type[BaseModel]:
|
|
512
|
+
"""Convert a JSON schema dict to a dynamic Pydantic model.
|
|
513
|
+
|
|
514
|
+
This allows the MCP client to pass arbitrary response schemas without
|
|
515
|
+
needing to define Python classes.
|
|
516
|
+
"""
|
|
517
|
+
properties = schema.get("properties", schema)
|
|
518
|
+
required = set(schema.get("required", []))
|
|
519
|
+
|
|
520
|
+
fields: dict[str, Any] = {}
|
|
521
|
+
for field_name, field_def in properties.items():
|
|
522
|
+
if field_name.startswith("_") or not isinstance(field_def, dict):
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
field_type_str = field_def.get("type", "string")
|
|
526
|
+
python_type = JSON_TYPE_MAP.get(field_type_str, str)
|
|
527
|
+
description = field_def.get("description", "")
|
|
528
|
+
|
|
529
|
+
if field_name in required:
|
|
530
|
+
fields[field_name] = (python_type, Field(..., description=description))
|
|
531
|
+
else:
|
|
532
|
+
fields[field_name] = (
|
|
533
|
+
python_type | None,
|
|
534
|
+
Field(default=None, description=description),
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
return create_model(name, **fields)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def main():
|
|
541
|
+
"""Run the MCP server."""
|
|
542
|
+
# Check for API key before starting
|
|
543
|
+
if "EVERYROW_API_KEY" not in os.environ:
|
|
544
|
+
print(
|
|
545
|
+
"Error: EVERYROW_API_KEY environment variable is not set.",
|
|
546
|
+
file=sys.stderr,
|
|
547
|
+
)
|
|
548
|
+
print(
|
|
549
|
+
"Get an API key at https://everyrow.io/api-key",
|
|
550
|
+
file=sys.stderr,
|
|
551
|
+
)
|
|
552
|
+
sys.exit(1)
|
|
553
|
+
|
|
554
|
+
mcp.run(transport="stdio")
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
if __name__ == "__main__":
|
|
558
|
+
main()
|
everyrow_mcp/utils.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Utility functions for the everyrow MCP server."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def validate_csv_path(path: str) -> None:
|
|
9
|
+
"""Validate that a CSV file exists and is readable.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
path: Path to the CSV file
|
|
13
|
+
|
|
14
|
+
Raises:
|
|
15
|
+
ValueError: If path is not absolute, doesn't exist, or isn't a CSV file
|
|
16
|
+
"""
|
|
17
|
+
p = Path(path)
|
|
18
|
+
|
|
19
|
+
if not p.is_absolute():
|
|
20
|
+
raise ValueError(f"Path must be absolute: {path}")
|
|
21
|
+
|
|
22
|
+
if not p.exists():
|
|
23
|
+
raise ValueError(f"File does not exist: {path}")
|
|
24
|
+
|
|
25
|
+
if not p.is_file():
|
|
26
|
+
raise ValueError(f"Path is not a file: {path}")
|
|
27
|
+
|
|
28
|
+
if p.suffix.lower() != ".csv":
|
|
29
|
+
raise ValueError(f"File must be a CSV file: {path}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def validate_output_path(path: str) -> None:
|
|
33
|
+
"""Validate that an output path is valid before processing.
|
|
34
|
+
|
|
35
|
+
The path can be either:
|
|
36
|
+
- A directory (must exist)
|
|
37
|
+
- A file path ending in .csv (parent directory must exist)
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
path: Output path to validate
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ValueError: If path is not absolute or parent directory doesn't exist
|
|
44
|
+
"""
|
|
45
|
+
p = Path(path)
|
|
46
|
+
|
|
47
|
+
if not p.is_absolute():
|
|
48
|
+
raise ValueError(f"Output path must be absolute: {path}")
|
|
49
|
+
|
|
50
|
+
is_csv_file = p.suffix.lower() == ".csv"
|
|
51
|
+
dir_to_check = p.parent if is_csv_file else p
|
|
52
|
+
|
|
53
|
+
if not dir_to_check.exists():
|
|
54
|
+
label = "Parent directory" if is_csv_file else "Output directory"
|
|
55
|
+
raise ValueError(f"{label} does not exist: {dir_to_check}")
|
|
56
|
+
|
|
57
|
+
if not dir_to_check.is_dir():
|
|
58
|
+
label = "Parent path" if is_csv_file else "Output path"
|
|
59
|
+
raise ValueError(f"{label} is not a directory: {dir_to_check}")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def resolve_output_path(output_path: str, input_path: str, prefix: str) -> Path:
|
|
63
|
+
"""Resolve the output path, generating a filename if needed.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
output_path: The output path (directory or full file path)
|
|
67
|
+
input_path: The input file path (used to generate output filename)
|
|
68
|
+
prefix: Prefix to add to the generated filename (e.g., 'screened', 'ranked')
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Full path to the output file
|
|
72
|
+
"""
|
|
73
|
+
out = Path(output_path)
|
|
74
|
+
|
|
75
|
+
if out.suffix.lower() == ".csv":
|
|
76
|
+
return out
|
|
77
|
+
|
|
78
|
+
input_name = Path(input_path).stem
|
|
79
|
+
return out / f"{prefix}_{input_name}.csv"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def save_result_to_csv(df: pd.DataFrame, path: Path) -> None:
|
|
83
|
+
"""Save a DataFrame to CSV.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
df: DataFrame to save
|
|
87
|
+
path: Path to save to
|
|
88
|
+
"""
|
|
89
|
+
df.to_csv(path, index=False)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: everyrow-mcp
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: MCP server for everyrow: agent ops at spreadsheet scale
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: everyrow>=0.1.5
|
|
7
|
+
Requires-Dist: mcp[cli]>=1.0.0
|
|
8
|
+
Requires-Dist: pandas>=2.0.0
|
|
9
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# everyrow MCP Server
|
|
13
|
+
|
|
14
|
+
MCP (Model Context Protocol) server for [everyrow](https://everyrow.io): agent ops at spreadsheet scale.
|
|
15
|
+
|
|
16
|
+
This server exposes everyrow's 5 core operations as MCP tools, allowing LLM applications to screen, rank, dedupe, merge, and run agents on CSV files.
|
|
17
|
+
|
|
18
|
+
**All tools operate on local CSV files.** Provide absolute file paths as input, and transformed results are written to new CSV files at your specified output path.
|
|
19
|
+
|
|
20
|
+
## Setup
|
|
21
|
+
|
|
22
|
+
The server requires an everyrow API key. Get one at [everyrow.io/api-key](https://everyrow.io/api-key) ($20 free credit).
|
|
23
|
+
|
|
24
|
+
Either set the API key in your shell environment, or hardcode it directly in the config below.
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
export EVERYROW_API_KEY=your_key_here
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Add this to your MCP config. If you have [uv](https://docs.astral.sh/uv/) installed:
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcpServers": {
|
|
35
|
+
"everyrow": {
|
|
36
|
+
"command": "uvx",
|
|
37
|
+
"args": ["everyrow-mcp"],
|
|
38
|
+
"env": {
|
|
39
|
+
"EVERYROW_API_KEY": "${EVERYROW_API_KEY}"
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Alternatively, install with pip (ideally in a venv) and use `"command": "everyrow-mcp"` instead of uvx.
|
|
47
|
+
|
|
48
|
+
## Available Tools
|
|
49
|
+
|
|
50
|
+
### everyrow_screen
|
|
51
|
+
|
|
52
|
+
Filter CSV rows based on criteria that require judgment.
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Parameters:
|
|
56
|
+
- task: Natural language description of screening criteria
|
|
57
|
+
- input_csv: Absolute path to input CSV
|
|
58
|
+
- output_path: Directory or full .csv path for output
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Example: Filter job postings for "remote-friendly AND senior-level AND salary disclosed"
|
|
62
|
+
|
|
63
|
+
### everyrow_rank
|
|
64
|
+
|
|
65
|
+
Score and sort CSV rows based on qualitative criteria.
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
Parameters:
|
|
69
|
+
- task: Natural language description of ranking criteria
|
|
70
|
+
- input_csv: Absolute path to input CSV
|
|
71
|
+
- output_path: Directory or full .csv path for output
|
|
72
|
+
- field_name: Name of the score field to add
|
|
73
|
+
- field_type: Type of field (float, int, str, bool)
|
|
74
|
+
- ascending_order: Sort direction (default: true)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Example: Rank leads by "likelihood to need data integration solutions"
|
|
78
|
+
|
|
79
|
+
### everyrow_dedupe
|
|
80
|
+
|
|
81
|
+
Remove duplicate rows using semantic equivalence.
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
Parameters:
|
|
85
|
+
- equivalence_relation: Natural language description of what makes rows duplicates
|
|
86
|
+
- input_csv: Absolute path to input CSV
|
|
87
|
+
- output_path: Directory or full .csv path for output
|
|
88
|
+
- select_representative: Keep one row per duplicate group (default: true)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Example: Dedupe contacts where "same person even with name abbreviations or career changes"
|
|
92
|
+
|
|
93
|
+
### everyrow_merge
|
|
94
|
+
|
|
95
|
+
Join two CSV files using intelligent entity matching.
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
Parameters:
|
|
99
|
+
- task: Natural language description of how to match rows
|
|
100
|
+
- left_csv: Absolute path to primary CSV
|
|
101
|
+
- right_csv: Absolute path to secondary CSV
|
|
102
|
+
- output_path: Directory or full .csv path for output
|
|
103
|
+
- merge_on_left: (optional) Column name in left table
|
|
104
|
+
- merge_on_right: (optional) Column name in right table
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Example: Match software products to parent companies (Photoshop -> Adobe)
|
|
108
|
+
|
|
109
|
+
### everyrow_agent
|
|
110
|
+
|
|
111
|
+
Run web research agents on each row of a CSV.
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
Parameters:
|
|
115
|
+
- task: Natural language description of research task
|
|
116
|
+
- input_csv: Absolute path to input CSV
|
|
117
|
+
- output_path: Directory or full .csv path for output
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Example: "Find this company's latest funding round and lead investors"
|
|
121
|
+
|
|
122
|
+
## Output Path Handling
|
|
123
|
+
|
|
124
|
+
The `output_path` parameter accepts two formats:
|
|
125
|
+
|
|
126
|
+
1. **Directory**: Output file is named `{operation}_{input_name}.csv`
|
|
127
|
+
- Input: `/data/companies.csv`, Output path: `/output/`
|
|
128
|
+
- Result: `/output/screened_companies.csv`
|
|
129
|
+
|
|
130
|
+
2. **Full file path**: Use the exact path specified
|
|
131
|
+
- Output path: `/output/my_results.csv`
|
|
132
|
+
- Result: `/output/my_results.csv`
|
|
133
|
+
|
|
134
|
+
The server validates output paths before making API requests to avoid wasted costs.
|
|
135
|
+
|
|
136
|
+
## Development
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
cd everyrow-mcp
|
|
140
|
+
uv sync
|
|
141
|
+
uv run pytest
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT - See [LICENSE.txt](../LICENSE.txt)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
everyrow_mcp/__init__.py,sha256=26a6CKpWpep-gfie5kxyAJkxshp6ChjxsP0HbfcXL-0,63
|
|
2
|
+
everyrow_mcp/server.py,sha256=VE5cz2nuwItCwClfVTTeG2QfzrqY3tIw1q-d2SKP3_A,17788
|
|
3
|
+
everyrow_mcp/utils.py,sha256=I79K5y844uIT2pMkXouwLrkpHbxMXlPQRfJneWclHZU,2477
|
|
4
|
+
everyrow_mcp-0.1.7.dist-info/METADATA,sha256=d_FsvGb8zGv-80G6ztCEP9k5fnnJv9HlNfaQAGsRAB8,4064
|
|
5
|
+
everyrow_mcp-0.1.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
everyrow_mcp-0.1.7.dist-info/entry_points.txt,sha256=0QaEmw8HuQP8LnTYC5V3ZBFFVftn_lBicmXlmlztYZM,58
|
|
7
|
+
everyrow_mcp-0.1.7.dist-info/RECORD,,
|