airow 0.1.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airow-0.1.0a1/LICENSE +21 -0
- airow-0.1.0a1/PKG-INFO +118 -0
- airow-0.1.0a1/README.md +74 -0
- airow-0.1.0a1/airow/__init__.py +6 -0
- airow-0.1.0a1/airow/agent.py +43 -0
- airow-0.1.0a1/airow/airow.py +72 -0
- airow-0.1.0a1/airow/schemas.py +23 -0
- airow-0.1.0a1/airow.egg-info/PKG-INFO +118 -0
- airow-0.1.0a1/airow.egg-info/SOURCES.txt +13 -0
- airow-0.1.0a1/airow.egg-info/dependency_links.txt +1 -0
- airow-0.1.0a1/airow.egg-info/requires.txt +12 -0
- airow-0.1.0a1/airow.egg-info/top_level.txt +1 -0
- airow-0.1.0a1/pyproject.toml +60 -0
- airow-0.1.0a1/setup.cfg +4 -0
- airow-0.1.0a1/tests/test_agents.py +189 -0
airow-0.1.0a1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Dmitrii K
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
airow-0.1.0a1/PKG-INFO
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: airow
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: AI-powered DataFrame processing made simple
|
|
5
|
+
Author-email: Dmitrii K <dmitriik@protonmail.com>
|
|
6
|
+
Maintainer-email: Dmitrii K <dmitriik@protonmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/dmitriiweb/airow
|
|
9
|
+
Project-URL: Repository, https://github.com/dmitriiweb/airow
|
|
10
|
+
Project-URL: Documentation, https://github.com/dmitriiweb/airow
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/dmitriiweb/airow/issues
|
|
12
|
+
Keywords: ai,ai-agent,dataframe,pandas,pydantic-ai,async,data-processing
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Classifier: Topic :: Text Processing
|
|
27
|
+
Classifier: Topic :: Database
|
|
28
|
+
Classifier: Typing :: Typed
|
|
29
|
+
Requires-Python: >=3.10
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Requires-Dist: loguru>=0.7.3
|
|
33
|
+
Requires-Dist: pandas>=2.3.2
|
|
34
|
+
Requires-Dist: pydantic>=2.11.7
|
|
35
|
+
Requires-Dist: pydantic-ai>=0.8.1
|
|
36
|
+
Requires-Dist: tqdm>=4.67.1
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: mypy>=1.17.1; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest>=8.4.2; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov>=6.3.0; extra == "dev"
|
|
42
|
+
Requires-Dist: ruff>=0.12.12; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# Airow
|
|
46
|
+
|
|
47
|
+
**AI-powered DataFrame processing made simple**
|
|
48
|
+
|
|
49
|
+
Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
|
|
50
|
+
|
|
51
|
+
## Features
|
|
52
|
+
|
|
53
|
+
- 🚀 **Async processing** with batch support for high performance
|
|
54
|
+
- 🔒 **Type-safe outputs** using Pydantic models
|
|
55
|
+
- 📊 **Progress tracking** with built-in progress bars
|
|
56
|
+
- 🔄 **Automatic retries** with configurable retry logic
|
|
57
|
+
- 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
|
|
58
|
+
- ⚡ **Parallel processing** within batches for maximum throughput
|
|
59
|
+
- 📝 **Structured outputs** with defined schemas and validation
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Using pip
|
|
65
|
+
pip install airow
|
|
66
|
+
|
|
67
|
+
# Using uv (recommended)
|
|
68
|
+
uv add airow
|
|
69
|
+
|
|
70
|
+
# Using conda
|
|
71
|
+
conda install -c conda-forge airow
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import pandas as pd
|
|
78
|
+
from pydantic_ai.models.openai import OpenAIChatModel
|
|
79
|
+
from pydantic_ai.providers.ollama import OllamaProvider
|
|
80
|
+
from airow import Airow, OutputColumn
|
|
81
|
+
import asyncio
|
|
82
|
+
|
|
83
|
+
async def main():
|
|
84
|
+
# Setup your AI model
|
|
85
|
+
model = OpenAIChatModel(
|
|
86
|
+
model_name="llama3.2:latest",
|
|
87
|
+
provider=OllamaProvider(base_url="http://localhost:11434/v1"),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Create Airow instance
|
|
91
|
+
airow = Airow(
|
|
92
|
+
model=model,
|
|
93
|
+
system_prompt="You are an expert in wine tasting and selection.",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Load your data
|
|
97
|
+
df = pd.read_csv("wine_data.csv")
|
|
98
|
+
|
|
99
|
+
output_columns = [
|
|
100
|
+
OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
|
|
101
|
+
OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
|
|
102
|
+
OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
# Process with AI
|
|
106
|
+
result_df = await airow.run(
|
|
107
|
+
df,
|
|
108
|
+
prompt="Extract taste characteristics from the wine description",
|
|
109
|
+
input_columns=["description"],
|
|
110
|
+
output_columns=output_columns,
|
|
111
|
+
show_progress=True,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
print(result_df.head())
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
asyncio.run(main())
|
|
118
|
+
```
|
airow-0.1.0a1/README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Airow
|
|
2
|
+
|
|
3
|
+
**AI-powered DataFrame processing made simple**
|
|
4
|
+
|
|
5
|
+
Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- 🚀 **Async processing** with batch support for high performance
|
|
10
|
+
- 🔒 **Type-safe outputs** using Pydantic models
|
|
11
|
+
- 📊 **Progress tracking** with built-in progress bars
|
|
12
|
+
- 🔄 **Automatic retries** with configurable retry logic
|
|
13
|
+
- 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
|
|
14
|
+
- ⚡ **Parallel processing** within batches for maximum throughput
|
|
15
|
+
- 📝 **Structured outputs** with defined schemas and validation
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Using pip
|
|
21
|
+
pip install airow
|
|
22
|
+
|
|
23
|
+
# Using uv (recommended)
|
|
24
|
+
uv add airow
|
|
25
|
+
|
|
26
|
+
# Using conda
|
|
27
|
+
conda install -c conda-forge airow
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import pandas as pd
|
|
34
|
+
from pydantic_ai.models.openai import OpenAIChatModel
|
|
35
|
+
from pydantic_ai.providers.ollama import OllamaProvider
|
|
36
|
+
from airow import Airow, OutputColumn
|
|
37
|
+
import asyncio
|
|
38
|
+
|
|
39
|
+
async def main():
|
|
40
|
+
# Setup your AI model
|
|
41
|
+
model = OpenAIChatModel(
|
|
42
|
+
model_name="llama3.2:latest",
|
|
43
|
+
provider=OllamaProvider(base_url="http://localhost:11434/v1"),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Create Airow instance
|
|
47
|
+
airow = Airow(
|
|
48
|
+
model=model,
|
|
49
|
+
system_prompt="You are an expert in wine tasting and selection.",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Load your data
|
|
53
|
+
df = pd.read_csv("wine_data.csv")
|
|
54
|
+
|
|
55
|
+
output_columns = [
|
|
56
|
+
OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
|
|
57
|
+
OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
|
|
58
|
+
OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
# Process with AI
|
|
62
|
+
result_df = await airow.run(
|
|
63
|
+
df,
|
|
64
|
+
prompt="Extract taste characteristics from the wine description",
|
|
65
|
+
input_columns=["description"],
|
|
66
|
+
output_columns=output_columns,
|
|
67
|
+
show_progress=True,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
print(result_df.head())
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
asyncio.run(main())
|
|
74
|
+
```
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field, create_model
|
|
4
|
+
from pydantic_ai import Agent
|
|
5
|
+
from pydantic_ai.models import Model
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from . import schemas
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AirowAgent:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
model: Model,
|
|
15
|
+
system_prompt: str,
|
|
16
|
+
retries: int = 3,
|
|
17
|
+
):
|
|
18
|
+
self.model = model
|
|
19
|
+
self.system_prompt = system_prompt
|
|
20
|
+
self.agent = Agent(model=model, system_prompt=self.system_prompt, retries=retries)
|
|
21
|
+
|
|
22
|
+
async def run(
|
|
23
|
+
self,
|
|
24
|
+
prompt: str,
|
|
25
|
+
output_columns: Iterable[schemas.OutputColumn],
|
|
26
|
+
) -> dict[str, object]:
|
|
27
|
+
output_columns_fields = self.build_agent_output_type(output_columns)
|
|
28
|
+
try:
|
|
29
|
+
result = await self.agent.run(prompt, output_type=output_columns_fields)
|
|
30
|
+
except Exception as e:
|
|
31
|
+
logger.error(f"{e=}")
|
|
32
|
+
return {}
|
|
33
|
+
return result.output.model_dump()
|
|
34
|
+
|
|
35
|
+
def build_agent_output_type(
|
|
36
|
+
self,
|
|
37
|
+
output_columns: Iterable[schemas.OutputColumn],
|
|
38
|
+
) -> type[BaseModel]:
|
|
39
|
+
fields = {
|
|
40
|
+
col.name: (col.type, Field(..., description=col.description))
|
|
41
|
+
for col in output_columns
|
|
42
|
+
}
|
|
43
|
+
return create_model("OutputColumns", **fields)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Iterable
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from pydantic_ai.models import Model
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from . import schemas
|
|
9
|
+
from .agent import AirowAgent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Airow:
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
model: Model,
|
|
17
|
+
system_prompt: str,
|
|
18
|
+
batch_size: int = 1,
|
|
19
|
+
):
|
|
20
|
+
self.model = model
|
|
21
|
+
self.system_prompt = system_prompt
|
|
22
|
+
self.batch_size = batch_size
|
|
23
|
+
self.agent = AirowAgent(self.model, self.system_prompt)
|
|
24
|
+
|
|
25
|
+
async def run(
|
|
26
|
+
self,
|
|
27
|
+
df: pd.DataFrame,
|
|
28
|
+
*,
|
|
29
|
+
prompt: str,
|
|
30
|
+
input_columns: Iterable[str],
|
|
31
|
+
output_columns: schemas.OutputColumn | Iterable[schemas.OutputColumn],
|
|
32
|
+
show_progress: bool = False,
|
|
33
|
+
) -> pd.DataFrame:
|
|
34
|
+
if isinstance(output_columns, schemas.OutputColumn):
|
|
35
|
+
output_columns = [output_columns]
|
|
36
|
+
|
|
37
|
+
# Convert to list for easier handling
|
|
38
|
+
input_columns = list(input_columns)
|
|
39
|
+
|
|
40
|
+
# Split dataframe into batches
|
|
41
|
+
total_rows = df.shape[0]
|
|
42
|
+
batche_ranges = [
|
|
43
|
+
(i, i + self.batch_size)
|
|
44
|
+
for i in range(0, total_rows, self.batch_size)
|
|
45
|
+
]
|
|
46
|
+
if show_progress:
|
|
47
|
+
batche_ranges = tqdm(batche_ranges)
|
|
48
|
+
|
|
49
|
+
for batch_range in batche_ranges:
|
|
50
|
+
# Process each row in the batch in parallel
|
|
51
|
+
tasks = []
|
|
52
|
+
row_indices = []
|
|
53
|
+
batch = df.iloc[batch_range[0] : batch_range[1]]
|
|
54
|
+
|
|
55
|
+
for idx, row in batch.iterrows():
|
|
56
|
+
input_data = {col: row[col] for col in input_columns}
|
|
57
|
+
input_data_str = "\n".join([f"Column: {k}, Value: {v}" for k, v in input_data.items()])
|
|
58
|
+
prompt = f"{prompt}\n\n{input_data_str}"
|
|
59
|
+
task = self.agent.run(prompt, output_columns)
|
|
60
|
+
tasks.append(task)
|
|
61
|
+
row_indices.append(idx)
|
|
62
|
+
|
|
63
|
+
# Run all tasks in parallel
|
|
64
|
+
results = await asyncio.gather(*tasks)
|
|
65
|
+
|
|
66
|
+
# Add results to dataframe
|
|
67
|
+
for i, result in enumerate(results):
|
|
68
|
+
row_idx = row_indices[i]
|
|
69
|
+
for col_name, value in result.items():
|
|
70
|
+
df.loc[row_idx, col_name] = value
|
|
71
|
+
|
|
72
|
+
return df
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any, Type
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class OutputColumn:
|
|
9
|
+
"""
|
|
10
|
+
Output column for the AI model.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
name: Name of the output column.
|
|
14
|
+
type: Type of the output column.
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
>>> OutputColumn(name="output_column", type=str)
|
|
18
|
+
>>> OutputColumn(name="output_column", type=int)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name: str
|
|
22
|
+
type: Type[Any]
|
|
23
|
+
description: str
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: airow
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: AI-powered DataFrame processing made simple
|
|
5
|
+
Author-email: Dmitrii K <dmitriik@protonmail.com>
|
|
6
|
+
Maintainer-email: Dmitrii K <dmitriik@protonmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/dmitriiweb/airow
|
|
9
|
+
Project-URL: Repository, https://github.com/dmitriiweb/airow
|
|
10
|
+
Project-URL: Documentation, https://github.com/dmitriiweb/airow
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/dmitriiweb/airow/issues
|
|
12
|
+
Keywords: ai,ai-agent,dataframe,pandas,pydantic-ai,async,data-processing
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Classifier: Topic :: Text Processing
|
|
27
|
+
Classifier: Topic :: Database
|
|
28
|
+
Classifier: Typing :: Typed
|
|
29
|
+
Requires-Python: >=3.10
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Requires-Dist: loguru>=0.7.3
|
|
33
|
+
Requires-Dist: pandas>=2.3.2
|
|
34
|
+
Requires-Dist: pydantic>=2.11.7
|
|
35
|
+
Requires-Dist: pydantic-ai>=0.8.1
|
|
36
|
+
Requires-Dist: tqdm>=4.67.1
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: mypy>=1.17.1; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest>=8.4.2; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov>=6.3.0; extra == "dev"
|
|
42
|
+
Requires-Dist: ruff>=0.12.12; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# Airow
|
|
46
|
+
|
|
47
|
+
**AI-powered DataFrame processing made simple**
|
|
48
|
+
|
|
49
|
+
Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
|
|
50
|
+
|
|
51
|
+
## Features
|
|
52
|
+
|
|
53
|
+
- 🚀 **Async processing** with batch support for high performance
|
|
54
|
+
- 🔒 **Type-safe outputs** using Pydantic models
|
|
55
|
+
- 📊 **Progress tracking** with built-in progress bars
|
|
56
|
+
- 🔄 **Automatic retries** with configurable retry logic
|
|
57
|
+
- 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
|
|
58
|
+
- ⚡ **Parallel processing** within batches for maximum throughput
|
|
59
|
+
- 📝 **Structured outputs** with defined schemas and validation
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Using pip
|
|
65
|
+
pip install airow
|
|
66
|
+
|
|
67
|
+
# Using uv (recommended)
|
|
68
|
+
uv add airow
|
|
69
|
+
|
|
70
|
+
# Using conda
|
|
71
|
+
conda install -c conda-forge airow
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import pandas as pd
|
|
78
|
+
from pydantic_ai.models.openai import OpenAIChatModel
|
|
79
|
+
from pydantic_ai.providers.ollama import OllamaProvider
|
|
80
|
+
from airow import Airow, OutputColumn
|
|
81
|
+
import asyncio
|
|
82
|
+
|
|
83
|
+
async def main():
|
|
84
|
+
# Setup your AI model
|
|
85
|
+
model = OpenAIChatModel(
|
|
86
|
+
model_name="llama3.2:latest",
|
|
87
|
+
provider=OllamaProvider(base_url="http://localhost:11434/v1"),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Create Airow instance
|
|
91
|
+
airow = Airow(
|
|
92
|
+
model=model,
|
|
93
|
+
system_prompt="You are an expert in wine tasting and selection.",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Load your data
|
|
97
|
+
df = pd.read_csv("wine_data.csv")
|
|
98
|
+
|
|
99
|
+
output_columns = [
|
|
100
|
+
OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
|
|
101
|
+
OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
|
|
102
|
+
OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
# Process with AI
|
|
106
|
+
result_df = await airow.run(
|
|
107
|
+
df,
|
|
108
|
+
prompt="Extract taste characteristics from the wine description",
|
|
109
|
+
input_columns=["description"],
|
|
110
|
+
output_columns=output_columns,
|
|
111
|
+
show_progress=True,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
print(result_df.head())
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
asyncio.run(main())
|
|
118
|
+
```
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
airow/__init__.py
|
|
5
|
+
airow/agent.py
|
|
6
|
+
airow/airow.py
|
|
7
|
+
airow/schemas.py
|
|
8
|
+
airow.egg-info/PKG-INFO
|
|
9
|
+
airow.egg-info/SOURCES.txt
|
|
10
|
+
airow.egg-info/dependency_links.txt
|
|
11
|
+
airow.egg-info/requires.txt
|
|
12
|
+
airow.egg-info/top_level.txt
|
|
13
|
+
tests/test_agents.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
airow
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "airow"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "AI-powered DataFrame processing made simple"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = {text = "MIT"}
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "Dmitrii K", email = "dmitriik@protonmail.com"},
|
|
10
|
+
]
|
|
11
|
+
maintainers = [
|
|
12
|
+
{name = "Dmitrii K", email = "dmitriik@protonmail.com"},
|
|
13
|
+
]
|
|
14
|
+
keywords = ["ai", "ai-agent", "dataframe", "pandas", "pydantic-ai", "async", "data-processing"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
28
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
29
|
+
"Topic :: Text Processing",
|
|
30
|
+
"Topic :: Database",
|
|
31
|
+
"Typing :: Typed",
|
|
32
|
+
]
|
|
33
|
+
dependencies = [
|
|
34
|
+
"loguru>=0.7.3",
|
|
35
|
+
"pandas>=2.3.2",
|
|
36
|
+
"pydantic>=2.11.7",
|
|
37
|
+
"pydantic-ai>=0.8.1",
|
|
38
|
+
"tqdm>=4.67.1",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/dmitriiweb/airow"
|
|
43
|
+
Repository = "https://github.com/dmitriiweb/airow"
|
|
44
|
+
Documentation = "https://github.com/dmitriiweb/airow"
|
|
45
|
+
"Bug Tracker" = "https://github.com/dmitriiweb/airow/issues"
|
|
46
|
+
|
|
47
|
+
[project.optional-dependencies]
|
|
48
|
+
dev = [
|
|
49
|
+
"mypy>=1.17.1",
|
|
50
|
+
"pytest>=8.4.2",
|
|
51
|
+
"pytest-asyncio>=1.1.0",
|
|
52
|
+
"pytest-cov>=6.3.0",
|
|
53
|
+
"ruff>=0.12.12",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.dynamic]
|
|
57
|
+
version = {attr = "airow.__version__"}
|
|
58
|
+
|
|
59
|
+
[tool.pytest.ini_options]
|
|
60
|
+
asyncio_mode = "auto"
|
airow-0.1.0a1/setup.cfg
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from pydantic import BaseModel, ValidationError
|
|
3
|
+
|
|
4
|
+
from airow.agent import AirowAgent
|
|
5
|
+
from airow.schemas import OutputColumn
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_single_output_column():
|
|
9
|
+
"""Test creating a model with a single output column."""
|
|
10
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
11
|
+
|
|
12
|
+
output_columns = [
|
|
13
|
+
OutputColumn(name="result", type=str, description="The result of processing")
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
17
|
+
|
|
18
|
+
# Verify it's a BaseModel subclass
|
|
19
|
+
assert issubclass(model_class, BaseModel)
|
|
20
|
+
|
|
21
|
+
# Verify the model name
|
|
22
|
+
assert model_class.__name__ == "OutputColumns"
|
|
23
|
+
|
|
24
|
+
# Verify field exists
|
|
25
|
+
assert "result" in model_class.model_fields
|
|
26
|
+
|
|
27
|
+
# Verify field configuration
|
|
28
|
+
field_info = model_class.model_fields["result"]
|
|
29
|
+
assert field_info.annotation is str
|
|
30
|
+
assert field_info.description == "The result of processing"
|
|
31
|
+
assert field_info.is_required()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_multiple_output_columns():
|
|
35
|
+
"""Test creating a model with multiple output columns."""
|
|
36
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
37
|
+
|
|
38
|
+
output_columns = [
|
|
39
|
+
OutputColumn(name="name", type=str, description="Person's name"),
|
|
40
|
+
OutputColumn(name="age", type=int, description="Person's age"),
|
|
41
|
+
OutputColumn(name="is_active", type=bool, description="Whether person is active"),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
45
|
+
|
|
46
|
+
# Verify all fields exist
|
|
47
|
+
assert "name" in model_class.model_fields
|
|
48
|
+
assert "age" in model_class.model_fields
|
|
49
|
+
assert "is_active" in model_class.model_fields
|
|
50
|
+
|
|
51
|
+
# Verify field types and descriptions
|
|
52
|
+
name_field = model_class.model_fields["name"]
|
|
53
|
+
assert name_field.annotation is str
|
|
54
|
+
assert name_field.description == "Person's name"
|
|
55
|
+
|
|
56
|
+
age_field = model_class.model_fields["age"]
|
|
57
|
+
assert age_field.annotation is int
|
|
58
|
+
assert age_field.description == "Person's age"
|
|
59
|
+
|
|
60
|
+
is_active_field = model_class.model_fields["is_active"]
|
|
61
|
+
assert is_active_field.annotation is bool
|
|
62
|
+
assert is_active_field.description == "Whether person is active"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_different_data_types():
|
|
66
|
+
"""Test creating a model with various data types."""
|
|
67
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
68
|
+
|
|
69
|
+
output_columns = [
|
|
70
|
+
OutputColumn(name="text", type=str, description="Text field"),
|
|
71
|
+
OutputColumn(name="number", type=int, description="Integer field"),
|
|
72
|
+
OutputColumn(name="float_val", type=float, description="Float field"),
|
|
73
|
+
OutputColumn(name="flag", type=bool, description="Boolean field"),
|
|
74
|
+
OutputColumn(name="items", type=list, description="List field"),
|
|
75
|
+
OutputColumn(name="metadata", type=dict, description="Dictionary field"),
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
79
|
+
|
|
80
|
+
# Verify all fields exist with correct types
|
|
81
|
+
assert model_class.model_fields["text"].annotation is str
|
|
82
|
+
assert model_class.model_fields["number"].annotation is int
|
|
83
|
+
assert model_class.model_fields["float_val"].annotation is float
|
|
84
|
+
assert model_class.model_fields["flag"].annotation is bool
|
|
85
|
+
assert model_class.model_fields["items"].annotation is list
|
|
86
|
+
assert model_class.model_fields["metadata"].annotation is dict
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_empty_output_columns():
|
|
90
|
+
"""Test creating a model with no output columns."""
|
|
91
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
92
|
+
|
|
93
|
+
output_columns = []
|
|
94
|
+
|
|
95
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
96
|
+
|
|
97
|
+
# Should still create a valid model class
|
|
98
|
+
assert issubclass(model_class, BaseModel)
|
|
99
|
+
assert model_class.__name__ == "OutputColumns"
|
|
100
|
+
assert len(model_class.model_fields) == 0
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_model_validation():
|
|
104
|
+
"""Test that the created model validates data correctly."""
|
|
105
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
106
|
+
|
|
107
|
+
output_columns = [
|
|
108
|
+
OutputColumn(name="name", type=str, description="Person's name"),
|
|
109
|
+
OutputColumn(name="age", type=int, description="Person's age"),
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
113
|
+
|
|
114
|
+
# Test valid data
|
|
115
|
+
valid_data = {"name": "John Doe", "age": 30}
|
|
116
|
+
instance = model_class(**valid_data)
|
|
117
|
+
assert instance.name == "John Doe"
|
|
118
|
+
assert instance.age == 30
|
|
119
|
+
|
|
120
|
+
# Test invalid data (missing required field)
|
|
121
|
+
with pytest.raises(ValidationError):
|
|
122
|
+
model_class(name="John Doe") # Missing age
|
|
123
|
+
|
|
124
|
+
# Test invalid data (wrong type)
|
|
125
|
+
with pytest.raises(ValidationError):
|
|
126
|
+
model_class(name="John Doe", age="thirty") # age should be int
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_model_serialization():
|
|
130
|
+
"""Test that the created model can be serialized."""
|
|
131
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
132
|
+
|
|
133
|
+
output_columns = [
|
|
134
|
+
OutputColumn(name="result", type=str, description="Processing result"),
|
|
135
|
+
OutputColumn(name="score", type=float, description="Processing score"),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
139
|
+
|
|
140
|
+
instance = model_class(result="success", score=0.95)
|
|
141
|
+
|
|
142
|
+
# Test model_dump
|
|
143
|
+
data = instance.model_dump()
|
|
144
|
+
expected = {"result": "success", "score": 0.95}
|
|
145
|
+
assert data == expected
|
|
146
|
+
|
|
147
|
+
# Test model_dump_json
|
|
148
|
+
json_data = instance.model_dump_json()
|
|
149
|
+
assert '"result":"success"' in json_data
|
|
150
|
+
assert '"score":0.95' in json_data
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_field_descriptions_preserved():
|
|
154
|
+
"""Test that field descriptions are properly preserved."""
|
|
155
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
156
|
+
|
|
157
|
+
output_columns = [
|
|
158
|
+
OutputColumn(
|
|
159
|
+
name="complex_field",
|
|
160
|
+
type=str,
|
|
161
|
+
description="This is a complex field with special characters: @#$%^&*()"
|
|
162
|
+
),
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
166
|
+
|
|
167
|
+
field_info = model_class.model_fields["complex_field"]
|
|
168
|
+
assert field_info.description == "This is a complex field with special characters: @#$%^&*()"
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_duplicate_field_names():
|
|
172
|
+
"""Test behavior with duplicate field names (should overwrite)."""
|
|
173
|
+
agent = AirowAgent(model=None, system_prompt="test")
|
|
174
|
+
|
|
175
|
+
output_columns = [
|
|
176
|
+
OutputColumn(name="field", type=str, description="First field"),
|
|
177
|
+
OutputColumn(name="field", type=int, description="Second field"), # Duplicate name
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
model_class = agent.build_agent_output_type(output_columns)
|
|
181
|
+
|
|
182
|
+
# Should have only one field (last one wins)
|
|
183
|
+
assert len(model_class.model_fields) == 1
|
|
184
|
+
assert "field" in model_class.model_fields
|
|
185
|
+
|
|
186
|
+
# Should use the last definition
|
|
187
|
+
field_info = model_class.model_fields["field"]
|
|
188
|
+
assert field_info.annotation is int
|
|
189
|
+
assert field_info.description == "Second field"
|