airow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
airow/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """airow package public API and version information.
2
+
3
+ Exposes `Airow` for batched, row-wise LLM inference over pandas DataFrames
4
+ and `OutputColumn` for declaring structured outputs.
5
+ """
6
+ __version__ = "0.1.0"
7
+
8
+ from .airow import Airow
9
+ from .schemas import OutputColumn
10
+
11
+ __all__ = ["Airow", "OutputColumn"]
airow/agent.py ADDED
@@ -0,0 +1,71 @@
1
+ """Agent utilities for executing pydantic-ai models with structured outputs."""
2
+
3
+ from typing import Iterable
4
+
5
+ from pydantic import BaseModel, Field, create_model
6
+ from pydantic_ai import Agent
7
+ from pydantic_ai.models import Model
8
+ from loguru import logger
9
+
10
+ from . import schemas
11
+
12
+
13
+ class AirowAgent:
14
+ """Wrapper around `pydantic_ai.Agent` that builds structured output models."""
15
+ def __init__(
16
+ self,
17
+ model: Model | str,
18
+ system_prompt: str,
19
+ retries: int = 3,
20
+ ):
21
+ """Initialize the agent.
22
+
23
+ Args:
24
+ model: The underlying model used by pydantic-ai.
25
+ system_prompt: System prompt applied to all runs.
26
+ retries: Number of retries for a run.
27
+ """
28
+ self.model = model
29
+ self.system_prompt = system_prompt
30
+ self.agent = Agent(model=model, system_prompt=self.system_prompt, retries=retries)
31
+
32
+ async def run(
33
+ self,
34
+ prompt: str,
35
+ output_columns: Iterable[schemas.OutputColumn],
36
+ ) -> dict[str, object]:
37
+ """Run the agent with the given prompt and expected outputs.
38
+
39
+ Args:
40
+ prompt: User prompt to pass to the model.
41
+ output_columns: Iterable of expected output columns specifications.
42
+
43
+ Returns:
44
+ A dictionary mapping output column names to parsed values. Returns
45
+ an empty dictionary when the underlying model call fails.
46
+ """
47
+ output_columns_fields = self.build_agent_output_type(output_columns)
48
+ try:
49
+ result = await self.agent.run(prompt, output_type=output_columns_fields)
50
+ except Exception as e:
51
+ logger.error(f"{e=}")
52
+ return {}
53
+ return result.output.model_dump()
54
+
55
+ def build_agent_output_type(
56
+ self,
57
+ output_columns: Iterable[schemas.OutputColumn],
58
+ ) -> type[BaseModel]:
59
+ """Create a `pydantic.BaseModel` for the requested output columns.
60
+
61
+ Args:
62
+ output_columns: Iterable of output column specifications.
63
+
64
+ Returns:
65
+ A dynamically created `BaseModel` subclass with fields per column.
66
+ """
67
+ fields = {
68
+ col.name: (col.type, Field(..., description=col.description))
69
+ for col in output_columns
70
+ }
71
+ return create_model("OutputColumns", **fields)
airow/airow.py ADDED
@@ -0,0 +1,110 @@
1
+ """High-level API to run LLMs over DataFrame rows in batches."""
2
+
3
+ import asyncio
4
+ from typing import Iterable
5
+
6
+ import pandas as pd
7
+ from pydantic_ai.models import Model
8
+ from tqdm import tqdm
9
+
10
+ from . import schemas
11
+ from .agent import AirowAgent
12
+
13
+
14
+ class Airow:
15
+ """Apply an LLM to each row of a DataFrame and write results.
16
+
17
+ Uses `AirowAgent` internally and supports parallel row processing per batch.
18
+ """
19
+ def __init__(
20
+ self,
21
+ *,
22
+ model: Model | str,
23
+ system_prompt: str,
24
+ batch_size: int = 1,
25
+ retries: int = 3,
26
+ ):
27
+ """Configure the runner.
28
+
29
+ Args:
30
+ model: The pydantic-ai model to use.
31
+ system_prompt: System prompt applied to each request.
32
+ batch_size: Number of DataFrame rows to process concurrently.
33
+ retries: Number of retries for a run.
34
+ """
35
+ self.model = model
36
+ self.system_prompt = system_prompt
37
+ self.batch_size = batch_size
38
+ self.agent = AirowAgent(self.model, self.system_prompt, retries)
39
+
40
+ async def run(
41
+ self,
42
+ df: pd.DataFrame,
43
+ *,
44
+ prompt: str,
45
+ input_columns: Iterable[str],
46
+ output_columns: schemas.OutputColumn | Iterable[schemas.OutputColumn],
47
+ show_progress: bool = False,
48
+ ) -> pd.DataFrame:
49
+ """Run the model across the DataFrame and return results.
50
+
51
+ For each row, the values from `input_columns` are appended to `prompt`
52
+ as labeled lines and the model is asked to produce `output_columns`.
53
+ Results are written into the original DataFrame.
54
+
55
+ Args:
56
+ df: Input DataFrame.
57
+ prompt: Base prompt text provided to the model.
58
+ input_columns: Columns whose values are passed as context to the model.
59
+ output_columns: One or more output column specifications.
60
+ show_progress: Whether to display a progress bar.
61
+
62
+ Returns:
63
+ The input DataFrame with new output columns populated.
64
+ """
65
+ if isinstance(output_columns, schemas.OutputColumn):
66
+ output_columns = [output_columns]
67
+
68
+ # Convert to list for easier handling
69
+ input_columns = list(input_columns)
70
+
71
+ # Work with a copy to avoid SettingWithCopyWarning
72
+ df = df.copy()
73
+
74
+ # Split dataframe into batches
75
+ total_rows = df.shape[0]
76
+ batche_ranges = [
77
+ (i, i + self.batch_size)
78
+ for i in range(0, total_rows, self.batch_size)
79
+ ]
80
+ if show_progress:
81
+ batche_ranges = tqdm(batche_ranges)
82
+
83
+ for batch_range in batche_ranges:
84
+ # Process each row in the batch in parallel
85
+ tasks = []
86
+ row_indices = []
87
+
88
+ # Get row indices for this batch
89
+ start_idx = batch_range[0]
90
+ end_idx = min(batch_range[1], total_rows)
91
+
92
+ for row_idx in range(start_idx, end_idx):
93
+ row = df.iloc[row_idx]
94
+ input_data = {col: row[col] for col in input_columns}
95
+ input_data_str = "\n".join([f"Column: {k}, Value: {v}" for k, v in input_data.items()])
96
+ full_prompt = f"{prompt}\n\n{input_data_str}"
97
+ task = self.agent.run(full_prompt, output_columns)
98
+ tasks.append(task)
99
+ row_indices.append(row_idx)
100
+
101
+ # Run all tasks in parallel
102
+ results = await asyncio.gather(*tasks)
103
+
104
+ # Add results to dataframe
105
+ for i, result in enumerate(results):
106
+ row_idx = row_indices[i]
107
+ for col_name, value in result.items():
108
+ df.loc[row_idx, col_name] = value
109
+
110
+ return df
airow/schemas.py ADDED
@@ -0,0 +1,25 @@
1
+ """Data structures for declaring model outputs."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Type
5
+
6
+ from pydantic import BaseModel
7
+
8
+
9
+ @dataclass
10
+ class OutputColumn:
11
+ """
12
+ Output column for the AI model.
13
+
14
+ Args:
15
+ name: Name of the output column.
16
+ type: Type of the output column.
17
+
18
+ Examples:
19
+ >>> OutputColumn(name="output_column", type=str)
20
+ >>> OutputColumn(name="output_column", type=int)
21
+ """
22
+
23
+ name: str
24
+ type: Type[Any]
25
+ description: str
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.4
2
+ Name: airow
3
+ Version: 0.1.0
4
+ Summary: AI-powered DataFrame processing made simple
5
+ Author-email: Dmitrii K <dmitriik@protonmail.com>
6
+ Maintainer-email: Dmitrii K <dmitriik@protonmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/dmitriiweb/airow
9
+ Project-URL: Repository, https://github.com/dmitriiweb/airow
10
+ Project-URL: Documentation, https://github.com/dmitriiweb/airow
11
+ Project-URL: Bug Tracker, https://github.com/dmitriiweb/airow/issues
12
+ Keywords: ai,ai-agent,dataframe,pandas,pydantic-ai,async,data-processing
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: Text Processing
27
+ Classifier: Topic :: Database
28
+ Classifier: Typing :: Typed
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: loguru>=0.7.3
33
+ Requires-Dist: pandas>=2.3.2
34
+ Requires-Dist: pydantic>=2.11.7
35
+ Requires-Dist: pydantic-ai>=0.8.1
36
+ Requires-Dist: tqdm>=4.67.1
37
+ Provides-Extra: dev
38
+ Requires-Dist: mypy>=1.17.1; extra == "dev"
39
+ Requires-Dist: pytest>=8.4.2; extra == "dev"
40
+ Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=6.3.0; extra == "dev"
42
+ Requires-Dist: ruff>=0.12.12; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # Airow
46
+
47
+ **AI-powered DataFrame processing made simple**
48
+
49
+ Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
50
+
51
+ ## Features
52
+
53
+ - 🚀 **Async processing** with batch support for high performance
54
+ - 🔒 **Type-safe outputs** using Pydantic models
55
+ - 📊 **Progress tracking** with built-in progress bars
56
+ - 🔄 **Automatic retries** with configurable retry logic
57
+ - 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
58
+ - ⚡ **Parallel processing** within batches for maximum throughput
59
+ - 📝 **Structured outputs** with defined schemas and validation
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ # Using pip
65
+ pip install airow
66
+
67
+ # Using uv (recommended)
68
+ uv add airow
69
+
70
+ # Using conda
71
+ conda install -c conda-forge airow
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ```python
77
+ import pandas as pd
78
+ from pydantic_ai.models.openai import OpenAIChatModel
79
+ from pydantic_ai.providers.ollama import OllamaProvider
80
+ from airow import Airow, OutputColumn
81
+ import asyncio
82
+
83
+ async def main():
84
+ # Setup your AI model
85
+ model = OpenAIChatModel(
86
+ model_name="llama3.2:latest",
87
+ provider=OllamaProvider(base_url="http://localhost:11434/v1"),
88
+ )
89
+ # or use strings:
90
+ model = "openai:gpt-5"
91
+ model = "anthropic:claude-sonnet-4-0"
92
+
93
+ # Create Airow instance
94
+ airow = Airow(
95
+ model=model,
96
+ system_prompt="You are an expert in wine tasting and selection.",
97
+ )
98
+
99
+ # Load your data
100
+ df = pd.read_csv("wine_data.csv")
101
+
102
+ output_columns = [
103
+ OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
104
+ OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
105
+ OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
106
+ ]
107
+
108
+ # Process with AI
109
+ result_df = await airow.run(
110
+ df,
111
+ prompt="Analyze the wine description and provide sentiment analysis, confidence score, and extract key terms.",
112
+ input_columns=["description"],
113
+ output_columns=output_columns,
114
+ show_progress=True,
115
+ )
116
+
117
+ print(result_df.head())
118
+
119
+ if __name__ == "__main__":
120
+ asyncio.run(main())
121
+ ```
@@ -0,0 +1,9 @@
1
+ airow/__init__.py,sha256=SOIZ4vEvMRj1fnoEpFOqH9WJyqXexjVwNYqJPbxsoag,305
2
+ airow/agent.py,sha256=Qp77KCzNwF84dwkbqbDonmw7Yqp-IU_OJ9rwOVMUuE8,2331
3
+ airow/airow.py,sha256=ykrIizSVy_bEFKUgvebCYRqw0nx_9dlY0hyyAVWkEaY,3723
4
+ airow/schemas.py,sha256=2qCW5DR4tcDcY3M8IQihuR9rqtfiYwkp2z_xEOB6vA0,510
5
+ airow-0.1.0.dist-info/licenses/LICENSE,sha256=vTx38XxI12rNKWp4ebsj-Yc9eke56CBetDgHW2L2Cfg,1066
6
+ airow-0.1.0.dist-info/METADATA,sha256=jO7g4QxGJGLGkKULWiI60UVXMHUGzqXksCH_PGrn7y0,4196
7
+ airow-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ airow-0.1.0.dist-info/top_level.txt,sha256=Ux5WLwH3HuMGPpvHKYkBIbrbmxWQd_z-aIR2R_mHG5U,6
9
+ airow-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Dmitrii K
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ airow