airow 0.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
airow-0.1.0a1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Dmitrii K
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
airow-0.1.0a1/PKG-INFO ADDED
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.4
2
+ Name: airow
3
+ Version: 0.1.0a1
4
+ Summary: AI-powered DataFrame processing made simple
5
+ Author-email: Dmitrii K <dmitriik@protonmail.com>
6
+ Maintainer-email: Dmitrii K <dmitriik@protonmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/dmitriiweb/airow
9
+ Project-URL: Repository, https://github.com/dmitriiweb/airow
10
+ Project-URL: Documentation, https://github.com/dmitriiweb/airow
11
+ Project-URL: Bug Tracker, https://github.com/dmitriiweb/airow/issues
12
+ Keywords: ai,ai-agent,dataframe,pandas,pydantic-ai,async,data-processing
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: Text Processing
27
+ Classifier: Topic :: Database
28
+ Classifier: Typing :: Typed
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: loguru>=0.7.3
33
+ Requires-Dist: pandas>=2.3.2
34
+ Requires-Dist: pydantic>=2.11.7
35
+ Requires-Dist: pydantic-ai>=0.8.1
36
+ Requires-Dist: tqdm>=4.67.1
37
+ Provides-Extra: dev
38
+ Requires-Dist: mypy>=1.17.1; extra == "dev"
39
+ Requires-Dist: pytest>=8.4.2; extra == "dev"
40
+ Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=6.3.0; extra == "dev"
42
+ Requires-Dist: ruff>=0.12.12; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # Airow
46
+
47
+ **AI-powered DataFrame processing made simple**
48
+
49
+ Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
50
+
51
+ ## Features
52
+
53
+ - 🚀 **Async processing** with batch support for high performance
54
+ - 🔒 **Type-safe outputs** using Pydantic models
55
+ - 📊 **Progress tracking** with built-in progress bars
56
+ - 🔄 **Automatic retries** with configurable retry logic
57
+ - 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
58
+ - ⚡ **Parallel processing** within batches for maximum throughput
59
+ - 📝 **Structured outputs** with defined schemas and validation
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ # Using pip
65
+ pip install airow
66
+
67
+ # Using uv (recommended)
68
+ uv add airow
69
+
70
+ # Using conda
71
+ conda install -c conda-forge airow
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ```python
77
+ import pandas as pd
78
+ from pydantic_ai.models.openai import OpenAIChatModel
79
+ from pydantic_ai.providers.ollama import OllamaProvider
80
+ from airow import Airow, OutputColumn
81
+ import asyncio
82
+
83
+ async def main():
84
+ # Setup your AI model
85
+ model = OpenAIChatModel(
86
+ model_name="llama3.2:latest",
87
+ provider=OllamaProvider(base_url="http://localhost:11434/v1"),
88
+ )
89
+
90
+ # Create Airow instance
91
+ airow = Airow(
92
+ model=model,
93
+ system_prompt="You are an expert in wine tasting and selection.",
94
+ )
95
+
96
+ # Load your data
97
+ df = pd.read_csv("wine_data.csv")
98
+
99
+ output_columns = [
100
+ OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
101
+ OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
102
+ OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
103
+ ]
104
+
105
+ # Process with AI
106
+ result_df = await airow.run(
107
+ df,
108
+ prompt="Extract taste characteristics from the wine description",
109
+ input_columns=["description"],
110
+ output_columns=output_columns,
111
+ show_progress=True,
112
+ )
113
+
114
+ print(result_df.head())
115
+
116
+ if __name__ == "__main__":
117
+ asyncio.run(main())
118
+ ```
@@ -0,0 +1,74 @@
1
+ # Airow
2
+
3
+ **AI-powered DataFrame processing made simple**
4
+
5
+ Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
6
+
7
+ ## Features
8
+
9
+ - 🚀 **Async processing** with batch support for high performance
10
+ - 🔒 **Type-safe outputs** using Pydantic models
11
+ - 📊 **Progress tracking** with built-in progress bars
12
+ - 🔄 **Automatic retries** with configurable retry logic
13
+ - 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
14
+ - ⚡ **Parallel processing** within batches for maximum throughput
15
+ - 📝 **Structured outputs** with defined schemas and validation
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ # Using pip
21
+ pip install airow
22
+
23
+ # Using uv (recommended)
24
+ uv add airow
25
+
26
+ # Using conda
27
+ conda install -c conda-forge airow
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ import pandas as pd
34
+ from pydantic_ai.models.openai import OpenAIChatModel
35
+ from pydantic_ai.providers.ollama import OllamaProvider
36
+ from airow import Airow, OutputColumn
37
+ import asyncio
38
+
39
+ async def main():
40
+ # Setup your AI model
41
+ model = OpenAIChatModel(
42
+ model_name="llama3.2:latest",
43
+ provider=OllamaProvider(base_url="http://localhost:11434/v1"),
44
+ )
45
+
46
+ # Create Airow instance
47
+ airow = Airow(
48
+ model=model,
49
+ system_prompt="You are an expert in wine tasting and selection.",
50
+ )
51
+
52
+ # Load your data
53
+ df = pd.read_csv("wine_data.csv")
54
+
55
+ output_columns = [
56
+ OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
57
+ OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
58
+ OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
59
+ ]
60
+
61
+ # Process with AI
62
+ result_df = await airow.run(
63
+ df,
64
+ prompt="Extract taste characteristics from the wine description",
65
+ input_columns=["description"],
66
+ output_columns=output_columns,
67
+ show_progress=True,
68
+ )
69
+
70
+ print(result_df.head())
71
+
72
+ if __name__ == "__main__":
73
+ asyncio.run(main())
74
+ ```
@@ -0,0 +1,6 @@
1
+ __version__ = "0.1.0a1"
2
+
3
+ from .airow import Airow
4
+ from .schemas import OutputColumn
5
+
6
+ __all__ = ["Airow", "OutputColumn"]
@@ -0,0 +1,43 @@
1
+ from typing import Iterable
2
+
3
+ from pydantic import BaseModel, Field, create_model
4
+ from pydantic_ai import Agent
5
+ from pydantic_ai.models import Model
6
+ from loguru import logger
7
+
8
+ from . import schemas
9
+
10
+
11
+ class AirowAgent:
12
+ def __init__(
13
+ self,
14
+ model: Model,
15
+ system_prompt: str,
16
+ retries: int = 3,
17
+ ):
18
+ self.model = model
19
+ self.system_prompt = system_prompt
20
+ self.agent = Agent(model=model, system_prompt=self.system_prompt, retries=retries)
21
+
22
+ async def run(
23
+ self,
24
+ prompt: str,
25
+ output_columns: Iterable[schemas.OutputColumn],
26
+ ) -> dict[str, object]:
27
+ output_columns_fields = self.build_agent_output_type(output_columns)
28
+ try:
29
+ result = await self.agent.run(prompt, output_type=output_columns_fields)
30
+ except Exception as e:
31
+ logger.error(f"{e=}")
32
+ return {}
33
+ return result.output.model_dump()
34
+
35
+ def build_agent_output_type(
36
+ self,
37
+ output_columns: Iterable[schemas.OutputColumn],
38
+ ) -> type[BaseModel]:
39
+ fields = {
40
+ col.name: (col.type, Field(..., description=col.description))
41
+ for col in output_columns
42
+ }
43
+ return create_model("OutputColumns", **fields)
@@ -0,0 +1,72 @@
1
+ import asyncio
2
+ from typing import Iterable
3
+
4
+ import pandas as pd
5
+ from pydantic_ai.models import Model
6
+ from tqdm import tqdm
7
+
8
+ from . import schemas
9
+ from .agent import AirowAgent
10
+
11
+
12
+ class Airow:
13
+ def __init__(
14
+ self,
15
+ *,
16
+ model: Model,
17
+ system_prompt: str,
18
+ batch_size: int = 1,
19
+ ):
20
+ self.model = model
21
+ self.system_prompt = system_prompt
22
+ self.batch_size = batch_size
23
+ self.agent = AirowAgent(self.model, self.system_prompt)
24
+
25
+ async def run(
26
+ self,
27
+ df: pd.DataFrame,
28
+ *,
29
+ prompt: str,
30
+ input_columns: Iterable[str],
31
+ output_columns: schemas.OutputColumn | Iterable[schemas.OutputColumn],
32
+ show_progress: bool = False,
33
+ ) -> pd.DataFrame:
34
+ if isinstance(output_columns, schemas.OutputColumn):
35
+ output_columns = [output_columns]
36
+
37
+ # Convert to list for easier handling
38
+ input_columns = list(input_columns)
39
+
40
+ # Split dataframe into batches
41
+ total_rows = df.shape[0]
42
+ batche_ranges = [
43
+ (i, i + self.batch_size)
44
+ for i in range(0, total_rows, self.batch_size)
45
+ ]
46
+ if show_progress:
47
+ batche_ranges = tqdm(batche_ranges)
48
+
49
+ for batch_range in batche_ranges:
50
+ # Process each row in the batch in parallel
51
+ tasks = []
52
+ row_indices = []
53
+ batch = df.iloc[batch_range[0] : batch_range[1]]
54
+
55
+ for idx, row in batch.iterrows():
56
+ input_data = {col: row[col] for col in input_columns}
57
+ input_data_str = "\n".join([f"Column: {k}, Value: {v}" for k, v in input_data.items()])
58
+ prompt = f"{prompt}\n\n{input_data_str}"
59
+ task = self.agent.run(prompt, output_columns)
60
+ tasks.append(task)
61
+ row_indices.append(idx)
62
+
63
+ # Run all tasks in parallel
64
+ results = await asyncio.gather(*tasks)
65
+
66
+ # Add results to dataframe
67
+ for i, result in enumerate(results):
68
+ row_idx = row_indices[i]
69
+ for col_name, value in result.items():
70
+ df.loc[row_idx, col_name] = value
71
+
72
+ return df
@@ -0,0 +1,23 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Type
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ @dataclass
8
+ class OutputColumn:
9
+ """
10
+ Output column for the AI model.
11
+
12
+ Args:
13
+ name: Name of the output column.
14
+ type: Type of the output column.
15
+
16
+ Examples:
17
+ >>> OutputColumn(name="output_column", type=str)
18
+ >>> OutputColumn(name="output_column", type=int)
19
+ """
20
+
21
+ name: str
22
+ type: Type[Any]
23
+ description: str
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.4
2
+ Name: airow
3
+ Version: 0.1.0a1
4
+ Summary: AI-powered DataFrame processing made simple
5
+ Author-email: Dmitrii K <dmitriik@protonmail.com>
6
+ Maintainer-email: Dmitrii K <dmitriik@protonmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/dmitriiweb/airow
9
+ Project-URL: Repository, https://github.com/dmitriiweb/airow
10
+ Project-URL: Documentation, https://github.com/dmitriiweb/airow
11
+ Project-URL: Bug Tracker, https://github.com/dmitriiweb/airow/issues
12
+ Keywords: ai,ai-agent,dataframe,pandas,pydantic-ai,async,data-processing
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: Text Processing
27
+ Classifier: Topic :: Database
28
+ Classifier: Typing :: Typed
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: loguru>=0.7.3
33
+ Requires-Dist: pandas>=2.3.2
34
+ Requires-Dist: pydantic>=2.11.7
35
+ Requires-Dist: pydantic-ai>=0.8.1
36
+ Requires-Dist: tqdm>=4.67.1
37
+ Provides-Extra: dev
38
+ Requires-Dist: mypy>=1.17.1; extra == "dev"
39
+ Requires-Dist: pytest>=8.4.2; extra == "dev"
40
+ Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=6.3.0; extra == "dev"
42
+ Requires-Dist: ruff>=0.12.12; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # Airow
46
+
47
+ **AI-powered DataFrame processing made simple**
48
+
49
+ Airow is a Python library that combines the power of pandas DataFrames with AI models to process structured data at scale. Built on top of `pydantic-ai`, it provides type-safe, async processing of DataFrames using any AI model.
50
+
51
+ ## Features
52
+
53
+ - 🚀 **Async processing** with batch support for high performance
54
+ - 🔒 **Type-safe outputs** using Pydantic models
55
+ - 📊 **Progress tracking** with built-in progress bars
56
+ - 🔄 **Automatic retries** with configurable retry logic
57
+ - 🤖 **Flexible AI models** - works with OpenAI, Ollama, Anthropic, and more
58
+ - ⚡ **Parallel processing** within batches for maximum throughput
59
+ - 📝 **Structured outputs** with defined schemas and validation
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ # Using pip
65
+ pip install airow
66
+
67
+ # Using uv (recommended)
68
+ uv add airow
69
+
70
+ # Using conda
71
+ conda install -c conda-forge airow
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ```python
77
+ import pandas as pd
78
+ from pydantic_ai.models.openai import OpenAIChatModel
79
+ from pydantic_ai.providers.ollama import OllamaProvider
80
+ from airow import Airow, OutputColumn
81
+ import asyncio
82
+
83
+ async def main():
84
+ # Setup your AI model
85
+ model = OpenAIChatModel(
86
+ model_name="llama3.2:latest",
87
+ provider=OllamaProvider(base_url="http://localhost:11434/v1"),
88
+ )
89
+
90
+ # Create Airow instance
91
+ airow = Airow(
92
+ model=model,
93
+ system_prompt="You are an expert in wine tasting and selection.",
94
+ )
95
+
96
+ # Load your data
97
+ df = pd.read_csv("wine_data.csv")
98
+
99
+ output_columns = [
100
+ OutputColumn(name="sentiment", type=str, description="Positive, negative, or neutral sentiment"),
101
+ OutputColumn(name="confidence", type=float, description="Confidence score between 0 and 1"),
102
+ OutputColumn(name="keywords", type=list, description="List of key terms extracted"),
103
+ ]
104
+
105
+ # Process with AI
106
+ result_df = await airow.run(
107
+ df,
108
+ prompt="Extract taste characteristics from the wine description",
109
+ input_columns=["description"],
110
+ output_columns=output_columns,
111
+ show_progress=True,
112
+ )
113
+
114
+ print(result_df.head())
115
+
116
+ if __name__ == "__main__":
117
+ asyncio.run(main())
118
+ ```
@@ -0,0 +1,13 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ airow/__init__.py
5
+ airow/agent.py
6
+ airow/airow.py
7
+ airow/schemas.py
8
+ airow.egg-info/PKG-INFO
9
+ airow.egg-info/SOURCES.txt
10
+ airow.egg-info/dependency_links.txt
11
+ airow.egg-info/requires.txt
12
+ airow.egg-info/top_level.txt
13
+ tests/test_agents.py
@@ -0,0 +1,12 @@
1
+ loguru>=0.7.3
2
+ pandas>=2.3.2
3
+ pydantic>=2.11.7
4
+ pydantic-ai>=0.8.1
5
+ tqdm>=4.67.1
6
+
7
+ [dev]
8
+ mypy>=1.17.1
9
+ pytest>=8.4.2
10
+ pytest-asyncio>=1.1.0
11
+ pytest-cov>=6.3.0
12
+ ruff>=0.12.12
@@ -0,0 +1 @@
1
+ airow
@@ -0,0 +1,60 @@
1
+ [project]
2
+ name = "airow"
3
+ dynamic = ["version"]
4
+ description = "AI-powered DataFrame processing made simple"
5
+ readme = "README.md"
6
+ license = {text = "MIT"}
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ {name = "Dmitrii K", email = "dmitriik@protonmail.com"},
10
+ ]
11
+ maintainers = [
12
+ {name = "Dmitrii K", email = "dmitriik@protonmail.com"},
13
+ ]
14
+ keywords = ["ai", "ai-agent", "dataframe", "pandas", "pydantic-ai", "async", "data-processing"]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "Intended Audience :: Science/Research",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
27
+ "Topic :: Scientific/Engineering :: Information Analysis",
28
+ "Topic :: Software Development :: Libraries :: Python Modules",
29
+ "Topic :: Text Processing",
30
+ "Topic :: Database",
31
+ "Typing :: Typed",
32
+ ]
33
+ dependencies = [
34
+ "loguru>=0.7.3",
35
+ "pandas>=2.3.2",
36
+ "pydantic>=2.11.7",
37
+ "pydantic-ai>=0.8.1",
38
+ "tqdm>=4.67.1",
39
+ ]
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/dmitriiweb/airow"
43
+ Repository = "https://github.com/dmitriiweb/airow"
44
+ Documentation = "https://github.com/dmitriiweb/airow"
45
+ "Bug Tracker" = "https://github.com/dmitriiweb/airow/issues"
46
+
47
+ [project.optional-dependencies]
48
+ dev = [
49
+ "mypy>=1.17.1",
50
+ "pytest>=8.4.2",
51
+ "pytest-asyncio>=1.1.0",
52
+ "pytest-cov>=6.3.0",
53
+ "ruff>=0.12.12",
54
+ ]
55
+
56
+ [tool.setuptools.dynamic]
57
+ version = {attr = "airow.__version__"}
58
+
59
+ [tool.pytest.ini_options]
60
+ asyncio_mode = "auto"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,189 @@
1
+ import pytest
2
+ from pydantic import BaseModel, ValidationError
3
+
4
+ from airow.agent import AirowAgent
5
+ from airow.schemas import OutputColumn
6
+
7
+
8
+ def test_single_output_column():
9
+ """Test creating a model with a single output column."""
10
+ agent = AirowAgent(model=None, system_prompt="test")
11
+
12
+ output_columns = [
13
+ OutputColumn(name="result", type=str, description="The result of processing")
14
+ ]
15
+
16
+ model_class = agent.build_agent_output_type(output_columns)
17
+
18
+ # Verify it's a BaseModel subclass
19
+ assert issubclass(model_class, BaseModel)
20
+
21
+ # Verify the model name
22
+ assert model_class.__name__ == "OutputColumns"
23
+
24
+ # Verify field exists
25
+ assert "result" in model_class.model_fields
26
+
27
+ # Verify field configuration
28
+ field_info = model_class.model_fields["result"]
29
+ assert field_info.annotation is str
30
+ assert field_info.description == "The result of processing"
31
+ assert field_info.is_required()
32
+
33
+
34
+ def test_multiple_output_columns():
35
+ """Test creating a model with multiple output columns."""
36
+ agent = AirowAgent(model=None, system_prompt="test")
37
+
38
+ output_columns = [
39
+ OutputColumn(name="name", type=str, description="Person's name"),
40
+ OutputColumn(name="age", type=int, description="Person's age"),
41
+ OutputColumn(name="is_active", type=bool, description="Whether person is active"),
42
+ ]
43
+
44
+ model_class = agent.build_agent_output_type(output_columns)
45
+
46
+ # Verify all fields exist
47
+ assert "name" in model_class.model_fields
48
+ assert "age" in model_class.model_fields
49
+ assert "is_active" in model_class.model_fields
50
+
51
+ # Verify field types and descriptions
52
+ name_field = model_class.model_fields["name"]
53
+ assert name_field.annotation is str
54
+ assert name_field.description == "Person's name"
55
+
56
+ age_field = model_class.model_fields["age"]
57
+ assert age_field.annotation is int
58
+ assert age_field.description == "Person's age"
59
+
60
+ is_active_field = model_class.model_fields["is_active"]
61
+ assert is_active_field.annotation is bool
62
+ assert is_active_field.description == "Whether person is active"
63
+
64
+
65
+ def test_different_data_types():
66
+ """Test creating a model with various data types."""
67
+ agent = AirowAgent(model=None, system_prompt="test")
68
+
69
+ output_columns = [
70
+ OutputColumn(name="text", type=str, description="Text field"),
71
+ OutputColumn(name="number", type=int, description="Integer field"),
72
+ OutputColumn(name="float_val", type=float, description="Float field"),
73
+ OutputColumn(name="flag", type=bool, description="Boolean field"),
74
+ OutputColumn(name="items", type=list, description="List field"),
75
+ OutputColumn(name="metadata", type=dict, description="Dictionary field"),
76
+ ]
77
+
78
+ model_class = agent.build_agent_output_type(output_columns)
79
+
80
+ # Verify all fields exist with correct types
81
+ assert model_class.model_fields["text"].annotation is str
82
+ assert model_class.model_fields["number"].annotation is int
83
+ assert model_class.model_fields["float_val"].annotation is float
84
+ assert model_class.model_fields["flag"].annotation is bool
85
+ assert model_class.model_fields["items"].annotation is list
86
+ assert model_class.model_fields["metadata"].annotation is dict
87
+
88
+
89
+ def test_empty_output_columns():
90
+ """Test creating a model with no output columns."""
91
+ agent = AirowAgent(model=None, system_prompt="test")
92
+
93
+ output_columns = []
94
+
95
+ model_class = agent.build_agent_output_type(output_columns)
96
+
97
+ # Should still create a valid model class
98
+ assert issubclass(model_class, BaseModel)
99
+ assert model_class.__name__ == "OutputColumns"
100
+ assert len(model_class.model_fields) == 0
101
+
102
+
103
+ def test_model_validation():
104
+ """Test that the created model validates data correctly."""
105
+ agent = AirowAgent(model=None, system_prompt="test")
106
+
107
+ output_columns = [
108
+ OutputColumn(name="name", type=str, description="Person's name"),
109
+ OutputColumn(name="age", type=int, description="Person's age"),
110
+ ]
111
+
112
+ model_class = agent.build_agent_output_type(output_columns)
113
+
114
+ # Test valid data
115
+ valid_data = {"name": "John Doe", "age": 30}
116
+ instance = model_class(**valid_data)
117
+ assert instance.name == "John Doe"
118
+ assert instance.age == 30
119
+
120
+ # Test invalid data (missing required field)
121
+ with pytest.raises(ValidationError):
122
+ model_class(name="John Doe") # Missing age
123
+
124
+ # Test invalid data (wrong type)
125
+ with pytest.raises(ValidationError):
126
+ model_class(name="John Doe", age="thirty") # age should be int
127
+
128
+
129
+ def test_model_serialization():
130
+ """Test that the created model can be serialized."""
131
+ agent = AirowAgent(model=None, system_prompt="test")
132
+
133
+ output_columns = [
134
+ OutputColumn(name="result", type=str, description="Processing result"),
135
+ OutputColumn(name="score", type=float, description="Processing score"),
136
+ ]
137
+
138
+ model_class = agent.build_agent_output_type(output_columns)
139
+
140
+ instance = model_class(result="success", score=0.95)
141
+
142
+ # Test model_dump
143
+ data = instance.model_dump()
144
+ expected = {"result": "success", "score": 0.95}
145
+ assert data == expected
146
+
147
+ # Test model_dump_json
148
+ json_data = instance.model_dump_json()
149
+ assert '"result":"success"' in json_data
150
+ assert '"score":0.95' in json_data
151
+
152
+
153
+ def test_field_descriptions_preserved():
154
+ """Test that field descriptions are properly preserved."""
155
+ agent = AirowAgent(model=None, system_prompt="test")
156
+
157
+ output_columns = [
158
+ OutputColumn(
159
+ name="complex_field",
160
+ type=str,
161
+ description="This is a complex field with special characters: @#$%^&*()"
162
+ ),
163
+ ]
164
+
165
+ model_class = agent.build_agent_output_type(output_columns)
166
+
167
+ field_info = model_class.model_fields["complex_field"]
168
+ assert field_info.description == "This is a complex field with special characters: @#$%^&*()"
169
+
170
+
171
+ def test_duplicate_field_names():
172
+ """Test behavior with duplicate field names (should overwrite)."""
173
+ agent = AirowAgent(model=None, system_prompt="test")
174
+
175
+ output_columns = [
176
+ OutputColumn(name="field", type=str, description="First field"),
177
+ OutputColumn(name="field", type=int, description="Second field"), # Duplicate name
178
+ ]
179
+
180
+ model_class = agent.build_agent_output_type(output_columns)
181
+
182
+ # Should have only one field (last one wins)
183
+ assert len(model_class.model_fields) == 1
184
+ assert "field" in model_class.model_fields
185
+
186
+ # Should use the last definition
187
+ field_info = model_class.model_fields["field"]
188
+ assert field_info.annotation is int
189
+ assert field_info.description == "Second field"