airopa-automation 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: airopa-automation
3
+ Version: 0.1.0
4
+ Summary: AI-powered automation workflows
5
+ Requires-Python: >=3.12
@@ -0,0 +1,112 @@
1
+ # AIropa Automation Layer
2
+
3
+ AIropa is an AI-powered automation framework designed to streamline and automate complex workflows.
4
+
5
+ ## 🚀 Quick Start
6
+
7
+ ### Prerequisites
8
+ - Python 3.7+
9
+ - SQLite (included with Python)
10
+
11
+ ### Installation
12
+
13
+ ```bash
14
+ # Clone the repository
15
+ git clone https://github.com/your-repo/airopa-automation.git
16
+ cd airopa-automation
17
+
18
+ # Install dependencies
19
+ pip install -r requirements.txt
20
+
21
+ # Run the demo
22
+ python main.py
23
+ ```
24
+
25
+ ## 📦 Features
26
+
27
+ - **Agent Framework**: Base classes for creating custom automation agents
28
+ - **Configuration Management**: Flexible configuration system with file and environment variable support
29
+ - **Database Integration**: SQLite support with easy extensibility to other databases
30
+ - **Task Management**: Track and manage automation tasks and their execution history
31
+
32
+ ## 🏗️ Architecture
33
+
34
+ ```
35
+ airopa-automation/
36
+ ├── airopa_automation/ # Core package
37
+ │ ├── __init__.py # Package initialization
38
+ │ ├── agents.py # Agent base classes
39
+ │ ├── config.py # Configuration management
40
+ │ └── database.py # Database operations
41
+ ├── database/ # Database files
42
+ │ └── schema.sql # Database schema
43
+ ├── main.py # Main entry point
44
+ ├── README.md # Documentation
45
+ └── requirements.txt # Dependencies
46
+ ```
47
+
48
+ ## 🤖 Creating Agents
49
+
50
+ ```python
51
+ from airopa_automation.agents import BaseAgent
52
+
53
+ class MyCustomAgent(BaseAgent):
54
+ def execute(self, *args, **kwargs):
55
+ # Your automation logic here
56
+ return {"status": "completed", "result": "success"}
57
+
58
+ # Usage
59
+ agent = MyCustomAgent(name="my_agent", description="My custom automation agent")
60
+ result = agent.execute(task_data)
61
+ ```
62
+
63
+ ## 🔧 Configuration
64
+
65
+ Create a `config.json` file:
66
+
67
+ ```json
68
+ {
69
+ "debug": true,
70
+ "log_level": "DEBUG",
71
+ "database": {
72
+ "type": "sqlite",
73
+ "path": "database/airopa.db"
74
+ },
75
+ "agents": {
76
+ "default_timeout": 120
77
+ }
78
+ }
79
+ ```
80
+
81
+ Or use environment variables:
82
+
83
+ ```bash
84
+ export AIROPA_DEBUG=true
85
+ export AIROPA_DATABASE__TYPE=sqlite
86
+ export AIROPA_DATABASE__PATH=database/airopa.db
87
+ ```
88
+
89
+ ## 📊 Database
90
+
91
+ The system includes a SQLite database with tables for:
92
+ - Tasks (automation tasks)
93
+ - Task executions (execution history)
94
+ - Agents (registered automation agents)
95
+
96
+ ## 🎯 Roadmap
97
+
98
+ - [x] Core agent framework
99
+ - [x] Configuration management
100
+ - [x] Database integration
101
+ - [ ] Advanced agent types
102
+ - [ ] Web interface
103
+ - [ ] API endpoints
104
+ - [ ] Scheduling system
105
+
106
+ ## 🤝 Contributing
107
+
108
+ Contributions are welcome! Please open issues and pull requests.
109
+
110
+ ## 📄 License
111
+
112
+ MIT License - see LICENSE file for details.
@@ -0,0 +1,26 @@
1
+ """
2
+ AIropa Automation Layer - Core Package
3
+
4
+ This package provides the foundation for AI-powered automation workflows.
5
+ """
6
+
7
+ from .agents import (
8
+ CategoryClassifierAgent,
9
+ ContentGeneratorAgent,
10
+ GitCommitAgent,
11
+ QualityScoreAgent,
12
+ ScraperAgent,
13
+ )
14
+ from .config import Config
15
+ from .database import Database
16
+
17
+ __version__ = "0.1.0"
18
+ __all__ = [
19
+ "ScraperAgent",
20
+ "CategoryClassifierAgent",
21
+ "QualityScoreAgent",
22
+ "ContentGeneratorAgent",
23
+ "GitCommitAgent",
24
+ "Config",
25
+ "Database",
26
+ ]
@@ -0,0 +1,352 @@
1
+ # AIropa Automation Agents - Base Classes
2
+
3
+ import hashlib
4
+ import time
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ import feedparser
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+ from newspaper import Article as NewspaperArticle
13
+ from pydantic import BaseModel
14
+ from slugify import slugify
15
+
16
+ from airopa_automation.config import config
17
+
18
+
19
+ class Article(BaseModel):
20
+ title: str
21
+ url: str
22
+ source: str
23
+ content: str
24
+ summary: str = ""
25
+ published_date: Optional[datetime] = None
26
+ scraped_date: datetime = datetime.now()
27
+ category: str = ""
28
+ country: str = ""
29
+ quality_score: float = 0.0
30
+
31
+ def generate_hash(self) -> str:
32
+ """Generate a unique hash for this article"""
33
+ hash_input = f"{self.title}{self.url}{self.source}".encode("utf-8")
34
+ return hashlib.sha256(hash_input).hexdigest()
35
+
36
+
37
+ class ScraperAgent:
38
+ def __init__(self):
39
+ self.session = requests.Session()
40
+ self.session.headers.update(
41
+ {
42
+ "User-Agent": config.scraper.user_agent,
43
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # noqa: E501
44
+ "Accept-Language": "en-US,en;q=0.5",
45
+ }
46
+ )
47
+
48
+ def scrape_rss_feeds(self) -> List[Article]:
49
+ """Scrape articles from RSS feeds"""
50
+ articles = []
51
+
52
+ for feed_url in config.scraper.rss_feeds:
53
+ try:
54
+ feed = feedparser.parse(feed_url)
55
+
56
+ for entry in feed.entries[: config.scraper.max_articles_per_source]:
57
+ try:
58
+ article = Article(
59
+ title=entry.get("title", "No title"),
60
+ url=entry.get("link", ""),
61
+ source=feed.feed.get("title", feed_url),
62
+ content=self._extract_article_content(
63
+ entry.get("link", "")
64
+ ),
65
+ summary=entry.get("summary", ""),
66
+ published_date=self._parse_date(entry.get("published", "")),
67
+ scraped_date=datetime.now(),
68
+ )
69
+ articles.append(article)
70
+
71
+ # Rate limiting
72
+ time.sleep(config.scraper.rate_limit_delay)
73
+
74
+ except Exception as e:
75
+ print(
76
+ f"Error processing RSS entry {entry.get('title', 'unknown')}: {e}" # noqa: E501
77
+ )
78
+ continue
79
+
80
+ except Exception as e:
81
+ print(f"Error scraping RSS feed {feed_url}: {e}")
82
+ continue
83
+
84
+ return articles
85
+
86
+ def scrape_web_sources(self) -> List[Article]:
87
+ """Scrape articles from web sources"""
88
+ articles = []
89
+
90
+ for source_url in config.scraper.web_sources:
91
+ try:
92
+ response = self.session.get(source_url, timeout=10)
93
+ response.raise_for_status()
94
+
95
+ soup = BeautifulSoup(response.text, "html.parser")
96
+ article_links = self._extract_article_links(soup, source_url)
97
+
98
+ for link in article_links[: config.scraper.max_articles_per_source]:
99
+ try:
100
+ article = self._scrape_article_page(link, source_url)
101
+ if article:
102
+ articles.append(article)
103
+
104
+ # Rate limiting
105
+ time.sleep(config.scraper.rate_limit_delay)
106
+
107
+ except Exception as e:
108
+ print(f"Error scraping article {link}: {e}")
109
+ continue
110
+
111
+ except Exception as e:
112
+ print(f"Error accessing web source {source_url}: {e}")
113
+ continue
114
+
115
+ return articles
116
+
117
+ def _extract_article_links(self, soup: BeautifulSoup, source_url: str) -> List[str]:
118
+ """Extract article links from a webpage"""
119
+ links = []
120
+
121
+ # Look for common article link patterns
122
+ for a in soup.find_all("a", href=True):
123
+ href = a["href"]
124
+ if any(
125
+ keyword in href.lower()
126
+ for keyword in ["article", "news", "post", "blog"]
127
+ ):
128
+ if href.startswith("http"):
129
+ links.append(href)
130
+ else:
131
+ # Handle relative URLs
132
+ from urllib.parse import urljoin
133
+
134
+ links.append(urljoin(source_url, href))
135
+
136
+ return list(set(links)) # Remove duplicates
137
+
138
+ def _scrape_article_page(self, url: str, source: str) -> Optional[Article]:
139
+ """Scrape content from a single article page"""
140
+ try:
141
+ # Use newspaper3k for article extraction
142
+ newspaper_article = NewspaperArticle(url)
143
+ newspaper_article.download()
144
+ newspaper_article.parse()
145
+
146
+ return Article(
147
+ title=newspaper_article.title,
148
+ url=url,
149
+ source=source,
150
+ content=newspaper_article.text,
151
+ summary=newspaper_article.summary,
152
+ published_date=newspaper_article.publish_date,
153
+ scraped_date=datetime.now(),
154
+ )
155
+
156
+ except Exception as e:
157
+ print(f"Error scraping article page {url}: {e}")
158
+ return None
159
+
160
+ def _extract_article_content(self, url: str) -> str:
161
+ """Extract main content from an article URL"""
162
+ try:
163
+ newspaper_article = NewspaperArticle(url)
164
+ newspaper_article.download()
165
+ newspaper_article.parse()
166
+ return str(newspaper_article.text)
167
+ except Exception as e:
168
+ print(f"Error extracting content from {url}: {e}")
169
+ return ""
170
+
171
+ def _parse_date(self, date_str: str) -> Optional[datetime]:
172
+ """Parse various date formats"""
173
+ if not date_str:
174
+ return None
175
+
176
+ # Try multiple date formats
177
+ from dateutil import parser as dateutil_parser
178
+
179
+ try:
180
+ parsed: datetime = dateutil_parser.parse(date_str)
181
+ return parsed
182
+ except Exception:
183
+ return None
184
+
185
+
186
+ class CategoryClassifierAgent:
187
+ def __init__(self):
188
+ # Initialize AI client (will be implemented)
189
+ pass
190
+
191
+ def classify(self, article: Article) -> Article:
192
+ """Classify article into appropriate category"""
193
+ # This will use AI/ML for classification
194
+ # For now, implement basic keyword-based classification
195
+
196
+ title_lower = article.title.lower()
197
+ content_lower = article.content.lower()
198
+
199
+ # Category classification
200
+ if any(
201
+ keyword in title_lower or keyword in content_lower
202
+ for keyword in ["startup", "company", "funding", "investment"]
203
+ ):
204
+ article.category = "startups"
205
+ elif any(
206
+ keyword in title_lower or keyword in content_lower
207
+ for keyword in ["policy", "regulation", "law", "act", "government"]
208
+ ):
209
+ article.category = "policy"
210
+ elif any(
211
+ country in title_lower or country in content_lower
212
+ for country in ["france", "germany", "netherlands", "europe", "eu"]
213
+ ):
214
+ article.category = "country"
215
+ else:
216
+ article.category = "stories"
217
+
218
+ # Country classification
219
+ if "france" in title_lower or "france" in content_lower:
220
+ article.country = "France"
221
+ elif "germany" in title_lower or "germany" in content_lower:
222
+ article.country = "Germany"
223
+ elif "netherlands" in title_lower or "netherlands" in content_lower:
224
+ article.country = "Netherlands"
225
+ elif "europe" in title_lower or "eu" in title_lower:
226
+ article.country = "Europe"
227
+ else:
228
+ article.country = ""
229
+
230
+ return article
231
+
232
+
233
+ class QualityScoreAgent:
234
+ def __init__(self):
235
+ pass
236
+
237
+ def assess_quality(self, article: Article) -> Article:
238
+ """Assess article quality and relevance"""
239
+ # Basic quality scoring algorithm
240
+ score = 0.0
241
+
242
+ # Title quality
243
+ if len(article.title.split()) > 3:
244
+ score += 0.2
245
+
246
+ # Content length
247
+ word_count = len(article.content.split())
248
+ if word_count > 200:
249
+ score += 0.3
250
+ if word_count > 500:
251
+ score += 0.2
252
+
253
+ # Source credibility
254
+ if any(source in article.source.lower() for source in ["europa.eu", "airopa"]):
255
+ score += 0.3
256
+
257
+ # Category relevance
258
+ if article.category:
259
+ score += 0.1
260
+
261
+ # Country relevance
262
+ if article.country:
263
+ score += 0.1
264
+
265
+ article.quality_score = min(score, 1.0)
266
+ return article
267
+
268
+
269
+ class ContentGeneratorAgent:
270
+ def __init__(self):
271
+ self.output_dir = Path(config.content.output_dir)
272
+ self.output_dir.mkdir(parents=True, exist_ok=True)
273
+
274
+ def generate_markdown(self, article: Article) -> Optional[Path]:
275
+ """Generate markdown file for an article"""
276
+ try:
277
+ # Generate filename
278
+ title_slug: str = slugify(article.title)
279
+ date_str = (
280
+ article.published_date.strftime("%Y-%m-%d")
281
+ if article.published_date
282
+ else datetime.now().strftime("%Y-%m-%d")
283
+ )
284
+ filename = f"{date_str}-{title_slug}.md"
285
+ filepath: Path = self.output_dir / filename
286
+
287
+ # Generate frontmatter
288
+ frontmatter = self._generate_frontmatter(article)
289
+
290
+ # Write markdown file
291
+ with open(filepath, "w", encoding="utf-8") as f:
292
+ f.write(frontmatter)
293
+ f.write(f"\n\n{article.content}")
294
+
295
+ return filepath
296
+
297
+ except Exception as e:
298
+ print(f"Error generating markdown for {article.title}: {e}")
299
+ return None
300
+
301
+ def _generate_frontmatter(self, article: Article) -> str:
302
+ """Generate YAML frontmatter for markdown file"""
303
+ frontmatter = "---\n"
304
+ frontmatter += f'title: "{article.title}"\n'
305
+ frontmatter += f"date: \"{article.published_date.strftime('%Y-%m-%d') if article.published_date else datetime.now().strftime('%Y-%m-%d')}\"\n" # noqa: E501
306
+ frontmatter += f'author: "{config.content.default_author}"\n'
307
+ frontmatter += f'source: "{article.source}"\n'
308
+ frontmatter += f'url: "{article.url}"\n'
309
+ frontmatter += f'pillar: "{article.category}"\n'
310
+
311
+ if article.country:
312
+ frontmatter += f'country: "{article.country}"\n'
313
+
314
+ if article.summary:
315
+ frontmatter += f'description: "{article.summary[:160]}"\n'
316
+
317
+ frontmatter += f'coverImage: "{config.content.default_cover_image}"\n'
318
+ frontmatter += "isFeatured: false\n"
319
+ frontmatter += "isAiGenerated: true\n"
320
+ frontmatter += "---"
321
+
322
+ return frontmatter
323
+
324
+
325
+ class GitCommitAgent:
326
+ def __init__(self):
327
+ import git
328
+
329
+ self.repo_path = Path(config.git.repo_path)
330
+ self.repo = git.Repo(self.repo_path)
331
+
332
+ def commit_new_content(self, files: List[Path]) -> bool:
333
+ """Commit new content files to git repository"""
334
+ try:
335
+ # Add files to git
336
+ for file in files:
337
+ relative_path = file.relative_to(self.repo_path)
338
+ self.repo.index.add([str(relative_path)])
339
+
340
+ # Commit changes
341
+ import git
342
+
343
+ self.repo.index.commit(
344
+ config.git.commit_message,
345
+ author=git.Actor(config.git.author_name, config.git.author_email),
346
+ )
347
+
348
+ return True
349
+
350
+ except Exception as e:
351
+ print(f"Error committing files to git: {e}")
352
+ return False
@@ -0,0 +1,78 @@
1
+ # AIropa Automation Configuration
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from dotenv import load_dotenv
7
+ from pydantic import BaseModel
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+
13
+ class ScraperConfig(BaseModel):
14
+ rss_feeds: list[str] = [
15
+ "https://sifted.eu/feed/?post_type=article",
16
+ "https://tech.eu/category/deep-tech/feed",
17
+ "https://european-champions.org/feed",
18
+ "https://tech.eu/category/robotics/feed",
19
+ ]
20
+ web_sources: list[str] = [
21
+ "https://sifted.eu",
22
+ "https://tech.eu",
23
+ "https://european-champions.org",
24
+ ]
25
+ max_articles_per_source: int = 10
26
+ rate_limit_delay: float = 1.0 # seconds between requests
27
+ user_agent: str = "AIropaBot/1.0 (+https://airopa.eu)"
28
+
29
+
30
+ class AIConfig(BaseModel):
31
+ model: str = "llama3-70b-8192"
32
+ temperature: float = 0.7
33
+ max_tokens: int = 1024
34
+ api_key: str = os.getenv("GROQ_API_KEY", "")
35
+ # Note: AI features will be limited due to Python 3.13 compatibility issues
36
+
37
+
38
+ class DatabaseConfig(BaseModel):
39
+ db_path: str = "database/airopa.db"
40
+ max_connections: int = 5
41
+ timeout: float = 10.0
42
+
43
+
44
+ class ContentConfig(BaseModel):
45
+ output_dir: str = "../airopa/src/content/post"
46
+ default_author: str = "AIropa Bot"
47
+ default_cover_image: str = "/assets/featured-story.jpg"
48
+
49
+
50
+ class GitConfig(BaseModel):
51
+ repo_path: str = ".."
52
+ commit_message: str = "chore(content): add automated AI news articles"
53
+ author_name: str = "AIropa Bot"
54
+ author_email: str = "bot@airopa.eu"
55
+
56
+
57
+ class Config(BaseModel):
58
+ scraper: ScraperConfig = ScraperConfig()
59
+ ai: AIConfig = AIConfig()
60
+ database: DatabaseConfig = DatabaseConfig()
61
+ content: ContentConfig = ContentConfig()
62
+ git: GitConfig = GitConfig()
63
+ debug: bool = os.getenv("DEBUG", "false").lower() == "true"
64
+
65
+
66
+ # Global configuration instance
67
+ config = Config()
68
+
69
+
70
+ def ensure_directories() -> None:
71
+ """Ensure required directories exist"""
72
+ Path(config.content.output_dir).mkdir(parents=True, exist_ok=True)
73
+ Path(config.database.db_path).parent.mkdir(parents=True, exist_ok=True)
74
+
75
+
76
+ if __name__ == "__main__":
77
+ ensure_directories()
78
+ print("Configuration loaded successfully")
@@ -0,0 +1,146 @@
1
+ """
2
+ Database Module - Database connectivity and operations
3
+
4
+ This module provides a unified interface for database operations
5
+ across different database backends (SQLite, PostgreSQL, etc.).
6
+ """
7
+
8
+ import os
9
+ import sqlite3
10
+ from typing import Any, Optional
11
+
12
+
13
+ class Database:
14
+ """
15
+ Database connection and operations manager.
16
+
17
+ Provides a unified interface for database operations with support
18
+ for multiple database backends.
19
+ """
20
+
21
+ def __init__(self, config: dict[str, Any]):
22
+ """
23
+ Initialize database connection.
24
+
25
+ Args:
26
+ config (dict[str, Any]): Database configuration
27
+ """
28
+ self.config = config
29
+ self.connection: Optional[sqlite3.Connection] = None
30
+ self.cursor: Optional[sqlite3.Cursor] = None
31
+
32
+ def connect(self) -> bool:
33
+ """
34
+ Establish database connection.
35
+
36
+ Returns:
37
+ bool: True if connection successful, False otherwise
38
+ """
39
+ try:
40
+ db_type = self.config.get("type", "sqlite")
41
+
42
+ if db_type == "sqlite":
43
+ db_path = self.config.get("path", "database/airopa.db")
44
+ # Ensure directory exists
45
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
46
+ self.connection = sqlite3.connect(db_path)
47
+ self.cursor = self.connection.cursor()
48
+ return True
49
+
50
+ raise ValueError(f"Unsupported database type: {db_type}")
51
+
52
+ except Exception as e:
53
+ print(f"Error connecting to database: {e}")
54
+ return False
55
+
56
+ def disconnect(self) -> None:
57
+ """Close database connection."""
58
+ if self.connection:
59
+ self.connection.close()
60
+ self.connection = None
61
+ self.cursor = None
62
+
63
+ def execute(self, query: str, params: tuple[Any, ...] | None = None) -> bool:
64
+ """
65
+ Execute a SQL query.
66
+
67
+ Args:
68
+ query (str): SQL query to execute
69
+ params (tuple[Any, ...] | None): Parameters for the query
70
+
71
+ Returns:
72
+ bool: True if execution successful, False otherwise
73
+ """
74
+ try:
75
+ if not self.connection:
76
+ if not self.connect():
77
+ return False
78
+
79
+ if self.cursor is None:
80
+ return False
81
+
82
+ if params:
83
+ self.cursor.execute(query, params)
84
+ else:
85
+ self.cursor.execute(query)
86
+
87
+ return True
88
+
89
+ except Exception as e:
90
+ print(f"Error executing query: {e}")
91
+ return False
92
+
93
+ def fetch_one(
94
+ self, query: str, params: tuple[Any, ...] | None = None
95
+ ) -> Optional[tuple[Any, ...]]:
96
+ """
97
+ Execute query and fetch one result.
98
+
99
+ Args:
100
+ query (str): SQL query to execute
101
+ params (tuple[Any, ...] | None): Parameters for the query
102
+
103
+ Returns:
104
+ Optional[tuple[Any, ...]]: First result row or None
105
+ """
106
+ if self.execute(query, params) and self.cursor is not None:
107
+ result: Optional[tuple[Any, ...]] = self.cursor.fetchone()
108
+ return result
109
+ return None
110
+
111
+ def fetch_all(
112
+ self, query: str, params: tuple[Any, ...] | None = None
113
+ ) -> list[tuple[Any, ...]]:
114
+ """
115
+ Execute query and fetch all results.
116
+
117
+ Args:
118
+ query (str): SQL query to execute
119
+ params (tuple[Any, ...] | None): Parameters for the query
120
+
121
+ Returns:
122
+ list[tuple[Any, ...]]: All result rows
123
+ """
124
+ if self.execute(query, params) and self.cursor is not None:
125
+ result: list[tuple[Any, ...]] = self.cursor.fetchall()
126
+ return result
127
+ return []
128
+
129
+ def commit(self) -> None:
130
+ """Commit pending transactions."""
131
+ if self.connection:
132
+ self.connection.commit()
133
+
134
+ def rollback(self) -> None:
135
+ """Rollback pending transactions."""
136
+ if self.connection:
137
+ self.connection.rollback()
138
+
139
+ def __enter__(self):
140
+ """Context manager entry."""
141
+ self.connect()
142
+ return self
143
+
144
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
145
+ """Context manager exit."""
146
+ self.disconnect()
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: airopa-automation
3
+ Version: 0.1.0
4
+ Summary: AI-powered automation workflows
5
+ Requires-Python: >=3.12
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ airopa_automation/__init__.py
4
+ airopa_automation/agents.py
5
+ airopa_automation/config.py
6
+ airopa_automation/database.py
7
+ airopa_automation.egg-info/PKG-INFO
8
+ airopa_automation.egg-info/SOURCES.txt
9
+ airopa_automation.egg-info/dependency_links.txt
10
+ airopa_automation.egg-info/top_level.txt
11
+ tests/test_agents.py
12
+ tests/test_config.py
@@ -0,0 +1 @@
1
+ airopa_automation
@@ -0,0 +1,31 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "airopa-automation"
7
+ version = "0.1.0"
8
+ description = "AI-powered automation workflows"
9
+ requires-python = ">=3.12"
10
+
11
+ [tool.setuptools.packages.find]
12
+ include = ["airopa_automation*"]
13
+
14
+ [tool.black]
15
+ line-length = 88
16
+ target-version = ["py312"]
17
+
18
+ [tool.isort]
19
+ profile = "black"
20
+ line_length = 88
21
+
22
+ [tool.mypy]
23
+ python_version = "3.12"
24
+ ignore_missing_imports = true
25
+ follow_imports = "silent"
26
+ strict_optional = true
27
+ warn_redundant_casts = true
28
+ warn_unused_ignores = true
29
+ warn_no_return = true
30
+ warn_return_any = true
31
+ warn_unreachable = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,245 @@
1
+ from datetime import datetime
2
+ from unittest.mock import MagicMock, patch
3
+
4
+ from airopa_automation.agents import (
5
+ Article,
6
+ CategoryClassifierAgent,
7
+ ContentGeneratorAgent,
8
+ QualityScoreAgent,
9
+ ScraperAgent,
10
+ )
11
+
12
+
13
+ class TestArticle:
14
+ """Test Article model"""
15
+
16
+ def test_article_creation(self):
17
+ """Test creating an Article instance"""
18
+ article = Article(
19
+ title="Test Article",
20
+ url="http://example.com/article",
21
+ source="Test Source",
22
+ content="This is the article content.",
23
+ )
24
+
25
+ assert article.title == "Test Article"
26
+ assert article.url == "http://example.com/article"
27
+ assert article.source == "Test Source"
28
+ assert article.content == "This is the article content."
29
+ assert article.category == ""
30
+ assert article.quality_score == 0.0
31
+
32
+ def test_article_generate_hash(self):
33
+ """Test Article hash generation"""
34
+ article = Article(
35
+ title="Test Article",
36
+ url="http://example.com/article",
37
+ source="Test Source",
38
+ content="Content",
39
+ )
40
+
41
+ hash1 = article.generate_hash()
42
+ assert len(hash1) == 64 # SHA256 hex digest length
43
+
44
+ # Same article should generate same hash
45
+ article2 = Article(
46
+ title="Test Article",
47
+ url="http://example.com/article",
48
+ source="Test Source",
49
+ content="Different content",
50
+ )
51
+ hash2 = article2.generate_hash()
52
+ assert hash1 == hash2 # Hash is based on title, url, source
53
+
54
+ def test_article_with_optional_fields(self):
55
+ """Test Article with optional fields populated"""
56
+ article = Article(
57
+ title="Test",
58
+ url="http://test.com",
59
+ source="Source",
60
+ content="Content",
61
+ summary="Summary text",
62
+ published_date=datetime(2024, 1, 15),
63
+ category="policy",
64
+ country="France",
65
+ quality_score=0.8,
66
+ )
67
+
68
+ assert article.summary == "Summary text"
69
+ assert article.published_date == datetime(2024, 1, 15)
70
+ assert article.category == "policy"
71
+ assert article.country == "France"
72
+ assert article.quality_score == 0.8
73
+
74
+
75
+ class TestCategoryClassifierAgent:
76
+ """Test CategoryClassifierAgent"""
77
+
78
+ def test_classify_startup_category(self):
79
+ """Test classification of startup-related content"""
80
+ classifier = CategoryClassifierAgent()
81
+ article = Article(
82
+ title="New AI Startup Raises Funding",
83
+ url="http://test.com",
84
+ source="Test",
85
+ content="A new startup company has received investment.",
86
+ )
87
+
88
+ result = classifier.classify(article)
89
+
90
+ assert result.category == "startups"
91
+
92
+ def test_classify_policy_category(self):
93
+ """Test classification of policy-related content"""
94
+ classifier = CategoryClassifierAgent()
95
+ article = Article(
96
+ title="New AI Regulation Proposed",
97
+ url="http://test.com",
98
+ source="Test",
99
+ content="The government has proposed new policy for AI.",
100
+ )
101
+
102
+ result = classifier.classify(article)
103
+
104
+ assert result.category == "policy"
105
+
106
+ def test_classify_country(self):
107
+ """Test country classification"""
108
+ classifier = CategoryClassifierAgent()
109
+ article = Article(
110
+ title="AI Development in France",
111
+ url="http://test.com",
112
+ source="Test",
113
+ content="France is leading AI innovation.",
114
+ )
115
+
116
+ result = classifier.classify(article)
117
+
118
+ assert result.country == "France"
119
+
120
+ def test_classify_default_category(self):
121
+ """Test default category for unclassified content"""
122
+ classifier = CategoryClassifierAgent()
123
+ article = Article(
124
+ title="Random Title",
125
+ url="http://test.com",
126
+ source="Test",
127
+ content="Some random content without keywords.",
128
+ )
129
+
130
+ result = classifier.classify(article)
131
+
132
+ assert result.category == "stories"
133
+
134
+
135
+ class TestQualityScoreAgent:
136
+ """Test QualityScoreAgent"""
137
+
138
+ def test_quality_score_short_content(self):
139
+ """Test quality score for short content"""
140
+ scorer = QualityScoreAgent()
141
+ article = Article(
142
+ title="Short",
143
+ url="http://test.com",
144
+ source="Test",
145
+ content="Very short content.",
146
+ )
147
+
148
+ result = scorer.assess_quality(article)
149
+
150
+ assert result.quality_score < 0.5
151
+
152
+ def test_quality_score_good_content(self):
153
+ """Test quality score for good content"""
154
+ scorer = QualityScoreAgent()
155
+ long_content = " ".join(["word"] * 600) # >500 words
156
+ article = Article(
157
+ title="A Good Article Title Here",
158
+ url="http://test.com",
159
+ source="Test",
160
+ content=long_content,
161
+ category="policy",
162
+ country="Europe",
163
+ )
164
+
165
+ result = scorer.assess_quality(article)
166
+
167
+ assert result.quality_score > 0.5
168
+
169
+ def test_quality_score_max_is_one(self):
170
+ """Test that quality score doesn't exceed 1.0"""
171
+ scorer = QualityScoreAgent()
172
+ article = Article(
173
+ title="Excellent Article With Many Words",
174
+ url="http://test.com",
175
+ source="europa.eu", # credible source
176
+ content=" ".join(["word"] * 1000),
177
+ category="policy",
178
+ country="France",
179
+ )
180
+
181
+ result = scorer.assess_quality(article)
182
+
183
+ assert result.quality_score <= 1.0
184
+
185
+
186
+ class TestScraperAgent:
187
+ """Test ScraperAgent"""
188
+
189
+ def test_scraper_init(self):
190
+ """Test ScraperAgent initialization"""
191
+ scraper = ScraperAgent()
192
+
193
+ assert scraper.session is not None
194
+ assert "User-Agent" in scraper.session.headers
195
+
196
+ @patch("airopa_automation.agents.feedparser.parse")
197
+ def test_scrape_rss_feeds_empty(self, mock_parse):
198
+ """Test RSS scraping with empty config"""
199
+ mock_parse.return_value = MagicMock(entries=[])
200
+
201
+ with patch("airopa_automation.agents.config") as mock_config:
202
+ mock_config.scraper.rss_feeds = []
203
+ mock_config.scraper.user_agent = "Test"
204
+
205
+ scraper = ScraperAgent()
206
+ articles = scraper.scrape_rss_feeds()
207
+
208
+ assert articles == []
209
+
210
+
211
+ class TestContentGeneratorAgent:
212
+ """Test ContentGeneratorAgent"""
213
+
214
+ def test_content_generator_init(self):
215
+ """Test ContentGeneratorAgent initialization"""
216
+ with patch("airopa_automation.agents.config") as mock_config:
217
+ mock_config.content.output_dir = "/tmp/test_output"
218
+
219
+ generator = ContentGeneratorAgent()
220
+
221
+ assert generator.output_dir.exists()
222
+
223
+ def test_generate_frontmatter(self):
224
+ """Test frontmatter generation"""
225
+ with patch("airopa_automation.agents.config") as mock_config:
226
+ mock_config.content.output_dir = "/tmp/test_output"
227
+ mock_config.content.default_author = "Test Author"
228
+ mock_config.content.default_cover_image = "/test.jpg"
229
+
230
+ generator = ContentGeneratorAgent()
231
+ article = Article(
232
+ title="Test Article",
233
+ url="http://test.com",
234
+ source="Test Source",
235
+ content="Content",
236
+ category="policy",
237
+ published_date=datetime(2024, 1, 15),
238
+ )
239
+
240
+ frontmatter = generator._generate_frontmatter(article)
241
+
242
+ assert "title:" in frontmatter
243
+ assert "Test Article" in frontmatter
244
+ assert "policy" in frontmatter
245
+ assert "---" in frontmatter
@@ -0,0 +1,111 @@
1
+ from airopa_automation.config import (
2
+ Config,
3
+ ContentConfig,
4
+ DatabaseConfig,
5
+ GitConfig,
6
+ ScraperConfig,
7
+ )
8
+
9
+
10
+ def test_scraper_config_defaults():
11
+ """Test ScraperConfig default values"""
12
+ config = ScraperConfig()
13
+
14
+ assert len(config.rss_feeds) == 4 # Updated to 4 RSS feeds
15
+ assert len(config.web_sources) == 3 # Updated to 3 web sources
16
+ assert config.max_articles_per_source == 10
17
+ assert config.rate_limit_delay == 1.0
18
+ assert "AIropaBot" in config.user_agent
19
+ # Test that the new URLs are present
20
+ assert any("sifted.eu" in url for url in config.rss_feeds)
21
+ assert any("tech.eu" in url for url in config.rss_feeds)
22
+ assert any("european-champions.org" in url for url in config.rss_feeds)
23
+
24
+
25
+ def test_scraper_config_custom():
26
+ """Test ScraperConfig with custom values"""
27
+ config = ScraperConfig(
28
+ rss_feeds=["http://test.com/rss"],
29
+ web_sources=["http://test.com"],
30
+ max_articles_per_source=5,
31
+ rate_limit_delay=2.0,
32
+ user_agent="Test Agent/1.0",
33
+ )
34
+
35
+ assert config.rss_feeds == ["http://test.com/rss"]
36
+ assert config.web_sources == ["http://test.com"]
37
+ assert config.max_articles_per_source == 5
38
+ assert config.rate_limit_delay == 2.0
39
+ assert config.user_agent == "Test Agent/1.0"
40
+
41
+
42
+ def test_database_config_defaults():
43
+ """Test DatabaseConfig default values"""
44
+ config = DatabaseConfig()
45
+
46
+ assert config.db_path == "database/airopa.db"
47
+ assert config.max_connections == 5
48
+ assert config.timeout == 10.0
49
+
50
+
51
+ def test_content_config_defaults():
52
+ """Test ContentConfig default values"""
53
+ config = ContentConfig()
54
+
55
+ assert "content/post" in config.output_dir
56
+ assert config.default_author == "AIropa Bot"
57
+ assert config.default_cover_image != ""
58
+
59
+
60
+ def test_git_config_defaults():
61
+ """Test GitConfig default values"""
62
+ config = GitConfig()
63
+
64
+ assert config.repo_path == ".."
65
+ assert "content" in config.commit_message.lower()
66
+ assert config.author_name == "AIropa Bot"
67
+ assert "@" in config.author_email
68
+
69
+
70
+ def test_git_config_custom():
71
+ """Test GitConfig with custom values"""
72
+ config = GitConfig(
73
+ repo_path="./repo",
74
+ commit_message="Test commit message",
75
+ author_name="Test Author",
76
+ author_email="test@example.com",
77
+ )
78
+
79
+ assert config.repo_path == "./repo"
80
+ assert config.commit_message == "Test commit message"
81
+ assert config.author_name == "Test Author"
82
+ assert config.author_email == "test@example.com"
83
+
84
+
85
+ def test_full_config():
86
+ """Test full Config integration"""
87
+ config = Config()
88
+
89
+ # Test that all sub-configs are present
90
+ assert config.scraper is not None
91
+ assert config.ai is not None
92
+ assert config.database is not None
93
+ assert config.content is not None
94
+ assert config.git is not None
95
+
96
+ # Test some default values
97
+ assert config.scraper.max_articles_per_source == 10
98
+ assert config.database.db_path == "database/airopa.db"
99
+
100
+
101
+ def test_config_override():
102
+ """Test config with overridden sub-configs"""
103
+ config = Config(
104
+ scraper=ScraperConfig(max_articles_per_source=20),
105
+ git=GitConfig(author_name="Custom Bot"),
106
+ )
107
+
108
+ assert config.scraper.max_articles_per_source == 20
109
+ assert config.git.author_name == "Custom Bot"
110
+ # Other defaults should remain
111
+ assert config.database.db_path == "database/airopa.db"