airopa-automation 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airopa_automation-0.1.0/PKG-INFO +5 -0
- airopa_automation-0.1.0/README.md +112 -0
- airopa_automation-0.1.0/airopa_automation/__init__.py +26 -0
- airopa_automation-0.1.0/airopa_automation/agents.py +352 -0
- airopa_automation-0.1.0/airopa_automation/config.py +78 -0
- airopa_automation-0.1.0/airopa_automation/database.py +146 -0
- airopa_automation-0.1.0/airopa_automation.egg-info/PKG-INFO +5 -0
- airopa_automation-0.1.0/airopa_automation.egg-info/SOURCES.txt +12 -0
- airopa_automation-0.1.0/airopa_automation.egg-info/dependency_links.txt +1 -0
- airopa_automation-0.1.0/airopa_automation.egg-info/top_level.txt +1 -0
- airopa_automation-0.1.0/pyproject.toml +31 -0
- airopa_automation-0.1.0/setup.cfg +4 -0
- airopa_automation-0.1.0/tests/test_agents.py +245 -0
- airopa_automation-0.1.0/tests/test_config.py +111 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# AIropa Automation Layer
|
|
2
|
+
|
|
3
|
+
AIropa is an AI-powered automation framework designed to streamline and automate complex workflows.
|
|
4
|
+
|
|
5
|
+
## 🚀 Quick Start
|
|
6
|
+
|
|
7
|
+
### Prerequisites
|
|
8
|
+
- Python 3.7+
|
|
9
|
+
- SQLite (included with Python)
|
|
10
|
+
|
|
11
|
+
### Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Clone the repository
|
|
15
|
+
git clone https://github.com/your-repo/airopa-automation.git
|
|
16
|
+
cd airopa-automation
|
|
17
|
+
|
|
18
|
+
# Install dependencies
|
|
19
|
+
pip install -r requirements.txt
|
|
20
|
+
|
|
21
|
+
# Run the demo
|
|
22
|
+
python main.py
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 📦 Features
|
|
26
|
+
|
|
27
|
+
- **Agent Framework**: Base classes for creating custom automation agents
|
|
28
|
+
- **Configuration Management**: Flexible configuration system with file and environment variable support
|
|
29
|
+
- **Database Integration**: SQLite support with easy extensibility to other databases
|
|
30
|
+
- **Task Management**: Track and manage automation tasks and their execution history
|
|
31
|
+
|
|
32
|
+
## 🏗️ Architecture
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
airopa-automation/
|
|
36
|
+
├── airopa_automation/ # Core package
|
|
37
|
+
│ ├── __init__.py # Package initialization
|
|
38
|
+
│ ├── agents.py # Agent base classes
|
|
39
|
+
│ ├── config.py # Configuration management
|
|
40
|
+
│ └── database.py # Database operations
|
|
41
|
+
├── database/ # Database files
|
|
42
|
+
│ └── schema.sql # Database schema
|
|
43
|
+
├── main.py # Main entry point
|
|
44
|
+
├── README.md # Documentation
|
|
45
|
+
└── requirements.txt # Dependencies
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## 🤖 Creating Agents
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from airopa_automation.agents import BaseAgent
|
|
52
|
+
|
|
53
|
+
class MyCustomAgent(BaseAgent):
|
|
54
|
+
def execute(self, *args, **kwargs):
|
|
55
|
+
# Your automation logic here
|
|
56
|
+
return {"status": "completed", "result": "success"}
|
|
57
|
+
|
|
58
|
+
# Usage
|
|
59
|
+
agent = MyCustomAgent(name="my_agent", description="My custom automation agent")
|
|
60
|
+
result = agent.execute(task_data)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## 🔧 Configuration
|
|
64
|
+
|
|
65
|
+
Create a `config.json` file:
|
|
66
|
+
|
|
67
|
+
```json
|
|
68
|
+
{
|
|
69
|
+
"debug": true,
|
|
70
|
+
"log_level": "DEBUG",
|
|
71
|
+
"database": {
|
|
72
|
+
"type": "sqlite",
|
|
73
|
+
"path": "database/airopa.db"
|
|
74
|
+
},
|
|
75
|
+
"agents": {
|
|
76
|
+
"default_timeout": 120
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Or use environment variables:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
export AIROPA_DEBUG=true
|
|
85
|
+
export AIROPA_DATABASE__TYPE=sqlite
|
|
86
|
+
export AIROPA_DATABASE__PATH=database/airopa.db
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 📊 Database
|
|
90
|
+
|
|
91
|
+
The system includes a SQLite database with tables for:
|
|
92
|
+
- Tasks (automation tasks)
|
|
93
|
+
- Task executions (execution history)
|
|
94
|
+
- Agents (registered automation agents)
|
|
95
|
+
|
|
96
|
+
## 🎯 Roadmap
|
|
97
|
+
|
|
98
|
+
- [x] Core agent framework
|
|
99
|
+
- [x] Configuration management
|
|
100
|
+
- [x] Database integration
|
|
101
|
+
- [ ] Advanced agent types
|
|
102
|
+
- [ ] Web interface
|
|
103
|
+
- [ ] API endpoints
|
|
104
|
+
- [ ] Scheduling system
|
|
105
|
+
|
|
106
|
+
## 🤝 Contributing
|
|
107
|
+
|
|
108
|
+
Contributions are welcome! Please open issues and pull requests.
|
|
109
|
+
|
|
110
|
+
## 📄 License
|
|
111
|
+
|
|
112
|
+
MIT License - see LICENSE file for details.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AIropa Automation Layer - Core Package
|
|
3
|
+
|
|
4
|
+
This package provides the foundation for AI-powered automation workflows.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .agents import (
|
|
8
|
+
CategoryClassifierAgent,
|
|
9
|
+
ContentGeneratorAgent,
|
|
10
|
+
GitCommitAgent,
|
|
11
|
+
QualityScoreAgent,
|
|
12
|
+
ScraperAgent,
|
|
13
|
+
)
|
|
14
|
+
from .config import Config
|
|
15
|
+
from .database import Database
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ScraperAgent",
|
|
20
|
+
"CategoryClassifierAgent",
|
|
21
|
+
"QualityScoreAgent",
|
|
22
|
+
"ContentGeneratorAgent",
|
|
23
|
+
"GitCommitAgent",
|
|
24
|
+
"Config",
|
|
25
|
+
"Database",
|
|
26
|
+
]
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# AIropa Automation Agents - Base Classes
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
import feedparser
|
|
10
|
+
import requests
|
|
11
|
+
from bs4 import BeautifulSoup
|
|
12
|
+
from newspaper import Article as NewspaperArticle
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
from slugify import slugify
|
|
15
|
+
|
|
16
|
+
from airopa_automation.config import config
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Article(BaseModel):
|
|
20
|
+
title: str
|
|
21
|
+
url: str
|
|
22
|
+
source: str
|
|
23
|
+
content: str
|
|
24
|
+
summary: str = ""
|
|
25
|
+
published_date: Optional[datetime] = None
|
|
26
|
+
scraped_date: datetime = datetime.now()
|
|
27
|
+
category: str = ""
|
|
28
|
+
country: str = ""
|
|
29
|
+
quality_score: float = 0.0
|
|
30
|
+
|
|
31
|
+
def generate_hash(self) -> str:
|
|
32
|
+
"""Generate a unique hash for this article"""
|
|
33
|
+
hash_input = f"{self.title}{self.url}{self.source}".encode("utf-8")
|
|
34
|
+
return hashlib.sha256(hash_input).hexdigest()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ScraperAgent:
|
|
38
|
+
def __init__(self):
|
|
39
|
+
self.session = requests.Session()
|
|
40
|
+
self.session.headers.update(
|
|
41
|
+
{
|
|
42
|
+
"User-Agent": config.scraper.user_agent,
|
|
43
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # noqa: E501
|
|
44
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def scrape_rss_feeds(self) -> List[Article]:
|
|
49
|
+
"""Scrape articles from RSS feeds"""
|
|
50
|
+
articles = []
|
|
51
|
+
|
|
52
|
+
for feed_url in config.scraper.rss_feeds:
|
|
53
|
+
try:
|
|
54
|
+
feed = feedparser.parse(feed_url)
|
|
55
|
+
|
|
56
|
+
for entry in feed.entries[: config.scraper.max_articles_per_source]:
|
|
57
|
+
try:
|
|
58
|
+
article = Article(
|
|
59
|
+
title=entry.get("title", "No title"),
|
|
60
|
+
url=entry.get("link", ""),
|
|
61
|
+
source=feed.feed.get("title", feed_url),
|
|
62
|
+
content=self._extract_article_content(
|
|
63
|
+
entry.get("link", "")
|
|
64
|
+
),
|
|
65
|
+
summary=entry.get("summary", ""),
|
|
66
|
+
published_date=self._parse_date(entry.get("published", "")),
|
|
67
|
+
scraped_date=datetime.now(),
|
|
68
|
+
)
|
|
69
|
+
articles.append(article)
|
|
70
|
+
|
|
71
|
+
# Rate limiting
|
|
72
|
+
time.sleep(config.scraper.rate_limit_delay)
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(
|
|
76
|
+
f"Error processing RSS entry {entry.get('title', 'unknown')}: {e}" # noqa: E501
|
|
77
|
+
)
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
print(f"Error scraping RSS feed {feed_url}: {e}")
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
return articles
|
|
85
|
+
|
|
86
|
+
def scrape_web_sources(self) -> List[Article]:
|
|
87
|
+
"""Scrape articles from web sources"""
|
|
88
|
+
articles = []
|
|
89
|
+
|
|
90
|
+
for source_url in config.scraper.web_sources:
|
|
91
|
+
try:
|
|
92
|
+
response = self.session.get(source_url, timeout=10)
|
|
93
|
+
response.raise_for_status()
|
|
94
|
+
|
|
95
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
|
96
|
+
article_links = self._extract_article_links(soup, source_url)
|
|
97
|
+
|
|
98
|
+
for link in article_links[: config.scraper.max_articles_per_source]:
|
|
99
|
+
try:
|
|
100
|
+
article = self._scrape_article_page(link, source_url)
|
|
101
|
+
if article:
|
|
102
|
+
articles.append(article)
|
|
103
|
+
|
|
104
|
+
# Rate limiting
|
|
105
|
+
time.sleep(config.scraper.rate_limit_delay)
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
print(f"Error scraping article {link}: {e}")
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
print(f"Error accessing web source {source_url}: {e}")
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
return articles
|
|
116
|
+
|
|
117
|
+
def _extract_article_links(self, soup: BeautifulSoup, source_url: str) -> List[str]:
|
|
118
|
+
"""Extract article links from a webpage"""
|
|
119
|
+
links = []
|
|
120
|
+
|
|
121
|
+
# Look for common article link patterns
|
|
122
|
+
for a in soup.find_all("a", href=True):
|
|
123
|
+
href = a["href"]
|
|
124
|
+
if any(
|
|
125
|
+
keyword in href.lower()
|
|
126
|
+
for keyword in ["article", "news", "post", "blog"]
|
|
127
|
+
):
|
|
128
|
+
if href.startswith("http"):
|
|
129
|
+
links.append(href)
|
|
130
|
+
else:
|
|
131
|
+
# Handle relative URLs
|
|
132
|
+
from urllib.parse import urljoin
|
|
133
|
+
|
|
134
|
+
links.append(urljoin(source_url, href))
|
|
135
|
+
|
|
136
|
+
return list(set(links)) # Remove duplicates
|
|
137
|
+
|
|
138
|
+
def _scrape_article_page(self, url: str, source: str) -> Optional[Article]:
|
|
139
|
+
"""Scrape content from a single article page"""
|
|
140
|
+
try:
|
|
141
|
+
# Use newspaper3k for article extraction
|
|
142
|
+
newspaper_article = NewspaperArticle(url)
|
|
143
|
+
newspaper_article.download()
|
|
144
|
+
newspaper_article.parse()
|
|
145
|
+
|
|
146
|
+
return Article(
|
|
147
|
+
title=newspaper_article.title,
|
|
148
|
+
url=url,
|
|
149
|
+
source=source,
|
|
150
|
+
content=newspaper_article.text,
|
|
151
|
+
summary=newspaper_article.summary,
|
|
152
|
+
published_date=newspaper_article.publish_date,
|
|
153
|
+
scraped_date=datetime.now(),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
print(f"Error scraping article page {url}: {e}")
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
def _extract_article_content(self, url: str) -> str:
|
|
161
|
+
"""Extract main content from an article URL"""
|
|
162
|
+
try:
|
|
163
|
+
newspaper_article = NewspaperArticle(url)
|
|
164
|
+
newspaper_article.download()
|
|
165
|
+
newspaper_article.parse()
|
|
166
|
+
return str(newspaper_article.text)
|
|
167
|
+
except Exception as e:
|
|
168
|
+
print(f"Error extracting content from {url}: {e}")
|
|
169
|
+
return ""
|
|
170
|
+
|
|
171
|
+
def _parse_date(self, date_str: str) -> Optional[datetime]:
|
|
172
|
+
"""Parse various date formats"""
|
|
173
|
+
if not date_str:
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
# Try multiple date formats
|
|
177
|
+
from dateutil import parser as dateutil_parser
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
parsed: datetime = dateutil_parser.parse(date_str)
|
|
181
|
+
return parsed
|
|
182
|
+
except Exception:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class CategoryClassifierAgent:
|
|
187
|
+
def __init__(self):
|
|
188
|
+
# Initialize AI client (will be implemented)
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
def classify(self, article: Article) -> Article:
|
|
192
|
+
"""Classify article into appropriate category"""
|
|
193
|
+
# This will use AI/ML for classification
|
|
194
|
+
# For now, implement basic keyword-based classification
|
|
195
|
+
|
|
196
|
+
title_lower = article.title.lower()
|
|
197
|
+
content_lower = article.content.lower()
|
|
198
|
+
|
|
199
|
+
# Category classification
|
|
200
|
+
if any(
|
|
201
|
+
keyword in title_lower or keyword in content_lower
|
|
202
|
+
for keyword in ["startup", "company", "funding", "investment"]
|
|
203
|
+
):
|
|
204
|
+
article.category = "startups"
|
|
205
|
+
elif any(
|
|
206
|
+
keyword in title_lower or keyword in content_lower
|
|
207
|
+
for keyword in ["policy", "regulation", "law", "act", "government"]
|
|
208
|
+
):
|
|
209
|
+
article.category = "policy"
|
|
210
|
+
elif any(
|
|
211
|
+
country in title_lower or country in content_lower
|
|
212
|
+
for country in ["france", "germany", "netherlands", "europe", "eu"]
|
|
213
|
+
):
|
|
214
|
+
article.category = "country"
|
|
215
|
+
else:
|
|
216
|
+
article.category = "stories"
|
|
217
|
+
|
|
218
|
+
# Country classification
|
|
219
|
+
if "france" in title_lower or "france" in content_lower:
|
|
220
|
+
article.country = "France"
|
|
221
|
+
elif "germany" in title_lower or "germany" in content_lower:
|
|
222
|
+
article.country = "Germany"
|
|
223
|
+
elif "netherlands" in title_lower or "netherlands" in content_lower:
|
|
224
|
+
article.country = "Netherlands"
|
|
225
|
+
elif "europe" in title_lower or "eu" in title_lower:
|
|
226
|
+
article.country = "Europe"
|
|
227
|
+
else:
|
|
228
|
+
article.country = ""
|
|
229
|
+
|
|
230
|
+
return article
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class QualityScoreAgent:
|
|
234
|
+
def __init__(self):
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
def assess_quality(self, article: Article) -> Article:
|
|
238
|
+
"""Assess article quality and relevance"""
|
|
239
|
+
# Basic quality scoring algorithm
|
|
240
|
+
score = 0.0
|
|
241
|
+
|
|
242
|
+
# Title quality
|
|
243
|
+
if len(article.title.split()) > 3:
|
|
244
|
+
score += 0.2
|
|
245
|
+
|
|
246
|
+
# Content length
|
|
247
|
+
word_count = len(article.content.split())
|
|
248
|
+
if word_count > 200:
|
|
249
|
+
score += 0.3
|
|
250
|
+
if word_count > 500:
|
|
251
|
+
score += 0.2
|
|
252
|
+
|
|
253
|
+
# Source credibility
|
|
254
|
+
if any(source in article.source.lower() for source in ["europa.eu", "airopa"]):
|
|
255
|
+
score += 0.3
|
|
256
|
+
|
|
257
|
+
# Category relevance
|
|
258
|
+
if article.category:
|
|
259
|
+
score += 0.1
|
|
260
|
+
|
|
261
|
+
# Country relevance
|
|
262
|
+
if article.country:
|
|
263
|
+
score += 0.1
|
|
264
|
+
|
|
265
|
+
article.quality_score = min(score, 1.0)
|
|
266
|
+
return article
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class ContentGeneratorAgent:
|
|
270
|
+
def __init__(self):
|
|
271
|
+
self.output_dir = Path(config.content.output_dir)
|
|
272
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
273
|
+
|
|
274
|
+
def generate_markdown(self, article: Article) -> Optional[Path]:
|
|
275
|
+
"""Generate markdown file for an article"""
|
|
276
|
+
try:
|
|
277
|
+
# Generate filename
|
|
278
|
+
title_slug: str = slugify(article.title)
|
|
279
|
+
date_str = (
|
|
280
|
+
article.published_date.strftime("%Y-%m-%d")
|
|
281
|
+
if article.published_date
|
|
282
|
+
else datetime.now().strftime("%Y-%m-%d")
|
|
283
|
+
)
|
|
284
|
+
filename = f"{date_str}-{title_slug}.md"
|
|
285
|
+
filepath: Path = self.output_dir / filename
|
|
286
|
+
|
|
287
|
+
# Generate frontmatter
|
|
288
|
+
frontmatter = self._generate_frontmatter(article)
|
|
289
|
+
|
|
290
|
+
# Write markdown file
|
|
291
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
292
|
+
f.write(frontmatter)
|
|
293
|
+
f.write(f"\n\n{article.content}")
|
|
294
|
+
|
|
295
|
+
return filepath
|
|
296
|
+
|
|
297
|
+
except Exception as e:
|
|
298
|
+
print(f"Error generating markdown for {article.title}: {e}")
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
def _generate_frontmatter(self, article: Article) -> str:
|
|
302
|
+
"""Generate YAML frontmatter for markdown file"""
|
|
303
|
+
frontmatter = "---\n"
|
|
304
|
+
frontmatter += f'title: "{article.title}"\n'
|
|
305
|
+
frontmatter += f"date: \"{article.published_date.strftime('%Y-%m-%d') if article.published_date else datetime.now().strftime('%Y-%m-%d')}\"\n" # noqa: E501
|
|
306
|
+
frontmatter += f'author: "{config.content.default_author}"\n'
|
|
307
|
+
frontmatter += f'source: "{article.source}"\n'
|
|
308
|
+
frontmatter += f'url: "{article.url}"\n'
|
|
309
|
+
frontmatter += f'pillar: "{article.category}"\n'
|
|
310
|
+
|
|
311
|
+
if article.country:
|
|
312
|
+
frontmatter += f'country: "{article.country}"\n'
|
|
313
|
+
|
|
314
|
+
if article.summary:
|
|
315
|
+
frontmatter += f'description: "{article.summary[:160]}"\n'
|
|
316
|
+
|
|
317
|
+
frontmatter += f'coverImage: "{config.content.default_cover_image}"\n'
|
|
318
|
+
frontmatter += "isFeatured: false\n"
|
|
319
|
+
frontmatter += "isAiGenerated: true\n"
|
|
320
|
+
frontmatter += "---"
|
|
321
|
+
|
|
322
|
+
return frontmatter
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class GitCommitAgent:
|
|
326
|
+
def __init__(self):
|
|
327
|
+
import git
|
|
328
|
+
|
|
329
|
+
self.repo_path = Path(config.git.repo_path)
|
|
330
|
+
self.repo = git.Repo(self.repo_path)
|
|
331
|
+
|
|
332
|
+
def commit_new_content(self, files: List[Path]) -> bool:
|
|
333
|
+
"""Commit new content files to git repository"""
|
|
334
|
+
try:
|
|
335
|
+
# Add files to git
|
|
336
|
+
for file in files:
|
|
337
|
+
relative_path = file.relative_to(self.repo_path)
|
|
338
|
+
self.repo.index.add([str(relative_path)])
|
|
339
|
+
|
|
340
|
+
# Commit changes
|
|
341
|
+
import git
|
|
342
|
+
|
|
343
|
+
self.repo.index.commit(
|
|
344
|
+
config.git.commit_message,
|
|
345
|
+
author=git.Actor(config.git.author_name, config.git.author_email),
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return True
|
|
349
|
+
|
|
350
|
+
except Exception as e:
|
|
351
|
+
print(f"Error committing files to git: {e}")
|
|
352
|
+
return False
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# AIropa Automation Configuration
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
# Load environment variables
|
|
10
|
+
load_dotenv()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ScraperConfig(BaseModel):
|
|
14
|
+
rss_feeds: list[str] = [
|
|
15
|
+
"https://sifted.eu/feed/?post_type=article",
|
|
16
|
+
"https://tech.eu/category/deep-tech/feed",
|
|
17
|
+
"https://european-champions.org/feed",
|
|
18
|
+
"https://tech.eu/category/robotics/feed",
|
|
19
|
+
]
|
|
20
|
+
web_sources: list[str] = [
|
|
21
|
+
"https://sifted.eu",
|
|
22
|
+
"https://tech.eu",
|
|
23
|
+
"https://european-champions.org",
|
|
24
|
+
]
|
|
25
|
+
max_articles_per_source: int = 10
|
|
26
|
+
rate_limit_delay: float = 1.0 # seconds between requests
|
|
27
|
+
user_agent: str = "AIropaBot/1.0 (+https://airopa.eu)"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AIConfig(BaseModel):
|
|
31
|
+
model: str = "llama3-70b-8192"
|
|
32
|
+
temperature: float = 0.7
|
|
33
|
+
max_tokens: int = 1024
|
|
34
|
+
api_key: str = os.getenv("GROQ_API_KEY", "")
|
|
35
|
+
# Note: AI features will be limited due to Python 3.13 compatibility issues
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DatabaseConfig(BaseModel):
|
|
39
|
+
db_path: str = "database/airopa.db"
|
|
40
|
+
max_connections: int = 5
|
|
41
|
+
timeout: float = 10.0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ContentConfig(BaseModel):
|
|
45
|
+
output_dir: str = "../airopa/src/content/post"
|
|
46
|
+
default_author: str = "AIropa Bot"
|
|
47
|
+
default_cover_image: str = "/assets/featured-story.jpg"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class GitConfig(BaseModel):
|
|
51
|
+
repo_path: str = ".."
|
|
52
|
+
commit_message: str = "chore(content): add automated AI news articles"
|
|
53
|
+
author_name: str = "AIropa Bot"
|
|
54
|
+
author_email: str = "bot@airopa.eu"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Config(BaseModel):
|
|
58
|
+
scraper: ScraperConfig = ScraperConfig()
|
|
59
|
+
ai: AIConfig = AIConfig()
|
|
60
|
+
database: DatabaseConfig = DatabaseConfig()
|
|
61
|
+
content: ContentConfig = ContentConfig()
|
|
62
|
+
git: GitConfig = GitConfig()
|
|
63
|
+
debug: bool = os.getenv("DEBUG", "false").lower() == "true"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Global configuration instance
|
|
67
|
+
config = Config()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def ensure_directories() -> None:
|
|
71
|
+
"""Ensure required directories exist"""
|
|
72
|
+
Path(config.content.output_dir).mkdir(parents=True, exist_ok=True)
|
|
73
|
+
Path(config.database.db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == "__main__":
|
|
77
|
+
ensure_directories()
|
|
78
|
+
print("Configuration loaded successfully")
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database Module - Database connectivity and operations
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for database operations
|
|
5
|
+
across different database backends (SQLite, PostgreSQL, etc.).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sqlite3
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Database:
|
|
14
|
+
"""
|
|
15
|
+
Database connection and operations manager.
|
|
16
|
+
|
|
17
|
+
Provides a unified interface for database operations with support
|
|
18
|
+
for multiple database backends.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, config: dict[str, Any]):
|
|
22
|
+
"""
|
|
23
|
+
Initialize database connection.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
config (dict[str, Any]): Database configuration
|
|
27
|
+
"""
|
|
28
|
+
self.config = config
|
|
29
|
+
self.connection: Optional[sqlite3.Connection] = None
|
|
30
|
+
self.cursor: Optional[sqlite3.Cursor] = None
|
|
31
|
+
|
|
32
|
+
def connect(self) -> bool:
|
|
33
|
+
"""
|
|
34
|
+
Establish database connection.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
bool: True if connection successful, False otherwise
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
db_type = self.config.get("type", "sqlite")
|
|
41
|
+
|
|
42
|
+
if db_type == "sqlite":
|
|
43
|
+
db_path = self.config.get("path", "database/airopa.db")
|
|
44
|
+
# Ensure directory exists
|
|
45
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
46
|
+
self.connection = sqlite3.connect(db_path)
|
|
47
|
+
self.cursor = self.connection.cursor()
|
|
48
|
+
return True
|
|
49
|
+
|
|
50
|
+
raise ValueError(f"Unsupported database type: {db_type}")
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f"Error connecting to database: {e}")
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
def disconnect(self) -> None:
|
|
57
|
+
"""Close database connection."""
|
|
58
|
+
if self.connection:
|
|
59
|
+
self.connection.close()
|
|
60
|
+
self.connection = None
|
|
61
|
+
self.cursor = None
|
|
62
|
+
|
|
63
|
+
def execute(self, query: str, params: tuple[Any, ...] | None = None) -> bool:
|
|
64
|
+
"""
|
|
65
|
+
Execute a SQL query.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
query (str): SQL query to execute
|
|
69
|
+
params (tuple[Any, ...] | None): Parameters for the query
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
bool: True if execution successful, False otherwise
|
|
73
|
+
"""
|
|
74
|
+
try:
|
|
75
|
+
if not self.connection:
|
|
76
|
+
if not self.connect():
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
if self.cursor is None:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
if params:
|
|
83
|
+
self.cursor.execute(query, params)
|
|
84
|
+
else:
|
|
85
|
+
self.cursor.execute(query)
|
|
86
|
+
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
print(f"Error executing query: {e}")
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
def fetch_one(
|
|
94
|
+
self, query: str, params: tuple[Any, ...] | None = None
|
|
95
|
+
) -> Optional[tuple[Any, ...]]:
|
|
96
|
+
"""
|
|
97
|
+
Execute query and fetch one result.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
query (str): SQL query to execute
|
|
101
|
+
params (tuple[Any, ...] | None): Parameters for the query
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Optional[tuple[Any, ...]]: First result row or None
|
|
105
|
+
"""
|
|
106
|
+
if self.execute(query, params) and self.cursor is not None:
|
|
107
|
+
result: Optional[tuple[Any, ...]] = self.cursor.fetchone()
|
|
108
|
+
return result
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def fetch_all(
|
|
112
|
+
self, query: str, params: tuple[Any, ...] | None = None
|
|
113
|
+
) -> list[tuple[Any, ...]]:
|
|
114
|
+
"""
|
|
115
|
+
Execute query and fetch all results.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
query (str): SQL query to execute
|
|
119
|
+
params (tuple[Any, ...] | None): Parameters for the query
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
list[tuple[Any, ...]]: All result rows
|
|
123
|
+
"""
|
|
124
|
+
if self.execute(query, params) and self.cursor is not None:
|
|
125
|
+
result: list[tuple[Any, ...]] = self.cursor.fetchall()
|
|
126
|
+
return result
|
|
127
|
+
return []
|
|
128
|
+
|
|
129
|
+
def commit(self) -> None:
|
|
130
|
+
"""Commit pending transactions."""
|
|
131
|
+
if self.connection:
|
|
132
|
+
self.connection.commit()
|
|
133
|
+
|
|
134
|
+
def rollback(self) -> None:
|
|
135
|
+
"""Rollback pending transactions."""
|
|
136
|
+
if self.connection:
|
|
137
|
+
self.connection.rollback()
|
|
138
|
+
|
|
139
|
+
def __enter__(self):
|
|
140
|
+
"""Context manager entry."""
|
|
141
|
+
self.connect()
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
145
|
+
"""Context manager exit."""
|
|
146
|
+
self.disconnect()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
airopa_automation/__init__.py
|
|
4
|
+
airopa_automation/agents.py
|
|
5
|
+
airopa_automation/config.py
|
|
6
|
+
airopa_automation/database.py
|
|
7
|
+
airopa_automation.egg-info/PKG-INFO
|
|
8
|
+
airopa_automation.egg-info/SOURCES.txt
|
|
9
|
+
airopa_automation.egg-info/dependency_links.txt
|
|
10
|
+
airopa_automation.egg-info/top_level.txt
|
|
11
|
+
tests/test_agents.py
|
|
12
|
+
tests/test_config.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
airopa_automation
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "airopa-automation"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "AI-powered automation workflows"
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
|
|
11
|
+
[tool.setuptools.packages.find]
|
|
12
|
+
include = ["airopa_automation*"]
|
|
13
|
+
|
|
14
|
+
[tool.black]
|
|
15
|
+
line-length = 88
|
|
16
|
+
target-version = ["py312"]
|
|
17
|
+
|
|
18
|
+
[tool.isort]
|
|
19
|
+
profile = "black"
|
|
20
|
+
line_length = 88
|
|
21
|
+
|
|
22
|
+
[tool.mypy]
|
|
23
|
+
python_version = "3.12"
|
|
24
|
+
ignore_missing_imports = true
|
|
25
|
+
follow_imports = "silent"
|
|
26
|
+
strict_optional = true
|
|
27
|
+
warn_redundant_casts = true
|
|
28
|
+
warn_unused_ignores = true
|
|
29
|
+
warn_no_return = true
|
|
30
|
+
warn_return_any = true
|
|
31
|
+
warn_unreachable = true
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
|
|
4
|
+
from airopa_automation.agents import (
|
|
5
|
+
Article,
|
|
6
|
+
CategoryClassifierAgent,
|
|
7
|
+
ContentGeneratorAgent,
|
|
8
|
+
QualityScoreAgent,
|
|
9
|
+
ScraperAgent,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestArticle:
|
|
14
|
+
"""Test Article model"""
|
|
15
|
+
|
|
16
|
+
def test_article_creation(self):
|
|
17
|
+
"""Test creating an Article instance"""
|
|
18
|
+
article = Article(
|
|
19
|
+
title="Test Article",
|
|
20
|
+
url="http://example.com/article",
|
|
21
|
+
source="Test Source",
|
|
22
|
+
content="This is the article content.",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
assert article.title == "Test Article"
|
|
26
|
+
assert article.url == "http://example.com/article"
|
|
27
|
+
assert article.source == "Test Source"
|
|
28
|
+
assert article.content == "This is the article content."
|
|
29
|
+
assert article.category == ""
|
|
30
|
+
assert article.quality_score == 0.0
|
|
31
|
+
|
|
32
|
+
def test_article_generate_hash(self):
|
|
33
|
+
"""Test Article hash generation"""
|
|
34
|
+
article = Article(
|
|
35
|
+
title="Test Article",
|
|
36
|
+
url="http://example.com/article",
|
|
37
|
+
source="Test Source",
|
|
38
|
+
content="Content",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
hash1 = article.generate_hash()
|
|
42
|
+
assert len(hash1) == 64 # SHA256 hex digest length
|
|
43
|
+
|
|
44
|
+
# Same article should generate same hash
|
|
45
|
+
article2 = Article(
|
|
46
|
+
title="Test Article",
|
|
47
|
+
url="http://example.com/article",
|
|
48
|
+
source="Test Source",
|
|
49
|
+
content="Different content",
|
|
50
|
+
)
|
|
51
|
+
hash2 = article2.generate_hash()
|
|
52
|
+
assert hash1 == hash2 # Hash is based on title, url, source
|
|
53
|
+
|
|
54
|
+
def test_article_with_optional_fields(self):
|
|
55
|
+
"""Test Article with optional fields populated"""
|
|
56
|
+
article = Article(
|
|
57
|
+
title="Test",
|
|
58
|
+
url="http://test.com",
|
|
59
|
+
source="Source",
|
|
60
|
+
content="Content",
|
|
61
|
+
summary="Summary text",
|
|
62
|
+
published_date=datetime(2024, 1, 15),
|
|
63
|
+
category="policy",
|
|
64
|
+
country="France",
|
|
65
|
+
quality_score=0.8,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
assert article.summary == "Summary text"
|
|
69
|
+
assert article.published_date == datetime(2024, 1, 15)
|
|
70
|
+
assert article.category == "policy"
|
|
71
|
+
assert article.country == "France"
|
|
72
|
+
assert article.quality_score == 0.8
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TestCategoryClassifierAgent:
|
|
76
|
+
"""Test CategoryClassifierAgent"""
|
|
77
|
+
|
|
78
|
+
def test_classify_startup_category(self):
|
|
79
|
+
"""Test classification of startup-related content"""
|
|
80
|
+
classifier = CategoryClassifierAgent()
|
|
81
|
+
article = Article(
|
|
82
|
+
title="New AI Startup Raises Funding",
|
|
83
|
+
url="http://test.com",
|
|
84
|
+
source="Test",
|
|
85
|
+
content="A new startup company has received investment.",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
result = classifier.classify(article)
|
|
89
|
+
|
|
90
|
+
assert result.category == "startups"
|
|
91
|
+
|
|
92
|
+
def test_classify_policy_category(self):
|
|
93
|
+
"""Test classification of policy-related content"""
|
|
94
|
+
classifier = CategoryClassifierAgent()
|
|
95
|
+
article = Article(
|
|
96
|
+
title="New AI Regulation Proposed",
|
|
97
|
+
url="http://test.com",
|
|
98
|
+
source="Test",
|
|
99
|
+
content="The government has proposed new policy for AI.",
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
result = classifier.classify(article)
|
|
103
|
+
|
|
104
|
+
assert result.category == "policy"
|
|
105
|
+
|
|
106
|
+
def test_classify_country(self):
|
|
107
|
+
"""Test country classification"""
|
|
108
|
+
classifier = CategoryClassifierAgent()
|
|
109
|
+
article = Article(
|
|
110
|
+
title="AI Development in France",
|
|
111
|
+
url="http://test.com",
|
|
112
|
+
source="Test",
|
|
113
|
+
content="France is leading AI innovation.",
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
result = classifier.classify(article)
|
|
117
|
+
|
|
118
|
+
assert result.country == "France"
|
|
119
|
+
|
|
120
|
+
def test_classify_default_category(self):
|
|
121
|
+
"""Test default category for unclassified content"""
|
|
122
|
+
classifier = CategoryClassifierAgent()
|
|
123
|
+
article = Article(
|
|
124
|
+
title="Random Title",
|
|
125
|
+
url="http://test.com",
|
|
126
|
+
source="Test",
|
|
127
|
+
content="Some random content without keywords.",
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
result = classifier.classify(article)
|
|
131
|
+
|
|
132
|
+
assert result.category == "stories"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class TestQualityScoreAgent:
|
|
136
|
+
"""Test QualityScoreAgent"""
|
|
137
|
+
|
|
138
|
+
def test_quality_score_short_content(self):
|
|
139
|
+
"""Test quality score for short content"""
|
|
140
|
+
scorer = QualityScoreAgent()
|
|
141
|
+
article = Article(
|
|
142
|
+
title="Short",
|
|
143
|
+
url="http://test.com",
|
|
144
|
+
source="Test",
|
|
145
|
+
content="Very short content.",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
result = scorer.assess_quality(article)
|
|
149
|
+
|
|
150
|
+
assert result.quality_score < 0.5
|
|
151
|
+
|
|
152
|
+
def test_quality_score_good_content(self):
|
|
153
|
+
"""Test quality score for good content"""
|
|
154
|
+
scorer = QualityScoreAgent()
|
|
155
|
+
long_content = " ".join(["word"] * 600) # >500 words
|
|
156
|
+
article = Article(
|
|
157
|
+
title="A Good Article Title Here",
|
|
158
|
+
url="http://test.com",
|
|
159
|
+
source="Test",
|
|
160
|
+
content=long_content,
|
|
161
|
+
category="policy",
|
|
162
|
+
country="Europe",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
result = scorer.assess_quality(article)
|
|
166
|
+
|
|
167
|
+
assert result.quality_score > 0.5
|
|
168
|
+
|
|
169
|
+
def test_quality_score_max_is_one(self):
|
|
170
|
+
"""Test that quality score doesn't exceed 1.0"""
|
|
171
|
+
scorer = QualityScoreAgent()
|
|
172
|
+
article = Article(
|
|
173
|
+
title="Excellent Article With Many Words",
|
|
174
|
+
url="http://test.com",
|
|
175
|
+
source="europa.eu", # credible source
|
|
176
|
+
content=" ".join(["word"] * 1000),
|
|
177
|
+
category="policy",
|
|
178
|
+
country="France",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
result = scorer.assess_quality(article)
|
|
182
|
+
|
|
183
|
+
assert result.quality_score <= 1.0
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class TestScraperAgent:
|
|
187
|
+
"""Test ScraperAgent"""
|
|
188
|
+
|
|
189
|
+
def test_scraper_init(self):
|
|
190
|
+
"""Test ScraperAgent initialization"""
|
|
191
|
+
scraper = ScraperAgent()
|
|
192
|
+
|
|
193
|
+
assert scraper.session is not None
|
|
194
|
+
assert "User-Agent" in scraper.session.headers
|
|
195
|
+
|
|
196
|
+
@patch("airopa_automation.agents.feedparser.parse")
|
|
197
|
+
def test_scrape_rss_feeds_empty(self, mock_parse):
|
|
198
|
+
"""Test RSS scraping with empty config"""
|
|
199
|
+
mock_parse.return_value = MagicMock(entries=[])
|
|
200
|
+
|
|
201
|
+
with patch("airopa_automation.agents.config") as mock_config:
|
|
202
|
+
mock_config.scraper.rss_feeds = []
|
|
203
|
+
mock_config.scraper.user_agent = "Test"
|
|
204
|
+
|
|
205
|
+
scraper = ScraperAgent()
|
|
206
|
+
articles = scraper.scrape_rss_feeds()
|
|
207
|
+
|
|
208
|
+
assert articles == []
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class TestContentGeneratorAgent:
|
|
212
|
+
"""Test ContentGeneratorAgent"""
|
|
213
|
+
|
|
214
|
+
def test_content_generator_init(self):
|
|
215
|
+
"""Test ContentGeneratorAgent initialization"""
|
|
216
|
+
with patch("airopa_automation.agents.config") as mock_config:
|
|
217
|
+
mock_config.content.output_dir = "/tmp/test_output"
|
|
218
|
+
|
|
219
|
+
generator = ContentGeneratorAgent()
|
|
220
|
+
|
|
221
|
+
assert generator.output_dir.exists()
|
|
222
|
+
|
|
223
|
+
def test_generate_frontmatter(self):
|
|
224
|
+
"""Test frontmatter generation"""
|
|
225
|
+
with patch("airopa_automation.agents.config") as mock_config:
|
|
226
|
+
mock_config.content.output_dir = "/tmp/test_output"
|
|
227
|
+
mock_config.content.default_author = "Test Author"
|
|
228
|
+
mock_config.content.default_cover_image = "/test.jpg"
|
|
229
|
+
|
|
230
|
+
generator = ContentGeneratorAgent()
|
|
231
|
+
article = Article(
|
|
232
|
+
title="Test Article",
|
|
233
|
+
url="http://test.com",
|
|
234
|
+
source="Test Source",
|
|
235
|
+
content="Content",
|
|
236
|
+
category="policy",
|
|
237
|
+
published_date=datetime(2024, 1, 15),
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
frontmatter = generator._generate_frontmatter(article)
|
|
241
|
+
|
|
242
|
+
assert "title:" in frontmatter
|
|
243
|
+
assert "Test Article" in frontmatter
|
|
244
|
+
assert "policy" in frontmatter
|
|
245
|
+
assert "---" in frontmatter
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from airopa_automation.config import (
|
|
2
|
+
Config,
|
|
3
|
+
ContentConfig,
|
|
4
|
+
DatabaseConfig,
|
|
5
|
+
GitConfig,
|
|
6
|
+
ScraperConfig,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_scraper_config_defaults():
|
|
11
|
+
"""Test ScraperConfig default values"""
|
|
12
|
+
config = ScraperConfig()
|
|
13
|
+
|
|
14
|
+
assert len(config.rss_feeds) == 4 # Updated to 4 RSS feeds
|
|
15
|
+
assert len(config.web_sources) == 3 # Updated to 3 web sources
|
|
16
|
+
assert config.max_articles_per_source == 10
|
|
17
|
+
assert config.rate_limit_delay == 1.0
|
|
18
|
+
assert "AIropaBot" in config.user_agent
|
|
19
|
+
# Test that the new URLs are present
|
|
20
|
+
assert any("sifted.eu" in url for url in config.rss_feeds)
|
|
21
|
+
assert any("tech.eu" in url for url in config.rss_feeds)
|
|
22
|
+
assert any("european-champions.org" in url for url in config.rss_feeds)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_scraper_config_custom():
|
|
26
|
+
"""Test ScraperConfig with custom values"""
|
|
27
|
+
config = ScraperConfig(
|
|
28
|
+
rss_feeds=["http://test.com/rss"],
|
|
29
|
+
web_sources=["http://test.com"],
|
|
30
|
+
max_articles_per_source=5,
|
|
31
|
+
rate_limit_delay=2.0,
|
|
32
|
+
user_agent="Test Agent/1.0",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert config.rss_feeds == ["http://test.com/rss"]
|
|
36
|
+
assert config.web_sources == ["http://test.com"]
|
|
37
|
+
assert config.max_articles_per_source == 5
|
|
38
|
+
assert config.rate_limit_delay == 2.0
|
|
39
|
+
assert config.user_agent == "Test Agent/1.0"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_database_config_defaults():
|
|
43
|
+
"""Test DatabaseConfig default values"""
|
|
44
|
+
config = DatabaseConfig()
|
|
45
|
+
|
|
46
|
+
assert config.db_path == "database/airopa.db"
|
|
47
|
+
assert config.max_connections == 5
|
|
48
|
+
assert config.timeout == 10.0
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_content_config_defaults():
|
|
52
|
+
"""Test ContentConfig default values"""
|
|
53
|
+
config = ContentConfig()
|
|
54
|
+
|
|
55
|
+
assert "content/post" in config.output_dir
|
|
56
|
+
assert config.default_author == "AIropa Bot"
|
|
57
|
+
assert config.default_cover_image != ""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_git_config_defaults():
|
|
61
|
+
"""Test GitConfig default values"""
|
|
62
|
+
config = GitConfig()
|
|
63
|
+
|
|
64
|
+
assert config.repo_path == ".."
|
|
65
|
+
assert "content" in config.commit_message.lower()
|
|
66
|
+
assert config.author_name == "AIropa Bot"
|
|
67
|
+
assert "@" in config.author_email
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_git_config_custom():
|
|
71
|
+
"""Test GitConfig with custom values"""
|
|
72
|
+
config = GitConfig(
|
|
73
|
+
repo_path="./repo",
|
|
74
|
+
commit_message="Test commit message",
|
|
75
|
+
author_name="Test Author",
|
|
76
|
+
author_email="test@example.com",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
assert config.repo_path == "./repo"
|
|
80
|
+
assert config.commit_message == "Test commit message"
|
|
81
|
+
assert config.author_name == "Test Author"
|
|
82
|
+
assert config.author_email == "test@example.com"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_full_config():
|
|
86
|
+
"""Test full Config integration"""
|
|
87
|
+
config = Config()
|
|
88
|
+
|
|
89
|
+
# Test that all sub-configs are present
|
|
90
|
+
assert config.scraper is not None
|
|
91
|
+
assert config.ai is not None
|
|
92
|
+
assert config.database is not None
|
|
93
|
+
assert config.content is not None
|
|
94
|
+
assert config.git is not None
|
|
95
|
+
|
|
96
|
+
# Test some default values
|
|
97
|
+
assert config.scraper.max_articles_per_source == 10
|
|
98
|
+
assert config.database.db_path == "database/airopa.db"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_config_override():
|
|
102
|
+
"""Test config with overridden sub-configs"""
|
|
103
|
+
config = Config(
|
|
104
|
+
scraper=ScraperConfig(max_articles_per_source=20),
|
|
105
|
+
git=GitConfig(author_name="Custom Bot"),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
assert config.scraper.max_articles_per_source == 20
|
|
109
|
+
assert config.git.author_name == "Custom Bot"
|
|
110
|
+
# Other defaults should remain
|
|
111
|
+
assert config.database.db_path == "database/airopa.db"
|