airopa-automation 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ """
2
+ AIropa Automation Layer - Core Package
3
+
4
+ This package provides the foundation for AI-powered automation workflows.
5
+ """
6
+
7
+ from .agents import (
8
+ CategoryClassifierAgent,
9
+ ContentGeneratorAgent,
10
+ GitCommitAgent,
11
+ QualityScoreAgent,
12
+ ScraperAgent,
13
+ )
14
+ from .config import Config
15
+ from .database import Database
16
+
17
+ __version__ = "0.1.0"
18
+ __all__ = [
19
+ "ScraperAgent",
20
+ "CategoryClassifierAgent",
21
+ "QualityScoreAgent",
22
+ "ContentGeneratorAgent",
23
+ "GitCommitAgent",
24
+ "Config",
25
+ "Database",
26
+ ]
@@ -0,0 +1,352 @@
1
+ # AIropa Automation Agents - Base Classes
2
+
3
+ import hashlib
4
+ import time
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ import feedparser
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+ from newspaper import Article as NewspaperArticle
13
+ from pydantic import BaseModel
14
+ from slugify import slugify
15
+
16
+ from airopa_automation.config import config
17
+
18
+
19
+ class Article(BaseModel):
20
+ title: str
21
+ url: str
22
+ source: str
23
+ content: str
24
+ summary: str = ""
25
+ published_date: Optional[datetime] = None
26
+ scraped_date: datetime = datetime.now()
27
+ category: str = ""
28
+ country: str = ""
29
+ quality_score: float = 0.0
30
+
31
+ def generate_hash(self) -> str:
32
+ """Generate a unique hash for this article"""
33
+ hash_input = f"{self.title}{self.url}{self.source}".encode("utf-8")
34
+ return hashlib.sha256(hash_input).hexdigest()
35
+
36
+
37
+ class ScraperAgent:
38
+ def __init__(self):
39
+ self.session = requests.Session()
40
+ self.session.headers.update(
41
+ {
42
+ "User-Agent": config.scraper.user_agent,
43
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # noqa: E501
44
+ "Accept-Language": "en-US,en;q=0.5",
45
+ }
46
+ )
47
+
48
+ def scrape_rss_feeds(self) -> List[Article]:
49
+ """Scrape articles from RSS feeds"""
50
+ articles = []
51
+
52
+ for feed_url in config.scraper.rss_feeds:
53
+ try:
54
+ feed = feedparser.parse(feed_url)
55
+
56
+ for entry in feed.entries[: config.scraper.max_articles_per_source]:
57
+ try:
58
+ article = Article(
59
+ title=entry.get("title", "No title"),
60
+ url=entry.get("link", ""),
61
+ source=feed.feed.get("title", feed_url),
62
+ content=self._extract_article_content(
63
+ entry.get("link", "")
64
+ ),
65
+ summary=entry.get("summary", ""),
66
+ published_date=self._parse_date(entry.get("published", "")),
67
+ scraped_date=datetime.now(),
68
+ )
69
+ articles.append(article)
70
+
71
+ # Rate limiting
72
+ time.sleep(config.scraper.rate_limit_delay)
73
+
74
+ except Exception as e:
75
+ print(
76
+ f"Error processing RSS entry {entry.get('title', 'unknown')}: {e}" # noqa: E501
77
+ )
78
+ continue
79
+
80
+ except Exception as e:
81
+ print(f"Error scraping RSS feed {feed_url}: {e}")
82
+ continue
83
+
84
+ return articles
85
+
86
+ def scrape_web_sources(self) -> List[Article]:
87
+ """Scrape articles from web sources"""
88
+ articles = []
89
+
90
+ for source_url in config.scraper.web_sources:
91
+ try:
92
+ response = self.session.get(source_url, timeout=10)
93
+ response.raise_for_status()
94
+
95
+ soup = BeautifulSoup(response.text, "html.parser")
96
+ article_links = self._extract_article_links(soup, source_url)
97
+
98
+ for link in article_links[: config.scraper.max_articles_per_source]:
99
+ try:
100
+ article = self._scrape_article_page(link, source_url)
101
+ if article:
102
+ articles.append(article)
103
+
104
+ # Rate limiting
105
+ time.sleep(config.scraper.rate_limit_delay)
106
+
107
+ except Exception as e:
108
+ print(f"Error scraping article {link}: {e}")
109
+ continue
110
+
111
+ except Exception as e:
112
+ print(f"Error accessing web source {source_url}: {e}")
113
+ continue
114
+
115
+ return articles
116
+
117
+ def _extract_article_links(self, soup: BeautifulSoup, source_url: str) -> List[str]:
118
+ """Extract article links from a webpage"""
119
+ links = []
120
+
121
+ # Look for common article link patterns
122
+ for a in soup.find_all("a", href=True):
123
+ href = a["href"]
124
+ if any(
125
+ keyword in href.lower()
126
+ for keyword in ["article", "news", "post", "blog"]
127
+ ):
128
+ if href.startswith("http"):
129
+ links.append(href)
130
+ else:
131
+ # Handle relative URLs
132
+ from urllib.parse import urljoin
133
+
134
+ links.append(urljoin(source_url, href))
135
+
136
+ return list(set(links)) # Remove duplicates
137
+
138
+ def _scrape_article_page(self, url: str, source: str) -> Optional[Article]:
139
+ """Scrape content from a single article page"""
140
+ try:
141
+ # Use newspaper3k for article extraction
142
+ newspaper_article = NewspaperArticle(url)
143
+ newspaper_article.download()
144
+ newspaper_article.parse()
145
+
146
+ return Article(
147
+ title=newspaper_article.title,
148
+ url=url,
149
+ source=source,
150
+ content=newspaper_article.text,
151
+ summary=newspaper_article.summary,
152
+ published_date=newspaper_article.publish_date,
153
+ scraped_date=datetime.now(),
154
+ )
155
+
156
+ except Exception as e:
157
+ print(f"Error scraping article page {url}: {e}")
158
+ return None
159
+
160
+ def _extract_article_content(self, url: str) -> str:
161
+ """Extract main content from an article URL"""
162
+ try:
163
+ newspaper_article = NewspaperArticle(url)
164
+ newspaper_article.download()
165
+ newspaper_article.parse()
166
+ return str(newspaper_article.text)
167
+ except Exception as e:
168
+ print(f"Error extracting content from {url}: {e}")
169
+ return ""
170
+
171
+ def _parse_date(self, date_str: str) -> Optional[datetime]:
172
+ """Parse various date formats"""
173
+ if not date_str:
174
+ return None
175
+
176
+ # Try multiple date formats
177
+ from dateutil import parser as dateutil_parser
178
+
179
+ try:
180
+ parsed: datetime = dateutil_parser.parse(date_str)
181
+ return parsed
182
+ except Exception:
183
+ return None
184
+
185
+
186
+ class CategoryClassifierAgent:
187
+ def __init__(self):
188
+ # Initialize AI client (will be implemented)
189
+ pass
190
+
191
+ def classify(self, article: Article) -> Article:
192
+ """Classify article into appropriate category"""
193
+ # This will use AI/ML for classification
194
+ # For now, implement basic keyword-based classification
195
+
196
+ title_lower = article.title.lower()
197
+ content_lower = article.content.lower()
198
+
199
+ # Category classification
200
+ if any(
201
+ keyword in title_lower or keyword in content_lower
202
+ for keyword in ["startup", "company", "funding", "investment"]
203
+ ):
204
+ article.category = "startups"
205
+ elif any(
206
+ keyword in title_lower or keyword in content_lower
207
+ for keyword in ["policy", "regulation", "law", "act", "government"]
208
+ ):
209
+ article.category = "policy"
210
+ elif any(
211
+ country in title_lower or country in content_lower
212
+ for country in ["france", "germany", "netherlands", "europe", "eu"]
213
+ ):
214
+ article.category = "country"
215
+ else:
216
+ article.category = "stories"
217
+
218
+ # Country classification
219
+ if "france" in title_lower or "france" in content_lower:
220
+ article.country = "France"
221
+ elif "germany" in title_lower or "germany" in content_lower:
222
+ article.country = "Germany"
223
+ elif "netherlands" in title_lower or "netherlands" in content_lower:
224
+ article.country = "Netherlands"
225
+ elif "europe" in title_lower or "eu" in title_lower:
226
+ article.country = "Europe"
227
+ else:
228
+ article.country = ""
229
+
230
+ return article
231
+
232
+
233
+ class QualityScoreAgent:
234
+ def __init__(self):
235
+ pass
236
+
237
+ def assess_quality(self, article: Article) -> Article:
238
+ """Assess article quality and relevance"""
239
+ # Basic quality scoring algorithm
240
+ score = 0.0
241
+
242
+ # Title quality
243
+ if len(article.title.split()) > 3:
244
+ score += 0.2
245
+
246
+ # Content length
247
+ word_count = len(article.content.split())
248
+ if word_count > 200:
249
+ score += 0.3
250
+ if word_count > 500:
251
+ score += 0.2
252
+
253
+ # Source credibility
254
+ if any(source in article.source.lower() for source in ["europa.eu", "airopa"]):
255
+ score += 0.3
256
+
257
+ # Category relevance
258
+ if article.category:
259
+ score += 0.1
260
+
261
+ # Country relevance
262
+ if article.country:
263
+ score += 0.1
264
+
265
+ article.quality_score = min(score, 1.0)
266
+ return article
267
+
268
+
269
+ class ContentGeneratorAgent:
270
+ def __init__(self):
271
+ self.output_dir = Path(config.content.output_dir)
272
+ self.output_dir.mkdir(parents=True, exist_ok=True)
273
+
274
+ def generate_markdown(self, article: Article) -> Optional[Path]:
275
+ """Generate markdown file for an article"""
276
+ try:
277
+ # Generate filename
278
+ title_slug: str = slugify(article.title)
279
+ date_str = (
280
+ article.published_date.strftime("%Y-%m-%d")
281
+ if article.published_date
282
+ else datetime.now().strftime("%Y-%m-%d")
283
+ )
284
+ filename = f"{date_str}-{title_slug}.md"
285
+ filepath: Path = self.output_dir / filename
286
+
287
+ # Generate frontmatter
288
+ frontmatter = self._generate_frontmatter(article)
289
+
290
+ # Write markdown file
291
+ with open(filepath, "w", encoding="utf-8") as f:
292
+ f.write(frontmatter)
293
+ f.write(f"\n\n{article.content}")
294
+
295
+ return filepath
296
+
297
+ except Exception as e:
298
+ print(f"Error generating markdown for {article.title}: {e}")
299
+ return None
300
+
301
+ def _generate_frontmatter(self, article: Article) -> str:
302
+ """Generate YAML frontmatter for markdown file"""
303
+ frontmatter = "---\n"
304
+ frontmatter += f'title: "{article.title}"\n'
305
+ frontmatter += f"date: \"{article.published_date.strftime('%Y-%m-%d') if article.published_date else datetime.now().strftime('%Y-%m-%d')}\"\n" # noqa: E501
306
+ frontmatter += f'author: "{config.content.default_author}"\n'
307
+ frontmatter += f'source: "{article.source}"\n'
308
+ frontmatter += f'url: "{article.url}"\n'
309
+ frontmatter += f'pillar: "{article.category}"\n'
310
+
311
+ if article.country:
312
+ frontmatter += f'country: "{article.country}"\n'
313
+
314
+ if article.summary:
315
+ frontmatter += f'description: "{article.summary[:160]}"\n'
316
+
317
+ frontmatter += f'coverImage: "{config.content.default_cover_image}"\n'
318
+ frontmatter += "isFeatured: false\n"
319
+ frontmatter += "isAiGenerated: true\n"
320
+ frontmatter += "---"
321
+
322
+ return frontmatter
323
+
324
+
325
+ class GitCommitAgent:
326
+ def __init__(self):
327
+ import git
328
+
329
+ self.repo_path = Path(config.git.repo_path)
330
+ self.repo = git.Repo(self.repo_path)
331
+
332
+ def commit_new_content(self, files: List[Path]) -> bool:
333
+ """Commit new content files to git repository"""
334
+ try:
335
+ # Add files to git
336
+ for file in files:
337
+ relative_path = file.relative_to(self.repo_path)
338
+ self.repo.index.add([str(relative_path)])
339
+
340
+ # Commit changes
341
+ import git
342
+
343
+ self.repo.index.commit(
344
+ config.git.commit_message,
345
+ author=git.Actor(config.git.author_name, config.git.author_email),
346
+ )
347
+
348
+ return True
349
+
350
+ except Exception as e:
351
+ print(f"Error committing files to git: {e}")
352
+ return False
@@ -0,0 +1,78 @@
1
+ # AIropa Automation Configuration
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from dotenv import load_dotenv
7
+ from pydantic import BaseModel
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+
13
+ class ScraperConfig(BaseModel):
14
+ rss_feeds: list[str] = [
15
+ "https://sifted.eu/feed/?post_type=article",
16
+ "https://tech.eu/category/deep-tech/feed",
17
+ "https://european-champions.org/feed",
18
+ "https://tech.eu/category/robotics/feed",
19
+ ]
20
+ web_sources: list[str] = [
21
+ "https://sifted.eu",
22
+ "https://tech.eu",
23
+ "https://european-champions.org",
24
+ ]
25
+ max_articles_per_source: int = 10
26
+ rate_limit_delay: float = 1.0 # seconds between requests
27
+ user_agent: str = "AIropaBot/1.0 (+https://airopa.eu)"
28
+
29
+
30
+ class AIConfig(BaseModel):
31
+ model: str = "llama3-70b-8192"
32
+ temperature: float = 0.7
33
+ max_tokens: int = 1024
34
+ api_key: str = os.getenv("GROQ_API_KEY", "")
35
+ # Note: AI features will be limited due to Python 3.13 compatibility issues
36
+
37
+
38
+ class DatabaseConfig(BaseModel):
39
+ db_path: str = "database/airopa.db"
40
+ max_connections: int = 5
41
+ timeout: float = 10.0
42
+
43
+
44
+ class ContentConfig(BaseModel):
45
+ output_dir: str = "../airopa/src/content/post"
46
+ default_author: str = "AIropa Bot"
47
+ default_cover_image: str = "/assets/featured-story.jpg"
48
+
49
+
50
+ class GitConfig(BaseModel):
51
+ repo_path: str = ".."
52
+ commit_message: str = "chore(content): add automated AI news articles"
53
+ author_name: str = "AIropa Bot"
54
+ author_email: str = "bot@airopa.eu"
55
+
56
+
57
+ class Config(BaseModel):
58
+ scraper: ScraperConfig = ScraperConfig()
59
+ ai: AIConfig = AIConfig()
60
+ database: DatabaseConfig = DatabaseConfig()
61
+ content: ContentConfig = ContentConfig()
62
+ git: GitConfig = GitConfig()
63
+ debug: bool = os.getenv("DEBUG", "false").lower() == "true"
64
+
65
+
66
+ # Global configuration instance
67
+ config = Config()
68
+
69
+
70
+ def ensure_directories() -> None:
71
+ """Ensure required directories exist"""
72
+ Path(config.content.output_dir).mkdir(parents=True, exist_ok=True)
73
+ Path(config.database.db_path).parent.mkdir(parents=True, exist_ok=True)
74
+
75
+
76
+ if __name__ == "__main__":
77
+ ensure_directories()
78
+ print("Configuration loaded successfully")
@@ -0,0 +1,146 @@
1
+ """
2
+ Database Module - Database connectivity and operations
3
+
4
+ This module provides a unified interface for database operations
5
+ across different database backends (SQLite, PostgreSQL, etc.).
6
+ """
7
+
8
+ import os
9
+ import sqlite3
10
+ from typing import Any, Optional
11
+
12
+
13
+ class Database:
14
+ """
15
+ Database connection and operations manager.
16
+
17
+ Provides a unified interface for database operations with support
18
+ for multiple database backends.
19
+ """
20
+
21
+ def __init__(self, config: dict[str, Any]):
22
+ """
23
+ Initialize database connection.
24
+
25
+ Args:
26
+ config (dict[str, Any]): Database configuration
27
+ """
28
+ self.config = config
29
+ self.connection: Optional[sqlite3.Connection] = None
30
+ self.cursor: Optional[sqlite3.Cursor] = None
31
+
32
+ def connect(self) -> bool:
33
+ """
34
+ Establish database connection.
35
+
36
+ Returns:
37
+ bool: True if connection successful, False otherwise
38
+ """
39
+ try:
40
+ db_type = self.config.get("type", "sqlite")
41
+
42
+ if db_type == "sqlite":
43
+ db_path = self.config.get("path", "database/airopa.db")
44
+ # Ensure directory exists
45
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
46
+ self.connection = sqlite3.connect(db_path)
47
+ self.cursor = self.connection.cursor()
48
+ return True
49
+
50
+ raise ValueError(f"Unsupported database type: {db_type}")
51
+
52
+ except Exception as e:
53
+ print(f"Error connecting to database: {e}")
54
+ return False
55
+
56
+ def disconnect(self) -> None:
57
+ """Close database connection."""
58
+ if self.connection:
59
+ self.connection.close()
60
+ self.connection = None
61
+ self.cursor = None
62
+
63
+ def execute(self, query: str, params: tuple[Any, ...] | None = None) -> bool:
64
+ """
65
+ Execute a SQL query.
66
+
67
+ Args:
68
+ query (str): SQL query to execute
69
+ params (tuple[Any, ...] | None): Parameters for the query
70
+
71
+ Returns:
72
+ bool: True if execution successful, False otherwise
73
+ """
74
+ try:
75
+ if not self.connection:
76
+ if not self.connect():
77
+ return False
78
+
79
+ if self.cursor is None:
80
+ return False
81
+
82
+ if params:
83
+ self.cursor.execute(query, params)
84
+ else:
85
+ self.cursor.execute(query)
86
+
87
+ return True
88
+
89
+ except Exception as e:
90
+ print(f"Error executing query: {e}")
91
+ return False
92
+
93
+ def fetch_one(
94
+ self, query: str, params: tuple[Any, ...] | None = None
95
+ ) -> Optional[tuple[Any, ...]]:
96
+ """
97
+ Execute query and fetch one result.
98
+
99
+ Args:
100
+ query (str): SQL query to execute
101
+ params (tuple[Any, ...] | None): Parameters for the query
102
+
103
+ Returns:
104
+ Optional[tuple[Any, ...]]: First result row or None
105
+ """
106
+ if self.execute(query, params) and self.cursor is not None:
107
+ result: Optional[tuple[Any, ...]] = self.cursor.fetchone()
108
+ return result
109
+ return None
110
+
111
+ def fetch_all(
112
+ self, query: str, params: tuple[Any, ...] | None = None
113
+ ) -> list[tuple[Any, ...]]:
114
+ """
115
+ Execute query and fetch all results.
116
+
117
+ Args:
118
+ query (str): SQL query to execute
119
+ params (tuple[Any, ...] | None): Parameters for the query
120
+
121
+ Returns:
122
+ list[tuple[Any, ...]]: All result rows
123
+ """
124
+ if self.execute(query, params) and self.cursor is not None:
125
+ result: list[tuple[Any, ...]] = self.cursor.fetchall()
126
+ return result
127
+ return []
128
+
129
+ def commit(self) -> None:
130
+ """Commit pending transactions."""
131
+ if self.connection:
132
+ self.connection.commit()
133
+
134
+ def rollback(self) -> None:
135
+ """Rollback pending transactions."""
136
+ if self.connection:
137
+ self.connection.rollback()
138
+
139
+ def __enter__(self):
140
+ """Context manager entry."""
141
+ self.connect()
142
+ return self
143
+
144
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
145
+ """Context manager exit."""
146
+ self.disconnect()
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: airopa-automation
3
+ Version: 0.1.0
4
+ Summary: AI-powered automation workflows
5
+ Requires-Python: >=3.12
@@ -0,0 +1,8 @@
1
+ airopa_automation/__init__.py,sha256=gTmfJIXfCh1_6EZWuQXkRjnELqjnAKRdu5by_f8g9u4,516
2
+ airopa_automation/agents.py,sha256=xAcrv7eH0oYACrqFoSLtY3oYSjfCRQCZuZLgGuuSM84,11891
3
+ airopa_automation/config.py,sha256=s9rU6NCeMskEN07iy86ohehnjH8thF9JffJEV5XTYvE,2183
4
+ airopa_automation/database.py,sha256=qQHaNlUgvu7jD34YztVsoXBJcU1MOyl-MkJ1d9TLM8k,4250
5
+ airopa_automation-0.1.0.dist-info/METADATA,sha256=IVY601nQpJAJSeMWXzubh-2n6qrafrbr8cjfkBheblg,126
6
+ airopa_automation-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
+ airopa_automation-0.1.0.dist-info/top_level.txt,sha256=BUlz2sUjwlNLgRKGEIBPa4Ju69E-foQKfTT2C_8eEdk,18
8
+ airopa_automation-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ airopa_automation