PyPI - airopa-automation - Versions diffs - 0.1.0__py3-none-any.whl - Mend

airopa-automation 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

airopa_automation/__init__.py +26 -0
airopa_automation/agents.py +352 -0
airopa_automation/config.py +78 -0
airopa_automation/database.py +146 -0
airopa_automation-0.1.0.dist-info/METADATA +5 -0
airopa_automation-0.1.0.dist-info/RECORD +8 -0
airopa_automation-0.1.0.dist-info/WHEEL +5 -0
airopa_automation-0.1.0.dist-info/top_level.txt +1 -0

airopa_automation/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""
+AIropa Automation Layer - Core Package
+This package provides the foundation for AI-powered automation workflows.
+"""
+from .agents import (
+    CategoryClassifierAgent,
+    ContentGeneratorAgent,
+    GitCommitAgent,
+    QualityScoreAgent,
+    ScraperAgent,
+)
+from .config import Config
+from .database import Database
+__version__ = "0.1.0"
+__all__ = [
+    "ScraperAgent",
+    "CategoryClassifierAgent",
+    "QualityScoreAgent",
+    "ContentGeneratorAgent",
+    "GitCommitAgent",
+    "Config",
+    "Database",
+]

airopa_automation/agents.py ADDED Viewed

@@ -0,0 +1,352 @@
+# AIropa Automation Agents - Base Classes
+import hashlib
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional
+import feedparser
+import requests
+from bs4 import BeautifulSoup
+from newspaper import Article as NewspaperArticle
+from pydantic import BaseModel
+from slugify import slugify
+from airopa_automation.config import config
+class Article(BaseModel):
+    title: str
+    url: str
+    source: str
+    content: str
+    summary: str = ""
+    published_date: Optional[datetime] = None
+    scraped_date: datetime = datetime.now()
+    category: str = ""
+    country: str = ""
+    quality_score: float = 0.0
+    def generate_hash(self) -> str:
+        """Generate a unique hash for this article"""
+        hash_input = f"{self.title}{self.url}{self.source}".encode("utf-8")
+        return hashlib.sha256(hash_input).hexdigest()
+class ScraperAgent:
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "User-Agent": config.scraper.user_agent,
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",  # noqa: E501
+                "Accept-Language": "en-US,en;q=0.5",
+            }
+        )
+    def scrape_rss_feeds(self) -> List[Article]:
+        """Scrape articles from RSS feeds"""
+        articles = []
+        for feed_url in config.scraper.rss_feeds:
+            try:
+                feed = feedparser.parse(feed_url)
+                for entry in feed.entries[: config.scraper.max_articles_per_source]:
+                    try:
+                        article = Article(
+                            title=entry.get("title", "No title"),
+                            url=entry.get("link", ""),
+                            source=feed.feed.get("title", feed_url),
+                            content=self._extract_article_content(
+                                entry.get("link", "")
+                            ),
+                            summary=entry.get("summary", ""),
+                            published_date=self._parse_date(entry.get("published", "")),
+                            scraped_date=datetime.now(),
+                        )
+                        articles.append(article)
+                        # Rate limiting
+                        time.sleep(config.scraper.rate_limit_delay)
+                    except Exception as e:
+                        print(
+                            f"Error processing RSS entry {entry.get('title', 'unknown')}: {e}"  # noqa: E501
+                        )
+                        continue
+            except Exception as e:
+                print(f"Error scraping RSS feed {feed_url}: {e}")
+                continue
+        return articles
+    def scrape_web_sources(self) -> List[Article]:
+        """Scrape articles from web sources"""
+        articles = []
+        for source_url in config.scraper.web_sources:
+            try:
+                response = self.session.get(source_url, timeout=10)
+                response.raise_for_status()
+                soup = BeautifulSoup(response.text, "html.parser")
+                article_links = self._extract_article_links(soup, source_url)
+                for link in article_links[: config.scraper.max_articles_per_source]:
+                    try:
+                        article = self._scrape_article_page(link, source_url)
+                        if article:
+                            articles.append(article)
+                        # Rate limiting
+                        time.sleep(config.scraper.rate_limit_delay)
+                    except Exception as e:
+                        print(f"Error scraping article {link}: {e}")
+                        continue
+            except Exception as e:
+                print(f"Error accessing web source {source_url}: {e}")
+                continue
+        return articles
+    def _extract_article_links(self, soup: BeautifulSoup, source_url: str) -> List[str]:
+        """Extract article links from a webpage"""
+        links = []
+        # Look for common article link patterns
+        for a in soup.find_all("a", href=True):
+            href = a["href"]
+            if any(
+                keyword in href.lower()
+                for keyword in ["article", "news", "post", "blog"]
+            ):
+                if href.startswith("http"):
+                    links.append(href)
+                else:
+                    # Handle relative URLs
+                    from urllib.parse import urljoin
+                    links.append(urljoin(source_url, href))
+        return list(set(links))  # Remove duplicates
+    def _scrape_article_page(self, url: str, source: str) -> Optional[Article]:
+        """Scrape content from a single article page"""
+        try:
+            # Use newspaper3k for article extraction
+            newspaper_article = NewspaperArticle(url)
+            newspaper_article.download()
+            newspaper_article.parse()
+            return Article(
+                title=newspaper_article.title,
+                url=url,
+                source=source,
+                content=newspaper_article.text,
+                summary=newspaper_article.summary,
+                published_date=newspaper_article.publish_date,
+                scraped_date=datetime.now(),
+            )
+        except Exception as e:
+            print(f"Error scraping article page {url}: {e}")
+            return None
+    def _extract_article_content(self, url: str) -> str:
+        """Extract main content from an article URL"""
+        try:
+            newspaper_article = NewspaperArticle(url)
+            newspaper_article.download()
+            newspaper_article.parse()
+            return str(newspaper_article.text)
+        except Exception as e:
+            print(f"Error extracting content from {url}: {e}")
+            return ""
+    def _parse_date(self, date_str: str) -> Optional[datetime]:
+        """Parse various date formats"""
+        if not date_str:
+            return None
+        # Try multiple date formats
+        from dateutil import parser as dateutil_parser
+        try:
+            parsed: datetime = dateutil_parser.parse(date_str)
+            return parsed
+        except Exception:
+            return None
+class CategoryClassifierAgent:
+    def __init__(self):
+        # Initialize AI client (will be implemented)
+        pass
+    def classify(self, article: Article) -> Article:
+        """Classify article into appropriate category"""
+        # This will use AI/ML for classification
+        # For now, implement basic keyword-based classification
+        title_lower = article.title.lower()
+        content_lower = article.content.lower()
+        # Category classification
+        if any(
+            keyword in title_lower or keyword in content_lower
+            for keyword in ["startup", "company", "funding", "investment"]
+        ):
+            article.category = "startups"
+        elif any(
+            keyword in title_lower or keyword in content_lower
+            for keyword in ["policy", "regulation", "law", "act", "government"]
+        ):
+            article.category = "policy"
+        elif any(
+            country in title_lower or country in content_lower
+            for country in ["france", "germany", "netherlands", "europe", "eu"]
+        ):
+            article.category = "country"
+        else:
+            article.category = "stories"
+        # Country classification
+        if "france" in title_lower or "france" in content_lower:
+            article.country = "France"
+        elif "germany" in title_lower or "germany" in content_lower:
+            article.country = "Germany"
+        elif "netherlands" in title_lower or "netherlands" in content_lower:
+            article.country = "Netherlands"
+        elif "europe" in title_lower or "eu" in title_lower:
+            article.country = "Europe"
+        else:
+            article.country = ""
+        return article
+class QualityScoreAgent:
+    def __init__(self):
+        pass
+    def assess_quality(self, article: Article) -> Article:
+        """Assess article quality and relevance"""
+        # Basic quality scoring algorithm
+        score = 0.0
+        # Title quality
+        if len(article.title.split()) > 3:
+            score += 0.2
+        # Content length
+        word_count = len(article.content.split())
+        if word_count > 200:
+            score += 0.3
+        if word_count > 500:
+            score += 0.2
+        # Source credibility
+        if any(source in article.source.lower() for source in ["europa.eu", "airopa"]):
+            score += 0.3
+        # Category relevance
+        if article.category:
+            score += 0.1
+        # Country relevance
+        if article.country:
+            score += 0.1
+        article.quality_score = min(score, 1.0)
+        return article
+class ContentGeneratorAgent:
+    def __init__(self):
+        self.output_dir = Path(config.content.output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    def generate_markdown(self, article: Article) -> Optional[Path]:
+        """Generate markdown file for an article"""
+        try:
+            # Generate filename
+            title_slug: str = slugify(article.title)
+            date_str = (
+                article.published_date.strftime("%Y-%m-%d")
+                if article.published_date
+                else datetime.now().strftime("%Y-%m-%d")
+            )
+            filename = f"{date_str}-{title_slug}.md"
+            filepath: Path = self.output_dir / filename
+            # Generate frontmatter
+            frontmatter = self._generate_frontmatter(article)
+            # Write markdown file
+            with open(filepath, "w", encoding="utf-8") as f:
+                f.write(frontmatter)
+                f.write(f"\n\n{article.content}")
+            return filepath
+        except Exception as e:
+            print(f"Error generating markdown for {article.title}: {e}")
+            return None
+    def _generate_frontmatter(self, article: Article) -> str:
+        """Generate YAML frontmatter for markdown file"""
+        frontmatter = "---\n"
+        frontmatter += f'title: "{article.title}"\n'
+        frontmatter += f"date: \"{article.published_date.strftime('%Y-%m-%d') if article.published_date else datetime.now().strftime('%Y-%m-%d')}\"\n"  # noqa: E501
+        frontmatter += f'author: "{config.content.default_author}"\n'
+        frontmatter += f'source: "{article.source}"\n'
+        frontmatter += f'url: "{article.url}"\n'
+        frontmatter += f'pillar: "{article.category}"\n'
+        if article.country:
+            frontmatter += f'country: "{article.country}"\n'
+        if article.summary:
+            frontmatter += f'description: "{article.summary[:160]}"\n'
+        frontmatter += f'coverImage: "{config.content.default_cover_image}"\n'
+        frontmatter += "isFeatured: false\n"
+        frontmatter += "isAiGenerated: true\n"
+        frontmatter += "---"
+        return frontmatter
+class GitCommitAgent:
+    def __init__(self):
+        import git
+        self.repo_path = Path(config.git.repo_path)
+        self.repo = git.Repo(self.repo_path)
+    def commit_new_content(self, files: List[Path]) -> bool:
+        """Commit new content files to git repository"""
+        try:
+            # Add files to git
+            for file in files:
+                relative_path = file.relative_to(self.repo_path)
+                self.repo.index.add([str(relative_path)])
+            # Commit changes
+            import git
+            self.repo.index.commit(
+                config.git.commit_message,
+                author=git.Actor(config.git.author_name, config.git.author_email),
+            )
+            return True
+        except Exception as e:
+            print(f"Error committing files to git: {e}")
+            return False

airopa_automation/config.py ADDED Viewed

@@ -0,0 +1,78 @@
+# AIropa Automation Configuration
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from pydantic import BaseModel
+# Load environment variables
+load_dotenv()
+class ScraperConfig(BaseModel):
+    rss_feeds: list[str] = [
+        "https://sifted.eu/feed/?post_type=article",
+        "https://tech.eu/category/deep-tech/feed",
+        "https://european-champions.org/feed",
+        "https://tech.eu/category/robotics/feed",
+    ]
+    web_sources: list[str] = [
+        "https://sifted.eu",
+        "https://tech.eu",
+        "https://european-champions.org",
+    ]
+    max_articles_per_source: int = 10
+    rate_limit_delay: float = 1.0  # seconds between requests
+    user_agent: str = "AIropaBot/1.0 (+https://airopa.eu)"
+class AIConfig(BaseModel):
+    model: str = "llama3-70b-8192"
+    temperature: float = 0.7
+    max_tokens: int = 1024
+    api_key: str = os.getenv("GROQ_API_KEY", "")
+    # Note: AI features will be limited due to Python 3.13 compatibility issues
+class DatabaseConfig(BaseModel):
+    db_path: str = "database/airopa.db"
+    max_connections: int = 5
+    timeout: float = 10.0
+class ContentConfig(BaseModel):
+    output_dir: str = "../airopa/src/content/post"
+    default_author: str = "AIropa Bot"
+    default_cover_image: str = "/assets/featured-story.jpg"
+class GitConfig(BaseModel):
+    repo_path: str = ".."
+    commit_message: str = "chore(content): add automated AI news articles"
+    author_name: str = "AIropa Bot"
+    author_email: str = "bot@airopa.eu"
+class Config(BaseModel):
+    scraper: ScraperConfig = ScraperConfig()
+    ai: AIConfig = AIConfig()
+    database: DatabaseConfig = DatabaseConfig()
+    content: ContentConfig = ContentConfig()
+    git: GitConfig = GitConfig()
+    debug: bool = os.getenv("DEBUG", "false").lower() == "true"
+# Global configuration instance
+config = Config()
+def ensure_directories() -> None:
+    """Ensure required directories exist"""
+    Path(config.content.output_dir).mkdir(parents=True, exist_ok=True)
+    Path(config.database.db_path).parent.mkdir(parents=True, exist_ok=True)
+if __name__ == "__main__":
+    ensure_directories()
+    print("Configuration loaded successfully")

airopa_automation/database.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Database Module - Database connectivity and operations
+This module provides a unified interface for database operations
+across different database backends (SQLite, PostgreSQL, etc.).
+"""
+import os
+import sqlite3
+from typing import Any, Optional
+class Database:
+    """
+    Database connection and operations manager.
+    Provides a unified interface for database operations with support
+    for multiple database backends.
+    """
+    def __init__(self, config: dict[str, Any]):
+        """
+        Initialize database connection.
+        Args:
+            config (dict[str, Any]): Database configuration
+        """
+        self.config = config
+        self.connection: Optional[sqlite3.Connection] = None
+        self.cursor: Optional[sqlite3.Cursor] = None
+    def connect(self) -> bool:
+        """
+        Establish database connection.
+        Returns:
+            bool: True if connection successful, False otherwise
+        """
+        try:
+            db_type = self.config.get("type", "sqlite")
+            if db_type == "sqlite":
+                db_path = self.config.get("path", "database/airopa.db")
+                # Ensure directory exists
+                os.makedirs(os.path.dirname(db_path), exist_ok=True)
+                self.connection = sqlite3.connect(db_path)
+                self.cursor = self.connection.cursor()
+                return True
+            raise ValueError(f"Unsupported database type: {db_type}")
+        except Exception as e:
+            print(f"Error connecting to database: {e}")
+            return False
+    def disconnect(self) -> None:
+        """Close database connection."""
+        if self.connection:
+            self.connection.close()
+            self.connection = None
+            self.cursor = None
+    def execute(self, query: str, params: tuple[Any, ...] | None = None) -> bool:
+        """
+        Execute a SQL query.
+        Args:
+            query (str): SQL query to execute
+            params (tuple[Any, ...] | None): Parameters for the query
+        Returns:
+            bool: True if execution successful, False otherwise
+        """
+        try:
+            if not self.connection:
+                if not self.connect():
+                    return False
+            if self.cursor is None:
+                return False
+            if params:
+                self.cursor.execute(query, params)
+            else:
+                self.cursor.execute(query)
+            return True
+        except Exception as e:
+            print(f"Error executing query: {e}")
+            return False
+    def fetch_one(
+        self, query: str, params: tuple[Any, ...] | None = None
+    ) -> Optional[tuple[Any, ...]]:
+        """
+        Execute query and fetch one result.
+        Args:
+            query (str): SQL query to execute
+            params (tuple[Any, ...] | None): Parameters for the query
+        Returns:
+            Optional[tuple[Any, ...]]: First result row or None
+        """
+        if self.execute(query, params) and self.cursor is not None:
+            result: Optional[tuple[Any, ...]] = self.cursor.fetchone()
+            return result
+        return None
+    def fetch_all(
+        self, query: str, params: tuple[Any, ...] | None = None
+    ) -> list[tuple[Any, ...]]:
+        """
+        Execute query and fetch all results.
+        Args:
+            query (str): SQL query to execute
+            params (tuple[Any, ...] | None): Parameters for the query
+        Returns:
+            list[tuple[Any, ...]]: All result rows
+        """
+        if self.execute(query, params) and self.cursor is not None:
+            result: list[tuple[Any, ...]] = self.cursor.fetchall()
+            return result
+        return []
+    def commit(self) -> None:
+        """Commit pending transactions."""
+        if self.connection:
+            self.connection.commit()
+    def rollback(self) -> None:
+        """Rollback pending transactions."""
+        if self.connection:
+            self.connection.rollback()
+    def __enter__(self):
+        """Context manager entry."""
+        self.connect()
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Context manager exit."""
+        self.disconnect()

airopa_automation-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: airopa-automation
+Version: 0.1.0
+Summary: AI-powered automation workflows
+Requires-Python: >=3.12

airopa_automation-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+airopa_automation/__init__.py,sha256=gTmfJIXfCh1_6EZWuQXkRjnELqjnAKRdu5by_f8g9u4,516
+airopa_automation/agents.py,sha256=xAcrv7eH0oYACrqFoSLtY3oYSjfCRQCZuZLgGuuSM84,11891
+airopa_automation/config.py,sha256=s9rU6NCeMskEN07iy86ohehnjH8thF9JffJEV5XTYvE,2183
+airopa_automation/database.py,sha256=qQHaNlUgvu7jD34YztVsoXBJcU1MOyl-MkJ1d9TLM8k,4250
+airopa_automation-0.1.0.dist-info/METADATA,sha256=IVY601nQpJAJSeMWXzubh-2n6qrafrbr8cjfkBheblg,126
+airopa_automation-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+airopa_automation-0.1.0.dist-info/top_level.txt,sha256=BUlz2sUjwlNLgRKGEIBPa4Ju69E-foQKfTT2C_8eEdk,18
+airopa_automation-0.1.0.dist-info/RECORD,,

airopa_automation-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.10.2)
+Root-Is-Purelib: true
+Tag: py3-none-any

airopa_automation-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ airopa_automation