PyPI - anysite-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

anysite-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of anysite-cli might be problematic. Click here for more details.

Files changed (64) hide show

anysite/__init__.py +4 -0
anysite/__main__.py +6 -0
anysite/api/__init__.py +21 -0
anysite/api/client.py +271 -0
anysite/api/errors.py +137 -0
anysite/api/schemas.py +333 -0
anysite/batch/__init__.py +1 -0
anysite/batch/executor.py +176 -0
anysite/batch/input.py +160 -0
anysite/batch/rate_limiter.py +98 -0
anysite/cli/__init__.py +1 -0
anysite/cli/config.py +176 -0
anysite/cli/executor.py +388 -0
anysite/cli/options.py +249 -0
anysite/config/__init__.py +11 -0
anysite/config/paths.py +46 -0
anysite/config/settings.py +187 -0
anysite/dataset/__init__.py +37 -0
anysite/dataset/analyzer.py +268 -0
anysite/dataset/cli.py +644 -0
anysite/dataset/collector.py +686 -0
anysite/dataset/db_loader.py +248 -0
anysite/dataset/errors.py +30 -0
anysite/dataset/exporters.py +121 -0
anysite/dataset/history.py +153 -0
anysite/dataset/models.py +245 -0
anysite/dataset/notifications.py +87 -0
anysite/dataset/scheduler.py +107 -0
anysite/dataset/storage.py +171 -0
anysite/dataset/transformer.py +213 -0
anysite/db/__init__.py +38 -0
anysite/db/adapters/__init__.py +1 -0
anysite/db/adapters/base.py +158 -0
anysite/db/adapters/postgres.py +201 -0
anysite/db/adapters/sqlite.py +183 -0
anysite/db/cli.py +687 -0
anysite/db/config.py +92 -0
anysite/db/manager.py +166 -0
anysite/db/operations/__init__.py +1 -0
anysite/db/operations/insert.py +199 -0
anysite/db/operations/query.py +43 -0
anysite/db/schema/__init__.py +1 -0
anysite/db/schema/inference.py +213 -0
anysite/db/schema/types.py +71 -0
anysite/db/utils/__init__.py +1 -0
anysite/db/utils/sanitize.py +99 -0
anysite/main.py +498 -0
anysite/models/__init__.py +1 -0
anysite/output/__init__.py +11 -0
anysite/output/console.py +45 -0
anysite/output/formatters.py +301 -0
anysite/output/templates.py +76 -0
anysite/py.typed +0 -0
anysite/streaming/__init__.py +1 -0
anysite/streaming/progress.py +121 -0
anysite/streaming/writer.py +130 -0
anysite/utils/__init__.py +1 -0
anysite/utils/fields.py +242 -0
anysite/utils/retry.py +109 -0
anysite_cli-0.1.0.dist-info/METADATA +437 -0
anysite_cli-0.1.0.dist-info/RECORD +64 -0
anysite_cli-0.1.0.dist-info/WHEEL +4 -0
anysite_cli-0.1.0.dist-info/entry_points.txt +2 -0
anysite_cli-0.1.0.dist-info/licenses/LICENSE +21 -0

anysite/dataset/transformer.py ADDED Viewed

@@ -0,0 +1,213 @@
+"""Record transformer — filter, field selection, and column injection.
+Applies per-source transforms to collected records before Parquet storage.
+The filter parser is intentionally safe: no ``eval()``, only tokenize → parse.
+"""
+from __future__ import annotations
+import re
+from typing import Any
+from anysite.dataset.models import TransformConfig
+class FilterParseError(Exception):
+    """Raised when a filter expression cannot be parsed."""
+class RecordTransformer:
+    """Apply transform pipeline: filter → select fields → add columns."""
+    def __init__(self, config: TransformConfig) -> None:
+        self.config = config
+        self._filter_fn = _parse_filter(config.filter) if config.filter else None
+    def apply(self, records: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        result = records
+        # 1. Filter
+        if self._filter_fn:
+            result = [r for r in result if self._filter_fn(r)]
+        # 2. Select fields
+        if self.config.fields:
+            result = [_select_fields(r, self.config.fields) for r in result]
+        # 3. Add static columns
+        if self.config.add_columns:
+            for r in result:
+                r.update(self.config.add_columns)
+        return result
+# ---------------------------------------------------------------------------
+# Safe filter parser
+# ---------------------------------------------------------------------------
+_TOKEN_RE = re.compile(
+    r"""
+    \s*(?:
+        (?P<field>\.[a-zA-Z_][a-zA-Z0-9_.]*) |  # .field.path
+        (?P<string>"[^"]*"|'[^']*')            |  # quoted string
+        (?P<number>-?\d+(?:\.\d+)?)            |  # number
+        (?P<op>==|!=|>=|<=|>|<)                |  # comparison
+        (?P<logic>and|or)                      |  # logical
+        (?P<null>null|none|None)                  # null literal
+    )\s*
+    """,
+    re.VERBOSE,
+)
+def _tokenize(expr: str) -> list[tuple[str, str]]:
+    """Tokenize a filter expression into (type, value) pairs."""
+    tokens: list[tuple[str, str]] = []
+    pos = 0
+    while pos < len(expr):
+        m = _TOKEN_RE.match(expr, pos)
+        if not m:
+            raise FilterParseError(f"Unexpected character at position {pos}: {expr[pos:]!r}")
+        for name in ("field", "string", "number", "op", "logic", "null"):
+            val = m.group(name)
+            if val is not None:
+                tokens.append((name, val))
+                break
+        pos = m.end()
+    return tokens
+def _parse_filter(expr: str) -> Any:
+    """Parse a filter expression into a callable predicate.
+    Supported syntax:
+        .field > 10
+        .name != ""
+        .status == "active" and .count > 0
+        .field != null
+    """
+    if not expr or not expr.strip():
+        return None
+    tokens = _tokenize(expr)
+    if not tokens:
+        raise FilterParseError(f"Empty filter expression: {expr!r}")
+    # Parse into comparisons joined by and/or
+    comparisons: list[tuple[str, str, Any]] = []  # (field, op, value)
+    connectors: list[str] = []  # 'and' | 'or'
+    i = 0
+    while i < len(tokens):
+        # Expect: field op value
+        if i >= len(tokens) or tokens[i][0] != "field":
+            raise FilterParseError(f"Expected field, got {tokens[i] if i < len(tokens) else 'end'}")
+        field_path = tokens[i][1][1:]  # strip leading dot
+        i += 1
+        if i >= len(tokens) or tokens[i][0] != "op":
+            raise FilterParseError(f"Expected operator after .{field_path}")
+        op = tokens[i][1]
+        i += 1
+        if i >= len(tokens):
+            raise FilterParseError(f"Expected value after .{field_path} {op}")
+        tok_type, tok_val = tokens[i]
+        if tok_type == "string":
+            value: Any = tok_val[1:-1]  # strip quotes
+        elif tok_type == "number":
+            value = float(tok_val) if "." in tok_val else int(tok_val)
+        elif tok_type == "null":
+            value = None
+        else:
+            raise FilterParseError(f"Expected value, got {tokens[i]}")
+        i += 1
+        comparisons.append((field_path, op, value))
+        # Check for connector
+        if i < len(tokens):
+            if tokens[i][0] == "logic":
+                connectors.append(tokens[i][1])
+                i += 1
+            else:
+                raise FilterParseError(f"Expected 'and'/'or', got {tokens[i]}")
+    # Build callable
+    def _eval_comparison(record: dict[str, Any], field: str, op: str, val: Any) -> bool:
+        actual = _get_dot_value(record, field)
+        if val is None:
+            if op == "==":
+                return actual is None
+            if op == "!=":
+                return actual is not None
+            return False
+        if actual is None:
+            return False
+        try:
+            if op == "==":
+                return actual == val
+            if op == "!=":
+                return actual != val
+            if op == ">":
+                return actual > val
+            if op == "<":
+                return actual < val
+            if op == ">=":
+                return actual >= val
+            if op == "<=":
+                return actual <= val
+        except TypeError:
+            return False
+        return False
+    def predicate(record: dict[str, Any]) -> bool:
+        results = [_eval_comparison(record, f, o, v) for f, o, v in comparisons]
+        if not connectors:
+            return results[0]
+        # Evaluate left to right: and binds tighter than or
+        # Simple left-to-right evaluation
+        result = results[0]
+        for idx, conn in enumerate(connectors):
+            if conn == "and":
+                result = result and results[idx + 1]
+            else:  # or
+                result = result or results[idx + 1]
+        return result
+    return predicate
+def _get_dot_value(record: dict[str, Any], path: str) -> Any:
+    """Get a nested value using dot notation."""
+    current: Any = record
+    for part in path.split("."):
+        if isinstance(current, dict):
+            current = current.get(part)
+        else:
+            return None
+    return current
+def _select_fields(record: dict[str, Any], fields: list[str]) -> dict[str, Any]:
+    """Select specific fields from a record, supporting dot notation."""
+    result: dict[str, Any] = {}
+    for field in fields:
+        # Support "path.to.field AS alias" syntax
+        if " AS " in field:
+            path, _, alias = field.partition(" AS ")
+            path = path.strip()
+            alias = alias.strip()
+        elif " as " in field:
+            path, _, alias = field.partition(" as ")
+            path = path.strip()
+            alias = alias.strip()
+        else:
+            path = field
+            alias = field.replace(".", "_") if "." in field else field
+        value = _get_dot_value(record, path)
+        result[alias] = value
+    return result

anysite/db/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Database integration subsystem for storing API data in SQL databases."""
+from typing import NoReturn
+def check_db_deps(db_type: str | None = None) -> None:
+    """Check that optional database dependencies are installed.
+    Args:
+        db_type: Specific database type to check ('postgres', 'mysql').
+                 If None, only checks that the db module itself is usable.
+    Raises:
+        SystemExit: If required packages are not installed.
+    """
+    if db_type == "postgres":
+        try:
+            import psycopg  # noqa: F401
+        except ImportError:
+            _missing_deps_error(["psycopg"], extra="postgres")
+    elif db_type == "mysql":
+        try:
+            import pymysql  # noqa: F401
+        except ImportError:
+            _missing_deps_error(["pymysql"], extra="mysql")
+def _missing_deps_error(missing: list[str], extra: str = "db") -> NoReturn:
+    import typer
+    names = ", ".join(missing)
+    typer.echo(
+        f"Error: Missing required packages: {names}\n"
+        f"Install with: pip install anysite-cli[{extra}]",
+        err=True,
+    )
+    raise typer.Exit(1)

anysite/db/adapters/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Database adapters."""

anysite/db/adapters/base.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Abstract base class for database adapters."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from collections.abc import Generator
+from contextlib import contextmanager
+from typing import Any
+from anysite.db.config import OnConflict
+class DatabaseAdapter(ABC):
+    """Abstract base class for all database adapters.
+    Adapters are synchronous. Use as a context manager for
+    automatic connect/disconnect:
+        with SQLiteAdapter(config) as db:
+            db.execute("CREATE TABLE ...")
+    """
+    @abstractmethod
+    def connect(self) -> None:
+        """Open a connection to the database."""
+    @abstractmethod
+    def disconnect(self) -> None:
+        """Close the database connection."""
+    @abstractmethod
+    def execute(self, sql: str, params: tuple[Any, ...] | None = None) -> None:
+        """Execute a SQL statement.
+        Args:
+            sql: SQL statement with parameter placeholders.
+            params: Parameter values for the statement.
+        """
+    @abstractmethod
+    def fetch_one(self, sql: str, params: tuple[Any, ...] | None = None) -> dict[str, Any] | None:
+        """Execute a query and return the first row.
+        Args:
+            sql: SQL query with parameter placeholders.
+            params: Parameter values for the query.
+        Returns:
+            First row as a dictionary, or None if no results.
+        """
+    @abstractmethod
+    def fetch_all(self, sql: str, params: tuple[Any, ...] | None = None) -> list[dict[str, Any]]:
+        """Execute a query and return all rows.
+        Args:
+            sql: SQL query with parameter placeholders.
+            params: Parameter values for the query.
+        Returns:
+            List of rows as dictionaries.
+        """
+    @abstractmethod
+    def insert_batch(
+        self,
+        table: str,
+        rows: list[dict[str, Any]],
+        on_conflict: OnConflict = OnConflict.ERROR,
+        conflict_columns: list[str] | None = None,
+    ) -> int:
+        """Insert multiple rows into a table.
+        Args:
+            table: Table name.
+            rows: List of row dictionaries.
+            on_conflict: Conflict resolution strategy.
+            conflict_columns: Columns that define uniqueness for upsert.
+        Returns:
+            Number of rows inserted/affected.
+        """
+    @abstractmethod
+    def table_exists(self, table: str) -> bool:
+        """Check if a table exists.
+        Args:
+            table: Table name.
+        Returns:
+            True if the table exists.
+        """
+    @abstractmethod
+    def get_table_schema(self, table: str) -> list[dict[str, str]]:
+        """Get the schema of a table.
+        Args:
+            table: Table name.
+        Returns:
+            List of column info dicts with 'name', 'type', 'nullable', 'primary_key' keys.
+        """
+    @abstractmethod
+    def create_table(self, table: str, columns: dict[str, str], primary_key: str | None = None) -> None:
+        """Create a table.
+        Args:
+            table: Table name.
+            columns: Mapping of column name to SQL type.
+            primary_key: Optional column name to use as primary key.
+        """
+    @abstractmethod
+    def get_server_info(self) -> dict[str, str]:
+        """Get database server information.
+        Returns:
+            Dictionary with server info (version, type, etc.).
+        """
+    @contextmanager
+    def transaction(self) -> Generator[None, None, None]:
+        """Context manager for transactions.
+        Usage:
+            with adapter.transaction():
+                adapter.execute("INSERT ...")
+                adapter.execute("UPDATE ...")
+        """
+        self._begin_transaction()
+        try:
+            yield
+            self._commit_transaction()
+        except Exception:
+            self._rollback_transaction()
+            raise
+    @abstractmethod
+    def _begin_transaction(self) -> None:
+        """Begin a transaction."""
+    @abstractmethod
+    def _commit_transaction(self) -> None:
+        """Commit the current transaction."""
+    @abstractmethod
+    def _rollback_transaction(self) -> None:
+        """Roll back the current transaction."""
+    def __enter__(self) -> DatabaseAdapter:
+        self.connect()
+        return self
+    def __exit__(self, exc_type: type | None, exc_val: Exception | None, exc_tb: Any) -> None:
+        self.disconnect()

anysite/db/adapters/postgres.py ADDED Viewed

@@ -0,0 +1,201 @@
+"""PostgreSQL database adapter using psycopg v3."""
+from __future__ import annotations
+import json
+from typing import Any
+from anysite.db.adapters.base import DatabaseAdapter
+from anysite.db.config import ConnectionConfig, OnConflict
+from anysite.db.utils.sanitize import sanitize_identifier, sanitize_table_name
+class PostgresAdapter(DatabaseAdapter):
+    """PostgreSQL adapter using psycopg v3 (sync mode)."""
+    def __init__(self, config: ConnectionConfig) -> None:
+        self.config = config
+        self._conn: Any = None  # psycopg.Connection
+    def connect(self) -> None:
+        if self._conn is not None:
+            return
+        import psycopg
+        from psycopg.rows import dict_row
+        url = self.config.get_url()
+        if url:
+            self._conn = psycopg.connect(url, row_factory=dict_row)
+        else:
+            password = self.config.get_password()
+            connect_kwargs: dict[str, Any] = {
+                "host": self.config.host,
+                "dbname": self.config.database,
+                "row_factory": dict_row,
+            }
+            if self.config.user:
+                connect_kwargs["user"] = self.config.user
+            if password:
+                connect_kwargs["password"] = password
+            if self.config.port:
+                connect_kwargs["port"] = self.config.port
+            self._conn = psycopg.connect(**connect_kwargs)
+        # Set autocommit for non-transactional operations
+        self._conn.autocommit = True
+    def disconnect(self) -> None:
+        if self._conn is not None:
+            self._conn.close()
+            self._conn = None
+    @property
+    def conn(self) -> Any:
+        if self._conn is None:
+            raise RuntimeError("Not connected. Call connect() first or use as context manager.")
+        return self._conn
+    def execute(self, sql: str, params: tuple[Any, ...] | None = None) -> None:
+        self.conn.execute(sql, params)
+    def fetch_one(self, sql: str, params: tuple[Any, ...] | None = None) -> dict[str, Any] | None:
+        cursor = self.conn.execute(sql, params)
+        return cursor.fetchone()
+    def fetch_all(self, sql: str, params: tuple[Any, ...] | None = None) -> list[dict[str, Any]]:
+        cursor = self.conn.execute(sql, params)
+        return cursor.fetchall()
+    def insert_batch(
+        self,
+        table: str,
+        rows: list[dict[str, Any]],
+        on_conflict: OnConflict = OnConflict.ERROR,
+        conflict_columns: list[str] | None = None,
+    ) -> int:
+        if not rows:
+            return 0
+        safe_table = sanitize_table_name(table)
+        # Collect all column names
+        all_columns: list[str] = []
+        seen: set[str] = set()
+        for row in rows:
+            for col in row:
+                if col not in seen:
+                    seen.add(col)
+                    all_columns.append(col)
+        safe_columns = [sanitize_identifier(col) for col in all_columns]
+        placeholders = ", ".join(f"%({col})s" for col in all_columns)
+        col_list = ", ".join(safe_columns)
+        # Build the INSERT statement
+        if on_conflict == OnConflict.IGNORE and conflict_columns:
+            safe_conflict = [sanitize_identifier(c) for c in conflict_columns]
+            conflict_list = ", ".join(safe_conflict)
+            sql = (
+                f"INSERT INTO {safe_table} ({col_list}) VALUES ({placeholders}) "
+                f"ON CONFLICT ({conflict_list}) DO NOTHING"
+            )
+        elif on_conflict in (OnConflict.UPDATE, OnConflict.REPLACE) and conflict_columns:
+            safe_conflict = [sanitize_identifier(c) for c in conflict_columns]
+            conflict_list = ", ".join(safe_conflict)
+            update_cols = [c for c in safe_columns if c not in safe_conflict]
+            update_clause = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
+            sql = (
+                f"INSERT INTO {safe_table} ({col_list}) VALUES ({placeholders}) "
+                f"ON CONFLICT ({conflict_list}) DO UPDATE SET {update_clause}"
+            )
+        else:
+            sql = f"INSERT INTO {safe_table} ({col_list}) VALUES ({placeholders})"
+        # Prepare rows, serializing complex types to JSON
+        prepared_rows: list[dict[str, Any]] = []
+        for row in rows:
+            prepared: dict[str, Any] = {}
+            for col in all_columns:
+                val = row.get(col)
+                if isinstance(val, (dict, list)):
+                    val = json.dumps(val)
+                prepared[col] = val
+            prepared_rows.append(prepared)
+        # Use executemany for batch insert
+        with self.conn.transaction():
+            cursor = self.conn.cursor()
+            cursor.executemany(sql, prepared_rows)
+            return len(prepared_rows)
+    def table_exists(self, table: str) -> bool:
+        row = self.fetch_one(
+            "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = %s AND table_schema = 'public')",
+            (table,),
+        )
+        return bool(row and row.get("exists"))
+    def get_table_schema(self, table: str) -> list[dict[str, str]]:
+        rows = self.fetch_all(
+            """
+            SELECT c.column_name, c.data_type, c.is_nullable,
+                   CASE WHEN tc.constraint_type = 'PRIMARY KEY' THEN 'YES' ELSE 'NO' END as primary_key
+            FROM information_schema.columns c
+            LEFT JOIN information_schema.key_column_usage kcu
+                ON c.table_name = kcu.table_name AND c.column_name = kcu.column_name
+            LEFT JOIN information_schema.table_constraints tc
+                ON kcu.constraint_name = tc.constraint_name AND tc.constraint_type = 'PRIMARY KEY'
+            WHERE c.table_name = %s AND c.table_schema = 'public'
+            ORDER BY c.ordinal_position
+            """,
+            (table,),
+        )
+        return [
+            {
+                "name": r["column_name"],
+                "type": r["data_type"],
+                "nullable": r["is_nullable"],
+                "primary_key": r["primary_key"],
+            }
+            for r in rows
+        ]
+    def create_table(
+        self,
+        table: str,
+        columns: dict[str, str],
+        primary_key: str | None = None,
+    ) -> None:
+        safe_table = sanitize_table_name(table)
+        col_defs: list[str] = []
+        for col_name, col_type in columns.items():
+            safe_col = sanitize_identifier(col_name)
+            pk_suffix = " PRIMARY KEY" if col_name == primary_key else ""
+            col_defs.append(f"{safe_col} {col_type}{pk_suffix}")
+        cols_sql = ", ".join(col_defs)
+        sql = f"CREATE TABLE IF NOT EXISTS {safe_table} ({cols_sql})"
+        self.execute(sql)
+    def get_server_info(self) -> dict[str, str]:
+        row = self.fetch_one("SELECT version()")
+        version = row["version"] if row else "unknown"
+        return {
+            "type": "postgres",
+            "version": version,
+            "host": self.config.host or "unknown",
+            "database": self.config.database or "unknown",
+        }
+    def _begin_transaction(self) -> None:
+        self.conn.autocommit = False
+    def _commit_transaction(self) -> None:
+        self.conn.commit()
+        self.conn.autocommit = True
+    def _rollback_transaction(self) -> None:
+        self.conn.rollback()
+        self.conn.autocommit = True