PyPI - chadselect - Versions diffs - 0.2.0__py3-none-any.whl - Mend

chadselect 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

chadselect/__init__.py +40 -0
chadselect/_chadselect.py +218 -0
chadselect/_functions.py +134 -0
chadselect/_query.py +69 -0
chadselect/engine/__init__.py +1 -0
chadselect/engine/css.py +135 -0
chadselect/engine/json.py +66 -0
chadselect/engine/regex.py +49 -0
chadselect/engine/xpath.py +48 -0
chadselect/py.typed +23 -0
chadselect-0.2.0.dist-info/METADATA +113 -0
chadselect-0.2.0.dist-info/RECORD +13 -0
chadselect-0.2.0.dist-info/WHEEL +4 -0

chadselect/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+ChadSelect — Unified data extraction.
+CSS Selectors, XPath 1.0, Regex, and JMESPath behind one query interface
+with chainable post-processing functions.
+Usage::
+    from chadselect import ChadSelect
+    cs = ChadSelect()
+    cs.add_html('<span class="price">$49.99</span>')
+    price = cs.select(0, "css:.price")
+    # "$49.99"
+Query prefixes::
+    css:       → CSS Selectors (selectolax/lexbor)
+    xpath:     → XPath 1.0 (lxml/libxml2)
+    json:      → JMESPath
+    regex:     → Regex (re stdlib)
+    (no prefix) → Regex (default)
+Post-processing functions (pipe with >>)::
+    cs.select(0, "css:.price >> normalize-space() >> uppercase()")
+"""
+from chadselect._chadselect import ChadSelect
+from chadselect._query import FUNCTION_PIPE, QueryType, parse_query
+from chadselect._functions import supported_text_functions
+__all__ = [
+    "ChadSelect",
+    "FUNCTION_PIPE",
+    "QueryType",
+    "parse_query",
+    "supported_text_functions",
+]
+__version__ = "0.2.0"

chadselect/_chadselect.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""
+ChadSelect — the main extraction class.
+API-compatible with the Rust ``chadselect`` crate.
+"""
+from __future__ import annotations
+import logging
+from typing import Any, Callable, List, Optional, Sequence, Tuple
+from chadselect._query import ContentType, QueryType, parse_query, is_query_compatible
+from chadselect.engine import css as css_engine
+from chadselect.engine import xpath as xpath_engine
+from chadselect.engine import regex as regex_engine
+from chadselect.engine import json as json_engine
+logger = logging.getLogger(__name__)
+def _default_valid(s: str) -> bool:
+    """Default validator — non-empty, non-whitespace."""
+    return bool(s and s.strip())
+class _ContentItem:
+    """Internal content item with type tag."""
+    __slots__ = ("content", "content_type")
+    def __init__(self, content: str, content_type: ContentType) -> None:
+        self.content = content
+        self.content_type = content_type
+class ChadSelect:
+    """Unified data extraction — CSS, XPath, Regex, and JMESPath.
+    Load content, then query with a prefixed query string::
+        cs = ChadSelect()
+        cs.add_html('<span class="price">$49.99</span>')
+        price = cs.select(0, "css:.price")  # "$49.99"
+    Query prefixes:
+        - ``css:``   → CSS Selectors (selectolax / lexbor)
+        - ``xpath:`` → XPath 1.0 (lxml / libxml2)
+        - ``json:``  → JMESPath
+        - ``regex:`` → Python ``re``
+        - *(none)*   → Regex (default)
+    Post-processing via ``>>``::
+        cs.select(0, "css:.price >> normalize-space() >> uppercase()")
+    """
+    __slots__ = ("_content_list",)
+    def __init__(self) -> None:
+        self._content_list: List[_ContentItem] = []
+    # ── Content management ──────────────────────────────────────────────
+    def add_text(self, content: str) -> None:
+        """Add plain text content."""
+        self._content_list.append(_ContentItem(content, ContentType.TEXT))
+    def add_html(self, content: str) -> None:
+        """Add HTML content (compatible with CSS, XPath, and Regex)."""
+        self._content_list.append(_ContentItem(content, ContentType.HTML))
+    def add_json(self, content: str) -> None:
+        """Add JSON content (compatible with JMESPath and Regex)."""
+        self._content_list.append(_ContentItem(content, ContentType.JSON))
+    def content_count(self) -> int:
+        """Return the number of loaded content items."""
+        return len(self._content_list)
+    def clear(self) -> None:
+        """Remove all loaded content."""
+        self._content_list.clear()
+    # ── Querying ────────────────────────────────────────────────────────
+    def query(self, index: int, query_str: str) -> List[str]:
+        """Query all loaded content and return matching results.
+        Args:
+            index: ``-1`` returns **all** matches. ``>= 0`` returns the
+                match at that position (or empty list if out of bounds).
+            query_str: Prefixed query string (e.g. ``"css:.price"``).
+        Returns:
+            List of matched strings. Never raises — invalid queries or
+            out-of-bounds indices return ``[]``.
+        """
+        query_type, expression = parse_query(query_str)
+        all_results: List[str] = []
+        for item in self._content_list:
+            if not is_query_compatible(query_type, item.content_type):
+                continue
+            if query_type == QueryType.CSS:
+                results = css_engine.process(expression, item.content)
+            elif query_type == QueryType.XPATH:
+                results = xpath_engine.process(expression, item.content)
+            elif query_type == QueryType.REGEX:
+                results = regex_engine.process(expression, item.content)
+            elif query_type == QueryType.JSON:
+                results = json_engine.process(expression, item.content)
+            else:
+                results = []
+            all_results.extend(results)
+        return _select_by_index(all_results, index)
+    def select(self, index: int, query_str: str) -> str:
+        """Return a single result string (the first match), or ``""``.
+        A result is valid when it is non-empty and non-whitespace.
+        """
+        return self.select_where(index, query_str, _default_valid)
+    def select_where(
+        self,
+        index: int,
+        query_str: str,
+        valid: Callable[[str], bool],
+    ) -> str:
+        """Like :meth:`select` but with a custom validity check.
+        Args:
+            valid: Receives each candidate string, returns ``True`` to accept.
+        """
+        result = self.query(index, query_str)
+        if result and valid(result[0]):
+            return result[0]
+        return ""
+    def select_first(
+        self, queries: Sequence[Tuple[int, str]]
+    ) -> List[str]:
+        """Try multiple queries in order, return the first valid result set.
+        A result set is valid when all its elements are non-empty and
+        non-whitespace.
+        """
+        return self.select_first_where(queries, _default_valid)
+    def select_first_where(
+        self,
+        queries: Sequence[Tuple[int, str]],
+        valid: Callable[[str], bool],
+    ) -> List[str]:
+        """Like :meth:`select_first` but with a custom validity check."""
+        for index, query_str in queries:
+            result = self.query(index, query_str)
+            if result and all(valid(r) for r in result):
+                return result
+        return []
+    def select_many(
+        self, queries: Sequence[Tuple[int, str]]
+    ) -> List[str]:
+        """Run multiple queries and return combined unique results."""
+        return self.select_many_where(queries, _default_valid)
+    def select_many_where(
+        self,
+        queries: Sequence[Tuple[int, str]],
+        valid: Callable[[str], bool],
+    ) -> List[str]:
+        """Like :meth:`select_many` but with a custom validity check."""
+        seen: set[str] = set()
+        out: List[str] = []
+        for index, query_str in queries:
+            for r in self.query(index, query_str):
+                if valid(r) and r not in seen:
+                    seen.add(r)
+                    out.append(r)
+        return out
+    def query_batch(
+        self, queries: Sequence[Tuple[int, str]]
+    ) -> List[List[str]]:
+        """Execute multiple queries in one call.
+        Returns a list of result lists, one per input query, in order.
+        This is the most efficient way to extract many fields.
+        """
+        return [self.query(index, q) for index, q in queries]
+    # ── Dunder ──────────────────────────────────────────────────────────
+    def __repr__(self) -> str:
+        return f"ChadSelect(content_count={self.content_count()})"
+    def __len__(self) -> int:
+        return self.content_count()
+def _select_by_index(results: List[str], index: int) -> List[str]:
+    """Select results by index — ``-1`` means 'all'."""
+    if index == -1:
+        return results
+    if index >= 0:
+        if index < len(results):
+            return [results[index]]
+        logger.warning(
+            "Index %d out of range (have %d results)", index, len(results)
+        )
+        return []
+    logger.warning("Invalid index: %d", index)
+    return []

chadselect/_functions.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""
+Post-processing text functions — shared by all engines.
+Functions are chained using the ``>>`` delimiter after a selector expression::
+    css:.price >> normalize-space() >> uppercase()
+    xpath://div/text() >> substring-after('VIN: ') >> substring(0, 3)
+Mirrors the Rust crate's ``functions.rs`` exactly.
+"""
+from __future__ import annotations
+import re
+from typing import List, Tuple
+from chadselect._query import FUNCTION_PIPE
+def supported_text_functions() -> List[str]:
+    """Return the list of all supported text function signatures."""
+    return [
+        "normalize-space()",
+        "trim()",
+        "uppercase()",
+        "lowercase()",
+        "substring(start, length)",
+        "substring-after('delimiter')",
+        "substring-before('delimiter')",
+        "replace('find', 'replace')",
+        "get-attr('attribute')",
+    ]
+def split_functions(input_str: str) -> Tuple[str, str]:
+    """Split ``expression >> func1() >> func2()`` into ``(expression, func_chain_str)``.
+    Returns ``(expression, "")`` if no ``>>`` pipe is present.
+    """
+    pos = input_str.find(FUNCTION_PIPE)
+    if pos == -1:
+        return input_str.strip(), ""
+    return input_str[:pos].strip(), input_str[pos + len(FUNCTION_PIPE):]
+def parse_and_apply(results: List[str], func_chain_str: str) -> List[str]:
+    """Parse a function chain string and apply it to results."""
+    if not func_chain_str.strip():
+        return results
+    for func_str in func_chain_str.split(FUNCTION_PIPE):
+        func_str = func_str.strip()
+        if not func_str:
+            continue
+        results = _apply_one(results, func_str)
+        # Filter empty results after each step (matches Rust behavior)
+        results = [r for r in results if r]
+    return results
+def _apply_one(results: List[str], func_str: str) -> List[str]:
+    """Apply a single function to all results."""
+    paren = func_str.find("(")
+    if paren == -1:
+        # Shorthand without parens — e.g. "trim"
+        name = func_str.strip()
+        args_str = ""
+    else:
+        name = func_str[:paren].strip()
+        end = func_str.rfind(")")
+        args_str = func_str[paren + 1: end if end != -1 else len(func_str)]
+    if name == "normalize-space":
+        return [re.sub(r"\s+", " ", s).strip() for s in results]
+    if name == "trim":
+        return [s.strip() for s in results]
+    if name == "uppercase":
+        return [s.upper() for s in results]
+    if name == "lowercase":
+        return [s.lower() for s in results]
+    if name == "substring":
+        args = [a.strip() for a in args_str.split(",")]
+        if len(args) >= 2:
+            try:
+                start, length = int(args[0]), int(args[1])
+                return [s[start: start + length] for s in results]
+            except ValueError:
+                return results
+        return results
+    if name == "substring-after":
+        delim = args_str.strip().strip("\"'")
+        out = []
+        for s in results:
+            idx = s.find(delim)
+            out.append(s[idx + len(delim):] if idx != -1 else "")
+        # Filter out empty results (matches Rust behavior)
+        return [r for r in out if r]
+    if name == "substring-before":
+        delim = args_str.strip().strip("\"'")
+        out = []
+        for s in results:
+            idx = s.find(delim)
+            out.append(s[:idx] if idx != -1 else s)
+        return out
+    if name == "replace":
+        args = _parse_two_string_args(args_str)
+        if args:
+            find, repl = args
+            return [s.replace(find, repl) for s in results]
+        return results
+    if name == "get-attr":
+        # Handled specially by the CSS engine — pass through here
+        # (the attr name is extracted at the engine level)
+        return results
+    # Unknown function — skip silently
+    return results
+def _parse_two_string_args(args_str: str) -> Tuple[str, str] | None:
+    """Parse ``'find', 'replace'`` from an argument string."""
+    # Match 'x', 'y' or "x", "y"
+    m = re.match(r"""['"](.*?)['"],\s*['"](.*?)['"]""", args_str.strip())
+    if m:
+        return m.group(1), m.group(2)
+    return None

chadselect/_query.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""
+Query type parsing — prefix-based routing to the correct extraction engine.
+Mirrors the Rust crate's ``query.rs`` exactly.
+"""
+from __future__ import annotations
+from enum import Enum, auto
+from typing import Tuple
+#: The function-pipe delimiter used to separate a selector expression from its
+#: post-processing function chain.
+#:
+#: We use ``>>`` instead of ``|`` because ``|`` is a union operator in XPath 1.0
+#: and a pipe operator in JMESPath, which would create ambiguity.
+FUNCTION_PIPE: str = ">>"
+class QueryType(Enum):
+    """Parsed query engine type."""
+    #: Regex pattern — works on all content types.
+    REGEX = auto()
+    #: XPath 1.0 expression — works on HTML and Text.
+    XPATH = auto()
+    #: JMESPath expression — works on JSON.
+    JSON = auto()
+    #: CSS selector — works on HTML.
+    CSS = auto()
+#: Content type identifiers.
+class ContentType(Enum):
+    TEXT = auto()
+    HTML = auto()
+    JSON = auto()
+#: Maps query types to compatible content types.
+_COMPAT = {
+    QueryType.REGEX: {ContentType.TEXT, ContentType.HTML, ContentType.JSON},
+    QueryType.XPATH: {ContentType.TEXT, ContentType.HTML},
+    QueryType.JSON: {ContentType.JSON},
+    QueryType.CSS: {ContentType.HTML},
+}
+def parse_query(query: str) -> Tuple[QueryType, str]:
+    """Parse a prefixed query string into ``(QueryType, expression)``.
+    Supported prefixes: ``regex:``, ``xpath:``, ``json:``, ``css:``.
+    No prefix defaults to Regex.
+    """
+    if query.startswith("regex:"):
+        return QueryType.REGEX, query[6:]
+    if query.startswith("json:"):
+        return QueryType.JSON, query[5:]
+    if query.startswith("xpath:"):
+        return QueryType.XPATH, query[6:]
+    if query.startswith("css:"):
+        return QueryType.CSS, query[4:]
+    # Default to regex
+    return QueryType.REGEX, query
+def is_query_compatible(query_type: QueryType, content_type: ContentType) -> bool:
+    """Check whether a query type can run against a content type."""
+    return content_type in _COMPAT[query_type]

chadselect/engine/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Engine modules — one per query type."""

chadselect/engine/css.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""CSS selector engine — powered by selectolax (lexbor).
+Includes custom text pseudo-selectors that mirror the Rust crate:
+- ``:has-text('...')``  — select elements whose subtree *contains* the text
+- ``:contains-text('...')`` — select elements whose *own* text contains the substring
+- ``:text-equals('...')``  — select elements whose text *equals* the argument
+- ``:text-starts('...')``  — select elements whose text *starts with* the argument
+- ``:text-ends('...')``    — select elements whose text *ends with* the argument
+"""
+from __future__ import annotations
+import logging
+import re as _re
+from typing import List, Optional, Tuple
+from selectolax.parser import HTMLParser, Node
+from chadselect._functions import split_functions, parse_and_apply
+logger = logging.getLogger(__name__)
+# ── pseudo-selector regex ────────────────────────────────────────────────────
+# Matches  :pseudo-name('argument')  (with optional trailing selectors)
+_PSEUDO_RE = _re.compile(
+    r":(?P<pseudo>has-text|contains-text|text-equals|text-starts|text-ends)"
+    r"\(['\"](?P<arg>[^'\"]*)['\"]\)"
+)
+def _extract_pseudo(selector: str) -> Tuple[str, Optional[str], Optional[str], str]:
+    """Split a selector into (base_selector, pseudo_name, pseudo_arg, trailing).
+    Returns ``(selector, None, None, "")`` when no custom pseudo is present.
+    """
+    m = _PSEUDO_RE.search(selector)
+    if not m:
+        return selector, None, None, ""
+    base = selector[: m.start()]
+    trailing = selector[m.end() :].strip()  # e.g. " .value"
+    return base, m.group("pseudo"), m.group("arg"), trailing
+def _node_text(node: Node) -> str:
+    """Full text content (subtree), stripped."""
+    return (node.text(strip=True) or "").strip()
+def _matches_pseudo(node: Node, pseudo: str, arg: str) -> bool:
+    text = _node_text(node)
+    if pseudo == "has-text":
+        return arg in text
+    if pseudo == "contains-text":
+        return arg in text
+    if pseudo == "text-equals":
+        return text == arg
+    if pseudo == "text-starts":
+        return text.startswith(arg)
+    if pseudo == "text-ends":
+        return text.endswith(arg)
+    return False
+def process(selector_with_functions: str, html: str) -> List[str]:
+    """Run a CSS selector against HTML content, with optional ``>>`` functions."""
+    selector, func_chain = split_functions(selector_with_functions)
+    # Check for get-attr in the function chain — need to handle before parsing
+    attr_name = _extract_get_attr(func_chain)
+    if attr_name:
+        func_chain = _remove_get_attr(func_chain)
+    tree = HTMLParser(html)
+    # ── custom pseudo-selector handling ──────────────────────────────────
+    base, pseudo, pseudo_arg, trailing = _extract_pseudo(selector)
+    if pseudo:
+        try:
+            candidates = tree.css(base) if base else [tree.body]
+        except Exception as e:
+            logger.warning("CSS selector failed for '%s': %s", base, e)
+            return []
+        matched_nodes: List[Node] = []
+        for node in candidates:
+            if _matches_pseudo(node, pseudo, pseudo_arg):  # type: ignore[arg-type]
+                if trailing:
+                    # e.g. ":has-text('Exterior:') .value" — query inside
+                    try:
+                        matched_nodes.extend(node.css(trailing))
+                    except Exception:
+                        pass
+                else:
+                    matched_nodes.append(node)
+        nodes = matched_nodes
+    else:
+        try:
+            nodes = tree.css(selector)
+        except Exception as e:
+            logger.warning("CSS selector failed for '%s': %s", selector, e)
+            return []
+    # ── extract text / attributes ────────────────────────────────────────
+    results: List[str] = []
+    for node in nodes:
+        if attr_name:
+            val = node.attributes.get(attr_name, "")
+            if val:
+                results.append(val)
+        else:
+            text = node.text(strip=True)
+            if text:
+                results.append(text)
+    if func_chain.strip():
+        results = parse_and_apply(results, func_chain)
+    return results
+def _extract_get_attr(func_chain: str) -> str | None:
+    """Extract the attribute name from a ``get-attr('name')`` call."""
+    import re
+    m = re.search(r"get-attr\(['\"](\w[\w-]*?)['\"]\)", func_chain)
+    return m.group(1) if m else None
+def _remove_get_attr(func_chain: str) -> str:
+    """Remove ``get-attr(...)`` from a function chain string."""
+    import re
+    cleaned = re.sub(r"\s*get-attr\(['\"][\w-]+?['\"]\)\s*", " ", func_chain)
+    # Clean up stray >> delimiters
+    cleaned = re.sub(r"(>>\s*)+", ">> ", cleaned).strip().strip(">>").strip()
+    return cleaned

chadselect/engine/json.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""JMESPath engine — powered by the ``jmespath`` library."""
+from __future__ import annotations
+import json
+import logging
+from typing import List
+import jmespath
+from chadselect._functions import split_functions, parse_and_apply
+logger = logging.getLogger(__name__)
+def process(jmespath_with_functions: str, raw_json: str) -> List[str]:
+    """Run a JMESPath expression against JSON content.
+    Returns all result values stringified. Supports ``>>`` function piping.
+    """
+    expr, func_chain = split_functions(jmespath_with_functions)
+    try:
+        data = json.loads(raw_json)
+    except json.JSONDecodeError as e:
+        logger.warning("Invalid JSON content: %s", e)
+        return []
+    try:
+        result = jmespath.search(expr, data)
+    except Exception as e:
+        logger.warning("JMESPath failed for '%s': %s", expr, e)
+        return []
+    results = _to_string_list(result)
+    if func_chain.strip():
+        results = parse_and_apply(results, func_chain)
+    return results
+def _to_string_list(value) -> List[str]:
+    """Convert a JMESPath result into a flat list of strings."""
+    if value is None:
+        return []
+    if isinstance(value, list):
+        out: List[str] = []
+        for item in value:
+            s = _stringify(item)
+            if s:
+                out.append(s)
+        return out
+    s = _stringify(value)
+    return [s] if s else []
+def _stringify(value) -> str:
+    """Convert a single value to string."""
+    if value is None:
+        return ""
+    if isinstance(value, bool):
+        return str(value).lower()
+    if isinstance(value, (dict, list)):
+        return json.dumps(value, separators=(",", ":"))
+    return str(value)

chadselect/engine/regex.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""Regex engine — powered by the ``re`` stdlib module."""
+from __future__ import annotations
+import logging
+import re
+from typing import List
+from chadselect._functions import split_functions, parse_and_apply
+logger = logging.getLogger(__name__)
+def process(pattern_with_functions: str, content: str) -> List[str]:
+    """Run a regex against content, returning capture groups or full matches.
+    - If the pattern has capture groups, returns group values.
+    - Otherwise, returns full match strings.
+    Supports ``>>`` function piping.
+    """
+    pattern_str, func_chain = split_functions(pattern_with_functions)
+    try:
+        compiled = re.compile(pattern_str)
+    except re.error as e:
+        logger.warning("Invalid regex '%s': %s", pattern_str, e)
+        return []
+    results: List[str] = []
+    if compiled.groups == 0:
+        # No capture groups — return full matches
+        results = compiled.findall(content)
+    else:
+        # Has capture groups
+        for match in compiled.finditer(content):
+            groups = match.groups()
+            for g in groups:
+                if g is not None:
+                    results.append(g)
+    # Filter empty/whitespace-only
+    results = [r.strip() for r in results if r.strip()]
+    if func_chain.strip():
+        results = parse_and_apply(results, func_chain)
+    return results

chadselect/engine/xpath.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""XPath 1.0 engine — powered by lxml (libxml2)."""
+from __future__ import annotations
+import logging
+from typing import List
+from lxml import html as lxml_html
+from chadselect._functions import split_functions, parse_and_apply
+logger = logging.getLogger(__name__)
+def process(xpath_with_functions: str, content: str) -> List[str]:
+    """Run an XPath 1.0 expression against HTML/text content.
+    Supports ``>>`` function piping.
+    """
+    xpath_expr, func_chain = split_functions(xpath_with_functions)
+    try:
+        tree = lxml_html.fromstring(content)
+        raw = tree.xpath(xpath_expr)
+    except Exception as e:
+        logger.warning("XPath failed for '%s': %s", xpath_expr, e)
+        return []
+    # XPath can return a plain string (e.g. normalize-space(), string())
+    # instead of a list.  Wrap it so the loop below works correctly.
+    if isinstance(raw, str):
+        raw = [raw]
+    results: List[str] = []
+    for item in raw:
+        if hasattr(item, "text_content"):
+            # It's an Element
+            text = item.text_content().strip()
+        else:
+            # It's a string (text node or attribute)
+            text = str(item).strip()
+        if text:
+            results.append(text)
+    if func_chain.strip():
+        results = parse_and_apply(results, func_chain)
+    return results

chadselect/py.typed ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Callable, List, Sequence, Tuple
+class ChadSelect:
+    def __init__(self) -> None: ...
+    def add_text(self, content: str) -> None: ...
+    def add_html(self, content: str) -> None: ...
+    def add_json(self, content: str) -> None: ...
+    def content_count(self) -> int: ...
+    def clear(self) -> None: ...
+    def query(self, index: int, query_str: str) -> List[str]: ...
+    def select(self, index: int, query_str: str) -> str: ...
+    def select_where(self, index: int, query_str: str, valid: Callable[[str], bool]) -> str: ...
+    def select_first(self, queries: Sequence[Tuple[int, str]]) -> List[str]: ...
+    def select_first_where(self, queries: Sequence[Tuple[int, str]], valid: Callable[[str], bool]) -> List[str]: ...
+    def select_many(self, queries: Sequence[Tuple[int, str]]) -> List[str]: ...
+    def select_many_where(self, queries: Sequence[Tuple[int, str]], valid: Callable[[str], bool]) -> List[str]: ...
+    def query_batch(self, queries: Sequence[Tuple[int, str]]) -> List[List[str]]: ...
+    def __repr__(self) -> str: ...
+    def __len__(self) -> int: ...
+FUNCTION_PIPE: str
+__version__: str
+__all__: List[str]

chadselect-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,113 @@
+Metadata-Version: 2.4
+Name: chadselect
+Version: 0.2.0
+Summary: Unified data extraction — CSS, XPath, Regex, and JMESPath behind one query interface.
+Project-URL: Homepage, https://github.com/markjacksoncerberus/chadselect
+Project-URL: Repository, https://github.com/markjacksoncerberus/chadselect
+Project-URL: Issues, https://github.com/markjacksoncerberus/chadselect/issues
+Author: Mark Jackson
+License: MIT
+Keywords: css,extraction,jmespath,parsing,regex,scraping,xpath
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Programming Language :: Python :: 3.15
+Classifier: Topic :: Text Processing :: Markup :: HTML
+Classifier: Topic :: Text Processing :: Markup :: XML
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Requires-Dist: jmespath>=1.0
+Requires-Dist: lxml>=5.0
+Requires-Dist: selectolax>=0.3.21
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# ChadSelect
+Unified data extraction — CSS, XPath, Regex, and JMESPath behind one query interface.
+```python
+from chadselect import ChadSelect
+cs = ChadSelect()
+cs.add_html(html)
+cs.add_json(json_str)
+# One syntax, four engines
+title = cs.select(0, "css:h1.title")
+author = cs.select(0, "xpath://span[@class='author']/text()")
+vin = cs.select(0, r"regex:[A-HJ-NPR-Z0-9]{17}")
+name = cs.select(0, "json:data.products[0].name")
+# Function piping
+clean = cs.select(0, "css:.price >> trim >> uppercase()")
+```
+## Install
+```bash
+pip install chadselect
+```
+## Query Syntax
+Queries use a `engine:expression` prefix:
+| Prefix | Engine | Best For |
+|--------|--------|----------|
+| `css:` | CSS Selectors (selectolax) | HTML element selection |
+| `xpath:` | XPath 1.0 (lxml) | Complex HTML/XML traversal |
+| `regex:` | Regular Expressions (re) | Pattern matching on raw text |
+| `json:` | JMESPath (jmespath) | JSON field extraction |
+No prefix defaults to regex.
+## Function Piping
+Chain text transformations with `>>`:
+```python
+cs.select(0, "css:.price >> trim >> substring-after('$') >> uppercase()")
+```
+Available functions: `trim`, `uppercase()`, `lowercase()`, `normalize-space()`,
+`substring-after('delim')`, `substring-before('delim')`, `substring(start, len)`,
+`replace('old', 'new')`, `get-attr('name')`.
+## API
+```python
+cs = ChadSelect()
+# Load content
+cs.add_html(html_string)
+cs.add_json(json_string)
+cs.add_text(plain_text)
+# Query (index: 0=first, -1=all)
+results = cs.query(-1, "css:.price")          # List[str] — all matches
+value = cs.select(0, "css:.price")            # str — first match or ""
+# Multi-query
+first_hit = cs.select_first([(0, "css:#id"), (0, "xpath://fallback")])
+combined = cs.select_many([(-1, "css:.a"), (-1, "css:.b")])
+# Batch (fastest for many fields)
+results = cs.query_batch([(-1, "css:.title"), (-1, "json:data.name")])
+# With validators
+results = cs.select_where(0, "css:.vin", lambda v: len(v) == 17)
+```
+## License
+MIT

chadselect-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+chadselect/__init__.py,sha256=EOImbkqxe148OUS8ZKHAlAnw0zsXyh5vCUjJcJjmJfc,989
+chadselect/_chadselect.py,sha256=sZ7YJnPt7WDogzVTJPLI3FIjThd4Ge5ZjELeP7IIJys,7571
+chadselect/_functions.py,sha256=JeM5a8ZAkoDEOrLilP_ub90eF8fUP-q9n4dgUX_lDcQ,4149
+chadselect/_query.py,sha256=7nNtkcArP8_SAYtkKod-Jrop_9as8RhDIofdwRSROW4,2032
+chadselect/py.typed,sha256=oodZR5L_XXc9i-XuKipIh9EKO__Be3NSzuJcaHrzj_o,1158
+chadselect/engine/__init__.py,sha256=iJuOnTANwq4jWk6lNbjmvRrGYRXgcJYnXsVhDdgMTHY,45
+chadselect/engine/css.py,sha256=Xs_5T9WfIxSpyD1eD458g6g4Te3E7bfmpnmYlEh6hFA,4995
+chadselect/engine/json.py,sha256=ZSx8j2t7n-QJgvslaKwqBAo7TyxAHLcfs04NqoZSS3k,1702
+chadselect/engine/regex.py,sha256=wL7FaX0-3Q7Ib7ZR-8lhfdulPnPxGvQo4A-mbjNPJtc,1363
+chadselect/engine/xpath.py,sha256=QHJvs0xxprJg_htyBTvsBurnY8TXzemav03aaZAFH0w,1338
+chadselect-0.2.0.dist-info/METADATA,sha256=VFdzqtfF5ch6l3erISQHLLeJ_08_Pt0cSY_w6L_a0pk,3411
+chadselect-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+chadselect-0.2.0.dist-info/RECORD,,

chadselect-0.2.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any