PyPI - aix - Versions diffs - 0.0.22__py3-none-any.whl → 0.0.23__py3-none-any.whl - Mend

aix 0.0.22py3-none-any.whl → 0.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

aix/contexts.py CHANGED Viewed

@@ -548,6 +548,8 @@ def notebook_to_markdown(
     return target_file
+dflt_converters["ipynb"] = notebook_to_markdown
 # --------------------------------------------------------------------------------------
 # Download articles from a markdown string and save them as PDF files
@@ -559,11 +561,123 @@ def notebook_to_markdown(
 import os
 import re
+from typing import Callable, Iterator, Pattern, Tuple, Optional
 import requests
 DFLT_SAVE_DIR = os.path.expanduser("~/Downloads")
+def extract_urls(
+    markdown: str,
+    pattern: Optional[Pattern] = None,
+    extractor: Optional[Callable[[re.Match], Tuple[str, str]]] = None,
+) -> Iterator[Tuple[str, str]]:
+    """
+    Extract URLs and their context from a markdown string.
+    Args:
+        markdown: The markdown string to process
+        pattern: A compiled regex pattern to match URLs and their context
+                 Defaults to matching markdown hyperlinks [context](url)
+        extractor: A function that extracts (context, url) from a match
+                  Defaults to extracting from markdown hyperlinks
+    Returns:
+        Iterator of (context, url) pairs
+    >>> text = "[Google](https://google.com) and [GitHub](https://github.com)"
+    >>> list(extract_urls(text))
+    [('Google', 'https://google.com'), ('GitHub', 'https://github.com')]
+    """
+    if pattern is None:
+        # Default pattern matches markdown hyperlinks: [context](url)
+        pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+    if extractor is None:
+        # Default extractor for markdown hyperlinks
+        def extractor(match: re.Match) -> Tuple[str, str]:
+            return match.group(1), match.group(2)
+    for match in pattern.finditer(markdown):
+        yield extractor(match)
+# Example alternative patterns and extractors
+def extract_with_surrounding_context(
+    markdown: str, context_chars: int = 30
+) -> Iterator[Tuple[str, str]]:
+    """
+    Extract URLs with surrounding text as context.
+    Args:
+        markdown: The markdown string to process
+        context_chars: Number of characters to include before and after URL
+    Returns:
+        Iterator of (context, url) pairs
+    """
+    # Pattern to match URLs with a simple validation
+    pattern = re.compile(r"https?://[^\s]+")
+    def surrounding_context_extractor(match: re.Match) -> Tuple[str, str]:
+        url = match.group(0)
+        start = max(0, match.start() - context_chars)
+        end = min(len(markdown), match.end() + context_chars)
+        context = markdown[start:end].strip()
+        return context, url
+    return extract_urls(markdown, pattern, surrounding_context_extractor)
+def extract_urls_only(markdown: str) -> Iterator[Tuple[str, str]]:
+    """
+    Extract URLs with empty context.
+    Args:
+        markdown: The markdown string to process
+    Returns:
+        Iterator of (empty_context, url) pairs
+    """
+    # More comprehensive URL pattern
+    pattern = re.compile(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+(?:/[^\s]*)?")
+    def url_only_extractor(match: re.Match) -> Tuple[str, str]:
+        url = match.group(0)
+        return "", url
+    return extract_urls(markdown, pattern, url_only_extractor)
+def extract_html_links(markdown: str) -> Iterator[Tuple[str, str]]:
+    """
+    Extract URLs from HTML anchor tags.
+    Args:
+        markdown: The markdown or HTML string to process
+    Returns:
+        Iterator of (anchor_text, url) pairs
+    """
+    # Simple pattern for HTML anchor tags
+    pattern = re.compile(r'<a\s+(?:[^>]*?\s+)?href="([^"]*)"[^>]*>(.*?)</a>')
+    def html_link_extractor(match: re.Match) -> Tuple[str, str]:
+        # Note the order is reversed in HTML: href first, then text
+        return match.group(2), match.group(1)
+    return extract_urls(markdown, pattern, html_link_extractor)
+extract_urls.with_surrounding_context = extract_with_surrounding_context
+extract_urls.only_urls = extract_urls_only
+extract_urls.html_links = extract_html_links
+DFLT_SAVE_DIR = os.path.expanduser("~/Downloads")
 def download_articles(
     md_string: str,
     save_dir: str = DFLT_SAVE_DIR,

{aix-0.0.22.dist-info → aix-0.0.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: aix
-Version: 0.0.22
+Version: 0.0.23
 Summary: Artificial Intelligence eXtensions
 Home-page: https://github.com/thorwhalen/aix
 Author: Thor Whalen

{aix-0.0.22.dist-info → aix-0.0.23.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 aix/__init__.py,sha256=5_Ktv6lJbdwGf6c94mLYGi00zMTn2HkrUOvHAawhr_4,1921
-aix/contexts.py,sha256=_SPPR1oMa59nh-BV1jdKtY27UdAqDxiSL65yps5Opkk,33942
+aix/contexts.py,sha256=uLjAkIlumPrBECEOfTrFrvNsjBIFJFppNVZ__Pe3aBE,37576
 aix/misc.py,sha256=evC4FqE63z_gnZ_4vCLsfKZkksuPBDlfK0fI8jHEbGg,204
 aix/np.py,sha256=D6uTumkK5Y9kB_XbSqtMzzBsnuai9WZWLVa6-sWybls,194
 aix/pd.py,sha256=LqJ13OEOox6K6vs9hMYkhBRgCu0EMPiYSnd2no4RdDc,197
@@ -9,8 +9,8 @@ aix/util.py,sha256=d0VjSbpTNzjGFH_upNOnaUnrRawrVbXdlFBan1Q9CRo,107
 aix/gen_ai/__init__.py,sha256=ky5WRID0rIb8KLxtulB9t2CN_GKUxu1KdiRN-n92q2U,2341
 aix/gen_ai/google_genai.py,sha256=KRYc52DQtn-V5vycULyoNpoHcR3lcBt1Z0DHj0XYcuI,966
 aix/gen_ai/openai_genai.py,sha256=RzJy7pIu4dngUThEJdALqZpexHK_quDkug-SjAXm41E,539
-aix-0.0.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-aix-0.0.22.dist-info/METADATA,sha256=1k3kyzwTw_iZj7fqnwj1SZRbQhgto3rKAWFGJmLqIHs,6010
-aix-0.0.22.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-aix-0.0.22.dist-info/top_level.txt,sha256=JV67V91ws1X6NwMtcBSxqB7HJx0xOuo_of1K7yg33Z0,4
-aix-0.0.22.dist-info/RECORD,,
+aix-0.0.23.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+aix-0.0.23.dist-info/METADATA,sha256=Ff5jkUtLowI3yTkaVT8tbzyet-cDeuVSfda_KyfWs_U,6010
+aix-0.0.23.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+aix-0.0.23.dist-info/top_level.txt,sha256=JV67V91ws1X6NwMtcBSxqB7HJx0xOuo_of1K7yg33Z0,4
+aix-0.0.23.dist-info/RECORD,,

{aix-0.0.22.dist-info → aix-0.0.23.dist-info}/LICENSE RENAMED Viewed

File without changes

{aix-0.0.22.dist-info → aix-0.0.23.dist-info}/WHEEL RENAMED Viewed

File without changes

{aix-0.0.22.dist-info → aix-0.0.23.dist-info}/top_level.txt RENAMED Viewed

File without changes

aix 0.0.22__py3-none-any.whl → 0.0.23__py3-none-any.whl

aix 0.0.22py3-none-any.whl → 0.0.23py3-none-any.whl