PyPI - instapaper-scraper - Versions diffs - 1.1.0rc1__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

instapaper-scraper 1.1.0rc1py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

instapaper_scraper/api.py CHANGED Viewed

@@ -1,13 +1,20 @@
 import os
 import logging
 import time
-from typing import List, Dict, Tuple, Optional
+from typing import List, Dict, Tuple, Optional, Any
 import requests
 from bs4 import BeautifulSoup
+from bs4.element import Tag
 from .exceptions import ScraperStructureChanged
-from .constants import INSTAPAPER_BASE_URL, KEY_ID, KEY_TITLE, KEY_URL
+from .constants import (
+    INSTAPAPER_BASE_URL,
+    KEY_ID,
+    KEY_TITLE,
+    KEY_URL,
+    KEY_ARTICLE_PREVIEW,
+)
 class InstapaperClient:
@@ -33,6 +40,7 @@ class InstapaperClient:
     PAGINATE_OLDER_CLASS = "paginate_older"
     ARTICLE_TITLE_CLASS = "article_title"
     TITLE_META_CLASS = "title_meta"
+    ARTICLE_PREVIEW_CLASS = "article_preview"
     # URL paths
     URL_PATH_USER = "/u/"
@@ -101,12 +109,14 @@ class InstapaperClient:
         self,
         page: int = DEFAULT_PAGE_START,
         folder_info: Optional[Dict[str, str]] = None,
+        add_article_preview: bool = False,
     ) -> Tuple[List[Dict[str, str]], bool]:
         """
         Fetches a single page of articles and determines if there are more pages.
         Args:
             page: The page number to fetch.
             folder_info: A dictionary containing 'id' and 'slug' of the folder to fetch articles from.
+            add_article_preview: Whether to include the article preview.
         Returns:
             A tuple containing:
             - A list of article data (dictionaries with id, title, url).
@@ -123,16 +133,32 @@ class InstapaperClient:
                 soup = BeautifulSoup(response.text, self.HTML_PARSER)
                 article_list = soup.find(id=self.ARTICLE_LIST_ID)
-                if not article_list:
+                if not isinstance(article_list, Tag):
                     raise ScraperStructureChanged(self.MSG_ARTICLE_LIST_NOT_FOUND)
                 articles = article_list.find_all(self.ARTICLE_TAG)
-                article_ids = [
-                    article[KEY_ID].replace(self.ARTICLE_ID_PREFIX, "")
-                    for article in articles
-                ]
+                article_ids = []
+                for article in articles:
+                    if not isinstance(article, Tag):
+                        continue
+                    article_id_val = article.get(KEY_ID)
+                    # Ensure article_id_val is a string before calling replace
+                    # If it's a list, take the first element. This is a pragmatic
+                    # approach since 'id' attributes should ideally be unique strings.
+                    if isinstance(article_id_val, list):
+                        article_id_val = article_id_val[0] if article_id_val else None
+                    if isinstance(article_id_val, str) and article_id_val.startswith(
+                        self.ARTICLE_ID_PREFIX
+                    ):
+                        article_ids.append(
+                            article_id_val.replace(self.ARTICLE_ID_PREFIX, "")
+                        )
-                data = self._parse_article_data(soup, article_ids, page)
+                data = self._parse_article_data(
+                    soup, article_ids, page, add_article_preview
+                )
                 has_more = soup.find(class_=self.PAGINATE_OLDER_CLASS) is not None
                 return data, has_more
@@ -170,13 +196,17 @@ class InstapaperClient:
         raise Exception(self.MSG_SCRAPING_FAILED_UNKNOWN)
     def get_all_articles(
-        self, limit: Optional[int] = None, folder_info: Optional[Dict[str, str]] = None
+        self,
+        limit: Optional[int] = None,
+        folder_info: Optional[Dict[str, str]] = None,
+        add_article_preview: bool = False,
     ) -> List[Dict[str, str]]:
         """
         Iterates through pages and fetches articles up to a specified limit.
         Args:
             limit: The maximum number of pages to scrape. If None, scrapes all pages.
             folder_info: A dictionary containing 'id' and 'slug' of the folder to fetch articles from.
+            add_article_preview: Whether to include the article preview.
         """
         all_articles = []
         page = self.DEFAULT_PAGE_START
@@ -187,7 +217,11 @@ class InstapaperClient:
                 break
             logging.info(self.MSG_SCRAPING_PAGE.format(page=page))
-            data, has_more = self.get_articles(page=page, folder_info=folder_info)
+            data, has_more = self.get_articles(
+                page=page,
+                folder_info=folder_info,
+                add_article_preview=add_article_preview,
+            )
             if data:
                 all_articles.extend(data)
             page += 1
@@ -202,15 +236,19 @@ class InstapaperClient:
         return f"{INSTAPAPER_BASE_URL}{self.URL_PATH_USER}{page}"
     def _parse_article_data(
-        self, soup: BeautifulSoup, article_ids: List[str], page: int
-    ) -> List[Dict[str, str]]:
+        self,
+        soup: BeautifulSoup,
+        article_ids: List[str],
+        page: int,
+        add_article_preview: bool = False,
+    ) -> List[Dict[str, Any]]:
         """Parses the raw HTML to extract structured data for each article."""
         data = []
         for article_id in article_ids:
             article_id_full = f"{self.ARTICLE_ID_PREFIX}{article_id}"
             article_element = soup.find(id=article_id_full)
             try:
-                if not article_element:
+                if not isinstance(article_element, Tag):
                     raise AttributeError(
                         self.MSG_ARTICLE_ELEMENT_NOT_FOUND.format(
                             article_id_full=article_id_full
@@ -218,18 +256,35 @@ class InstapaperClient:
                     )
                 title_element = article_element.find(class_=self.ARTICLE_TITLE_CLASS)
-                if not title_element:
+                if not isinstance(title_element, Tag):
                     raise AttributeError(self.MSG_TITLE_ELEMENT_NOT_FOUND)
                 title = title_element.get_text().strip()
-                link_element = article_element.find(class_=self.TITLE_META_CLASS).find(
-                    "a"
-                )
-                if not link_element or "href" not in link_element.attrs:
+                meta_element = article_element.find(class_=self.TITLE_META_CLASS)
+                if not isinstance(meta_element, Tag):
+                    raise AttributeError(self.MSG_LINK_ELEMENT_NOT_FOUND)
+                link_element = meta_element.find("a")
+                if (
+                    not isinstance(link_element, Tag)
+                    or "href" not in link_element.attrs
+                ):
                     raise AttributeError(self.MSG_LINK_ELEMENT_NOT_FOUND)
                 link = link_element["href"]
-                data.append({KEY_ID: article_id, KEY_TITLE: title, KEY_URL: link})
+                article_data = {KEY_ID: article_id, KEY_TITLE: title, KEY_URL: link}
+                if add_article_preview:
+                    preview_element = article_element.find(
+                        class_=self.ARTICLE_PREVIEW_CLASS
+                    )
+                    article_data[KEY_ARTICLE_PREVIEW] = (
+                        preview_element.get_text().strip()
+                        if isinstance(preview_element, Tag)
+                        else ""
+                    )
+                data.append(article_data)
             except AttributeError as e:
                 logging.warning(
                     self.MSG_PARSE_ARTICLE_WARNING.format(
@@ -281,7 +336,7 @@ class InstapaperClient:
             )
             return False
-    def _wait_for_retry(self, attempt: int, reason: str):
+    def _wait_for_retry(self, attempt: int, reason: str) -> None:
         """Calculates and waits for an exponential backoff period."""
         sleep_time = self.backoff_factor * (2**attempt)
         logging.warning(

instapaper_scraper/auth.py CHANGED Viewed

@@ -3,7 +3,7 @@ import getpass
 import logging
 import stat
 from pathlib import Path
-from typing import Union
+from typing import Union, Optional
 from cryptography.fernet import Fernet
 import requests
@@ -67,11 +67,12 @@ class InstapaperAuthenticator:
         session: requests.Session,
         session_file: Union[str, Path],
         key_file: Union[str, Path],
-        username: str = None,
-        password: str = None,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
     ):
         self.session = session
         self.session_file = Path(session_file)
+        self.key_file = Path(key_file)
         self.key = get_encryption_key(key_file)
         self.fernet = Fernet(self.key)
         self.username = username
@@ -175,7 +176,7 @@ class InstapaperAuthenticator:
             logging.error(self.LOG_LOGIN_FAILED)
             return False
-    def _save_session(self):
+    def _save_session(self) -> None:
         """Saves the current session cookies to an encrypted file."""
         required_cookies = self.REQUIRED_COOKIES
         cookies_to_save = [

instapaper_scraper/cli.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 import argparse
 import requests
 from pathlib import Path
-from typing import Union
+from typing import Union, List, Dict, Any, Optional, cast
 if sys.version_info >= (3, 11):
     import tomllib
@@ -39,7 +39,7 @@ def _resolve_path(
     return user_dir_filename
-def load_config(config_path_str: Union[str, None] = None) -> Union[dict, None]:
+def load_config(config_path_str: Union[str, None] = None) -> Optional[Dict[str, Any]]:
     """
     Loads configuration from a TOML file.
     It checks the provided path, then config.toml in the project root,
@@ -50,7 +50,7 @@ def load_config(config_path_str: Union[str, None] = None) -> Union[dict, None]:
         CONFIG_DIR / CONFIG_FILENAME,
     ]
-    paths_to_check = []
+    paths_to_check: List[Path] = []
     if config_path_str:
         paths_to_check.insert(0, Path(config_path_str).expanduser())
     paths_to_check.extend(default_paths)
@@ -60,7 +60,7 @@ def load_config(config_path_str: Union[str, None] = None) -> Union[dict, None]:
             try:
                 with open(path, "rb") as f:
                     logging.info(f"Loading configuration from {path}")
-                    return tomllib.load(f)
+                    return cast(Dict[str, Any], tomllib.load(f))
             except tomllib.TOMLDecodeError as e:
                 logging.error(f"Error decoding TOML file at {path}: {e}")
                 return None
@@ -68,7 +68,7 @@ def load_config(config_path_str: Union[str, None] = None) -> Union[dict, None]:
     return None
-def main():
+def main() -> None:
     """
     Main entry point for the Instapaper scraper CLI.
     """
@@ -102,9 +102,18 @@ def main():
     parser.add_argument("--username", help="Instapaper username.")
     parser.add_argument("--password", help="Instapaper password.")
     parser.add_argument(
-        "--add-instapaper-url",
-        action="store_true",
-        help="Add an 'instapaper_url' column to the output with the full Instapaper read URL.",
+        "--read-url",  # New, preferred flag
+        "--add-instapaper-url",  # Old, for backward compatibility
+        dest="add_instapaper_url",
+        action=argparse.BooleanOptionalAction,
+        help="Include the Instapaper read URL. Overrides config.",
+    )
+    parser.add_argument(
+        "--article-preview",  # New, preferred flag
+        "--add-article-preview",  # Old, for backward compatibility
+        dest="add_article_preview",
+        action=argparse.BooleanOptionalAction,
+        help="Include the article preview text. Overrides config.",
     )
     parser.add_argument(
         "--limit",
@@ -120,8 +129,21 @@ def main():
     config = load_config(args.config_path)
     folders = config.get("folders", []) if config else []
+    fields_config = config.get("fields", {}) if config else {}
     selected_folder = None
+    # Resolve boolean flags, giving CLI priority over config
+    final_add_instapaper_url = (
+        args.add_instapaper_url
+        if args.add_instapaper_url is not None
+        else fields_config.get("read_url", False)
+    )
+    final_add_article_preview = (
+        args.add_article_preview
+        if args.add_article_preview is not None
+        else fields_config.get("article_preview", False)
+    )
     if args.folder:
         if args.folder.lower() == "none":
             selected_folder = None
@@ -144,7 +166,7 @@ def main():
         print("  0: none (non-folder mode)")
         for i, folder in enumerate(folders):
             display_name = folder.get("key") or folder.get("slug") or folder.get("id")
-            print(f"  {i+1}: {display_name}")
+            print(f"  {i + 1}: {display_name}")
         try:
             choice = int(input("Select a folder (enter a number): "))
@@ -196,7 +218,9 @@ def main():
     try:
         folder_info = selected_folder if selected_folder else None
         all_articles = client.get_all_articles(
-            limit=args.limit, folder_info=folder_info
+            limit=args.limit,
+            folder_info=folder_info,
+            add_article_preview=final_add_article_preview,
         )
     except ScraperStructureChanged as e:
         logging.error(f"Stopping scraper due to an unrecoverable error: {e}")
@@ -214,7 +238,8 @@ def main():
             all_articles,
             args.format,
             output_filename,
-            add_instapaper_url=args.add_instapaper_url,
+            add_instapaper_url=final_add_instapaper_url,
+            add_article_preview=final_add_article_preview,
         )
         logging.info("Articles scraped and saved successfully.")
     except Exception as e:

instapaper_scraper/constants.py CHANGED Viewed

@@ -15,3 +15,4 @@ CONFIG_DIR = Path.home() / ".config" / APP_NAME
 KEY_ID = "id"
 KEY_TITLE = "title"
 KEY_URL = "url"
+KEY_ARTICLE_PREVIEW = "article_preview"

instapaper_scraper/output.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import os
-import json
-import sqlite3
 import logging
-import csv
-from typing import List, Dict, Any
+from typing import List, Dict, Any, TYPE_CHECKING
-from .constants import INSTAPAPER_READ_URL, KEY_ID, KEY_TITLE, KEY_URL
+from .constants import (
+    INSTAPAPER_READ_URL,
+    KEY_ID,
+    KEY_TITLE,
+    KEY_URL,
+    KEY_ARTICLE_PREVIEW,
+)
 # Constants for file operations
 JSON_INDENT = 4
@@ -19,8 +22,17 @@ LOG_NO_ARTICLES = "No articles found to save."
 LOG_SAVED_ARTICLES = "Saved {count} articles to {filename}"
 LOG_UNKNOWN_FORMAT = "Unknown output format: {format}"
+if TYPE_CHECKING:
+    # Import for type-checking purposes, and use an alias
+    # to signal to linters like ruff that it is being used.
+    import sqlite3 as sqlite3
-def get_sqlite_create_table_sql(add_instapaper_url: bool = False) -> str:
+    __all__ = ["sqlite3"]
+def get_sqlite_create_table_sql(
+    add_instapaper_url: bool = False, add_article_preview: bool = False
+) -> str:
     """Returns the SQL statement to create the articles table."""
     columns = [
         f"{KEY_ID} TEXT PRIMARY KEY",
@@ -28,6 +40,8 @@ def get_sqlite_create_table_sql(add_instapaper_url: bool = False) -> str:
         f"{KEY_URL} TEXT NOT NULL",
     ]
     if add_instapaper_url:
+        import sqlite3
         # The GENERATED ALWAYS AS syntax was added in SQLite 3.31.0
         if sqlite3.sqlite_version_info >= (3, 31, 0):
             columns.append(
@@ -36,10 +50,15 @@ def get_sqlite_create_table_sql(add_instapaper_url: bool = False) -> str:
         else:
             columns.append(f"{SQLITE_INSTAPAPER_URL_COL} TEXT")
+    if add_article_preview:
+        columns.append(f"{KEY_ARTICLE_PREVIEW} TEXT")
     return f"CREATE TABLE IF NOT EXISTS {SQLITE_TABLE_NAME} ({', '.join(columns)})"
-def get_sqlite_insert_sql(add_instapaper_url_manually: bool = False) -> str:
+def get_sqlite_insert_sql(
+    add_instapaper_url_manually: bool = False, add_article_preview: bool = False
+) -> str:
     """Returns the SQL statement to insert an article."""
     cols = [KEY_ID, KEY_TITLE, KEY_URL]
     placeholders = [f":{KEY_ID}", f":{KEY_TITLE}", f":{KEY_URL}"]
@@ -48,19 +67,30 @@ def get_sqlite_insert_sql(add_instapaper_url_manually: bool = False) -> str:
         cols.append(SQLITE_INSTAPAPER_URL_COL)
         placeholders.append(f":{SQLITE_INSTAPAPER_URL_COL}")
+    if add_article_preview:
+        cols.append(KEY_ARTICLE_PREVIEW)
+        placeholders.append(f":{KEY_ARTICLE_PREVIEW}")
     return f"INSERT OR REPLACE INTO {SQLITE_TABLE_NAME} ({', '.join(cols)}) VALUES ({', '.join(placeholders)})"
 def save_to_csv(
-    data: List[Dict[str, Any]], filename: str, add_instapaper_url: bool = False
-):
+    data: List[Dict[str, Any]],
+    filename: str,
+    add_instapaper_url: bool = False,
+    add_article_preview: bool = False,
+) -> None:
     """Saves a list of articles to a CSV file."""
+    import csv
     os.makedirs(os.path.dirname(filename), exist_ok=True)
     with open(filename, "w", newline="", encoding="utf-8") as f:
         fieldnames = [KEY_ID, KEY_TITLE, KEY_URL]
         if add_instapaper_url:
             # Insert instapaper_url after the id column
             fieldnames.insert(1, SQLITE_INSTAPAPER_URL_COL)
+        if add_article_preview:
+            fieldnames.append(KEY_ARTICLE_PREVIEW)
         writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
         writer.writeheader()
@@ -69,8 +99,13 @@ def save_to_csv(
     logging.info(LOG_SAVED_ARTICLES.format(count=len(data), filename=filename))
-def save_to_json(data: List[Dict[str, Any]], filename: str):
+def save_to_json(
+    data: List[Dict[str, Any]],
+    filename: str,
+) -> None:
     """Saves a list of articles to a JSON file."""
+    import json
     os.makedirs(os.path.dirname(filename), exist_ok=True)
     with open(filename, "w", encoding="utf-8") as f:
         json.dump(data, f, indent=JSON_INDENT, ensure_ascii=False)
@@ -78,13 +113,18 @@ def save_to_json(data: List[Dict[str, Any]], filename: str):
 def save_to_sqlite(
-    data: List[Dict[str, Any]], db_name: str, add_instapaper_url: bool = False
-):
+    data: List[Dict[str, Any]],
+    db_name: str,
+    add_instapaper_url: bool = False,
+    add_article_preview: bool = False,
+) -> None:
     """Saves a list of articles to a SQLite database."""
+    import sqlite3
     os.makedirs(os.path.dirname(db_name), exist_ok=True)
     conn = sqlite3.connect(db_name)
     cursor = conn.cursor()
-    cursor.execute(get_sqlite_create_table_sql(add_instapaper_url))
+    cursor.execute(get_sqlite_create_table_sql(add_instapaper_url, add_article_preview))
     # For older SQLite versions, we need to manually add the URL
     manual_insert_required = add_instapaper_url and sqlite3.sqlite_version_info < (
@@ -104,7 +144,8 @@ def save_to_sqlite(
         data_to_insert = data
     insert_sql = get_sqlite_insert_sql(
-        add_instapaper_url_manually=manual_insert_required
+        add_instapaper_url_manually=manual_insert_required,
+        add_article_preview=add_article_preview,
     )
     cursor.executemany(insert_sql, data_to_insert)
@@ -131,7 +172,8 @@ def save_articles(
     format: str,
     filename: str,
     add_instapaper_url: bool = False,
-):
+    add_article_preview: bool = False,
+) -> None:
     """
     Dispatches to the correct save function based on the format.
     """
@@ -152,10 +194,20 @@ def save_articles(
         ]
     if format == "csv":
-        save_to_csv(data, filename=filename, add_instapaper_url=add_instapaper_url)
+        save_to_csv(
+            data,
+            filename=filename,
+            add_instapaper_url=add_instapaper_url,
+            add_article_preview=add_article_preview,
+        )
     elif format == "json":
         save_to_json(data, filename=filename)
     elif format == "sqlite":
-        save_to_sqlite(data, db_name=filename, add_instapaper_url=add_instapaper_url)
+        save_to_sqlite(
+            data,
+            db_name=filename,
+            add_instapaper_url=add_instapaper_url,
+            add_article_preview=add_article_preview,
+        )
     else:
         logging.error(LOG_UNKNOWN_FORMAT.format(format=format))

{instapaper_scraper-1.1.0rc1.dist-info → instapaper_scraper-1.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: instapaper-scraper
-Version: 1.1.0rc1
+Version: 1.2.0
 Summary: A tool to scrape articles from Instapaper.
 Project-URL: Homepage, https://github.com/chriskyfung/InstapaperScraper
 Project-URL: Source, https://github.com/chriskyfung/InstapaperScraper
@@ -21,7 +21,7 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: beautifulsoup4~=4.14.2
-Requires-Dist: certifi~=2025.11.12
+Requires-Dist: certifi<2026.2.0,>=2025.11.12
 Requires-Dist: charset-normalizer~=3.4.3
 Requires-Dist: cryptography~=46.0.3
 Requires-Dist: guara~=0.0.14
@@ -35,30 +35,58 @@ Requires-Dist: tomli~=2.0.1; python_version < "3.11"
 Provides-Extra: dev
 Requires-Dist: pytest; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"
-Requires-Dist: black; extra == "dev"
 Requires-Dist: ruff; extra == "dev"
 Requires-Dist: types-requests; extra == "dev"
 Requires-Dist: types-beautifulsoup4; extra == "dev"
 Requires-Dist: requests-mock; extra == "dev"
 Requires-Dist: build; extra == "dev"
 Requires-Dist: twine; extra == "dev"
+Requires-Dist: mypy; extra == "dev"
+Requires-Dist: pre-commit; extra == "dev"
+Requires-Dist: licensecheck; extra == "dev"
 Dynamic: license-file
 # Instapaper Scraper
-![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fchriskyfung%2FInstapaperScraper%2Frefs%2Fheads%2Fmaster%2Fpyproject.toml)
-[![CI](https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml/badge.svg)](https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml)
-[![PyPI version](https://img.shields.io/pypi/v/instapaper-scraper.svg)](https://pypi.org/project/instapaper-scraper/)
-[![PyPI Downloads](https://static.pepy.tech/personalized-badge/instapaper-scraper?period=total&left_text=downloads)](https://pepy.tech/projects/instapaper-scraper)
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
-[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
-[![GitHub License](https://img.shields.io/github/license/chriskyfung/InstapaperScraper)
-](https://www.gnu.org/licenses/gpl-3.0.en.html)
-[![codecov](https://codecov.io/gh/chriskyfung/InstapaperScraper/graph/badge.svg)](https://codecov.io/gh/chriskyfung/InstapaperScraper)
-A Python tool to scrape all your saved Instapaper bookmarks and export them to various formats.
-## Features
+<!-- Badges -->
+<p align="center">
+  <a href="https://pypi.org/project/instapaper-scraper/">
+    <img src="https://img.shields.io/pypi/v/instapaper-scraper.svg" alt="PyPI version">
+  </a>
+  <a href="https://pepy.tech/projects/instapaper-scraper">
+    <img src="https://static.pepy.tech/personalized-badge/instapaper-scraper?period=total&left_text=downloads" alt="PyPI Downloads">
+  </a>
+  <a href="https://github.com/chriskyfung/InstapaperScraper">
+    <img src="https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fchriskyfung%2FInstapaperScraper%2Frefs%2Fheads%2Fmaster%2Fpyproject.toml" alt="Python Version from PEP 621 TOML">
+  </a>
+  <a href="https://github.com/astral-sh/ruff">
+    <img src="https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fastral-sh%2Fruff%2Fmain%2Fassets%2Fbadge%2Fv2.json" alt="Ruff">
+  </a>
+  <a href="https://codecov.io/gh/chriskyfung/InstapaperScraper">
+    <img src="https://codecov.io/gh/chriskyfung/InstapaperScraper/graph/badge.svg" alt="Code Coverage">
+  </a>
+  <wbr />
+  <a href="https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml">
+    <img src="https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml/badge.svg" alt="CI Status">
+  </a>
+  <a href="https://www.gnu.org/licenses/gpl-3.0.en.html">
+    <img src="https://img.shields.io/github/license/chriskyfung/InstapaperScraper" alt="GitHub License">
+  </a>
+</p>
+A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
+<!-- Sponsors -->
+<p align="center">
+  <a href="https://github.com/sponsors/chriskyfung" title="Sponsor on GitHub">
+    <img src="https://img.shields.io/badge/Sponsor-GitHub-blue?style=for-the-badge&logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
+  </a>
+  <a href="https://www.buymeacoffee.com/chriskyfung" title="Support Coffee">
+    <img src="https://img.shields.io/badge/Support-Coffee-ffdd00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
+  </a>
+</p>
+## ✨ Features
 - Scrapes all bookmarks from your Instapaper account.
 - Supports scraping from specific folders.
@@ -66,13 +94,13 @@ A Python tool to scrape all your saved Instapaper bookmarks and export them to v
 - Securely stores your session for future runs.
 - Modern, modular, and tested architecture.
-## Getting Started
+## 🚀 Getting Started
-### 1. Requirements
+### 📋 1. Requirements
 - Python 3.9+
-### 2. Installation
+### 📦 2. Installation
 This package is available on PyPI and can be installed with pip:
@@ -80,7 +108,7 @@ This package is available on PyPI and can be installed with pip:
 pip install instapaper-scraper
 ```
-### 3. Usage
+### 💻 3. Usage
 Run the tool from the command line, specifying your desired output format:
@@ -95,35 +123,35 @@ instapaper-scraper --format json
 instapaper-scraper --format sqlite --output my_articles.db
 ```
-## Configuration
+## ⚙️ Configuration
-### Authentication
+### 🔐 Authentication
 The script authenticates using one of the following methods, in order of priority:
-1.  **Command-line Arguments**: Provide your username and password directly when running the script:
+1. **Command-line Arguments**: Provide your username and password directly when running the script:
     ```sh
     instapaper-scraper --username your_username --password your_password
     ```
-2.  **Session Files (`.session_key`, `.instapaper_session`)**: The script attempts to load these files in the following order:
+2. **Session Files (`.session_key`, `.instapaper_session`)**: The script attempts to load these files in the following order:
     a.  Path specified by `--session-file` or `--key-file` arguments.
     b.  Files in the current working directory (e.g., `./.session_key`).
     c.  Files in the user's configuration directory (`~/.config/instapaper-scraper/`).
     After the first successful login, the script creates an encrypted `.instapaper_session` file and a `.session_key` file to reuse your session securely.
-3.  **Interactive Prompt**: If no other method is available, the script will prompt you for your username and password.
+3. **Interactive Prompt**: If no other method is available, the script will prompt you for your username and password.
 > **Note on Security:** Your session file (`.instapaper_session`) and the encryption key (`.session_key`) are stored with secure permissions (read/write for the owner only) to protect your credentials.
-### Folder Configuration
+### 📁 Folder and Field Configuration
-You can define and quickly access your Instapaper folders using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
+You can define and quickly access your Instapaper folders and set default output fields using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
-1.  The path specified by the `--config-path` argument.
-2.  `config.toml` in the current working directory.
-3.  `~/.config/instapaper-scraper/config.toml`
+1. The path specified by the `--config-path` argument.
+2. `config.toml` in the current working directory.
+3. `~/.config/instapaper-scraper/config.toml`
 Here is an example of `config.toml`:
@@ -131,6 +159,12 @@ Here is an example of `config.toml`:
 # Default output filename for non-folder mode
 output_filename = "home-articles.csv"
+# Optional fields to include in the output.
+# These can be overridden by command-line flags.
+[fields]
+read_url = false
+article_preview = false
 [[folders]]
 key = "ml"
 id = "1234567"
@@ -145,14 +179,18 @@ output_filename = "python-articles.db"
 ```
 - **output_filename (top-level)**: The default output filename to use when not in folder mode.
-- **key**: A short alias for the folder.
-- **id**: The folder ID from the Instapaper URL.
-- **slug**: The human-readable part of the folder URL.
-- **output_filename (folder-specific)**: A preset output filename for scraped articles from this specific folder.
+- **[fields]**: A section to control which optional data fields are included in the output.
+    -   `read_url`: Set to `true` to include the Instapaper read URL for each article.
+    -   `article_preview`: Set to `true` to include the article's text preview.
+- **[[folders]]**: Each `[[folders]]` block defines a specific folder.
+    -   **key**: A short alias for the folder.
+    -   **id**: The folder ID from the Instapaper URL.
+    -   **slug**: The human-readable part of the folder URL.
+    -   **output_filename (folder-specific)**: A preset output filename for scraped articles from this specific folder.
 When a `config.toml` file is present and no `--folder` argument is provided, the scraper will prompt you to select a folder. You can also specify a folder directly using the `--folder` argument with its key, ID, or slug. Use `--folder=none` to explicitly disable folder mode and scrape all articles.
-### Command-line Arguments
+### 💻 Command-line Arguments
 | Argument | Description |
 | --- | --- |
@@ -162,9 +200,10 @@ When a `config.toml` file is present and no `--folder` argument is provided, the
 | `--output <filename>` | Specify a custom output filename. The file extension will be automatically corrected to match the selected format. |
 | `--username <user>` | Your Instapaper account username. |
 | `--password <pass>` | Your Instapaper account password. |
-| `--add-instapaper-url` | Adds a `instapaper_url` column to the output, containing a full, clickable URL for each article. |
+| `--[no-]read-url` | Includes the Instapaper read URL. (Old flag `--add-instapaper-url` is deprecated but supported). Can be set in `config.toml`. Overrides config. |
+| `--[no-]article-preview` | Includes the article preview text. (Old flag `--add-article-preview` is deprecated but supported). Can be set in `config.toml`. Overrides config. |
-### Output Formats
+### 📄 Output Formats
 You can control the output format using the `--format` argument. The supported formats are:
@@ -176,19 +215,19 @@ If the `--format` flag is omitted, the script will default to `csv`.
 When using `--output <filename>`, the file extension is automatically corrected to match the chosen format. For example, `instapaper-scraper --format json --output my_articles.txt` will create `my_articles.json`.
-#### Opening Articles in Instapaper
+#### 📖 Opening Articles in Instapaper
 The output data includes a unique `id` for each article. You can use this ID to construct a URL to the article's reader view: `https://www.instapaper.com/read/<article_id>`.
-For convenience, you can use the `--add-instapaper-url` flag to have the script include a full, clickable URL in the output.
+For convenience, you can use the `--read-url` flag to have the script include a full, clickable URL in the output.
 ```sh
-instapaper-scraper --add-instapaper-url
+instapaper-scraper --read-url
 ```
 This adds a `instapaper_url` field to each article in the JSON output and a `instapaper_url` column in the CSV and SQLite outputs. The original `id` field is preserved.
-## How It Works
+## 🛠️ How It Works
 The tool is designed with a modular architecture for reliability and maintainability.
@@ -197,17 +236,17 @@ The tool is designed with a modular architecture for reliability and maintainabi
 3. **Data Collection**: All fetched articles are aggregated into a single list.
 4. **Export**: Finally, the collected data is written to a file in your chosen format (`.csv`, `.json`, or `.db`).
-## Example Output
+## 📊 Example Output
-### CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
+### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url and --add-article-preview)
 ```csv
-"id","instapaper_url","title","url"
-"999901234","https://www.instapaper.com/read/999901234","Article 1","https://www.example.com/page-1/"
-"999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/"
+"id","instapaper_url","title","url","article_preview"
+"999901234","https://www.instapaper.com/read/999901234","Article 1","https://www.example.com/page-1/","This is a preview of article 1."
+"999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/","This is a preview of article 2."
 ```
-### JSON (`output/bookmarks.json`) (with --add-instapaper-url)
+### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url and --add-article-preview)
 ```json
 [
@@ -215,26 +254,57 @@ The tool is designed with a modular architecture for reliability and maintainabi
         "id": "999901234",
         "title": "Article 1",
         "url": "https://www.example.com/page-1/",
-        "instapaper_url": "https://www.instapaper.com/read/999901234"
+        "instapaper_url": "https://www.instapaper.com/read/999901234",
+        "article_preview": "This is a preview of article 1."
     },
     {
         "id": "999002345",
         "title": "Article 2",
         "url": "https://www.example.com/page-2/",
-        "instapaper_url": "https://www.instapaper.com/read/999002345"
+        "instapaper_url": "https://www.instapaper.com/read/999002345",
+        "article_preview": "This is a preview of article 2."
     }
 ]
 ```
-### SQLite (`output/bookmarks.db`)
+### 🗄️ SQLite (`output/bookmarks.db`)
 A SQLite database file is created with an `articles` table. The table includes `id`, `title`, and `url` columns. If the `--add-instapaper-url` flag is used, a `instapaper_url` column is also included. This feature is fully backward-compatible and will automatically adapt to the user's installed SQLite version, using an efficient generated column on modern versions (3.31.0+) and a fallback for older versions.
-## Development & Testing
+## 🤗 Support and Community
+- **🐛 Bug Reports:** For any bugs or unexpected behavior, please [open an issue on GitHub](https://github.com/chriskyfung/InstapaperScraper/issues).
+- **💬 Questions & General Discussion:** For questions, feature requests, or general discussion, please use our [GitHub Discussions](https://github.com/chriskyfung/InstapaperScraper/discussions).
+## 🙏 Support the Project
+`Instapaper Scraper` is a free and open-source project that requires significant time and effort to maintain and improve. If you find this tool useful, please consider supporting its development. Your contribution helps ensure the project stays healthy, active, and continuously updated.
+- **[Sponsor on GitHub](https://github.com/sponsors/chriskyfung):** The best way to support the project with recurring monthly donations. Tiers with special rewards like priority support are available!
+- **[Buy Me a Coffee](https://www.buymeacoffee.com/chriskyfung):** Perfect for a one-time thank you.
+## 🤝 Contributing
+Contributions are welcome! Whether it's a bug fix, a new feature, or documentation improvements, please feel free to open a pull request.
+Please read the **[Contribution Guidelines](CONTRIBUTING.md)** before you start.
+## 🧑‍💻 Development & Testing
+This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking. A `Makefile` is provided to simplify common development tasks.
+### 🚀 Using the Makefile
+The most common commands are:
+-   `make install`: Installs development dependencies.
+-   `make format`: Formats the entire codebase.
+-   `make check`: Runs the linter, type checker, and test suite.
+-   `make test`: Runs the test suite.
+-   `make build`: Builds the distributable packages.
-This project uses `pytest` for testing, `black` for code formatting, and `ruff` for linting.
+Run `make help` to see all available commands.
-### Setup
+### 🔧 Setup
 To install the development dependencies:
@@ -242,7 +312,13 @@ To install the development dependencies:
 pip install -e .[dev]
 ```
-### Running the Scraper
+To set up the pre-commit hooks:
+```sh
+pre-commit install
+```
+### ▶️ Running the Scraper
 To run the scraper directly without installing the package:
@@ -250,26 +326,28 @@ To run the scraper directly without installing the package:
 python -m src.instapaper_scraper.cli
 ```
-### Testing
+### ✅ Testing
-To run the tests, execute the following command from the project root:
+To run the tests, execute the following command from the project root (or use `make test`):
 ```sh
 pytest
 ```
-To check test coverage:
+To check test coverage (or use `make test-cov`):
 ```sh
 pytest --cov=src/instapaper_scraper --cov-report=term-missing
 ```
-### Code Quality
+### ✨ Code Quality
-To format the code with `black`:
+You can use the `Makefile` for convenience (e.g., `make format`, `make lint`).
+To format the code with `ruff`:
 ```sh
-black .
+ruff format .
 ```
 To check for linting errors with `ruff`:
@@ -278,16 +356,29 @@ To check for linting errors with `ruff`:
 ruff check .
 ```
-To automatically fix linting errors:
+To run static type checking with `mypy`:
 ```sh
-ruff check . --fix
+mypy src
 ```
-## Disclaimer
+To run license checks:
+```sh
+licensecheck --zero
+```
+## 📜 Disclaimer
 This script requires valid Instapaper credentials. Use it responsibly and in accordance with Instapaper’s Terms of Service.
-## License
+## 📄 License
+This project is licensed under the terms of the **GNU General Public License v3.0**. See the [LICENSE](LICENSE) file for the full license text.
+## Contributors
+[![Contributors](https://contrib.rocks/image?repo=chriskyfung/InstapaperScraper)](https://github.com/chriskyfung/InstapaperScraper/graphs/contributors)
-This project is licensed under the terms of the GNU General Public License v3.0. See the [LICENSE](LICENSE) file for the full license text.
+Made with [contrib.rocks](https://contrib.rocks).

instapaper_scraper-1.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+instapaper_scraper/__init__.py,sha256=qdcT3tp4KLufWH1u6tOuPVUQaXwakQD0gdjkwY4ljfg,206
+instapaper_scraper/api.py,sha256=q5cxikx3bmRfGUcgLbYjPtpMkrAE-A6vWjZ_KKwOmAU,13701
+instapaper_scraper/auth.py,sha256=OpgjbdI697FitumiyznWjey5-R2ZuxAEATaMz9NNnTc,7092
+instapaper_scraper/cli.py,sha256=MklUuxCVzoOGdT4jtMH0unY7D50qqJvU3XKatdfvGbg,8588
+instapaper_scraper/constants.py,sha256=hiWriGWAQjDlx_Jn14dTkJIg4I--5ltzOOwD0ywFmwg,443
+instapaper_scraper/exceptions.py,sha256=CptHoZe4NOhdjOoyXkZEMFgQC6oKtzjRljywwDEtsTg,134
+instapaper_scraper/output.py,sha256=6UdeKUubG_Yn-lCX0Pk8vG1zzc00xWg_5uNRWedOA30,6454
+instapaper_scraper-1.2.0.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
+instapaper_scraper-1.2.0.dist-info/METADATA,sha256=5m285Un8lmlLiY6aIFH_ANg9w_Gyofnw2CV4XymPbF0,15887
+instapaper_scraper-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+instapaper_scraper-1.2.0.dist-info/entry_points.txt,sha256=7AvRgN5fvtas_Duxdz-JPbDN6A1Lq2GaTfTSv54afxA,67
+instapaper_scraper-1.2.0.dist-info/top_level.txt,sha256=kiU9nLkqPOVPLsP4QMHuBFjAmoIKfftYmGV05daLrcc,19
+instapaper_scraper-1.2.0.dist-info/RECORD,,

{instapaper_scraper-1.1.0rc1.dist-info → instapaper_scraper-1.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

instapaper_scraper-1.1.0rc1.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-instapaper_scraper/__init__.py,sha256=qdcT3tp4KLufWH1u6tOuPVUQaXwakQD0gdjkwY4ljfg,206
-instapaper_scraper/api.py,sha256=-Dq5fOAGSGopb-qonIbETd9ZlxWdULKRgl1DCOuVemY,11618
-instapaper_scraper/auth.py,sha256=VTBE9KhGGJm0KbMT5DCTMCbh-N3HiJuJ9wMDb8CyZT4,7015
-instapaper_scraper/cli.py,sha256=wsQxTVFIyJq3EQiAtz7dCjg1vI2_Y9quZv4ifuEPDU8,7495
-instapaper_scraper/constants.py,sha256=ubFWa47985lIz58qokMC0xQzTmCB6NOa17KFgWLn65E,403
-instapaper_scraper/exceptions.py,sha256=CptHoZe4NOhdjOoyXkZEMFgQC6oKtzjRljywwDEtsTg,134
-instapaper_scraper/output.py,sha256=lxJgW71-m1YuMYJHeK6nu479pk_3bQGc0axzNCvxtZQ,5338
-instapaper_scraper-1.1.0rc1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
-instapaper_scraper-1.1.0rc1.dist-info/METADATA,sha256=O-VJZg1yN3cuPRfBCevmD9_IrOR07NGpzrgZXI2-6hk,11637
-instapaper_scraper-1.1.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-instapaper_scraper-1.1.0rc1.dist-info/entry_points.txt,sha256=7AvRgN5fvtas_Duxdz-JPbDN6A1Lq2GaTfTSv54afxA,67
-instapaper_scraper-1.1.0rc1.dist-info/top_level.txt,sha256=kiU9nLkqPOVPLsP4QMHuBFjAmoIKfftYmGV05daLrcc,19
-instapaper_scraper-1.1.0rc1.dist-info/RECORD,,

{instapaper_scraper-1.1.0rc1.dist-info → instapaper_scraper-1.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{instapaper_scraper-1.1.0rc1.dist-info → instapaper_scraper-1.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{instapaper_scraper-1.1.0rc1.dist-info → instapaper_scraper-1.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

instapaper-scraper 1.1.0rc1__py3-none-any.whl → 1.2.0__py3-none-any.whl

instapaper-scraper 1.1.0rc1py3-none-any.whl → 1.2.0py3-none-any.whl