PyPI - audioarxiv - Versions diffs - 0.1.0rc46.post1__py3-none-any.whl - Mend

audioarxiv 0.1.0rc46.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

audioarxiv/__init__.py +169 -0
audioarxiv/audio/__init__.py +12 -0
audioarxiv/audio/base.py +172 -0
audioarxiv/preprocess/__init__.py +13 -0
audioarxiv/preprocess/article.py +22 -0
audioarxiv/preprocess/math_equation.py +36 -0
audioarxiv/resources/__init__.py +10 -0
audioarxiv/resources/paper.py +204 -0
audioarxiv/tools/__init__.py +3 -0
audioarxiv/tools/main.py +194 -0
audioarxiv-0.1.0rc46.post1.dist-info/METADATA +114 -0
audioarxiv-0.1.0rc46.post1.dist-info/RECORD +15 -0
audioarxiv-0.1.0rc46.post1.dist-info/WHEEL +4 -0
audioarxiv-0.1.0rc46.post1.dist-info/entry_points.txt +3 -0
audioarxiv-0.1.0rc46.post1.dist-info/licenses/LICENSE +21 -0

audioarxiv/__init__.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""
+audioarxiv
+==========
+audioarxiv is a Python package designed to make staying up to date with the latest research more accessible and
+convenient.
+It allows you to fetch research papers directly from `arXiv <https://arxiv.org>`_ and converts them into speech,
+so you can listen to them on the go—whether you're commuting, working out, or simply prefer auditory learning.
+With support for customizable text-to-speech settings and a streamlined interface, audioarxiv offers researchers,
+students, and enthusiasts a hands-free way to engage with scientific literature.
+**Please note**: the package is still in its early stages of development,
+and some features may be limited or not fully mature yet.
+"""
+from __future__ import annotations
+import json
+import logging
+import subprocess
+import sys
+from pathlib import Path
+from pandas import DataFrame
+from . import audio, preprocess, resources
+__version__ = "0.1.0-rc46-post1"
+def get_version_information() -> str:
+    """Version information.
+    Returns:
+        str: Version information.
+    """
+    return __version__
+def setup_logger(logger_: logging.Logger, outdir='.', label=None, log_level='INFO', print_version=False):
+    """ Setup logging output: call at the start of the script to use
+    Args:
+        logger_ (logging.Logger): The logger instance to be configured.
+        outdir (str): If supplied, write the logging output to outdir/label.log
+        label (str): If supplied, write the logging output to outdir/label.log
+        log_level (str, optional): ['debug', 'info', 'warning']
+        Either a string from the list above, or an integer as specified
+        in https://docs.python.org/2/library/logging.html#logging-levels
+        print_version (bool): If true, print version information
+    """
+    if isinstance(log_level, str):
+        try:
+            level = getattr(logging, log_level.upper())
+        except AttributeError as exc:
+            raise ValueError(f'log_level {log_level} not understood') from exc
+    else:
+        level = int(log_level)
+    logger_.propagate = False
+    logger_.setLevel(level)
+    if not any(isinstance(h, logging.StreamHandler) for h in logger_.handlers):
+        stream_handler = logging.StreamHandler()
+        stream_handler.setFormatter(logging.Formatter(
+            '%(asctime)s %(name)s %(levelname)-8s: %(message)s', datefmt='%H:%M'))
+        stream_handler.setLevel(level)
+        logger_.addHandler(stream_handler)
+    if not any(isinstance(h, logging.FileHandler) for h in logger_.handlers):
+        if label:
+            Path(outdir).mkdir(parents=True, exist_ok=True)
+            log_file = f'{outdir}/{label}.log'
+            file_handler = logging.FileHandler(log_file)
+            file_handler.setFormatter(logging.Formatter(
+                '%(asctime)s %(levelname)-8s: %(message)s', datefmt='%H:%M'))
+            file_handler.setLevel(level)
+            logger_.addHandler(file_handler)
+    for handler in logger_.handlers:
+        handler.setLevel(level)
+    if print_version:
+        version = get_version_information()
+        logger_.info('Running audioarxiv version: %s', version)
+def loaded_modules_dict() -> dict:
+    """Get the modules and the versions.
+    Returns:
+        dict: A dictionary of the modules and the versions.
+    """
+    module_names = list(sys.modules.keys())
+    vdict = {}
+    for key in module_names:
+        if "." not in str(key):
+            vdict[key] = str(getattr(sys.modules[key], "__version__", "N/A"))
+    return vdict
+def env_package_list(as_dataframe: bool = False) -> list | DataFrame:
+    """Get the list of packages installed in the system prefix.
+    If it is detected that the system prefix is part of a Conda environment,
+    a call to ``conda list --prefix {sys.prefix}`` will be made, otherwise
+    the call will be to ``{sys.executable} -m pip list installed``.
+    Args:
+        as_dataframe (bool): return output as a `pandas.DataFrame`
+    Returns:
+    Union[list, DataFrame]:
+    If ``as_dataframe=False`` is given, the output is a `list` of `dict`,
+    one for each package, at least with ``'name'`` and ``'version'`` keys
+    (more if `conda` is used).
+    If ``as_dataframe=True`` is given, the output is a `DataFrame`
+    created from the `list` of `dicts`.
+    """
+    prefix = sys.prefix
+    pkgs = []
+    # if a conda-meta directory exists, this is a conda environment, so
+    # use conda to print the package list
+    conda_detected = (Path(prefix) / "conda-meta").is_dir()
+    if conda_detected:
+        try:
+            pkgs = json.loads(subprocess.check_output([
+                "conda",
+                "list",
+                "--prefix", prefix,
+                "--json"
+            ]))
+        except (FileNotFoundError, subprocess.CalledProcessError):
+            # When a conda env is in use but conda is unavailable
+            conda_detected = False
+    # otherwise try and use Pip
+    if not conda_detected:
+        try:
+            import pip  # noqa: F401 # pylint: disable=unused-import, import-outside-toplevel
+        except ModuleNotFoundError:  # no pip?
+            # not a conda environment, and no pip, so just return
+            # the list of loaded modules
+            modules = loaded_modules_dict()
+            pkgs = [{"name": x, "version": y} for x, y in modules.items()]
+        else:
+            pkgs = json.loads(subprocess.check_output([
+                sys.executable,
+                "-m", "pip",
+                "list", "installed",
+                "--format", "json",
+            ]))
+    # convert to recarray for storage
+    if as_dataframe:
+        return DataFrame(pkgs)
+    return pkgs
+logger = logging.getLogger('audioarxiv')
+setup_logger(logger)
+__all__ = [
+    'audio',
+    'preprocess',
+    'resources',
+    'logger',
+    '__version__'
+]

audioarxiv/audio/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""
+Handles text-to-speech conversion using various engines to read research papers aloud.
+"""
+from __future__ import annotations
+from . import base
+from .base import Audio
+__all__ = [
+    'base',
+    'Audio',
+]

audioarxiv/audio/base.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""
+A base class for audio.
+"""
+from __future__ import annotations
+import logging
+import time
+import pyttsx3
+from ..preprocess import get_sentences
+logger = logging.getLogger('audioarxiv')
+def validate_audio_arguments(rate: float, volume: float, voice: int | str | None, pause_seconds: float) -> dict:
+    """Validate the arguments for Audio.
+    Args:
+        rate (float): Number of words per minute.
+        volume (float): Volume.
+        voice (int | str | None): If it is int, it is interpreted as the index of the available voices.
+            If it is str, it is interpreted as the ID of the voice.
+            The available voice ids can be found with `list_voices()`.
+        pause_seconds (float): Duration of pause between sentences.
+    Returns:
+        dict: rate, volume, voice, pause_seconds
+    """
+    engine = pyttsx3.init()
+    available_voices = engine.getProperty('voices')
+    rate = max(50, min(500, rate))
+    volume = max(0.0, min(1.0, volume))
+    if isinstance(voice, int):
+        if 0 <= voice < len(available_voices):
+            voice = available_voices[voice].id
+        else:
+            voice = None
+            logger.error('Invalid voice index = %s. Keeping current voice.', voice)
+    elif isinstance(voice, str):
+        if voice not in [v.id for v in available_voices]:
+            voice = None
+            logger.error('Invalid voice ID = %s. Keeping current voice.', voice)
+    elif voice is not None:
+        logger.error('Unsupported datatype of voice = %s. It must be either int or str.', type(voice))
+    if pause_seconds < 0:
+        pause_seconds = 0.1
+        logger.error('pause = %s must be non-negative. Keeping the current pause.', pause_seconds)
+    return {'rate': rate,
+            'volume': volume,
+            'voice': voice,
+            'pause_seconds': pause_seconds}
+class Audio:
+    """A class to generate audio from text.
+    """
+    def __init__(self, rate: float = 140,  # noqa: R0913,E1120,E501 # pylint: disable=too-many-arguments,too-many-positional-arguments,C0301
+                 volume: float = 0.9,
+                 voice: str | None = None,
+                 pause_seconds: float = 0.1,
+                 validate_arguments: bool = True):
+        """A class to configure the audio.
+        Args:
+            rate (float, optional): Number of words per minute. Defaults to 140.
+            volume (float, optional): Volume. Defaults to 0.9.
+            voice (Optional[str], optional): Voice id.
+                The available voice ids can be found with `list_voices()`.
+                Defaults to None.
+            pause_seconds (float, optional): Duration of pause between sentences. Defaults to 0.1.
+            validate_arguments (bool): If True, validate the arguments.
+        """
+        if validate_arguments:
+            arguments = validate_audio_arguments(rate=rate,
+                                                 volume=volume,
+                                                 voice=voice,
+                                                 pause_seconds=pause_seconds)
+            rate = arguments['rate']
+            volume = arguments['volume']
+            voice = arguments['voice']
+            pause_seconds = arguments['pause_seconds']
+        self.engine = pyttsx3.init()
+        if rate is not None:
+            self.engine.setProperty('rate', rate)
+        if volume is not None:
+            self.engine.setProperty('volume', volume)
+        if voice is not None:
+            self.engine.setProperty('voice', voice)
+        self.pause_seconds = pause_seconds
+    @property
+    def available_voices(self) -> list:
+        """Get the available voices.
+        Returns:
+            list: The available voices.
+        """
+        return self.engine.getProperty('voices')
+    @property
+    def pause_seconds(self) -> float:
+        """The duration of pause between sentences.
+        Returns:
+            float: Duration of pause between sentences in second.
+        """
+        return self._pause_seconds
+    @pause_seconds.setter
+    def pause_seconds(self, value: float):
+        """Set the duration of pause between sentences.
+        Args:
+            value (float): Duration of pause between sentences.
+        """
+        if value < 0:
+            logger.error('pause = %s must be non-negative. Keeping the current pause.', value)
+            return
+        self._pause_seconds = value
+    def list_voices(self):
+        """Print available voices with their index and details."""
+        for i, voice in enumerate(self.available_voices):
+            logger.info("Index %s: %s (ID: %s)", i, voice.name, voice.id)
+    def clean_text(self, text: str) -> str:
+        """Clean the text for smoother reading.
+        '\\n' is replaced with a white space.
+        Args:
+            text (str): Text.
+        Returns:
+            str: Cleaned text.
+        """
+        return " ".join(text.split()).replace('\n', ' ').strip()
+    def read_article(self,
+                     article: str):
+        """Read the article aloud, splitting it into sentences.
+        Args:
+            article (str): Article.
+        """
+        if not isinstance(article, str):
+            logger.warning('article = %s is not str. Skipping.', article)
+            return
+        cleaned_text = self.clean_text(article)
+        sentences = get_sentences(cleaned_text)
+        for sentence in sentences:
+            self.engine.say(sentence)
+            self.engine.runAndWait()
+            time.sleep(self.pause_seconds)
+    def save_article(self,
+                     filename: str,
+                     article: str):
+        """Save the article to an audio file.
+        Args:
+            filename (str): File name.
+            article (str): Article.
+        """
+        cleaned_text = self.clean_text(article)
+        self.engine.save_to_file(cleaned_text, filename)
+        self.engine.runAndWait()
+    def stop(self):
+        """Stop the current speech."""
+        self.engine.stop()

audioarxiv/preprocess/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Processes and cleans the extracted text from papers,
+including sentence segmentation, symbol handling, and formatting for better audio output.
+"""
+from __future__ import annotations
+from .article import get_sentences
+from .math_equation import process_math_equations
+__all__ = [
+    'get_sentences',
+    'process_math_equations',
+]

audioarxiv/preprocess/article.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Functions to preprocess articles.
+"""
+from __future__ import annotations
+import nltk
+from nltk.tokenize import sent_tokenize
+# Download tokenizer if needed
+nltk.download('punkt_tab')
+def get_sentences(text: str) -> list:
+    """Get the sentences from the text.
+    Args:
+        text (str): Text.
+    Returns:
+        list: A list of sentences.
+    """
+    return sent_tokenize(text)

audioarxiv/preprocess/math_equation.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""
+Functions to preprocess math equations.
+"""
+from __future__ import annotations
+import re
+from sympy import srepr
+from sympy.parsing.sympy_parser import parse_expr
+def process_math_equations(text: str) -> str:
+    """Detects LaTeX-style math symbols and converts them to a readable format.
+    Args:
+        text (str): Text.
+    Returns:
+        str: Text with the processed math equations.
+    """
+    def replace_math(match: re.Match) -> str:
+        raw_expr = match.group(1)
+        try:
+            parsed = parse_expr(raw_expr)
+            return f"Math: {srepr(parsed)}"
+        except Exception:
+            return f"Equation: {raw_expr}"
+    # First replace block math ($$...$$)
+    text = re.sub(r"\$\$(.+?)\$\$", replace_math, text)
+    # Then replace inline math, match $...$ only if it's surrounded by non-digit characters (to avoid $5)
+    text = re.sub(r"(?<!\w)\$(.+?)\$(?!\w)", replace_math, text)
+    return text

audioarxiv/resources/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""
+Fetches research papers and metadata from arXiv using the arXiv API.
+"""
+from __future__ import annotations
+from .paper import Paper
+__all__ = [
+    'Paper',
+]

audioarxiv/resources/paper.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""
+A class to fetch papers from arXiv.
+"""
+from __future__ import annotations
+import logging
+import re
+import tempfile
+from datetime import datetime
+import arxiv
+import fitz
+logger = logging.getLogger('audioarxiv')
+def validate_paper_arguments(page_size: int,
+                             delay_seconds: float,
+                             num_retries: int) -> dict:
+    """Validate the arguments for Paper.
+    Args:
+        page_size (int, optional): Maximum number of results fetched in a single API request. Smaller pages can
+            be retrieved faster, but may require more round-trips. The API's limit is 2000 results per page.
+            Defaults to 100.
+        delay_seconds (float, optional): Number of seconds to wait between API requests.
+            `arXiv's Terms of Use <https://arxiv.org/help/api/tou>`_ ask that you "make no
+            more than one request every three seconds."
+            Defaults to 3.0.
+        num_retries (int, optional): Number of times to retry a failing API request before raising an Exception.
+            Defaults to 3.
+    Returns:
+        dict: paper_size, delay_seconds, num_retries
+    """
+    return {'page_size': page_size,
+            'delay_seconds': delay_seconds,
+            'num_retries': num_retries}
+class Paper:
+    """A class to fetch papers from arXiv.
+    """
+    def __init__(self, page_size: int = 100, delay_seconds: float = 3.0, num_retries: int = 3,
+                 validate_arguments: bool = True):
+        """An arXiv paper.
+        Args:
+            page_size (int, optional): Maximum number of results fetched in a single API request. Smaller pages can
+                be retrieved faster, but may require more round-trips. The API's limit is 2000 results per page.
+                Defaults to 100.
+            delay_seconds (float, optional): Number of seconds to wait between API requests.
+                `arXiv's Terms of Use <https://arxiv.org/help/api/tou>`_ ask that you "make no
+                more than one request every three seconds."
+                Defaults to 3.0.
+            num_retries (int, optional): Number of times to retry a failing API request before raising an Exception.
+                Defaults to 3.
+            validate_arguments (bool, optional): If True, validate the arguments. Defaults to True.
+        """
+        if validate_arguments:
+            arguments = validate_paper_arguments(page_size=page_size,
+                                                 delay_seconds=delay_seconds,
+                                                 num_retries=num_retries)
+            page_size = arguments['page_size']
+            delay_seconds = arguments['delay_seconds']
+            num_retries = arguments['num_retries']
+        self._client = arxiv.Client(page_size=page_size,
+                                    delay_seconds=delay_seconds,
+                                    num_retries=num_retries)
+        self._sections = []
+        self.paper = None
+    @property
+    def client(self) -> arxiv.Client:
+        """Get the arxiv client.
+        Returns:
+            arxiv.Client: arxiv client.
+        """
+        return self._client
+    @property
+    def title(self) -> str | None:
+        """Title of the paper.
+        Returns:
+            str | None: Title of the paper. None if paper is None.
+        """
+        if self.paper is not None:
+            return self.paper.title
+        logger.error('paper is None.')
+        return None
+    @property
+    def abstract(self) -> str | None:
+        """Abstract.
+        Returns:
+            str | None: Abstract. None if paper is None.
+        """
+        if self.paper is not None:
+            return self.paper.summary
+        logger.error('paper is None.')
+        return None
+    @property
+    def authors(self) -> list | None:
+        """List of authors.
+        Returns:
+            list | None: List of authors. None if paper is None.
+        """
+        if self.paper is not None:
+            return [author.name for author in self.paper.authors]
+        logger.error('paper is None.')
+        return None
+    @property
+    def published(self) -> datetime | None:
+        """Published date.
+        Returns:
+            datetime: Published date. None if paper is None.
+        """
+        if self.paper is not None:
+            return self.paper.published
+        logger.error('paper is None.')
+        return None
+    @property
+    def updated(self) -> datetime | None:
+        """Updated date.
+        Returns:
+            datetime | None: Updated date. None if paper is None.
+        """
+        if self.paper is not None:
+            return self.paper.updated
+        logger.error('paper is None.')
+        return None
+    def search_by_arxiv_id(self, arxiv_id: str):
+        """Search paper by arXiv ID.
+        Args:
+            arxiv_id (str): arXiv ID.
+        """
+        self.paper = next(self.client.results(arxiv.Search(id_list=[arxiv_id])))
+    def download_pdf(self,
+                     dirpath: str = './',
+                     filename: str = '') -> str | None:
+        """Download the PDF.
+        Args:
+            dirpath (str, optional): Path to the directory. Defaults to './'.
+            filename (str, optional): Name of the file. Defaults to ''.
+        Returns:
+            str | None: Path of the output PDF. None if paper is None.
+        """
+        if self.paper is not None:
+            return self.paper.download_pdf(dirpath=dirpath, filename=filename)
+        logger.error('Paper is None. Cannot download PDF.')
+        return None
+    @property
+    def sections(self) -> list:
+        """Get the sections of the paper.
+        Returns:
+            list: A list of sections. Each section is a dict with the header as the key and the content as the value.
+        """
+        if len(self._sections) == 0:
+            with tempfile.NamedTemporaryFile() as tmp:
+                filename = tmp.name
+                self.download_pdf(filename=filename)
+                doc = fitz.open(filename)
+                current_section = {"header": None, "content": []}
+                for page in doc:
+                    blocks = page.get_text("blocks")  # Extract text blocks # type: ignore[attr-defined]
+                    for block in blocks:
+                        text = block[4].strip()
+                        # Detect section headers using common patterns (uppercase, numbered, bold)
+                        if (text.isupper() or re.match(r"^\d+(\.\d+)*\s+\w+", text) or text.endswith(":")):
+                            # Store previous section before switching
+                            if current_section["header"] or current_section["content"]:
+                                self._sections.append(current_section)
+                            current_section = {"header": text, "content": []}  # New section
+                        else:
+                            current_section["content"].append(text)
+                # Append the last section
+                if current_section["header"] or current_section["content"]:
+                    self._sections.append(current_section)
+        return self._sections

audioarxiv/tools/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Command line tools.
+"""

audioarxiv/tools/main.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""
+A command line tool to fetch arXiv papers and read aloud.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import signal
+import sys
+import time
+import configargparse
+from platformdirs import user_config_dir
+from ..audio.base import Audio, validate_audio_arguments
+from ..resources.paper import Paper, validate_paper_arguments
+logger = logging.getLogger('audioarxiv')
+def handle_exit(sig_num: int , frame: object):  # noqa: ARG001 # pylint: disable=unused-argument
+    """Handle the exit.
+    Args:
+        sig_num (int): Signal number.
+        frame (object): A frame object.
+    """
+    logger.info("\nReceived signal %s. Exiting cleanly.", sig_num)
+    sys.exit(0)
+def save_settings(config_path: str, settings: dict):
+    """Save the settings to file.
+    Args:
+        config_path (str): Path to the configuration file.
+        settings (dict): Dictionary of the settings.
+    """
+    try:
+        with open(config_path, 'w', encoding="utf-8") as f:
+            json.dump(settings, f, indent=4)
+    except Exception as e:
+        logger.error('Error saving settings: %s', e)
+def initialize_configuration(args: configargparse.Namespace) -> tuple:
+    """Initialize the configuration.
+    Args:
+        args (configargparse.Namespace): Arguments.
+    Returns:
+        tuple: settings, config_path
+    """
+    config_dir = user_config_dir('audioarxiv')
+    os.makedirs(config_dir, exist_ok=True)
+    config_file = 'config.json'
+    config_path = os.path.join(config_dir, config_file)
+    # Default settings.
+    settings = {
+        'audio': {
+            'rate': 140,
+            'volume': 0.9,
+            'voice': None,
+            'pause_seconds': 0.1
+        },
+        'paper': {
+            'page_size': 100,
+            'delay_seconds': 3.0,
+            'num_retries': 3
+        }
+    }
+    # Validate the default settings.
+    if os.path.exists(config_path):
+        # Load the settings from the config file.
+        try:
+            with open(config_path, encoding="utf-8") as f:
+                loaded_settings = json.load(f)
+                settings.update(loaded_settings)
+                settings['audio'] = validate_audio_arguments(**settings['audio'])
+                settings['paper'] = validate_paper_arguments(**settings['paper'])
+        except Exception as e:
+            logger.error('Error loading settings: %s. Using defaults.', e)
+    else:
+        logger.info('Saving default settings to %s...', config_path)
+        settings['audio'] = validate_audio_arguments(**settings['audio'])
+        settings['paper'] = validate_paper_arguments(**settings['paper'])
+        save_settings(config_path, settings)
+    # Check audio properties
+    audio_properties = list(settings['audio'].keys())
+    audio_settings_changed = False
+    for prop in audio_properties:
+        value = getattr(args, prop)
+        if value is not None:
+            # Compare with the existing setting
+            if value != settings['audio'][prop]:
+                settings['audio'][prop] = value
+                audio_settings_changed = True
+    if audio_settings_changed:
+        settings['audio'] = validate_audio_arguments(**settings['audio'])
+    # Check paper properties
+    paper_properties = list(settings['paper'].keys())
+    paper_settings_changed = False
+    for prop in paper_properties:
+        value = getattr(args, prop)
+        if value is not None:
+            # Compare with the existing setting
+            if value != settings['paper'][prop]:
+                settings['paper'][prop] = value
+                paper_settings_changed = True
+    if paper_settings_changed:
+        settings['paper'] = validate_paper_arguments(**settings['paper'])
+    # Write the settings to file if there are changes.
+    if audio_settings_changed or paper_settings_changed:
+        logger.info('Saving updated settings to %s...', config_path)
+        save_settings(config_path=config_path, settings=settings)
+    return settings, config_path
+def main():
+    """Main function.
+    """
+    signal.signal(signal.SIGINT, handle_exit)
+    signal.signal(signal.SIGTERM, handle_exit)
+    parser = configargparse.ArgParser()
+    parser.add_argument('--id', help='arXiv paper ID.')
+    parser.add_argument('--output', type=str, help='Output to audio file if provided.')
+    parser.add_argument('--rate', type=float, help='Number of words per minute between 50 and 500.')
+    parser.add_argument('--volume', type=float, help='Volume between 0 and 1.')
+    parser.add_argument('--voice', type=str, help='Voice.')
+    parser.add_argument('--pause-seconds', type=float, help='Duration of pause between sentences in second.')
+    parser.add_argument('--page-size', type=int, help='Maximum number of results fetched in a single API request.')
+    parser.add_argument('--delay-seconds', type=float, help='Number of seconds to wait between API requests.')
+    parser.add_argument('--num-retries', type=int, help=('Number of times to retry a failing API request before raising'
+                                                         'an Exception.'))
+    parser.add_argument('--list-voices', action='store_true', help='List the available voices.')
+    args = parser.parse_args()
+    if args.list_voices:
+        audio = Audio()
+        audio.list_voices()
+        return
+    # Get the settings
+    settings, config_path = initialize_configuration(args)
+    # The Audio instance.
+    audio = Audio(**settings['audio'])
+    # Load the paper.
+    paper = Paper(**settings['paper'])
+    # Search the paper.
+    if args.id is not None:
+        # Print the information
+        logger.info('Configuration file: %s', config_path)
+        logger.info('Audio settings')
+        for key, value in settings['audio'].items():
+            logger.info('%s: %s', key, value)
+        logger.info('Paper settings')
+        for key, value in settings['paper'].items():
+            logger.info('%s: %s', key, value)
+        logger.info('Searching arxiv: %s...', args.id)
+        paper.search_by_arxiv_id(arxiv_id=args.id)
+        # Get the sections
+        sections = paper.sections
+        if args.output is None:
+            for section in sections:
+                audio.read_article(section['header'])
+                time.sleep(1)
+                for content in section['content']:
+                    audio.read_article(content)
+                    time.sleep(1)
+        else:
+            article = []
+            for section in sections:
+                if section['header'] is not None:
+                    article.append(section['header'])
+                if section['content'] is not None:
+                    article += section['content']
+            article = " ".join(article)
+            logger.info('Saving audio...')
+            audio.save_article(filename=args.output, article=article)
+            logger.info('Audio is saved to %s.', args.output)

audioarxiv-0.1.0rc46.post1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,114 @@
+Metadata-Version: 2.4
+Name: audioarxiv
+Version: 0.1.0rc46.post1
+Summary: Sample Python Project for creating a new Python Module
+Author-email: "Isaac C. F. Wong" <isaac.cf.wong@gmail.com>
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+License-File: LICENSE
+Requires-Dist: configargparse
+Requires-Dist: arxiv
+Requires-Dist: pyttsx3
+Requires-Dist: pymupdf
+Requires-Dist: sympy
+Requires-Dist: nltk
+Requires-Dist: pandas
+Requires-Dist: platformdirs
+Requires-Dist: pyspark>=3.0.0 ; extra == "spark"
+Requires-Dist: bandit[toml]==1.8.3 ; extra == "test"
+Requires-Dist: black==25.1.0 ; extra == "test"
+Requires-Dist: check-manifest==0.50 ; extra == "test"
+Requires-Dist: flake8-bugbear==24.12.12 ; extra == "test"
+Requires-Dist: flake8-docstrings ; extra == "test"
+Requires-Dist: flake8-formatter_junit_xml ; extra == "test"
+Requires-Dist: flake8 ; extra == "test"
+Requires-Dist: flake8-pyproject ; extra == "test"
+Requires-Dist: pre-commit==4.2.0 ; extra == "test"
+Requires-Dist: pylint==3.3.6 ; extra == "test"
+Requires-Dist: pylint_junit ; extra == "test"
+Requires-Dist: pytest-cov==6.1.1 ; extra == "test"
+Requires-Dist: pytest-mock<3.14.1 ; extra == "test"
+Requires-Dist: pytest-runner ; extra == "test"
+Requires-Dist: pytest==8.3.5 ; extra == "test"
+Requires-Dist: pytest-github-actions-annotate-failures ; extra == "test"
+Requires-Dist: shellcheck-py==0.10.0.1 ; extra == "test"
+Project-URL: Documentation, https://isaac-cf-wong.github.io/audioarxiv
+Project-URL: Source, https://github.com/isaac-cf-wong/audioarxiv
+Project-URL: Tracker, https://github.com/isaac-cf-wong/audioarxiv/issues
+Provides-Extra: spark
+Provides-Extra: test
+# 🎧 audioarxiv
+[![PyPI version](https://badge.fury.io/py/audioarxiv.svg)](https://pypi.org/project/audioarxiv/)
+[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+[![Build](https://img.shields.io/github/actions/workflow/status/isaac-cf-wong/audioarxiv/CI.yml?branch=main)](https://github.com/isaac-cf-wong/audioarxiv/actions)
+[![Python Version](https://img.shields.io/pypi/pyversions/audioarxiv)](https://pypi.org/project/audioarxiv/)
+[![Security: bandit](https://img.shields.io/badge/security-bandit-yellow.svg)](https://github.com/PyCQA/bandit)
+[![Documentation Status](https://img.shields.io/badge/documentation-online-brightgreen)](https://isaac-cf-wong.github.io/audioarxiv/)
+📚 **Documentation**: [https://isaac-cf-wong.github.io/audioarxiv/](https://isaac-cf-wong.github.io/audioarxiv/)
+**Turn arXiv papers into audio.**
+`audioarxiv` lets you fetch the research papers from arXiv and read them aloud.
+---
+## 🚀 Features
+- 🔍 Search and retrieve papers using the arXiv API
+- 📄 Extract and parse the content from PDF (excluding title/abstract)
+- 🗣️ Convert text to speech with natural voice output
+- 🧠 Great for passive learning while commuting or doing chores
+---
+## 📦 Installation
+Install from [PyPI](https://pypi.org/project/audioarxiv/):
+```bash
+pip install audioarxiv
+```
+Install from [Conda](https://anaconda.org/conda-forge/audioarxiv):
+```bash
+conda install -c conda-forge audioarxiv
+```
+---
+## 🛠 Usage
+```bash
+audioarxiv --id "<arxiv id>"
+```
+### 🎙️ Text-to-Speech Options
+You can customize the voice engine using `pyttsx3` by specifying the speaking rate, volume, voice, and pause between sentences.
+```bash
+audioarxiv --id "<arxiv id>" --rate <rate> --volume <volume> --voice "<voice>" --pause-seconds <pause-seconds>
+```
+- `rate`: Number of words per minutes. Defaults to 140.
+- `volume`: Volume of the audio. Defaults to 0.9.
+- `voice`: Voice of the audio. Defaults to the pyttsx3 default voice.
+- `pause-seconds`: Number of seconds to pause between sentences.
+The settings are saved, so you only need to provide your preferred settings once.
+## Contributing
+This project welcomes contributions and suggestions. For details, visit the repository's [Contributor License Agreement (CLA)](https://cla.opensource.microsoft.com) and [Code of Conduct](https://opensource.microsoft.com/codeofconduct/) pages.

audioarxiv-0.1.0rc46.post1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+audioarxiv/__init__.py,sha256=XIOQIggB9Lhdm1TolfKniYg7pEHZNnz4WqChpVcPaw0,5807
+audioarxiv/audio/__init__.py,sha256=UZx3AkhC8NZFXwQbW_sU4sQ1uGeEIRXCPDghXvZy8rY,214
+audioarxiv/audio/base.py,sha256=tQOby6-12r5lInbAQChMxsQNSQT7_od4_tZ7G8hsDKE,5984
+audioarxiv/preprocess/__init__.py,sha256=NUCDDLpSwpWTBaPNdLWUVK2FhtwMPsFvXczTE21_UvU,338
+audioarxiv/preprocess/article.py,sha256=d9nV2DEH4mvKsgUpJ3WB256rt8k5O7YvNTBUp5YOUbs,394
+audioarxiv/preprocess/math_equation.py,sha256=ulkeMZFJKxU8BH1QzNyW4BZ-UjWfDDMp1C-cqoRGyls,945
+audioarxiv/resources/__init__.py,sha256=KCZm9Hq0O9oCCtfpyKVDGo_qX-PU2qS8aJe1NgvmR7Q,166
+audioarxiv/resources/paper.py,sha256=s9XT3xuzntR_0_Np29F66muou0q409jocD4sU2dtgDM,7161
+audioarxiv/tools/__init__.py,sha256=7X5vtxzvCY9URWo0p3zvM11J6whGFeDPF7XU0dt1Qcw,28
+audioarxiv/tools/main.py,sha256=sAh7izIdGmfQZiI3tBKn-zLMhQgAQvLGu4OmYnYLsAc,6884
+audioarxiv-0.1.0rc46.post1.dist-info/entry_points.txt,sha256=d_K6uTNuC8-f9XUQ_enFBgssiK2lVV57EHCEloriVY4,57
+audioarxiv-0.1.0rc46.post1.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
+audioarxiv-0.1.0rc46.post1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+audioarxiv-0.1.0rc46.post1.dist-info/METADATA,sha256=OJI4px0ymhXa8WIUf3cse9LusAcXqCKAlQmqfUHJb_o,4445
+audioarxiv-0.1.0rc46.post1.dist-info/RECORD,,

audioarxiv-0.1.0rc46.post1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: flit 3.12.0
+Root-Is-Purelib: true
+Tag: py3-none-any

audioarxiv-0.1.0rc46.post1.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[console_scripts]
+audioarxiv=audioarxiv.tools.main:main

audioarxiv-0.1.0rc46.post1.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+    MIT License
+    Copyright (c) Microsoft Corporation.
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE