PyPI - oipd - Versions diffs - 0.0.1__tar.gz - Mend

oipd 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

oipd-0.0.1/LICENSE +21 -0
oipd-0.0.1/MANIFEST.in +7 -0
oipd-0.0.1/PKG-INFO +12 -0
oipd-0.0.1/README.md +116 -0
oipd-0.0.1/example_script.py +99 -0
oipd-0.0.1/oipd.egg-info/PKG-INFO +12 -0
oipd-0.0.1/oipd.egg-info/SOURCES.txt +32 -0
oipd-0.0.1/oipd.egg-info/dependency_links.txt +1 -0
oipd-0.0.1/oipd.egg-info/entry_points.txt +2 -0
oipd-0.0.1/oipd.egg-info/top_level.txt +4 -0
oipd-0.0.1/probabilistic/__init__.py +0 -0
oipd-0.0.1/probabilistic/cli/__init__.py +1 -0
oipd-0.0.1/probabilistic/cli/cli.py +48 -0
oipd-0.0.1/probabilistic/cli/csv_runner.py +72 -0
oipd-0.0.1/probabilistic/cli/utils.py +23 -0
oipd-0.0.1/probabilistic/core/__init__.py +3 -0
oipd-0.0.1/probabilistic/core/pdf.py +439 -0
oipd-0.0.1/probabilistic/dashboard/__init__.py +0 -0
oipd-0.0.1/probabilistic/dashboard/interface.py +203 -0
oipd-0.0.1/probabilistic/dashboard/resources/logo.png +0 -0
oipd-0.0.1/probabilistic/dashboard/resources/logo_alt.png +0 -0
oipd-0.0.1/probabilistic/graphics/__init__.py +3 -0
oipd-0.0.1/probabilistic/graphics/matplot.py +113 -0
oipd-0.0.1/probabilistic/io/__init__.py +3 -0
oipd-0.0.1/probabilistic/io/csv_reader.py +46 -0
oipd-0.0.1/probabilistic/io/reader.py +65 -0
oipd-0.0.1/pyproject.toml +29 -0
oipd-0.0.1/setup.cfg +4 -0
oipd-0.0.1/setup.py +10 -0
oipd-0.0.1/tests/__init__.py +0 -0
oipd-0.0.1/tests/cli/__init__.py +0 -0
oipd-0.0.1/tests/cli/test_cli.py +10 -0
oipd-0.0.1/tests/io/__init__.py +0 -0
oipd-0.0.1/tests/io/test_csv_reader.py +22 -0

oipd-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Henry Tian
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

oipd-0.0.1/MANIFEST.in ADDED Viewed

@@ -0,0 +1,7 @@
+include *.py
+recursive-include cli *
+recursive-include core *
+recursive-include dashboard *
+recursive-include dashboard/resources *.png
+recursive-include graphics *
+recursive-include io *

oipd-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,12 @@
+Metadata-Version: 2.2
+Name: oipd
+Version: 0.0.1
+Summary: Generate future price PDFs for publicly traded securities using options data
+Author-email: Jannic Holzer <jannic.holzer@gmail.com>, Henry Tian <tyrneh@gmail.com>
+Project-URL: Homepage, https://github.com/jmholzer/probabilistic-pdfs
+Project-URL: Bug Tracker, https://github.com/jmholzer/probabilistic-pdfs/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: POSIX :: Linux
+Requires-Python: >=3.10
+License-File: LICENSE

oipd-0.0.1/README.md ADDED Viewed

@@ -0,0 +1,116 @@
+![Probabilistic logo](probabilistic/dashboard/resources/logo.png)
+![Python version](https://img.shields.io/badge/python-3.10-blue.svg)
+[![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black)
+This Python project generates future-looking probability density function (PDFs) and cumulative distribution functions (CDFs) for the prices of publicly traded securities using options data. The output is visualized with matplotlib, and the project also includes a user-friendly web-based dashboard interface built with Streamlit.
+## Table of Contents
+- [Installation](#installation)
+- [Usage](#usage)
+- [Algorithm Overview](#algorithm-overview)
+- [License](#license)
+## Installation
+1. Clone the repo
+```bash
+git clone https://github.com/jmholzer/probabilistic-pdfs.git
+```
+2. Navigate to the project directory
+```bash
+cd probabilistic-pdfs
+```
+3. Install Python dependencies
+```bash
+pip install -r requirements.txt
+```
+4. Install the project
+```bash
+pip install .
+```
+Please note that this project requires Python 3.10 or later.
+## Usage
+<b>Option 1: To start the web-based dashboard, run the following command:</b>
+```bash
+probabilistic
+```
+This will start a local web server and you should be able to access the dashboard in your web browser at `localhost:8501`.
+The user will need to provide their own options data in a CSV file with the columns 'strike', and 'last_price'. Sample data for SPY can be found in the `data` folder.
+<b>Option 2: To use probabilistic from within python, see `example_script.py` for a demo:</b>
+The user will need to specify 4 arguments:
+1. `input_csv_path`: a string containing the file path of the options data in a csv, with the columns 'strike' and 'last_price'
+2. `current_price`: a number of the underlying asset's current price
+3. `days_foward`: a number of the days between the current date and the strike date
+4. `output_csv_path`, a string containing the file path where the user wishes to save the results
+The output will be a csv file containing 3 columns: price, probability density, cumulative probability
+```
+from probabilistic import cli
+input_csv_path = "data/AAPL_currentdateNov14_callMar15_currentprice18480_CLEAN.csv"
+current_price = 184.8
+days_forward = 123
+output_csv_path = "/Users/username/Downloads/results.csv"
+cli.csv_runner.run(input_csv_path, float(current_price), int(days_forward), output_csv_path)
+```
+## Theory Overview
+An option is a financial derivative that gives the holder the right, but not the obligation, to buy or sell an asset at a specified price (strike price) on a certain date in the future. Intuitively, the value of an option depends on the probability that it will be profitable or "in-the-money" at expiration.
+Why? Consider this scenario: You possess an option to sell a stock for $100 tomorrow, and as of the market's close today, the stock's price stands at $10. Intuitively, this option appears to hold significant value due to the high likelihood of its exercise. However, if it were certain that the stock's price would surge to $200 at the opening bell tomorrow, the chance of exercising your option profitably drops to zero. Consequently, the option's value evaporates. This illustrates how the price of an option is linked to the probability of its being in the money—that is, the likelihood that the option can be exercised at a profit. Consequently, by knowing the price of an option, we can work backwards to calculate the consensus probability of its future price.
+To recap, the price of an option reflects the market's collective expectation about the future price of the underlying asset, and is inherently tied to the probability of its outcome (the option being in-the-money) occuring.
+By working backwards, we can solve for the probability of outcomes occuring along a continuum of strike prices, and thus generate a PDF of the market's collective expectation of the future price of the underlying asset.
+For a simplified worked example, see this [excellent blog post](https://reasonabledeviations.com/2020/10/01/option-implied-pdfs/).
+For a complete reading of the financial theory, see [this paper](https://www.bankofengland.co.uk/-/media/boe/files/quarterly-bulletin/2000/recent-developments-in-extracting-information-from-options-markets.pdf?la=en&hash=8D29F2572E08B9F2B541C04102DE181C791DB870).
+## Algorithm Overview
+The process of generating the PDFs and CDFs is as follows:
+1. For an underlying asset, options data along the full range of strike prices are read from a CSV file to create a DataFrame. This gives us a table of strike prices along with the last price[^1] each option sold for
+2. Using the Black-Sholes formula, we convert strike prices into implied volatilities (IV)[^2]
+3. Using B-spline, we fit a curve-of-best-fit onto the discrete observations of IV over the full range of strike prices[^3]. Thus, we have extracted a continuous model from discrete IV observations - this is called the volatility smile
+4. From the volatility smile, we use Black-Scholes to convert IVs back to prices. Thus, we arrive at a continuous curve of options prices along the full range of strike prices
+5. From the continuous price curve, we use numerical differentiation to get the first derivative of prices. Then we numerically differentiate again to get the second derivative of prices. The second derivative of prices multiplied by a discount factor $\exp^{r*\uptau}$, results in the probability density function [^4]
+6. Once we have the PDF, we can calculate the CDF
+7. Quartiles (25th, 50th, and 75th percentiles) of each distribution are also derived
+[^1]: We chose to use last price instead of calculating the mid-price given the bid-ask spread. This is because Yahoo Finance, a common source for options chain data, often lacks bid-ask data. See for example [Apple options](https://finance.yahoo.com/quote/AAPL/options/)
+[^2]: We convert from price-space to IV-space, and then back to price-space as described in step 4. See this [blog post](https://reasonabledeviations.com/2020/10/10/option-implied-pdfs-2/) for a breakdown of why we do this double conversion
+[^3]: See [this paper](https://edoc.hu-berlin.de/bitstream/handle/18452/14708/zeng.pdf?sequence=1&isAllowed=y) for more details. In summary, options markets contains noise. Therefore, generating a volatility smile through simple interpolation will result in a noisy smile function. Then converting back to price-space will result in a noisy price curve. And finally when we numerically twice differentiate the price curve, noise will be amplified and the resulting PDF will be meaningless. Thus, we need either a parametric or non-parametric model to try to extract the true relationship between IV and strike price from the noisy observations. The paper suggests a 3rd order B-spline as a possible model choice
+[^4]: For a proof of this derivation, see this [blog post](https://reasonabledeviations.com/2020/10/10/option-implied-pdfs-2/)
+## Examples
+An example of the input and output for the sample AAPL options chain data for the expiry date of Mar 15 2024 (taken on Nov 14 2023)  included in `data/` is:
+![Probabilistic example input](.meta/images/probabilistic_example_input.png)
+![Probabilistic example output](.meta/images/probabilistic_example_output.png)
+## License
+This project is a preview, it is not currently licensed. Not financial advice.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

oipd-0.0.1/example_script.py ADDED Viewed

@@ -0,0 +1,99 @@
+from probabilistic import cli
+from datetime import datetime
+import matplotlib.pyplot as plt
+# example 1 - NVIDIA
+input_csv_path = "data/nvidia_date20250128_strikedate20250516_price12144.csv"
+current_price = 121.44
+current_date = "2025-01-28"
+strike_date = "2025-05-16"
+# Convert the strings to datetime objects
+current_date_dt = datetime.strptime(current_date, "%Y-%m-%d")
+strike_date_dt = datetime.strptime(strike_date, "%Y-%m-%d")
+# Calculate the difference in days
+days_difference = (strike_date_dt - current_date_dt).days
+# output_csv_path = "/Users/henrytian/Downloads/results.csv"
+df = cli.csv_runner.run(
+    input_csv_path=input_csv_path,
+    current_price=float(current_price),
+    days_forward=int(days_difference),
+    risk_free_rate=0.03,
+    fit_kernel_pdf=True,
+    solver_method="brent",
+)
+# Plot probability density function
+plt.figure(figsize=(8, 5))
+plt.plot(df.Price, df.PDF, label="Implied PDF", color="cyan", alpha=0.7)
+plt.xlabel("Price")
+plt.ylabel("Density")
+plt.legend()
+plt.title("Implied PDF of NVIDIA at 2025-05-16, from perspective of 2025-01-28")
+plt.show()
+# Example 2 - SPY
+input_csv_path = "data/spy_date20250128_strike20250228_price60444.csv"
+current_price = 604.44
+current_date = "2025-01-28"
+strike_date = "2025-02-28"
+# Convert the strings to datetime objects
+current_date_dt = datetime.strptime(current_date, "%Y-%m-%d")
+strike_date_dt = datetime.strptime(strike_date, "%Y-%m-%d")
+# Calculate the difference in days
+days_difference = (strike_date_dt - current_date_dt).days
+# output_csv_path = "/Users/henrytian/Downloads/results.csv"
+df = cli.csv_runner.run(
+    input_csv_path=input_csv_path,
+    current_price=float(current_price),
+    days_forward=int(days_difference),
+    risk_free_rate=0.03,
+    fit_kernel_pdf=True,
+    solver_method="brent",
+)
+# Plot probability density function
+plt.figure(figsize=(8, 5))
+plt.plot(df.Price, df.PDF, label="Implied PDF", color="cyan", alpha=0.7)
+plt.xlabel("Price")
+plt.ylabel("Density")
+plt.legend()
+plt.title("Implied PDF of S&P500 at 2025-02-28, from perspective of 2025-01-28")
+plt.show()
+# --- Example 3 - US Steel --- #
+input_csv_path = "data/ussteel_date20250128_strike20251219_price3629.csv"
+current_price = 36.29
+current_date = "2025-01-28"
+strike_date = "2025-12-19"
+# Convert the strings to datetime objects
+current_date_dt = datetime.strptime(current_date, "%Y-%m-%d")
+strike_date_dt = datetime.strptime(strike_date, "%Y-%m-%d")
+# Calculate the difference in days
+days_difference = (strike_date_dt - current_date_dt).days
+# output_csv_path = "/Users/henrytian/Downloads/results.csv"
+ussteel_pdf = cli.csv_runner.run(
+    input_csv_path=input_csv_path,
+    current_price=float(current_price),
+    days_forward=int(days_difference),
+    risk_free_rate=0.03,
+    fit_kernel_pdf=True,
+    solver_method="newton",
+)
+# Plot probability density function
+plt.figure(figsize=(8, 5))
+plt.plot(
+    ussteel_pdf.Price, ussteel_pdf.PDF, label="Implied PDF", color="cyan", alpha=0.7
+)
+plt.xlabel("Price")
+plt.ylabel("Density")
+plt.legend()
+plt.title(
+    "Probability distribution of US Steel on 2025-12-19, from perspective of 2025-01-28"
+)
+plt.show()

oipd-0.0.1/oipd.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,12 @@
+Metadata-Version: 2.2
+Name: oipd
+Version: 0.0.1
+Summary: Generate future price PDFs for publicly traded securities using options data
+Author-email: Jannic Holzer <jannic.holzer@gmail.com>, Henry Tian <tyrneh@gmail.com>
+Project-URL: Homepage, https://github.com/jmholzer/probabilistic-pdfs
+Project-URL: Bug Tracker, https://github.com/jmholzer/probabilistic-pdfs/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: POSIX :: Linux
+Requires-Python: >=3.10
+License-File: LICENSE

oipd-0.0.1/oipd.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,32 @@
+LICENSE
+MANIFEST.in
+README.md
+example_script.py
+pyproject.toml
+setup.py
+oipd.egg-info/PKG-INFO
+oipd.egg-info/SOURCES.txt
+oipd.egg-info/dependency_links.txt
+oipd.egg-info/entry_points.txt
+oipd.egg-info/top_level.txt
+probabilistic/__init__.py
+probabilistic/cli/__init__.py
+probabilistic/cli/cli.py
+probabilistic/cli/csv_runner.py
+probabilistic/cli/utils.py
+probabilistic/core/__init__.py
+probabilistic/core/pdf.py
+probabilistic/dashboard/__init__.py
+probabilistic/dashboard/interface.py
+probabilistic/dashboard/resources/logo.png
+probabilistic/dashboard/resources/logo_alt.png
+probabilistic/graphics/__init__.py
+probabilistic/graphics/matplot.py
+probabilistic/io/__init__.py
+probabilistic/io/csv_reader.py
+probabilistic/io/reader.py
+tests/__init__.py
+tests/cli/__init__.py
+tests/cli/test_cli.py
+tests/io/__init__.py
+tests/io/test_csv_reader.py

oipd-0.0.1/oipd.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

oipd-0.0.1/oipd.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ probabilistic = probabilistic.cli:main

oipd-0.0.1/oipd.egg-info/top_level.txt ADDED Viewed

@@ -0,0 +1,4 @@
+data
+dist
+probabilistic
+tests

oipd-0.0.1/probabilistic/__init__.py ADDED Viewed

File without changes

oipd-0.0.1/probabilistic/cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .cli import main

oipd-0.0.1/probabilistic/cli/cli.py ADDED Viewed

@@ -0,0 +1,48 @@
+import pathlib
+import click
+from click_default_group import DefaultGroup
+from . import csv_runner
+from .utils import python_call
+@click.group(
+    cls=DefaultGroup, name="probabilistic", default="run", default_if_no_args=True
+)
+def cli():
+    """Defines a click group for the whole project"""
+    pass
+@cli.command()
+@click.option("--csv", "input_csv_path")
+@click.option("--current-price", "current_price")
+@click.option("--days-forward", "days_forward")
+def calculate(input_csv_path: str, current_price: float, days_forward: int) -> None:
+    """The CLI endpoint for running probabilistic end-to-end
+    Args:
+        input_csv_path: the path to the input CSV file
+        current_price: the current price of the security
+        days_forward: the number of days in the future to estimate the
+            price probability density at
+    Returns:
+        None
+    """
+    if input_csv_path:
+        # TODO: Get rid of this casting in a neat way
+        csv_runner.run(input_csv_path, float(current_price), int(days_forward))
+@cli.command()
+def run() -> None:
+    """The CLI endpoint for running the probabilistic interface"""
+    root_path = pathlib.Path(__file__).parent.parent.resolve()
+    interface_path = root_path / pathlib.Path("dashboard/interface.py")
+    python_call("streamlit", ("run", str(interface_path)))
+def main() -> None:
+    cli()

oipd-0.0.1/probabilistic/cli/csv_runner.py ADDED Viewed

@@ -0,0 +1,72 @@
+from probabilistic.core import calculate_pdf, calculate_cdf, fit_kde
+from probabilistic.io import CSVReader
+import pandas as pd
+from traitlets import Bool
+from typing import Optional
+def run(
+    input_csv_path: str,
+    current_price: float,
+    days_forward: int,
+    risk_free_rate: float,
+    fit_kernel_pdf: Optional[Bool] = False,
+    save_to_csv: Bool = False,
+    output_csv_path: Optional[str] = None,
+    solver_method: Optional[str] = "brent",
+) -> pd.DataFrame:
+    """
+    Runs the probabilistic price distribution estimation using option market data.
+    This function reads option data from a CSV file, calculates an implied probability
+    density function (PDF) based on market prices, and optionally smooths the PDF
+    using Kernel Density Estimation (KDE). It then computes the cumulative distribution
+    function (CDF) and saves or returns the results.
+    Args:
+        input_csv_path (str): Path to the input CSV file containing option market data.
+        current_price (float): The current price of the underlying security.
+        days_forward (int): The number of days in the future for which the probability
+            density is estimated.
+        risk_free_rate (float): the annual risk free rate in nominal terms
+        fit_kernel_pdf (Optional[bool], default=True): Whether to smooth the implied
+            PDF using Kernel Density Estimation (KDE).
+        save_to_csv (bool, default=False): If `True`, saves the output to a CSV file.
+        output_csv_path (Optional[str], default=None): Path to save the output CSV file.
+            Required if `save_to_csv=True`.
+        solver_method (str): which solver to use for IV. Either "newton" or "brent"
+    Returns:
+        - If `save_to_csv` is `True`, saves the results to a CSV file and returns `None`.
+        - If `save_to_csv` is `False`, returns a `pd.DataFrame` containing three columns:
+          `Price`, `PDF`, and `CDF`.
+    """
+    reader = CSVReader()
+    options_data = reader.read(input_csv_path)
+    pdf_point_arrays = calculate_pdf(
+        options_data, current_price, days_forward, risk_free_rate, solver_method
+    )
+    # Fit KDE to normalize PDF if desired
+    if fit_kernel_pdf:
+        pdf_point_arrays = fit_kde(
+            pdf_point_arrays
+        )  # Ensure this returns a tuple of arrays
+    cdf_point_arrays = calculate_cdf(pdf_point_arrays)
+    priceP, densityP = pdf_point_arrays
+    priceC, densityC = cdf_point_arrays
+    # Convert results to DataFrame
+    df = pd.DataFrame({"Price": priceP, "PDF": densityP, "CDF": densityC})
+    # Save or return DataFrame
+    if save_to_csv:
+        if output_csv_path is None:
+            raise ValueError("output_csv_path must be provided when save_to_csv=True")
+        df.to_csv(output_csv_path, index=False)
+        return df
+    else:
+        return df

oipd-0.0.1/probabilistic/cli/utils.py ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Iterable
+import sys
+import click
+import subprocess
+import shlex
+def python_call(module: str, arguments: Iterable[str], **kwargs):
+    """Run a subprocess command that invokes a Python module.
+    Arguments:
+        module: The module to invoke.
+        arguments: The arguments to pass to the module.
+        **kwargs: Additional keyword arguments to pass to subprocess.run.
+    Raises:
+        subprocess.CalledProcessError: If the subprocess call fails.
+    """
+    command = [sys.executable, "-m", module] + list(arguments)
+    click.echo(" ".join(shlex.quote(cmd) for cmd in command))
+    return_code = subprocess.run(command, **kwargs).returncode
+    if return_code == 1:
+        raise click.exceptions.Exit(code=return_code)

oipd-0.0.1/probabilistic/core/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .pdf import calculate_cdf, calculate_pdf, calculate_quartiles, fit_kde
+__all__ = ["calculate_pdf", "calculate_cdf", "calculate_quartiles", "fit_kde"]