oipd 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oipd-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Henry Tian
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
oipd-0.0.1/MANIFEST.in ADDED
@@ -0,0 +1,7 @@
1
+ include *.py
2
+ recursive-include cli *
3
+ recursive-include core *
4
+ recursive-include dashboard *
5
+ recursive-include dashboard/resources *.png
6
+ recursive-include graphics *
7
+ recursive-include io *
oipd-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.2
2
+ Name: oipd
3
+ Version: 0.0.1
4
+ Summary: Generate future price PDFs for publicly traded securities using options data
5
+ Author-email: Jannic Holzer <jannic.holzer@gmail.com>, Henry Tian <tyrneh@gmail.com>
6
+ Project-URL: Homepage, https://github.com/jmholzer/probabilistic-pdfs
7
+ Project-URL: Bug Tracker, https://github.com/jmholzer/probabilistic-pdfs/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: MacOS
10
+ Classifier: Operating System :: POSIX :: Linux
11
+ Requires-Python: >=3.10
12
+ License-File: LICENSE
oipd-0.0.1/README.md ADDED
@@ -0,0 +1,116 @@
1
+ ![Probabilistic logo](probabilistic/dashboard/resources/logo.png)
2
+
3
+ ![Python version](https://img.shields.io/badge/python-3.10-blue.svg)
4
+ [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black)
5
+
6
+ This Python project generates future-looking probability density function (PDFs) and cumulative distribution functions (CDFs) for the prices of publicly traded securities using options data. The output is visualized with matplotlib, and the project also includes a user-friendly web-based dashboard interface built with Streamlit.
7
+
8
+ ## Table of Contents
9
+
10
+ - [Installation](#installation)
11
+ - [Usage](#usage)
12
+ - [Algorithm Overview](#algorithm-overview)
13
+ - [License](#license)
14
+
15
+ ## Installation
16
+
17
+ 1. Clone the repo
18
+
19
+ ```bash
20
+ git clone https://github.com/jmholzer/probabilistic-pdfs.git
21
+ ```
22
+
23
+ 2. Navigate to the project directory
24
+
25
+ ```bash
26
+ cd probabilistic-pdfs
27
+ ```
28
+
29
+ 3. Install Python dependencies
30
+
31
+ ```bash
32
+ pip install -r requirements.txt
33
+ ```
34
+
35
+ 4. Install the project
36
+
37
+ ```bash
38
+ pip install .
39
+ ```
40
+
41
+ Please note that this project requires Python 3.10 or later.
42
+
43
+ ## Usage
44
+
45
+ <b>Option 1: To start the web-based dashboard, run the following command:</b>
46
+
47
+ ```bash
48
+ probabilistic
49
+ ```
50
+
51
+ This will start a local web server and you should be able to access the dashboard in your web browser at `localhost:8501`.
52
+
53
+ The user will need to provide their own options data in a CSV file with the columns 'strike', and 'last_price'. Sample data for SPY can be found in the `data` folder.
54
+
55
+ <b>Option 2: To use probabilistic from within python, see `example_script.py` for a demo:</b>
56
+
57
+ The user will need to specify 4 arguments:
58
+ 1. `input_csv_path`: a string containing the file path of the options data in a csv, with the columns 'strike' and 'last_price'
59
+ 2. `current_price`: a number of the underlying asset's current price
60
+ 3. `days_foward`: a number of the days between the current date and the strike date
61
+ 4. `output_csv_path`, a string containing the file path where the user wishes to save the results
62
+ The output will be a csv file containing 3 columns: price, probability density, cumulative probability
63
+
64
+ ```
65
+ from probabilistic import cli
66
+
67
+ input_csv_path = "data/AAPL_currentdateNov14_callMar15_currentprice18480_CLEAN.csv"
68
+ current_price = 184.8
69
+ days_forward = 123
70
+ output_csv_path = "/Users/username/Downloads/results.csv"
71
+
72
+ cli.csv_runner.run(input_csv_path, float(current_price), int(days_forward), output_csv_path)
73
+ ```
74
+
75
+ ## Theory Overview
76
+
77
+ An option is a financial derivative that gives the holder the right, but not the obligation, to buy or sell an asset at a specified price (strike price) on a certain date in the future. Intuitively, the value of an option depends on the probability that it will be profitable or "in-the-money" at expiration.
78
+
79
+ Why? Consider this scenario: You possess an option to sell a stock for $100 tomorrow, and as of the market's close today, the stock's price stands at $10. Intuitively, this option appears to hold significant value due to the high likelihood of its exercise. However, if it were certain that the stock's price would surge to $200 at the opening bell tomorrow, the chance of exercising your option profitably drops to zero. Consequently, the option's value evaporates. This illustrates how the price of an option is linked to the probability of its being in the money—that is, the likelihood that the option can be exercised at a profit. Consequently, by knowing the price of an option, we can work backwards to calculate the consensus probability of its future price.
80
+
81
+ To recap, the price of an option reflects the market's collective expectation about the future price of the underlying asset, and is inherently tied to the probability of its outcome (the option being in-the-money) occuring.
82
+ By working backwards, we can solve for the probability of outcomes occuring along a continuum of strike prices, and thus generate a PDF of the market's collective expectation of the future price of the underlying asset.
83
+
84
+ For a simplified worked example, see this [excellent blog post](https://reasonabledeviations.com/2020/10/01/option-implied-pdfs/).
85
+ For a complete reading of the financial theory, see [this paper](https://www.bankofengland.co.uk/-/media/boe/files/quarterly-bulletin/2000/recent-developments-in-extracting-information-from-options-markets.pdf?la=en&hash=8D29F2572E08B9F2B541C04102DE181C791DB870).
86
+
87
+ ## Algorithm Overview
88
+
89
+ The process of generating the PDFs and CDFs is as follows:
90
+
91
+ 1. For an underlying asset, options data along the full range of strike prices are read from a CSV file to create a DataFrame. This gives us a table of strike prices along with the last price[^1] each option sold for
92
+ 2. Using the Black-Sholes formula, we convert strike prices into implied volatilities (IV)[^2]
93
+ 3. Using B-spline, we fit a curve-of-best-fit onto the discrete observations of IV over the full range of strike prices[^3]. Thus, we have extracted a continuous model from discrete IV observations - this is called the volatility smile
94
+ 4. From the volatility smile, we use Black-Scholes to convert IVs back to prices. Thus, we arrive at a continuous curve of options prices along the full range of strike prices
95
+ 5. From the continuous price curve, we use numerical differentiation to get the first derivative of prices. Then we numerically differentiate again to get the second derivative of prices. The second derivative of prices multiplied by a discount factor $\exp^{r*\uptau}$, results in the probability density function [^4]
96
+ 6. Once we have the PDF, we can calculate the CDF
97
+ 7. Quartiles (25th, 50th, and 75th percentiles) of each distribution are also derived
98
+
99
+ [^1]: We chose to use last price instead of calculating the mid-price given the bid-ask spread. This is because Yahoo Finance, a common source for options chain data, often lacks bid-ask data. See for example [Apple options](https://finance.yahoo.com/quote/AAPL/options/)
100
+ [^2]: We convert from price-space to IV-space, and then back to price-space as described in step 4. See this [blog post](https://reasonabledeviations.com/2020/10/10/option-implied-pdfs-2/) for a breakdown of why we do this double conversion
101
+ [^3]: See [this paper](https://edoc.hu-berlin.de/bitstream/handle/18452/14708/zeng.pdf?sequence=1&isAllowed=y) for more details. In summary, options markets contains noise. Therefore, generating a volatility smile through simple interpolation will result in a noisy smile function. Then converting back to price-space will result in a noisy price curve. And finally when we numerically twice differentiate the price curve, noise will be amplified and the resulting PDF will be meaningless. Thus, we need either a parametric or non-parametric model to try to extract the true relationship between IV and strike price from the noisy observations. The paper suggests a 3rd order B-spline as a possible model choice
102
+ [^4]: For a proof of this derivation, see this [blog post](https://reasonabledeviations.com/2020/10/10/option-implied-pdfs-2/)
103
+
104
+ ## Examples
105
+
106
+ An example of the input and output for the sample AAPL options chain data for the expiry date of Mar 15 2024 (taken on Nov 14 2023) included in `data/` is:
107
+
108
+ ![Probabilistic example input](.meta/images/probabilistic_example_input.png)
109
+
110
+ ![Probabilistic example output](.meta/images/probabilistic_example_output.png)
111
+
112
+ ## License
113
+
114
+ This project is a preview, it is not currently licensed. Not financial advice.
115
+
116
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,99 @@
1
+ from probabilistic import cli
2
+ from datetime import datetime
3
+ import matplotlib.pyplot as plt
4
+
5
+ # example 1 - NVIDIA
6
+ input_csv_path = "data/nvidia_date20250128_strikedate20250516_price12144.csv"
7
+ current_price = 121.44
8
+ current_date = "2025-01-28"
9
+ strike_date = "2025-05-16"
10
+ # Convert the strings to datetime objects
11
+ current_date_dt = datetime.strptime(current_date, "%Y-%m-%d")
12
+ strike_date_dt = datetime.strptime(strike_date, "%Y-%m-%d")
13
+ # Calculate the difference in days
14
+ days_difference = (strike_date_dt - current_date_dt).days
15
+ # output_csv_path = "/Users/henrytian/Downloads/results.csv"
16
+
17
+ df = cli.csv_runner.run(
18
+ input_csv_path=input_csv_path,
19
+ current_price=float(current_price),
20
+ days_forward=int(days_difference),
21
+ risk_free_rate=0.03,
22
+ fit_kernel_pdf=True,
23
+ solver_method="brent",
24
+ )
25
+
26
+ # Plot probability density function
27
+ plt.figure(figsize=(8, 5))
28
+ plt.plot(df.Price, df.PDF, label="Implied PDF", color="cyan", alpha=0.7)
29
+ plt.xlabel("Price")
30
+ plt.ylabel("Density")
31
+ plt.legend()
32
+ plt.title("Implied PDF of NVIDIA at 2025-05-16, from perspective of 2025-01-28")
33
+ plt.show()
34
+
35
+ # Example 2 - SPY
36
+ input_csv_path = "data/spy_date20250128_strike20250228_price60444.csv"
37
+ current_price = 604.44
38
+ current_date = "2025-01-28"
39
+ strike_date = "2025-02-28"
40
+ # Convert the strings to datetime objects
41
+ current_date_dt = datetime.strptime(current_date, "%Y-%m-%d")
42
+ strike_date_dt = datetime.strptime(strike_date, "%Y-%m-%d")
43
+ # Calculate the difference in days
44
+ days_difference = (strike_date_dt - current_date_dt).days
45
+ # output_csv_path = "/Users/henrytian/Downloads/results.csv"
46
+
47
+ df = cli.csv_runner.run(
48
+ input_csv_path=input_csv_path,
49
+ current_price=float(current_price),
50
+ days_forward=int(days_difference),
51
+ risk_free_rate=0.03,
52
+ fit_kernel_pdf=True,
53
+ solver_method="brent",
54
+ )
55
+
56
+ # Plot probability density function
57
+ plt.figure(figsize=(8, 5))
58
+ plt.plot(df.Price, df.PDF, label="Implied PDF", color="cyan", alpha=0.7)
59
+ plt.xlabel("Price")
60
+ plt.ylabel("Density")
61
+ plt.legend()
62
+ plt.title("Implied PDF of S&P500 at 2025-02-28, from perspective of 2025-01-28")
63
+ plt.show()
64
+
65
+
66
+ # --- Example 3 - US Steel --- #
67
+ input_csv_path = "data/ussteel_date20250128_strike20251219_price3629.csv"
68
+ current_price = 36.29
69
+ current_date = "2025-01-28"
70
+ strike_date = "2025-12-19"
71
+ # Convert the strings to datetime objects
72
+ current_date_dt = datetime.strptime(current_date, "%Y-%m-%d")
73
+ strike_date_dt = datetime.strptime(strike_date, "%Y-%m-%d")
74
+ # Calculate the difference in days
75
+ days_difference = (strike_date_dt - current_date_dt).days
76
+ # output_csv_path = "/Users/henrytian/Downloads/results.csv"
77
+
78
+
79
+ ussteel_pdf = cli.csv_runner.run(
80
+ input_csv_path=input_csv_path,
81
+ current_price=float(current_price),
82
+ days_forward=int(days_difference),
83
+ risk_free_rate=0.03,
84
+ fit_kernel_pdf=True,
85
+ solver_method="newton",
86
+ )
87
+
88
+ # Plot probability density function
89
+ plt.figure(figsize=(8, 5))
90
+ plt.plot(
91
+ ussteel_pdf.Price, ussteel_pdf.PDF, label="Implied PDF", color="cyan", alpha=0.7
92
+ )
93
+ plt.xlabel("Price")
94
+ plt.ylabel("Density")
95
+ plt.legend()
96
+ plt.title(
97
+ "Probability distribution of US Steel on 2025-12-19, from perspective of 2025-01-28"
98
+ )
99
+ plt.show()
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.2
2
+ Name: oipd
3
+ Version: 0.0.1
4
+ Summary: Generate future price PDFs for publicly traded securities using options data
5
+ Author-email: Jannic Holzer <jannic.holzer@gmail.com>, Henry Tian <tyrneh@gmail.com>
6
+ Project-URL: Homepage, https://github.com/jmholzer/probabilistic-pdfs
7
+ Project-URL: Bug Tracker, https://github.com/jmholzer/probabilistic-pdfs/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: MacOS
10
+ Classifier: Operating System :: POSIX :: Linux
11
+ Requires-Python: >=3.10
12
+ License-File: LICENSE
@@ -0,0 +1,32 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ example_script.py
5
+ pyproject.toml
6
+ setup.py
7
+ oipd.egg-info/PKG-INFO
8
+ oipd.egg-info/SOURCES.txt
9
+ oipd.egg-info/dependency_links.txt
10
+ oipd.egg-info/entry_points.txt
11
+ oipd.egg-info/top_level.txt
12
+ probabilistic/__init__.py
13
+ probabilistic/cli/__init__.py
14
+ probabilistic/cli/cli.py
15
+ probabilistic/cli/csv_runner.py
16
+ probabilistic/cli/utils.py
17
+ probabilistic/core/__init__.py
18
+ probabilistic/core/pdf.py
19
+ probabilistic/dashboard/__init__.py
20
+ probabilistic/dashboard/interface.py
21
+ probabilistic/dashboard/resources/logo.png
22
+ probabilistic/dashboard/resources/logo_alt.png
23
+ probabilistic/graphics/__init__.py
24
+ probabilistic/graphics/matplot.py
25
+ probabilistic/io/__init__.py
26
+ probabilistic/io/csv_reader.py
27
+ probabilistic/io/reader.py
28
+ tests/__init__.py
29
+ tests/cli/__init__.py
30
+ tests/cli/test_cli.py
31
+ tests/io/__init__.py
32
+ tests/io/test_csv_reader.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ probabilistic = probabilistic.cli:main
@@ -0,0 +1,4 @@
1
+ data
2
+ dist
3
+ probabilistic
4
+ tests
File without changes
@@ -0,0 +1 @@
1
+ from .cli import main
@@ -0,0 +1,48 @@
1
+ import pathlib
2
+
3
+ import click
4
+ from click_default_group import DefaultGroup
5
+
6
+ from . import csv_runner
7
+ from .utils import python_call
8
+
9
+
10
+ @click.group(
11
+ cls=DefaultGroup, name="probabilistic", default="run", default_if_no_args=True
12
+ )
13
+ def cli():
14
+ """Defines a click group for the whole project"""
15
+ pass
16
+
17
+
18
+ @cli.command()
19
+ @click.option("--csv", "input_csv_path")
20
+ @click.option("--current-price", "current_price")
21
+ @click.option("--days-forward", "days_forward")
22
+ def calculate(input_csv_path: str, current_price: float, days_forward: int) -> None:
23
+ """The CLI endpoint for running probabilistic end-to-end
24
+
25
+ Args:
26
+ input_csv_path: the path to the input CSV file
27
+ current_price: the current price of the security
28
+ days_forward: the number of days in the future to estimate the
29
+ price probability density at
30
+
31
+ Returns:
32
+ None
33
+ """
34
+ if input_csv_path:
35
+ # TODO: Get rid of this casting in a neat way
36
+ csv_runner.run(input_csv_path, float(current_price), int(days_forward))
37
+
38
+
39
+ @cli.command()
40
+ def run() -> None:
41
+ """The CLI endpoint for running the probabilistic interface"""
42
+ root_path = pathlib.Path(__file__).parent.parent.resolve()
43
+ interface_path = root_path / pathlib.Path("dashboard/interface.py")
44
+ python_call("streamlit", ("run", str(interface_path)))
45
+
46
+
47
+ def main() -> None:
48
+ cli()
@@ -0,0 +1,72 @@
1
+ from probabilistic.core import calculate_pdf, calculate_cdf, fit_kde
2
+ from probabilistic.io import CSVReader
3
+ import pandas as pd
4
+ from traitlets import Bool
5
+ from typing import Optional
6
+
7
+
8
+ def run(
9
+ input_csv_path: str,
10
+ current_price: float,
11
+ days_forward: int,
12
+ risk_free_rate: float,
13
+ fit_kernel_pdf: Optional[Bool] = False,
14
+ save_to_csv: Bool = False,
15
+ output_csv_path: Optional[str] = None,
16
+ solver_method: Optional[str] = "brent",
17
+ ) -> pd.DataFrame:
18
+ """
19
+ Runs the probabilistic price distribution estimation using option market data.
20
+
21
+ This function reads option data from a CSV file, calculates an implied probability
22
+ density function (PDF) based on market prices, and optionally smooths the PDF
23
+ using Kernel Density Estimation (KDE). It then computes the cumulative distribution
24
+ function (CDF) and saves or returns the results.
25
+
26
+ Args:
27
+ input_csv_path (str): Path to the input CSV file containing option market data.
28
+ current_price (float): The current price of the underlying security.
29
+ days_forward (int): The number of days in the future for which the probability
30
+ density is estimated.
31
+ risk_free_rate (float): the annual risk free rate in nominal terms
32
+ fit_kernel_pdf (Optional[bool], default=True): Whether to smooth the implied
33
+ PDF using Kernel Density Estimation (KDE).
34
+ save_to_csv (bool, default=False): If `True`, saves the output to a CSV file.
35
+ output_csv_path (Optional[str], default=None): Path to save the output CSV file.
36
+ Required if `save_to_csv=True`.
37
+ solver_method (str): which solver to use for IV. Either "newton" or "brent"
38
+
39
+ Returns:
40
+ - If `save_to_csv` is `True`, saves the results to a CSV file and returns `None`.
41
+ - If `save_to_csv` is `False`, returns a `pd.DataFrame` containing three columns:
42
+ `Price`, `PDF`, and `CDF`.
43
+ """
44
+
45
+ reader = CSVReader()
46
+ options_data = reader.read(input_csv_path)
47
+ pdf_point_arrays = calculate_pdf(
48
+ options_data, current_price, days_forward, risk_free_rate, solver_method
49
+ )
50
+
51
+ # Fit KDE to normalize PDF if desired
52
+ if fit_kernel_pdf:
53
+ pdf_point_arrays = fit_kde(
54
+ pdf_point_arrays
55
+ ) # Ensure this returns a tuple of arrays
56
+
57
+ cdf_point_arrays = calculate_cdf(pdf_point_arrays)
58
+
59
+ priceP, densityP = pdf_point_arrays
60
+ priceC, densityC = cdf_point_arrays
61
+
62
+ # Convert results to DataFrame
63
+ df = pd.DataFrame({"Price": priceP, "PDF": densityP, "CDF": densityC})
64
+
65
+ # Save or return DataFrame
66
+ if save_to_csv:
67
+ if output_csv_path is None:
68
+ raise ValueError("output_csv_path must be provided when save_to_csv=True")
69
+ df.to_csv(output_csv_path, index=False)
70
+ return df
71
+ else:
72
+ return df
@@ -0,0 +1,23 @@
1
+ from typing import Iterable
2
+ import sys
3
+ import click
4
+ import subprocess
5
+ import shlex
6
+
7
+
8
+ def python_call(module: str, arguments: Iterable[str], **kwargs):
9
+ """Run a subprocess command that invokes a Python module.
10
+
11
+ Arguments:
12
+ module: The module to invoke.
13
+ arguments: The arguments to pass to the module.
14
+ **kwargs: Additional keyword arguments to pass to subprocess.run.
15
+
16
+ Raises:
17
+ subprocess.CalledProcessError: If the subprocess call fails.
18
+ """
19
+ command = [sys.executable, "-m", module] + list(arguments)
20
+ click.echo(" ".join(shlex.quote(cmd) for cmd in command))
21
+ return_code = subprocess.run(command, **kwargs).returncode
22
+ if return_code == 1:
23
+ raise click.exceptions.Exit(code=return_code)
@@ -0,0 +1,3 @@
1
+ from .pdf import calculate_cdf, calculate_pdf, calculate_quartiles, fit_kde
2
+
3
+ __all__ = ["calculate_pdf", "calculate_cdf", "calculate_quartiles", "fit_kde"]