forecasting-tools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. forecasting_tools-0.1.0/LICENSE +21 -0
  2. forecasting_tools-0.1.0/PKG-INFO +103 -0
  3. forecasting_tools-0.1.0/README.md +61 -0
  4. forecasting_tools-0.1.0/forecasting_tools/__init__.py +31 -0
  5. forecasting_tools-0.1.0/forecasting_tools/ai_models/__init__.py +0 -0
  6. forecasting_tools-0.1.0/forecasting_tools/ai_models/ai_utils/__init__.py +0 -0
  7. forecasting_tools-0.1.0/forecasting_tools/ai_models/ai_utils/ai_misc.py +130 -0
  8. forecasting_tools-0.1.0/forecasting_tools/ai_models/ai_utils/openai_utils.py +260 -0
  9. forecasting_tools-0.1.0/forecasting_tools/ai_models/ai_utils/response_types.py +19 -0
  10. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/__init__.py +0 -0
  11. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/ai_model.py +72 -0
  12. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/incurs_cost.py +38 -0
  13. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/named_model.py +12 -0
  14. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/outputs_text.py +282 -0
  15. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/priced_per_request.py +15 -0
  16. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/request_limited_model.py +60 -0
  17. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/retryable_model.py +66 -0
  18. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/time_limited_model.py +40 -0
  19. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/token_limited_model.py +72 -0
  20. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/tokens_are_calculatable.py +8 -0
  21. forecasting_tools-0.1.0/forecasting_tools/ai_models/basic_model_interfaces/tokens_incur_cost.py +51 -0
  22. forecasting_tools-0.1.0/forecasting_tools/ai_models/claude35sonnet.py +15 -0
  23. forecasting_tools-0.1.0/forecasting_tools/ai_models/exa_searcher.py +371 -0
  24. forecasting_tools-0.1.0/forecasting_tools/ai_models/gpt4o.py +18 -0
  25. forecasting_tools-0.1.0/forecasting_tools/ai_models/gpt4ovision.py +24 -0
  26. forecasting_tools-0.1.0/forecasting_tools/ai_models/gpto1.py +43 -0
  27. forecasting_tools-0.1.0/forecasting_tools/ai_models/metaculus4o.py +33 -0
  28. forecasting_tools-0.1.0/forecasting_tools/ai_models/model_archetypes/__init__.py +0 -0
  29. forecasting_tools-0.1.0/forecasting_tools/ai_models/model_archetypes/anthropic_text_model.py +153 -0
  30. forecasting_tools-0.1.0/forecasting_tools/ai_models/model_archetypes/openai_text_model.py +148 -0
  31. forecasting_tools-0.1.0/forecasting_tools/ai_models/model_archetypes/openai_vision_model.py +58 -0
  32. forecasting_tools-0.1.0/forecasting_tools/ai_models/model_archetypes/perplexity_text_model.py +96 -0
  33. forecasting_tools-0.1.0/forecasting_tools/ai_models/model_archetypes/traditional_online_llm.py +84 -0
  34. forecasting_tools-0.1.0/forecasting_tools/ai_models/perplexity.py +18 -0
  35. forecasting_tools-0.1.0/forecasting_tools/ai_models/readme.md +102 -0
  36. forecasting_tools-0.1.0/forecasting_tools/ai_models/resource_managers/__init__.py +0 -0
  37. forecasting_tools-0.1.0/forecasting_tools/ai_models/resource_managers/hard_limit_manager.py +92 -0
  38. forecasting_tools-0.1.0/forecasting_tools/ai_models/resource_managers/monetary_cost_manager.py +21 -0
  39. forecasting_tools-0.1.0/forecasting_tools/ai_models/resource_managers/refreshing_bucket_rate_limiter.py +203 -0
  40. forecasting_tools-0.1.0/forecasting_tools/forecasting/__init__.py +0 -0
  41. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_database_manager.py +203 -0
  42. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/__init__.py +0 -0
  43. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/binary_report.py +156 -0
  44. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/date_report.py +11 -0
  45. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/forecast_report.py +220 -0
  46. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/multiple_choice_report.py +11 -0
  47. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/numeric_report.py +117 -0
  48. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/report_organizer.py +91 -0
  49. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_reports/report_section.py +125 -0
  50. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_team/__init__.py +0 -0
  51. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_team/final_decision_agent.py +197 -0
  52. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_team/forecast_team.py +116 -0
  53. forecasting_tools-0.1.0/forecasting_tools/forecasting/forecast_team/research_manager.py +410 -0
  54. forecasting_tools-0.1.0/forecasting_tools/forecasting/llms/__init__.py +0 -0
  55. forecasting_tools-0.1.0/forecasting_tools/forecasting/llms/configured_llms.py +11 -0
  56. forecasting_tools-0.1.0/forecasting_tools/forecasting/llms/smart_searcher.py +297 -0
  57. forecasting_tools-0.1.0/forecasting_tools/forecasting/metaculus_api.py +257 -0
  58. forecasting_tools-0.1.0/forecasting_tools/forecasting/metaculus_question.py +201 -0
  59. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/__init__.py +0 -0
  60. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/base_rate_responder.py +615 -0
  61. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/deduplicator.py +287 -0
  62. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/estimator.py +119 -0
  63. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/general_search_responder.py +42 -0
  64. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/key_factors_searcher.py +412 -0
  65. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/niche_list_researcher.py +542 -0
  66. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/question_responder.py +29 -0
  67. forecasting_tools-0.1.0/forecasting_tools/forecasting/sub_question_responders/question_router.py +73 -0
  68. forecasting_tools-0.1.0/forecasting_tools/forecasting/team_manager.py +111 -0
  69. forecasting_tools-0.1.0/forecasting_tools/front_end/Home.py +80 -0
  70. forecasting_tools-0.1.0/forecasting_tools/front_end/__init__.py +0 -0
  71. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/__init__.py +0 -0
  72. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/base_rate_page.py +127 -0
  73. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/benchmark_page.py +230 -0
  74. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/estimator_page.py +88 -0
  75. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/example_forecast_reports.json +468 -0
  76. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/example_forecasts.py +54 -0
  77. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/forecaster_page.py +235 -0
  78. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/key_factors_page.py +135 -0
  79. forecasting_tools-0.1.0/forecasting_tools/front_end/app_pages/niche_list_researcher_page.py +119 -0
  80. forecasting_tools-0.1.0/forecasting_tools/front_end/benchmarks/2024-08-30_16-46-19__original_bot__score_0.0657.json +39505 -0
  81. forecasting_tools-0.1.0/forecasting_tools/front_end/benchmarks/2024-08-30_17-22-42__research_format_update__score_0.0802.json +39803 -0
  82. forecasting_tools-0.1.0/forecasting_tools/front_end/benchmarks/__init__.py +0 -0
  83. forecasting_tools-0.1.0/forecasting_tools/front_end/helpers/__init__.py +0 -0
  84. forecasting_tools-0.1.0/forecasting_tools/front_end/helpers/app_page.py +49 -0
  85. forecasting_tools-0.1.0/forecasting_tools/front_end/helpers/custom_auth.py +35 -0
  86. forecasting_tools-0.1.0/forecasting_tools/front_end/helpers/general.py +24 -0
  87. forecasting_tools-0.1.0/forecasting_tools/front_end/helpers/report_displayer.py +186 -0
  88. forecasting_tools-0.1.0/forecasting_tools/util/__init__.py +0 -0
  89. forecasting_tools-0.1.0/forecasting_tools/util/async_batching.py +155 -0
  90. forecasting_tools-0.1.0/forecasting_tools/util/coda_utils.py +88 -0
  91. forecasting_tools-0.1.0/forecasting_tools/util/custom_logger.py +124 -0
  92. forecasting_tools-0.1.0/forecasting_tools/util/file_manipulation.py +145 -0
  93. forecasting_tools-0.1.0/forecasting_tools/util/image_reading.py +74 -0
  94. forecasting_tools-0.1.0/forecasting_tools/util/jsonable.py +80 -0
  95. forecasting_tools-0.1.0/forecasting_tools/util/misc.py +36 -0
  96. forecasting_tools-0.1.0/pyproject.toml +46 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 CodexVeritas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.1
2
+ Name: forecasting-tools
3
+ Version: 0.1.0
4
+ Summary: AI forecasting and research tools to help humans reason about and forecast the future
5
+ License: MIT
6
+ Author: Benjamin Wilson
7
+ Author-email: mokoresearch@gmail.com
8
+ Requires-Python: >=3.10,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Dist: aiofiles (>=23.2.1,<24.0.0)
16
+ Requires-Dist: aiohttp (>=3.9.3,<4.0.0)
17
+ Requires-Dist: aiolimiter (>=1.1.0,<2.0.0)
18
+ Requires-Dist: asyncio (>=3.4.3,<4.0.0)
19
+ Requires-Dist: bs4 (>=0.0.1,<0.0.2)
20
+ Requires-Dist: langchain-anthropic (>=0.2.4,<0.3.0)
21
+ Requires-Dist: langchain-community (>=0.3.4,<0.4.0)
22
+ Requires-Dist: langchain-core (>=0.3.15,<0.4.0)
23
+ Requires-Dist: langchain-openai (>=0.2.5,<0.3.0)
24
+ Requires-Dist: nest-asyncio (>=1.5.8,<2.0.0)
25
+ Requires-Dist: numpy (>=1.26.0,<2.0.0)
26
+ Requires-Dist: openai (>=1.51.0,<2.0.0)
27
+ Requires-Dist: pillow (>=10.4.0,<11.0.0)
28
+ Requires-Dist: pipreqs (>=0.4.13,<0.5.0)
29
+ Requires-Dist: pre-commit (>=4.0.1,<5.0.0)
30
+ Requires-Dist: pydantic (>=2.9.2,<3.0.0)
31
+ Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
32
+ Requires-Dist: regex (>=2023.8.8,<2024.0.0)
33
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
34
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
35
+ Requires-Dist: streamlit (>=1.36.0,<2.0.0)
36
+ Requires-Dist: tenacity (>=8.5.0,<9.0.0)
37
+ Requires-Dist: tiktoken (>=0.8.0,<0.9.0)
38
+ Requires-Dist: transformers (>=4.44.1,<5.0.0)
39
+ Requires-Dist: typeguard (>=4.3.0,<5.0.0)
40
+ Description-Content-Type: text/markdown
41
+
42
+ Last Update: Oct 25 2024
43
+
44
+ # Overview
45
+ This repository contains forecasting and research tools built with Python and Streamlit. The project aims to assist users in making predictions, conducting research, and analyzing data related to hard to answer questions (especially those from Metaculus). Find the demo website here: https://mokoresearch.streamlit.app/
46
+
47
+ Here are the key components and features of the project:
48
+ - General Forecaster that integrates with the Metaculus AI benchmarking competition
49
+ - Historical Base Rate Researcher
50
+ - Niche List Researcher
51
+ - Fermi Estimator
52
+ - Key Factors Analysis
53
+
54
+ # Getting Set Up
55
+
56
+ ## Environment Variables
57
+ The environment variables from ```.env.template``` are needed to run. Copy this template as ```.env``` and fill it in. Talk to the project owner to get the correct values for some of the more project specific variables.
58
+
59
+ ## Docker Dev Container
60
+ To get your development environment up and running, you need to have Docker Engine installed and running. Once you do, you can use the VSCode dev container pop-up to automatically set up everything for you
61
+
62
+ ### Install Docker
63
+ For Windows and Mac, you will download Docker Desktop. For Linux, you will download Docker Engine. (NOTE: These instructions might be outdated).
64
+
65
+ First download and setup Docker Engine using the instructions at the link below for your OS:
66
+ * Windows: [windows-install](https://docs.docker.com/desktop/install/windows-install/)
67
+ * Mac: [mac-install](mac-install)
68
+ * Linux: [install](https://docs.docker.com/engine/install/)
69
+ * Note: DO NOT install Docker Desktop for Linux, rather, select your Linux distribution on the left sidebar and follow the distribution specific instructions for Docker engine. Docker Desktop runs with a different environment in Linux. (TODO: Check if this restriction still applies)
70
+ * Remember to follow the post-installation steps for Linux: [linux-postinstall](https://docs.docker.com/engine/install/linux-postinstall/)
71
+
72
+
73
+ ### Starting the container
74
+ Once Docker is installed, when you open up the project folder in VSCode, you will see a pop up noting that you have a setup for dev containers, and asking if you would like to open the folder in a container. You will want to click "open in container". This will automatically set up everything you need and bring you into the container. If the docker process times out in the middle of installing python packages you can run the postinstall.sh manually. You also may need to have the VSCode Docker extension and/or devcontainer extension downloaded
75
+
76
+ You may need to reinstall some vscode extensions in the dev environment if you are opening it for the first time, but this should only be for the first time running it.
77
+
78
+ Some extensions are installed automatically (e.g. linting). You may need to reload the window after all of these extensions are installed.
79
+
80
+ ### Managing Docker
81
+ There are many ways to manager Docker containers, but generally if you download the vscode docker extension, you will be able to stop/start/remove all containers and images.
82
+
83
+
84
+ ### Alternatives to Docker
85
+ If you choose not to run docker, use a python virtual environment so these packages don't conflict with local packages. To set this up run
86
+
87
+ ```
88
+ python -m venv .venv
89
+ ```
90
+
91
+ If you use a virtual environment, install python packages and their dependencies to the virtual environment via the command
92
+
93
+ ```
94
+ pip install --require-virtualenv -r requirements.txt
95
+ ```
96
+
97
+ ## Running the Front End
98
+ You can run any front end folder in the front_end directory by executing `streamlit run front_end/[site_file_name]/Home.py`. This will start a development server for you that you can run.
99
+
100
+
101
+ # Testing
102
+ This repository uses pytest and pytest-xdist. xdist spreads out all the tests between multiple threads that are each run on a separate CPU. Currently its setup to create a thread per CPU. Configuration for this is in `pytest.ini`. The tests are gathered afresh from each thread, so any initialization done in imports, globals, or class variables are done for each thread. Additionally, global state is not reset between tests on the same thread. When making tests, assume unknown values for globals and especially class variables (though try to avoid using these at all).
103
+
@@ -0,0 +1,61 @@
1
+ Last Update: Oct 25 2024
2
+
3
+ # Overview
4
+ This repository contains forecasting and research tools built with Python and Streamlit. The project aims to assist users in making predictions, conducting research, and analyzing data related to hard to answer questions (especially those from Metaculus). Find the demo website here: https://mokoresearch.streamlit.app/
5
+
6
+ Here are the key components and features of the project:
7
+ - General Forecaster that integrates with the Metaculus AI benchmarking competition
8
+ - Historical Base Rate Researcher
9
+ - Niche List Researcher
10
+ - Fermi Estimator
11
+ - Key Factors Analysis
12
+
13
+ # Getting Set Up
14
+
15
+ ## Environment Variables
16
+ The environment variables from ```.env.template``` are needed to run. Copy this template as ```.env``` and fill it in. Talk to the project owner to get the correct values for some of the more project specific variables.
17
+
18
+ ## Docker Dev Container
19
+ To get your development environment up and running, you need to have Docker Engine installed and running. Once you do, you can use the VSCode dev container pop-up to automatically set up everything for you
20
+
21
+ ### Install Docker
22
+ For Windows and Mac, you will download Docker Desktop. For Linux, you will download Docker Engine. (NOTE: These instructions might be outdated).
23
+
24
+ First download and setup Docker Engine using the instructions at the link below for your OS:
25
+ * Windows: [windows-install](https://docs.docker.com/desktop/install/windows-install/)
26
+ * Mac: [mac-install](mac-install)
27
+ * Linux: [install](https://docs.docker.com/engine/install/)
28
+ * Note: DO NOT install Docker Desktop for Linux, rather, select your Linux distribution on the left sidebar and follow the distribution specific instructions for Docker engine. Docker Desktop runs with a different environment in Linux. (TODO: Check if this restriction still applies)
29
+ * Remember to follow the post-installation steps for Linux: [linux-postinstall](https://docs.docker.com/engine/install/linux-postinstall/)
30
+
31
+
32
+ ### Starting the container
33
+ Once Docker is installed, when you open up the project folder in VSCode, you will see a pop up noting that you have a setup for dev containers, and asking if you would like to open the folder in a container. You will want to click "open in container". This will automatically set up everything you need and bring you into the container. If the docker process times out in the middle of installing python packages you can run the postinstall.sh manually. You also may need to have the VSCode Docker extension and/or devcontainer extension downloaded
34
+
35
+ You may need to reinstall some vscode extensions in the dev environment if you are opening it for the first time, but this should only be for the first time running it.
36
+
37
+ Some extensions are installed automatically (e.g. linting). You may need to reload the window after all of these extensions are installed.
38
+
39
+ ### Managing Docker
40
+ There are many ways to manager Docker containers, but generally if you download the vscode docker extension, you will be able to stop/start/remove all containers and images.
41
+
42
+
43
+ ### Alternatives to Docker
44
+ If you choose not to run docker, use a python virtual environment so these packages don't conflict with local packages. To set this up run
45
+
46
+ ```
47
+ python -m venv .venv
48
+ ```
49
+
50
+ If you use a virtual environment, install python packages and their dependencies to the virtual environment via the command
51
+
52
+ ```
53
+ pip install --require-virtualenv -r requirements.txt
54
+ ```
55
+
56
+ ## Running the Front End
57
+ You can run any front end folder in the front_end directory by executing `streamlit run front_end/[site_file_name]/Home.py`. This will start a development server for you that you can run.
58
+
59
+
60
+ # Testing
61
+ This repository uses pytest and pytest-xdist. xdist spreads out all the tests between multiple threads that are each run on a separate CPU. Currently its setup to create a thread per CPU. Configuration for this is in `pytest.ini`. The tests are gathered afresh from each thread, so any initialization done in imports, globals, or class variables are done for each thread. Additionally, global state is not reset between tests on the same thread. When making tests, assume unknown values for globals and especially class variables (though try to avoid using these at all).
@@ -0,0 +1,31 @@
1
+ from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
2
+ MonetaryCostManager as MonetaryCostManager,
3
+ )
4
+ from forecasting_tools.forecasting.forecast_team.forecast_team import (
5
+ ForecastTeam as ForecastTeam,
6
+ )
7
+ from forecasting_tools.forecasting.llms.smart_searcher import (
8
+ SmartSearcher as SmartSearcher,
9
+ )
10
+ from forecasting_tools.forecasting.metaculus_api import (
11
+ MetaculusApi as MetaculusApi,
12
+ )
13
+ from forecasting_tools.forecasting.sub_question_responders.base_rate_responder import (
14
+ BaseRateResponder as BaseRateResponder,
15
+ )
16
+ from forecasting_tools.forecasting.sub_question_responders.estimator import (
17
+ Estimator as Estimator,
18
+ )
19
+ from forecasting_tools.forecasting.sub_question_responders.general_search_responder import (
20
+ GeneralSearchResponder as GeneralSearchResponder,
21
+ )
22
+ from forecasting_tools.forecasting.sub_question_responders.key_factors_searcher import (
23
+ KeyFactorsSearcher as KeyFactorsSearcher,
24
+ )
25
+ from forecasting_tools.forecasting.sub_question_responders.niche_list_researcher import (
26
+ NicheListResearcher as NicheListResearcher,
27
+ )
28
+ from forecasting_tools.forecasting.team_manager import (
29
+ TeamManager as TeamManager,
30
+ )
31
+ from forecasting_tools.front_end.Home import HomePage as HomePage
@@ -0,0 +1,130 @@
1
+ import asyncio
2
+ import logging
3
+ from typing import (
4
+ Any,
5
+ Callable,
6
+ TypeGuard,
7
+ TypeVar,
8
+ Union,
9
+ get_args,
10
+ get_origin,
11
+ )
12
+
13
+ T = TypeVar("T")
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ async def try_function_till_tries_run_out(
19
+ tries: int, function: Callable, *args, **kwargs
20
+ ) -> Any:
21
+ tries_left = tries
22
+ while tries_left > 0:
23
+ try:
24
+ response = await function(*args, **kwargs)
25
+ return response
26
+ except Exception as e:
27
+ tries_left -= 1
28
+ if tries_left == 0:
29
+ raise e
30
+ logger.warning(
31
+ f"Retrying function {function.__name__} due to error: {e}"
32
+ )
33
+ await asyncio.sleep(1)
34
+
35
+
36
+ def retry_async_function(tries: int) -> Callable:
37
+ def decorator(function: Callable) -> Callable:
38
+ async def wrapper(*args, **kwargs) -> Any:
39
+ return await try_function_till_tries_run_out(
40
+ tries, function, *args, **kwargs
41
+ )
42
+
43
+ return wrapper
44
+
45
+ return decorator
46
+
47
+
48
+ def validate_complex_type(value: T, expected_type: type[T]) -> TypeGuard[T]:
49
+ # NOTE: Consider using typeguard.check_type instead of this function
50
+ origin = get_origin(expected_type)
51
+ args = get_args(expected_type)
52
+
53
+ if origin is None:
54
+ # Base case: expected_type is not a generic alias (like int, str, etc.)
55
+ return isinstance(value, expected_type)
56
+
57
+ if origin is Union:
58
+ # Special handling for Union types (e.g., Union[int, str])
59
+ return any(validate_complex_type(value, arg) for arg in args)
60
+
61
+ if origin is tuple:
62
+ # Special handling for tuple types
63
+ if not isinstance(value, tuple) or len(value) != len(args):
64
+ return False
65
+ return all(validate_complex_type(v, t) for v, t in zip(value, args))
66
+
67
+ if origin is list:
68
+ # Special handling for list types
69
+ if not isinstance(value, list):
70
+ return False
71
+ return all(validate_complex_type(v, args[0]) for v in value)
72
+
73
+ if origin is dict:
74
+ # Special handling for dict types
75
+ if not isinstance(value, dict):
76
+ return False
77
+ key_type, value_type = args
78
+ return all(
79
+ validate_complex_type(k, key_type)
80
+ and validate_complex_type(v, value_type)
81
+ for k, v in value.items()
82
+ )
83
+
84
+ # Fallback for other types
85
+ return isinstance(value, expected_type)
86
+
87
+
88
+ def clean_indents(text: str) -> str:
89
+ """
90
+ Cleans indents from the text, optimized for prompts
91
+ Note, this is not the same as textwrap.dedent (see the test for this function for examples)
92
+ """
93
+ lines = text.split("\n")
94
+ try:
95
+ indent_level_of_first_line = find_indent_level_of_string(lines[0])
96
+ indent_level_of_second_line = find_indent_level_of_string(lines[1])
97
+ greatest_indent_level_of_first_two_lines = max(
98
+ indent_level_of_first_line, indent_level_of_second_line
99
+ )
100
+ except IndexError:
101
+ greatest_indent_level_of_first_two_lines = find_indent_level_of_string(
102
+ lines[0]
103
+ )
104
+
105
+ new_lines = []
106
+ for line in lines:
107
+ indent_level_of_line = find_indent_level_of_string(line)
108
+ if indent_level_of_line >= greatest_indent_level_of_first_two_lines:
109
+ new_line = line[greatest_indent_level_of_first_two_lines:]
110
+ else:
111
+ new_line = line.lstrip()
112
+ new_lines.append(new_line)
113
+
114
+ combined_new_lines = "\n".join(new_lines)
115
+ return combined_new_lines
116
+
117
+
118
+ def find_indent_level_of_string(string: str) -> int:
119
+ return len(string) - len(string.lstrip())
120
+
121
+
122
+ def strip_code_block_markdown(string: str) -> str:
123
+ string = string.strip()
124
+ if string.startswith("```json") and string.endswith("```"):
125
+ string = string[7:-3].strip()
126
+ elif string.startswith("```python") and string.endswith("```"):
127
+ string = string[9:-3].strip()
128
+ elif string.startswith("```") and string.endswith("```"):
129
+ string = string[3:-3].strip()
130
+ return string
@@ -0,0 +1,260 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger(__name__)
4
+ import base64
5
+ import math
6
+ import re
7
+ from io import BytesIO
8
+ from typing import Literal
9
+ from urllib import request
10
+
11
+ import tiktoken
12
+ from openai.types.chat import (
13
+ ChatCompletionContentPartImageParam,
14
+ ChatCompletionContentPartTextParam,
15
+ ChatCompletionMessageParam,
16
+ ChatCompletionSystemMessageParam,
17
+ ChatCompletionUserMessageParam,
18
+ )
19
+ from openai.types.chat.chat_completion_content_part_image_param import ImageURL
20
+ from PIL import Image
21
+ from pydantic import BaseModel
22
+ from tiktoken import Encoding
23
+
24
+
25
+ class VisionMessageData(BaseModel):
26
+ prompt: str
27
+ b64_image: str
28
+ image_resolution: Literal["auto", "low", "high"]
29
+
30
+ def __str__(self) -> str:
31
+ return f"Prompt: {self.prompt}, Resolution: {self.image_resolution}, Image: {self.b64_image[:10]}..."
32
+
33
+
34
+ class OpenAiUtils:
35
+
36
+ @staticmethod
37
+ def text_to_tokens_direct(text_to_tokenize: str, model: str) -> int:
38
+ encoding = OpenAiUtils.__get_encoding_for_model(model)
39
+ token_num = len(encoding.encode(text_to_tokenize))
40
+ return token_num
41
+
42
+ @staticmethod
43
+ def __get_encoding_for_model(model: str) -> Encoding:
44
+ try:
45
+ encoding = tiktoken.encoding_for_model(model)
46
+ except KeyError:
47
+ logger.warning(
48
+ "Warning: model not found. Using o200k_base encoding."
49
+ )
50
+ encoding = tiktoken.get_encoding("o200k_base")
51
+ return encoding
52
+
53
+ @staticmethod
54
+ def messages_to_tokens(
55
+ messages: list[ChatCompletionMessageParam], model: str
56
+ ) -> int:
57
+ """
58
+ This functions takes in a list of messages and returns the number of tokens in the messages for gpt-3.5-turbo
59
+ Here is an OpenAI guide with the example code: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
60
+ See here for other types of tokenization: https://github.com/shobrook/openlimit/blob/master/openlimit/utilities/token_counters.py
61
+ See here for gpt-vision code. This is an unpublished push to the examples: https://github.com/openai/openai-cookbook/pull/881/commits/555f5bb8b6d09d83fcc7a892562f97d7d1f085c7
62
+ """
63
+ encoding = OpenAiUtils.__get_encoding_for_model(model)
64
+
65
+ num_tokens = 0
66
+ for message in messages:
67
+ num_tokens += OpenAiUtils.__message_to_tokens(
68
+ message, model, encoding
69
+ )
70
+ num_tokens += (
71
+ 3 # every reply is primed with <|start|>assistant<|message|>
72
+ )
73
+ return num_tokens
74
+
75
+ @staticmethod
76
+ def __message_to_tokens(
77
+ message: ChatCompletionMessageParam, model: str, encoding: Encoding
78
+ ) -> int:
79
+ if OpenAiUtils.__determine_if_message_is_an_image_message(message):
80
+ return OpenAiUtils.__turn_image_message_into_tokens(
81
+ message, model, encoding
82
+ )
83
+ else:
84
+ return OpenAiUtils.__turn_regular_message_into_tokens(
85
+ message, encoding
86
+ )
87
+
88
+ @staticmethod
89
+ def __determine_if_message_is_an_image_message(
90
+ message: ChatCompletionMessageParam,
91
+ ) -> bool:
92
+ try:
93
+ content = message["content"] # type: ignore
94
+ content_types = [part["type"] for part in content] # type: ignore
95
+ assert "image_url" in content_types
96
+ return True
97
+ except Exception:
98
+ return False
99
+
100
+ @classmethod
101
+ def __turn_regular_message_into_tokens(
102
+ cls, message: ChatCompletionMessageParam, encoding: Encoding
103
+ ) -> int:
104
+ # NOTE: The hardcoded values might change for future models, but applies to all models past gpt3.5 as of Oct 15 2024
105
+ tokens_per_message = 3
106
+ tokens_per_name = 1
107
+ num_tokens = tokens_per_message
108
+ for key, value in message.items():
109
+ content = value
110
+ assert isinstance(content, str)
111
+ if isinstance(value, list):
112
+ # for gpt-4v
113
+ for item in value:
114
+ if isinstance(item, dict) and item.get("type") in ["text"]:
115
+ content = item.get("text", "")
116
+ num_tokens += len(encoding.encode(content))
117
+ if key == "name":
118
+ num_tokens += tokens_per_name
119
+ return num_tokens
120
+
121
+ @staticmethod
122
+ def __turn_image_message_into_tokens(
123
+ message: ChatCompletionMessageParam, model: str, encoding: Encoding
124
+ ) -> int:
125
+ if model not in ["gpt-4-vision-preview", "gpt-4o"]:
126
+ raise NotImplementedError(
127
+ f"num_tokens_from_messages() is not implemented for model {model}"
128
+ )
129
+
130
+ num_tokens: int = 0
131
+ for key, value in message.items():
132
+ if isinstance(value, list):
133
+ for item in value:
134
+ item: dict
135
+ num_tokens += len(encoding.encode(item["type"]))
136
+ if item["type"] == "text":
137
+ num_tokens += len(encoding.encode(item["text"]))
138
+ elif item["type"] == "image_url":
139
+ num_tokens += OpenAiUtils.__calculate_tokens_of_image(item["image_url"]["url"], item["image_url"]["detail"]) # type: ignore
140
+ elif isinstance(value, str):
141
+ num_tokens += len(encoding.encode(value))
142
+
143
+ return num_tokens
144
+
145
+ @staticmethod
146
+ def __get_image_dimensions(image_url_or_b64: str) -> tuple[int, int]:
147
+ # regex to check if image is a URL or base64 string
148
+ url_regex = r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)" # NOSONAR
149
+ base_64_regex = r"data:image\/\w+;base64,"
150
+ if re.match(url_regex, image_url_or_b64):
151
+ response = request.urlopen(image_url_or_b64)
152
+ image = Image.open(response)
153
+ return image.size
154
+ elif re.match(base_64_regex, image_url_or_b64):
155
+ image_url_or_b64 = re.sub(
156
+ r"data:image\/\w+;base64,", "", image_url_or_b64
157
+ )
158
+ image = Image.open(BytesIO(base64.b64decode(image_url_or_b64)))
159
+ return image.size
160
+ else:
161
+ raise ValueError("Image must be a URL or base64 string.")
162
+
163
+ @staticmethod
164
+ def __calculate_tokens_of_image(image_url_or_b64: str, detail: str) -> int:
165
+ # Constants
166
+ LOW_DETAIL_COST = 85
167
+ HIGH_DETAIL_COST_PER_TILE = 170
168
+ ADDITIONAL_COST = 85
169
+
170
+ if detail == "low":
171
+ # Low detail images have a fixed cost
172
+ return LOW_DETAIL_COST
173
+ elif detail == "high":
174
+ # Calculate token cost for high detail images
175
+ width, height = OpenAiUtils.__get_image_dimensions(
176
+ image_url_or_b64
177
+ )
178
+ # Check if resizing is needed to fit within a 2048 x 2048 square
179
+ if max(width, height) > 2048:
180
+ # Resize the image to fit within a 2048 x 2048 square
181
+ ratio = 2048 / max(width, height)
182
+ width = int(width * ratio)
183
+ height = int(height * ratio)
184
+
185
+ # Further scale down to 768px on the shortest side
186
+ if min(width, height) > 768:
187
+ ratio = 768 / min(width, height)
188
+ width = int(width * ratio)
189
+ height = int(height * ratio)
190
+
191
+ # Calculate the number of 512px squares
192
+ num_squares = math.ceil(width / 512) * math.ceil(height / 512)
193
+ total_cost = (
194
+ num_squares * HIGH_DETAIL_COST_PER_TILE + ADDITIONAL_COST
195
+ )
196
+ return total_cost
197
+ else:
198
+ # Invalid detail_option
199
+ raise ValueError("Invalid detail_option. Use 'low' or 'high'.")
200
+
201
+ @staticmethod
202
+ def put_single_user_message_in_list_using_prompt(
203
+ user_prompt: str,
204
+ ) -> list[ChatCompletionMessageParam]:
205
+ return [
206
+ ChatCompletionUserMessageParam(role="user", content=user_prompt)
207
+ ]
208
+
209
+ @staticmethod
210
+ def put_single_image_message_in_list_using_gpt_vision_input(
211
+ vision_data: VisionMessageData,
212
+ ) -> list[ChatCompletionMessageParam]:
213
+ prompt: str = vision_data.prompt
214
+ base64_image: str = vision_data.b64_image
215
+ resolution: str = vision_data.image_resolution
216
+ return [
217
+ ChatCompletionUserMessageParam(
218
+ role="user",
219
+ content=[
220
+ ChatCompletionContentPartTextParam(
221
+ type="text", text=prompt
222
+ ),
223
+ ChatCompletionContentPartImageParam(
224
+ type="image_url",
225
+ image_url=ImageURL(
226
+ url=f"data:image/png;base64,{base64_image}",
227
+ detail=resolution,
228
+ ),
229
+ ),
230
+ ],
231
+ )
232
+ ]
233
+
234
+ @staticmethod
235
+ def create_system_and_user_message_from_prompt(
236
+ user_prompt: str, system_prompt: str
237
+ ) -> list[ChatCompletionMessageParam]:
238
+ return [
239
+ ChatCompletionSystemMessageParam(
240
+ role="system", content=system_prompt
241
+ ),
242
+ ChatCompletionUserMessageParam(role="user", content=user_prompt),
243
+ ]
244
+
245
+ @classmethod
246
+ def create_system_and_image_message_from_prompt(
247
+ cls, vision_message_data: VisionMessageData, system_prompt: str
248
+ ) -> list[ChatCompletionMessageParam]:
249
+ image_message_as_list = (
250
+ cls.put_single_image_message_in_list_using_gpt_vision_input(
251
+ vision_message_data
252
+ )
253
+ )
254
+ image_message = image_message_as_list[0]
255
+ return [
256
+ ChatCompletionSystemMessageParam(
257
+ role="system", content=system_prompt
258
+ ),
259
+ image_message,
260
+ ]
@@ -0,0 +1,19 @@
1
+ from typing import Any
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class ModelResponse(BaseModel):
7
+ data: Any
8
+
9
+
10
+ class TextTokenResponse(ModelResponse):
11
+ data: str
12
+ prompt_tokens_used: int
13
+ completion_tokens_used: int
14
+ total_tokens_used: int
15
+ model: str
16
+
17
+
18
+ class TextTokenCostResponse(TextTokenResponse):
19
+ cost: float