PickYourLLM 0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pickyourllm-0.3.dist-info/METADATA +72 -0
- pickyourllm-0.3.dist-info/RECORD +8 -0
- pickyourllm-0.3.dist-info/WHEEL +5 -0
- pickyourllm-0.3.dist-info/entry_points.txt +2 -0
- pickyourllm-0.3.dist-info/licenses/LICENSE +201 -0
- pickyourllm-0.3.dist-info/top_level.txt +1 -0
- run/__init__.py +1 -0
- run/run.py +483 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: PickYourLLM
|
|
3
|
+
Version: 0.3
|
|
4
|
+
Summary: Pick Your LLM: Intelligent, Use-Case Aware LLM Model advisor for Optimal Performance and Cost
|
|
5
|
+
Home-page: https://github.com/AmadeusITGroup/PickYourLLM
|
|
6
|
+
Author: Ilias, Eoin
|
|
7
|
+
Author-email: ilias.driouich@amadeus.com;eoin.thomas@amadeus.com
|
|
8
|
+
Requires-Python: >=3.6
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas>=1.1.0
|
|
12
|
+
Requires-Dist: numpy>=1.19.0
|
|
13
|
+
Dynamic: author
|
|
14
|
+
Dynamic: author-email
|
|
15
|
+
Dynamic: description
|
|
16
|
+
Dynamic: description-content-type
|
|
17
|
+
Dynamic: home-page
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
Dynamic: requires-dist
|
|
20
|
+
Dynamic: requires-python
|
|
21
|
+
Dynamic: summary
|
|
22
|
+
|
|
23
|
+
## PickYourLLM Framework
|
|
24
|
+
|
|
25
|
+
#### - This framework helps you automatically select the most suitable Large Language Model (LLM) for a given business or technical use case.
|
|
26
|
+
|
|
27
|
+
#### - It analyzes use case requirements (e.g., cost, latency, reasoning quality, context window, provider constraints), matches them against available LLMs, and ranks the best candidates based on weighted scoring.
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Features
|
|
31
|
+
|
|
32
|
+
- **Use Case–Driven Selection:** Takes a natural-language description of a use case and extracts structured constraints and priorities.
|
|
33
|
+
- **Constraint Extraction:** Uses advanced LLM models to normalize requirements into a standardized schema (provider, latency, cost, openness, tool calling, languages, etc.).
|
|
34
|
+
- **Model Matching:** Filters candidate LLMs based on hard constraints such as provider restrictions, deployment type, language support, context window, and cost thresholds.
|
|
35
|
+
- **Weighted Recommendation Engine:** Scores models using weighted dimensions such as cost, latency, reasoning, quality, throughput, tool-calling capability, and openness.
|
|
36
|
+
- **Transparent Ranking:** Produces ranked recommendations with clear rationales explaining why each model was selected.
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## How It Works
|
|
40
|
+
|
|
41
|
+
The pipeline runs in sequential steps:
|
|
42
|
+
|
|
43
|
+
- **Use Case Selection**
|
|
44
|
+
Choose from predefined scenarios (customer assistant, travel agent assistant, multilingual chatbot, internal copilot, etc.) or provide your own description.
|
|
45
|
+
|
|
46
|
+
- **Requirement Extraction (LLM Agent)**
|
|
47
|
+
the use case is parsed into structured metadata, including:
|
|
48
|
+
Provider constraints
|
|
49
|
+
Deployment preferences
|
|
50
|
+
Latency and cost requirements
|
|
51
|
+
Language support
|
|
52
|
+
Reasoning / quality expectations
|
|
53
|
+
Tool-calling or multimodal needs
|
|
54
|
+
Priority weights across decision criteria
|
|
55
|
+
|
|
56
|
+
- **Model Filtering**
|
|
57
|
+
Candidate LLMs from the model catalog are filtered according to the extracted hard constraints.
|
|
58
|
+
|
|
59
|
+
- **Scoring & Ranking**
|
|
60
|
+
The remaining models are scored using a weighted recommendation engine across the most relevant dimensions for the use case.
|
|
61
|
+
|
|
62
|
+
- **Export**
|
|
63
|
+
Ranked recommendations are exported to CSV, along with the extracted use case metadata in JSON format for inspection.
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
To use the tool, follow these steps:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install PickYourLLM
|
|
71
|
+
|
|
72
|
+
PickYourLLM
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pickyourllm-0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2
|
+
run/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
3
|
+
run/run.py,sha256=JQ760dTE29nvDJ4LKpHAF3BeADMyBPGzSrKqd0YNFcY,19073
|
|
4
|
+
pickyourllm-0.3.dist-info/METADATA,sha256=qkG1xuR88i4xfOClICTe6kVOs77LwxPahaNbIlnLqUM,2889
|
|
5
|
+
pickyourllm-0.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
pickyourllm-0.3.dist-info/entry_points.txt,sha256=vXWc6ReWAhw0-Ot6peeYlAcfclaMZNOjwnVPxaJR0GQ,45
|
|
7
|
+
pickyourllm-0.3.dist-info/top_level.txt,sha256=tQBPJqhSsNYOwSN0MsGjPCMH_yRYw3TZ2ZdJ0EXH_rk,4
|
|
8
|
+
pickyourllm-0.3.dist-info/RECORD,,
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright [yyyy] [name of copyright owner]
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
run
|
run/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
run/run.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
warnings.filterwarnings("ignore")
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from typing import Dict, Any, List
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
from openai import AzureOpenAI
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main():
|
|
14
|
+
# ----------------------------
|
|
15
|
+
# 1) Load environment
|
|
16
|
+
# ----------------------------
|
|
17
|
+
envfilepath = str(input("\nEnter the path to your env file: ").strip()) or ".env"
|
|
18
|
+
_ = load_dotenv(envfilepath)
|
|
19
|
+
|
|
20
|
+
openai_api_key = os.getenv("API_KEY")
|
|
21
|
+
openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")
|
|
22
|
+
azure_openai_endpoint = os.getenv("AZURE_ENDPOINT")
|
|
23
|
+
gpt_model_deployment_name = os.getenv("GPT_DEPLOYMENT_NAME", "gpt-4o")
|
|
24
|
+
|
|
25
|
+
if not openai_api_key or not openai_api_version or not azure_openai_endpoint:
|
|
26
|
+
raise ValueError(
|
|
27
|
+
"Missing Azure OpenAI environment variables. "
|
|
28
|
+
"Please check API_KEY, AZURE_OPENAI_API_VERSION, and AZURE_ENDPOINT."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
client = AzureOpenAI(
|
|
32
|
+
api_version=openai_api_version,
|
|
33
|
+
azure_endpoint=azure_openai_endpoint,
|
|
34
|
+
api_key=openai_api_key,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def gpt_oneshot(prompt_sys: str, prompt_usr: str, temperature: float = 0.0) -> str:
|
|
38
|
+
prompt_sys = prompt_sys.replace("\n", " ")
|
|
39
|
+
prompt_usr = prompt_usr.replace("\n", " ")
|
|
40
|
+
|
|
41
|
+
response = client.chat.completions.create(
|
|
42
|
+
model=gpt_model_deployment_name,
|
|
43
|
+
temperature=temperature,
|
|
44
|
+
messages=[
|
|
45
|
+
{"role": "system", "content": prompt_sys},
|
|
46
|
+
{"role": "user", "content": prompt_usr},
|
|
47
|
+
],
|
|
48
|
+
)
|
|
49
|
+
return response.choices[0].message.content
|
|
50
|
+
|
|
51
|
+
# ----------------------------
|
|
52
|
+
# 2) Example use cases
|
|
53
|
+
# ----------------------------
|
|
54
|
+
use_cases = [
|
|
55
|
+
"I want to develop a digital assistant for my customer (travel agency) that will assist travel agents in finding the cheapest price for travelers. The customer would like that the assistant is built only using OpenAI models with a SLO response time <5 sec. Also, we would like the solution to be as cost effective as possible.",
|
|
56
|
+
"I want to develop a digital assistant for my customer (hotel chain) that will assist travelers by recommending activities to do at their destination. I would like that my assistant use open-source models and have the best performance in reasoning. No need for a fast solution as long as it is accurate and can handle multiple concurrent calls.",
|
|
57
|
+
"I need an internal customer-support copilot for airline operations. It must support English and French, have strong tool-calling ability, moderate cost, and low hallucination risk. Latency matters but accuracy matters more.",
|
|
58
|
+
"I want a multilingual chatbot for e-commerce customer support. It should support Arabic, French, and English, be low cost, and offer acceptable response speed. Open-source models are preferred but not mandatory.",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
print("\nAvailable Use Cases:")
|
|
62
|
+
for i, uc in enumerate(use_cases):
|
|
63
|
+
print(f"[{i}] {uc}")
|
|
64
|
+
|
|
65
|
+
usecase = str(input("\nEnter the use case description, choose one above or enter a new one: ")).strip()
|
|
66
|
+
use_case_description = usecase
|
|
67
|
+
print(f"\nSelected use case:\n{use_case_description}\n")
|
|
68
|
+
|
|
69
|
+
# ----------------------------
|
|
70
|
+
# 3) Prompt for structured extraction
|
|
71
|
+
# ----------------------------
|
|
72
|
+
extraction_prompt = """
|
|
73
|
+
You are an expert metadata-extraction and model-selection agent.
|
|
74
|
+
Given a free-form LLM use case description, extract and return exactly the following JSON.
|
|
75
|
+
If a value cannot be determined, use null for strings/numbers/arrays or false for booleans.
|
|
76
|
+
Return only valid JSON and no explanations.
|
|
77
|
+
|
|
78
|
+
{
|
|
79
|
+
"constraints": {
|
|
80
|
+
"provider": <[string]|null>,
|
|
81
|
+
"deployment_type": <string|null>,
|
|
82
|
+
"open_weights": <boolean|null>,
|
|
83
|
+
"max_latency_sec": <float|null>,
|
|
84
|
+
"max_input_cost_per_1m_tokens": <float|null>,
|
|
85
|
+
"max_output_cost_per_1m_tokens": <float|null>,
|
|
86
|
+
"min_context_window": <int|null>,
|
|
87
|
+
"required_languages": <[string]|null>,
|
|
88
|
+
"must_support_tool_calling": <boolean|null>,
|
|
89
|
+
"must_support_multimodality": <boolean|null>,
|
|
90
|
+
"min_reasoning_score": <float|null>,
|
|
91
|
+
"min_quality_score": <float|null>,
|
|
92
|
+
"min_throughput_rps": <float|null>
|
|
93
|
+
},
|
|
94
|
+
"priorities": {
|
|
95
|
+
"cost": <float>,
|
|
96
|
+
"latency": <float>,
|
|
97
|
+
"reasoning": <float>,
|
|
98
|
+
"context_window": <float>,
|
|
99
|
+
"quality": <float>,
|
|
100
|
+
"tool_calling": <float>,
|
|
101
|
+
"throughput": <float>,
|
|
102
|
+
"openness": <float>
|
|
103
|
+
},
|
|
104
|
+
"use_case_profile": {
|
|
105
|
+
"category": <string>,
|
|
106
|
+
"primary_goal": <string>,
|
|
107
|
+
"risk_level": <string>,
|
|
108
|
+
"notes": <string|null>
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
Rules:
|
|
113
|
+
1. priorities must sum to 1.0
|
|
114
|
+
2. provider must be a list like ["openai"], ["anthropic"], ["mistral"], ["meta"], or null
|
|
115
|
+
3. deployment_type must be one of:
|
|
116
|
+
["api", "self-hosted", "either", null]
|
|
117
|
+
4. risk_level must be one of:
|
|
118
|
+
["Low", "Medium", "High"]
|
|
119
|
+
5. category can be one of:
|
|
120
|
+
["chatbot", "assistant", "rag", "agent", "classification", "summarization",
|
|
121
|
+
"recommendation", "search", "copilot", "other"]
|
|
122
|
+
6. required_languages should be ISO-639-3 codes when possible, e.g. ["eng","fra","ara"]
|
|
123
|
+
7. If the user says "open-source models" or equivalent, set open_weights=true
|
|
124
|
+
8. If the user says "only OpenAI models", set provider=["openai"]
|
|
125
|
+
9. If latency is important but no exact threshold is provided, reflect that in priorities instead of inventing a hard threshold
|
|
126
|
+
10. If accuracy/reasoning is emphasized, increase reasoning and quality weights
|
|
127
|
+
11. If cost-effectiveness is emphasized, increase cost weight
|
|
128
|
+
12. If concurrency or scale is emphasized, increase throughput weight
|
|
129
|
+
13. If tool use / agent / API calling is implied, increase tool_calling weight and set must_support_tool_calling=true when clearly required
|
|
130
|
+
|
|
131
|
+
Return JSON only.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
role = "You are an expert metadata-extraction and LLM selection agent."
|
|
135
|
+
task = extraction_prompt + "\nHere is the use case description:\n" + use_case_description
|
|
136
|
+
extracted = gpt_oneshot(role, task, temperature=0.0)
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
params = json.loads(extracted)
|
|
140
|
+
except json.JSONDecodeError as e:
|
|
141
|
+
raise ValueError(f"Model did not return valid JSON:\n{extracted}") from e
|
|
142
|
+
|
|
143
|
+
print("\nExtracted constraints:")
|
|
144
|
+
print(json.dumps(params["constraints"], indent=2))
|
|
145
|
+
|
|
146
|
+
print("\nExtracted priorities:")
|
|
147
|
+
print(json.dumps(params["priorities"], indent=2))
|
|
148
|
+
|
|
149
|
+
print("\nUse case profile:")
|
|
150
|
+
print(json.dumps(params["use_case_profile"], indent=2))
|
|
151
|
+
|
|
152
|
+
# ----------------------------
|
|
153
|
+
# 4) Read model catalog CSV
|
|
154
|
+
# ----------------------------
|
|
155
|
+
model_catalog_path = str(input("\nEnter path to the LLM model catalog CSV: ").strip())
|
|
156
|
+
if not os.path.exists(model_catalog_path):
|
|
157
|
+
raise FileNotFoundError(f"CSV not found: {model_catalog_path}")
|
|
158
|
+
|
|
159
|
+
df = pd.read_csv(model_catalog_path)
|
|
160
|
+
|
|
161
|
+
# Expected columns in CSV
|
|
162
|
+
expected_columns = [
|
|
163
|
+
"model_name",
|
|
164
|
+
"provider",
|
|
165
|
+
"deployment_type",
|
|
166
|
+
"open_weights",
|
|
167
|
+
"input_cost_per_1m_tokens",
|
|
168
|
+
"output_cost_per_1m_tokens",
|
|
169
|
+
"avg_latency_sec",
|
|
170
|
+
"context_window",
|
|
171
|
+
"reasoning_score",
|
|
172
|
+
"quality_score",
|
|
173
|
+
"tool_calling",
|
|
174
|
+
"multimodal",
|
|
175
|
+
"throughput_rps",
|
|
176
|
+
"languages",
|
|
177
|
+
"benchmark_source",
|
|
178
|
+
"release_date",
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
missing_cols = [c for c in expected_columns if c not in df.columns]
|
|
182
|
+
if missing_cols:
|
|
183
|
+
raise ValueError(
|
|
184
|
+
"The model catalog CSV is missing the following required columns:\n"
|
|
185
|
+
+ "\n".join(missing_cols)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# ----------------------------
|
|
189
|
+
# 5) Helpers
|
|
190
|
+
# ----------------------------
|
|
191
|
+
def parse_languages(value: Any) -> List[str]:
|
|
192
|
+
if pd.isna(value):
|
|
193
|
+
return []
|
|
194
|
+
if isinstance(value, list):
|
|
195
|
+
return [str(x).strip().lower() for x in value]
|
|
196
|
+
text = str(value).strip()
|
|
197
|
+
if not text:
|
|
198
|
+
return []
|
|
199
|
+
# Accept formats like "eng,fra" or '["eng","fra"]'
|
|
200
|
+
if text.startswith("[") and text.endswith("]"):
|
|
201
|
+
try:
|
|
202
|
+
arr = json.loads(text)
|
|
203
|
+
return [str(x).strip().lower() for x in arr]
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
return [x.strip().lower() for x in text.split(",") if x.strip()]
|
|
207
|
+
|
|
208
|
+
def language_match_count(required: set, candidate_langs: List[str]) -> int:
|
|
209
|
+
candidate = {lang.split("-")[0].lower() for lang in candidate_langs}
|
|
210
|
+
return len(required & candidate)
|
|
211
|
+
|
|
212
|
+
def normalize_series(series: pd.Series, higher_is_better: bool = True) -> pd.Series:
|
|
213
|
+
s = pd.to_numeric(series, errors="coerce")
|
|
214
|
+
if s.isna().all():
|
|
215
|
+
return pd.Series([0.0] * len(series), index=series.index)
|
|
216
|
+
|
|
217
|
+
min_val = s.min()
|
|
218
|
+
max_val = s.max()
|
|
219
|
+
|
|
220
|
+
if pd.isna(min_val) or pd.isna(max_val) or min_val == max_val:
|
|
221
|
+
return pd.Series([1.0] * len(series), index=series.index)
|
|
222
|
+
|
|
223
|
+
normalized = (s - min_val) / (max_val - min_val)
|
|
224
|
+
return normalized if higher_is_better else 1 - normalized
|
|
225
|
+
|
|
226
|
+
def hard_filter_models(df_in: pd.DataFrame, req: Dict[str, Any]) -> pd.DataFrame:
|
|
227
|
+
df_filtered = df_in.copy()
|
|
228
|
+
|
|
229
|
+
# Normalize booleans
|
|
230
|
+
bool_cols = ["open_weights", "tool_calling", "multimodal"]
|
|
231
|
+
for col in bool_cols:
|
|
232
|
+
df_filtered[col] = df_filtered[col].astype(str).str.lower().map(
|
|
233
|
+
{"true": True, "false": False}
|
|
234
|
+
).fillna(df_filtered[col])
|
|
235
|
+
|
|
236
|
+
# Provider
|
|
237
|
+
if req["provider"] is not None:
|
|
238
|
+
allowed = {x.strip().lower() for x in req["provider"]}
|
|
239
|
+
df_filtered = df_filtered[
|
|
240
|
+
df_filtered["provider"].astype(str).str.lower().isin(allowed)
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
# Deployment type
|
|
244
|
+
if req["deployment_type"] is not None and req["deployment_type"] != "either":
|
|
245
|
+
df_filtered = df_filtered[
|
|
246
|
+
df_filtered["deployment_type"].astype(str).str.lower() == req["deployment_type"].lower()
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
# Open weights
|
|
250
|
+
if req["open_weights"] is True:
|
|
251
|
+
df_filtered = df_filtered[df_filtered["open_weights"] == True]
|
|
252
|
+
|
|
253
|
+
# Hard thresholds
|
|
254
|
+
if req["max_latency_sec"] is not None:
|
|
255
|
+
tmp = df_filtered[df_filtered["avg_latency_sec"] <= req["max_latency_sec"]]
|
|
256
|
+
if not tmp.empty:
|
|
257
|
+
df_filtered = tmp
|
|
258
|
+
|
|
259
|
+
if req["max_input_cost_per_1m_tokens"] is not None:
|
|
260
|
+
tmp = df_filtered[
|
|
261
|
+
df_filtered["input_cost_per_1m_tokens"] <= req["max_input_cost_per_1m_tokens"]
|
|
262
|
+
]
|
|
263
|
+
if not tmp.empty:
|
|
264
|
+
df_filtered = tmp
|
|
265
|
+
|
|
266
|
+
if req["max_output_cost_per_1m_tokens"] is not None:
|
|
267
|
+
tmp = df_filtered[
|
|
268
|
+
df_filtered["output_cost_per_1m_tokens"] <= req["max_output_cost_per_1m_tokens"]
|
|
269
|
+
]
|
|
270
|
+
if not tmp.empty:
|
|
271
|
+
df_filtered = tmp
|
|
272
|
+
|
|
273
|
+
if req["min_context_window"] is not None:
|
|
274
|
+
tmp = df_filtered[df_filtered["context_window"] >= req["min_context_window"]]
|
|
275
|
+
if not tmp.empty:
|
|
276
|
+
df_filtered = tmp
|
|
277
|
+
|
|
278
|
+
if req["min_reasoning_score"] is not None:
|
|
279
|
+
tmp = df_filtered[df_filtered["reasoning_score"] >= req["min_reasoning_score"]]
|
|
280
|
+
if not tmp.empty:
|
|
281
|
+
df_filtered = tmp
|
|
282
|
+
|
|
283
|
+
if req["min_quality_score"] is not None:
|
|
284
|
+
tmp = df_filtered[df_filtered["quality_score"] >= req["min_quality_score"]]
|
|
285
|
+
if not tmp.empty:
|
|
286
|
+
df_filtered = tmp
|
|
287
|
+
|
|
288
|
+
if req["min_throughput_rps"] is not None:
|
|
289
|
+
tmp = df_filtered[df_filtered["throughput_rps"] >= req["min_throughput_rps"]]
|
|
290
|
+
if not tmp.empty:
|
|
291
|
+
df_filtered = tmp
|
|
292
|
+
|
|
293
|
+
if req["must_support_tool_calling"] is True:
|
|
294
|
+
tmp = df_filtered[df_filtered["tool_calling"] == True]
|
|
295
|
+
if not tmp.empty:
|
|
296
|
+
df_filtered = tmp
|
|
297
|
+
|
|
298
|
+
if req["must_support_multimodality"] is True:
|
|
299
|
+
tmp = df_filtered[df_filtered["multimodal"] == True]
|
|
300
|
+
if not tmp.empty:
|
|
301
|
+
df_filtered = tmp
|
|
302
|
+
|
|
303
|
+
# Languages: keep best matching subset
|
|
304
|
+
if req["required_languages"] is not None:
|
|
305
|
+
required = {x.strip().lower() for x in req["required_languages"]}
|
|
306
|
+
if required:
|
|
307
|
+
df_filtered["lang_match_count"] = df_filtered["languages"].apply(
|
|
308
|
+
lambda x: language_match_count(required, parse_languages(x))
|
|
309
|
+
)
|
|
310
|
+
max_match = df_filtered["lang_match_count"].max()
|
|
311
|
+
if pd.notna(max_match):
|
|
312
|
+
df_filtered = df_filtered[df_filtered["lang_match_count"] == max_match]
|
|
313
|
+
|
|
314
|
+
return df_filtered.reset_index(drop=True)
|
|
315
|
+
|
|
316
|
+
def compute_weighted_score(df_in: pd.DataFrame, priorities: Dict[str, float]) -> pd.DataFrame:
|
|
317
|
+
df_scored = df_in.copy()
|
|
318
|
+
|
|
319
|
+
# Normalize score components
|
|
320
|
+
df_scored["score_cost_input"] = normalize_series(
|
|
321
|
+
df_scored["input_cost_per_1m_tokens"], higher_is_better=False
|
|
322
|
+
)
|
|
323
|
+
df_scored["score_cost_output"] = normalize_series(
|
|
324
|
+
df_scored["output_cost_per_1m_tokens"], higher_is_better=False
|
|
325
|
+
)
|
|
326
|
+
df_scored["score_cost"] = (df_scored["score_cost_input"] + df_scored["score_cost_output"]) / 2
|
|
327
|
+
|
|
328
|
+
df_scored["score_latency"] = normalize_series(
|
|
329
|
+
df_scored["avg_latency_sec"], higher_is_better=False
|
|
330
|
+
)
|
|
331
|
+
df_scored["score_reasoning"] = normalize_series(
|
|
332
|
+
df_scored["reasoning_score"], higher_is_better=True
|
|
333
|
+
)
|
|
334
|
+
df_scored["score_context_window"] = normalize_series(
|
|
335
|
+
df_scored["context_window"], higher_is_better=True
|
|
336
|
+
)
|
|
337
|
+
df_scored["score_quality"] = normalize_series(
|
|
338
|
+
df_scored["quality_score"], higher_is_better=True
|
|
339
|
+
)
|
|
340
|
+
df_scored["score_throughput"] = normalize_series(
|
|
341
|
+
df_scored["throughput_rps"], higher_is_better=True
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Boolean scores
|
|
345
|
+
df_scored["score_tool_calling"] = df_scored["tool_calling"].astype(str).str.lower().map(
|
|
346
|
+
{"true": 1.0, "false": 0.0}
|
|
347
|
+
).fillna(0.0)
|
|
348
|
+
|
|
349
|
+
df_scored["score_openness"] = df_scored["open_weights"].astype(str).str.lower().map(
|
|
350
|
+
{"true": 1.0, "false": 0.0}
|
|
351
|
+
).fillna(0.0)
|
|
352
|
+
|
|
353
|
+
# Weighted global score
|
|
354
|
+
df_scored["global_score"] = (
|
|
355
|
+
priorities["cost"] * df_scored["score_cost"]
|
|
356
|
+
+ priorities["latency"] * df_scored["score_latency"]
|
|
357
|
+
+ priorities["reasoning"] * df_scored["score_reasoning"]
|
|
358
|
+
+ priorities["context_window"] * df_scored["score_context_window"]
|
|
359
|
+
+ priorities["quality"] * df_scored["score_quality"]
|
|
360
|
+
+ priorities["tool_calling"] * df_scored["score_tool_calling"]
|
|
361
|
+
+ priorities["throughput"] * df_scored["score_throughput"]
|
|
362
|
+
+ priorities["openness"] * df_scored["score_openness"]
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
return df_scored
|
|
366
|
+
|
|
367
|
+
def build_explanation(row: pd.Series, priorities: Dict[str, float]) -> str:
|
|
368
|
+
top_dims = sorted(
|
|
369
|
+
[
|
|
370
|
+
("cost", priorities["cost"]),
|
|
371
|
+
("latency", priorities["latency"]),
|
|
372
|
+
("reasoning", priorities["reasoning"]),
|
|
373
|
+
("context_window", priorities["context_window"]),
|
|
374
|
+
("quality", priorities["quality"]),
|
|
375
|
+
("tool_calling", priorities["tool_calling"]),
|
|
376
|
+
("throughput", priorities["throughput"]),
|
|
377
|
+
("openness", priorities["openness"]),
|
|
378
|
+
],
|
|
379
|
+
key=lambda x: x[1],
|
|
380
|
+
reverse=True,
|
|
381
|
+
)[:3]
|
|
382
|
+
|
|
383
|
+
reasons = []
|
|
384
|
+
for dim, _ in top_dims:
|
|
385
|
+
if dim == "cost":
|
|
386
|
+
reasons.append(
|
|
387
|
+
f"cost input/output={row['input_cost_per_1m_tokens']}/{row['output_cost_per_1m_tokens']}"
|
|
388
|
+
)
|
|
389
|
+
elif dim == "latency":
|
|
390
|
+
reasons.append(f"latency={row['avg_latency_sec']} sec")
|
|
391
|
+
elif dim == "reasoning":
|
|
392
|
+
reasons.append(f"reasoning_score={row['reasoning_score']}")
|
|
393
|
+
elif dim == "context_window":
|
|
394
|
+
reasons.append(f"context_window={row['context_window']}")
|
|
395
|
+
elif dim == "quality":
|
|
396
|
+
reasons.append(f"quality_score={row['quality_score']}")
|
|
397
|
+
elif dim == "tool_calling":
|
|
398
|
+
reasons.append(f"tool_calling={row['tool_calling']}")
|
|
399
|
+
elif dim == "throughput":
|
|
400
|
+
reasons.append(f"throughput_rps={row['throughput_rps']}")
|
|
401
|
+
elif dim == "openness":
|
|
402
|
+
reasons.append(f"open_weights={row['open_weights']}")
|
|
403
|
+
|
|
404
|
+
return " | ".join(reasons)
|
|
405
|
+
|
|
406
|
+
# ----------------------------
|
|
407
|
+
# 6) Filter + score
|
|
408
|
+
# ----------------------------
|
|
409
|
+
filtered = hard_filter_models(df, params["constraints"])
|
|
410
|
+
|
|
411
|
+
if filtered.empty:
|
|
412
|
+
print("\nNo exact match found after hard filtering. Falling back to full catalog scoring.\n")
|
|
413
|
+
filtered = df.copy()
|
|
414
|
+
|
|
415
|
+
scored = compute_weighted_score(filtered, params["priorities"])
|
|
416
|
+
scored["recommendation_rationale"] = scored.apply(
|
|
417
|
+
lambda row: build_explanation(row, params["priorities"]), axis=1
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
ranked = scored.sort_values(
|
|
421
|
+
by=["global_score", "reasoning_score", "quality_score"],
|
|
422
|
+
ascending=[False, False, False]
|
|
423
|
+
).reset_index(drop=True)
|
|
424
|
+
|
|
425
|
+
# ----------------------------
|
|
426
|
+
# 7) Save results
|
|
427
|
+
# ----------------------------
|
|
428
|
+
output_path = str(input("\nEnter output folder path: ").strip())
|
|
429
|
+
os.makedirs(output_path, exist_ok=True)
|
|
430
|
+
|
|
431
|
+
output_cols = [
|
|
432
|
+
"model_name",
|
|
433
|
+
"provider",
|
|
434
|
+
"deployment_type",
|
|
435
|
+
"open_weights",
|
|
436
|
+
"input_cost_per_1m_tokens",
|
|
437
|
+
"output_cost_per_1m_tokens",
|
|
438
|
+
"avg_latency_sec",
|
|
439
|
+
"context_window",
|
|
440
|
+
"reasoning_score",
|
|
441
|
+
"quality_score",
|
|
442
|
+
"tool_calling",
|
|
443
|
+
"multimodal",
|
|
444
|
+
"throughput_rps",
|
|
445
|
+
"languages",
|
|
446
|
+
"benchmark_source",
|
|
447
|
+
"release_date",
|
|
448
|
+
"global_score",
|
|
449
|
+
"recommendation_rationale",
|
|
450
|
+
]
|
|
451
|
+
|
|
452
|
+
ranked[output_cols].to_csv(
|
|
453
|
+
os.path.join(output_path, "LLMAdvisor_BenchmarkResults.csv"),
|
|
454
|
+
index=False
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Save extracted request metadata as well
|
|
458
|
+
with open(os.path.join(output_path, "LLMAdvisor_ExtractedUseCase.json"), "w", encoding="utf-8") as f:
|
|
459
|
+
json.dump(params, f, indent=2, ensure_ascii=False)
|
|
460
|
+
|
|
461
|
+
# Display top results
|
|
462
|
+
print("\nTop 5 recommended models:\n")
|
|
463
|
+
print(
|
|
464
|
+
ranked[
|
|
465
|
+
[
|
|
466
|
+
"model_name",
|
|
467
|
+
"provider",
|
|
468
|
+
"global_score",
|
|
469
|
+
"input_cost_per_1m_tokens",
|
|
470
|
+
"output_cost_per_1m_tokens",
|
|
471
|
+
"avg_latency_sec",
|
|
472
|
+
"reasoning_score",
|
|
473
|
+
"quality_score",
|
|
474
|
+
"recommendation_rationale",
|
|
475
|
+
]
|
|
476
|
+
].head(5).to_string(index=False)
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
print(f"\nResults saved to: {output_path}")
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
if __name__ == "__main__":
|
|
483
|
+
main()
|