nvidia-livecodebench 25.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. core_evals/livecodebench/__init__.py +1 -0
  2. core_evals/livecodebench/framework.yml +233 -0
  3. core_evals/livecodebench/framework_entrypoint.py +28 -0
  4. core_evals/livecodebench/output.py +51 -0
  5. livecodebench/__init__.py +0 -0
  6. livecodebench/benchmarks/__init__.py +31 -0
  7. livecodebench/benchmarks/code_execution.py +85 -0
  8. livecodebench/benchmarks/code_generation.py +160 -0
  9. livecodebench/benchmarks/test_output_prediction.py +90 -0
  10. livecodebench/benchmarks/utils.py +50 -0
  11. livecodebench/evaluation/__init__.py +24 -0
  12. livecodebench/evaluation/compute_code_execution_metrics.py +73 -0
  13. livecodebench/evaluation/compute_code_generation_metrics.py +278 -0
  14. livecodebench/evaluation/compute_scores.py +172 -0
  15. livecodebench/evaluation/compute_test_output_prediction_metrics.py +125 -0
  16. livecodebench/evaluation/metric.py +28 -0
  17. livecodebench/evaluation/old_results_check.py +91 -0
  18. livecodebench/evaluation/pass_k_utils.py +84 -0
  19. livecodebench/evaluation/testing_util.py +574 -0
  20. livecodebench/evaluation/utils_execute.py +285 -0
  21. livecodebench/lm_styles.py +581 -0
  22. livecodebench/prompts/__init__.py +22 -0
  23. livecodebench/prompts/code_execution.py +201 -0
  24. livecodebench/prompts/code_generation.py +372 -0
  25. livecodebench/prompts/few_shot_examples/generation/func.json +12 -0
  26. livecodebench/prompts/few_shot_examples/generation/stdin.json +10 -0
  27. livecodebench/prompts/self_repair.py +370 -0
  28. livecodebench/prompts/test_output_prediction.py +327 -0
  29. livecodebench/runner/__init__.py +0 -0
  30. livecodebench/runner/base_runner.py +188 -0
  31. livecodebench/runner/claude3_runner.py +70 -0
  32. livecodebench/runner/claude_runner.py +69 -0
  33. livecodebench/runner/cohere_runner.py +71 -0
  34. livecodebench/runner/custom_evaluator.py +132 -0
  35. livecodebench/runner/deepseek_runner.py +87 -0
  36. livecodebench/runner/gemini_runner.py +111 -0
  37. livecodebench/runner/generic_oai_server_runner.py +104 -0
  38. livecodebench/runner/main.py +255 -0
  39. livecodebench/runner/mistral_runner.py +71 -0
  40. livecodebench/runner/oai_runner.py +93 -0
  41. livecodebench/runner/parser.py +174 -0
  42. livecodebench/runner/runner_utils.py +62 -0
  43. livecodebench/runner/scenario_router.py +239 -0
  44. livecodebench/runner/vllm_runner.py +82 -0
  45. livecodebench/utils/__init__.py +0 -0
  46. livecodebench/utils/extraction_utils.py +82 -0
  47. livecodebench/utils/multiprocess.py +250 -0
  48. livecodebench/utils/path_utils.py +58 -0
  49. livecodebench/utils/scenarios.py +26 -0
  50. livecodebench/utils/seed_generator.py +44 -0
  51. nvidia_livecodebench-25.8.dist-info/METADATA +518 -0
  52. nvidia_livecodebench-25.8.dist-info/RECORD +55 -0
  53. nvidia_livecodebench-25.8.dist-info/WHEEL +4 -0
  54. nvidia_livecodebench-25.8.dist-info/entry_points.txt +4 -0
  55. nvidia_livecodebench-25.8.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,201 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Original Copyright 2025 LiveCodeBench
17
+ # For the original license and copyright information, see the LICENSE file in this repository.
18
+
19
+ import json
20
+
21
+ from livecodebench.lm_styles import LMStyle
22
+ from livecodebench.benchmarks import CodeExecutionProblem
23
+
24
+
25
+ def make_cot_output_prompt(s):
26
+ code, input = s
27
+ return f"""You are given a Python function and an assertion containing an input to the function. Complete the assertion with a literal (no unsimplified expressions, no function calls) containing the output when executing the provided code on the given input, even if the function is incorrect or incomplete. Do NOT output any extra information. Execute the program step by step before arriving at an answer, and provide the full assertion with the correct output in [ANSWER] and [/ANSWER] tags, following the examples.
28
+
29
+ [PYTHON]
30
+ def performOperation(s):
31
+ s = s + s
32
+ return "b" + s + "a"
33
+ assert performOperation(s = "hi") == ??
34
+ [/PYTHON]
35
+ [THOUGHT]
36
+ Let's execute the code step by step:
37
+
38
+ 1. The function performOperation is defined, which takes a single argument s.
39
+ 2. The function is called with the argument "hi", so within the function, s is initially "hi".
40
+ 3. Inside the function, s is concatenated with itself, so s becomes "hihi".
41
+ 4. The function then returns a new string that starts with "b", followed by the value of s (which is now "hihi"), and ends with "a".
42
+ 5. The return value of the function is therefore "bhihia".
43
+ [/THOUGHT]
44
+ [ANSWER]
45
+ assert performOperation(s = "hi") == "bhihia"
46
+ [/ANSWER]
47
+
48
+ [PYTHON]
49
+ {code}
50
+ assert {input} == ??
51
+ [/PYTHON]
52
+ [THOUGHT]
53
+ """
54
+
55
+
56
+ def make_direct_output_prompt(s):
57
+ code, input = s
58
+ return f"""You are given a Python function and an assertion containing an input to the function. Complete the assertion with a literal (no unsimplified expressions, no function calls) containing the output when executing the provided code on the given input, even if the function is incorrect or incomplete. Do NOT output any extra information. Provide the full assertion with the correct output in [ANSWER] and [/ANSWER] tags, following the examples.
59
+
60
+ [PYTHON]
61
+ def repeatNumber(number : int) -> int:
62
+ return number
63
+ assert repeatNumber(number = 17) == ??
64
+ [/PYTHON]
65
+ [ANSWER]
66
+ assert repeatNumber(number = 17) == 17
67
+ [/ANSWER]
68
+
69
+ [PYTHON]
70
+ def addCharacterA(string : str) -> str:
71
+ return string + "a"
72
+ assert addCharacterA(string = "x9j") == ??
73
+ [/PYTHON]
74
+ [ANSWER]
75
+ assert addCharacterA(string = "x9j") == "x9ja"
76
+ [/ANSWER]
77
+
78
+ [PYTHON]
79
+ {code}
80
+ assert {input} == ??
81
+ [/PYTHON]
82
+ [ANSWER]
83
+ """
84
+
85
+
86
+ def format_prompt_execution(question, LanguageModelStyle):
87
+ return format_prompt_execution_base(question, LanguageModelStyle, False)
88
+
89
+
90
+ def format_prompt_execution_cot(question, LanguageModelStyle):
91
+ return format_prompt_execution_base(question, LanguageModelStyle, True)
92
+
93
+
94
+ def format_prompt_execution_base(
95
+ question: CodeExecutionProblem, LanguageModelStyle: LMStyle, cot: bool
96
+ ) -> str:
97
+ code = question.code
98
+ input = question.input
99
+ system_message = "You are an expert at Python programming, code execution, test case generation, and fuzzing."
100
+ if cot:
101
+ prompt = make_cot_output_prompt((code, input))
102
+ else:
103
+ prompt = make_direct_output_prompt((code, input))
104
+
105
+ if LanguageModelStyle in (LMStyle.OpenAIChat, LMStyle.GenericOAIServer):
106
+ chat_messages = [
107
+ {
108
+ "role": "system",
109
+ "content": system_message,
110
+ },
111
+ ]
112
+ chat_messages += [
113
+ {"role": "user", "content": prompt},
114
+ ]
115
+ return chat_messages
116
+ if LanguageModelStyle == LMStyle.LLaMa3:
117
+ chat_messages = [
118
+ {
119
+ "role": "system",
120
+ "content": system_message,
121
+ },
122
+ ]
123
+ chat_messages += [
124
+ {"role": "user", "content": prompt},
125
+ ]
126
+ from transformers import AutoTokenizer
127
+
128
+ tokenizer = AutoTokenizer.from_pretrained(
129
+ "meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left", use_fast=False
130
+ )
131
+ return tokenizer.apply_chat_template(
132
+ chat_messages,
133
+ tokenize=False,
134
+ add_generation_prompt=True,
135
+ truncation=False,
136
+ padding=False,
137
+ )
138
+
139
+ elif LanguageModelStyle == LMStyle.Claude:
140
+ return prompt
141
+ elif LanguageModelStyle == LMStyle.Claude3:
142
+ prompt = [
143
+ {
144
+ "role": "user",
145
+ "content": prompt,
146
+ }
147
+ ]
148
+ return system_message, prompt
149
+ elif LanguageModelStyle == LMStyle.Gemini:
150
+ return prompt
151
+ elif LanguageModelStyle == LMStyle.StarCoderInstruct:
152
+ return prompt
153
+ elif LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
154
+ return prompt
155
+ elif LanguageModelStyle == LMStyle.CodeLLaMaInstruct:
156
+ return prompt
157
+ elif LanguageModelStyle == LMStyle.MagiCoder:
158
+ return prompt
159
+ elif LanguageModelStyle == LMStyle.WizardCoder:
160
+ return prompt
161
+ elif LanguageModelStyle == LMStyle.Phind:
162
+ return prompt
163
+ elif LanguageModelStyle == LMStyle.OC:
164
+ return prompt
165
+ elif LanguageModelStyle == LMStyle.MistralWeb:
166
+ chat_messages = [
167
+ {
168
+ "role": "system",
169
+ "content": system_message,
170
+ },
171
+ {"role": "user", "content": prompt},
172
+ ]
173
+ return chat_messages
174
+ elif LanguageModelStyle == LMStyle.DracarysLlama:
175
+ chat_messages = [
176
+ {
177
+ "role": "system",
178
+ "content": system_message,
179
+ },
180
+ ]
181
+ chat_messages += [
182
+ {"role": "user", "content": prompt},
183
+ ]
184
+ from transformers import AutoTokenizer
185
+
186
+ tokenizer = AutoTokenizer.from_pretrained(
187
+ "abacusai/Dracarys-Llama-3.1-70B-Instruct", padding_side="right", use_fast=False
188
+ )
189
+ return tokenizer.apply_chat_template(
190
+ chat_messages,
191
+ tokenize=False,
192
+ add_generation_prompt=True,
193
+ truncation=False,
194
+ padding=False,
195
+ )
196
+ elif LanguageModelStyle == LMStyle.DracarysQwen:
197
+ return prompt
198
+ else:
199
+ raise NotImplementedError(
200
+ f"LanguageModelStyle {LanguageModelStyle} not implemented"
201
+ )
@@ -0,0 +1,372 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Original Copyright 2025 LiveCodeBench
17
+ # For the original license and copyright information, see the LICENSE file in this repository.
18
+
19
+ import json
20
+ from pathlib import Path
21
+
22
+ try:
23
+ from anthropic import HUMAN_PROMPT, AI_PROMPT
24
+ except ImportError:
25
+ HUMAN_PROMPT = None
26
+ AI_PROMPT = None
27
+
28
+ from livecodebench.lm_styles import LMStyle
29
+ from livecodebench.benchmarks.code_generation import CodeGenerationProblem
30
+
31
+
32
+ class PromptConstants:
33
+ SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests."
34
+
35
+ SYSTEM_MESSAGE_GEMINI = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. Do NOT use system calls like `exit` in the generated program. Ensure that the first code block contains the solution."
36
+
37
+ SYSTEM_MESSAGE_GEMINITHINK = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests."
38
+
39
+ SYSTEM_MESSAGE_DEEPSEEK = f"You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you answer questions related to computer science."
40
+
41
+ SYSTEM_MESSAGE_CODEQWEN = (
42
+ f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user"
43
+ )
44
+
45
+ FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
46
+
47
+ FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the python program runs, it reads the inputs, runs the algorithm and writes output to STDOUT."
48
+
49
+
50
+ def get_generic_question_template_answer(question: CodeGenerationProblem):
51
+ prompt = f"### Question:\n{question.question_content}\n\n"
52
+ if question.starter_code:
53
+ prompt += (
54
+ f"### Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
55
+ )
56
+ prompt += f"```python\n{question.starter_code}\n```\n\n"
57
+ else:
58
+ prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
59
+ prompt += "```python\n# YOUR CODE HERE\n```\n\n"
60
+ prompt += f"### Answer: (use the provided format with backticks)\n\n"
61
+ return prompt
62
+
63
+
64
+ def get_oaireason_question_template_answer(question: CodeGenerationProblem):
65
+ prompt = f"### Question:\n{question.question_content}\n\n"
66
+ if question.starter_code:
67
+ prompt += (
68
+ f"### Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
69
+ )
70
+ prompt += f"```python\n{question.starter_code}\n```\n\n"
71
+ else:
72
+ prompt += f"### Format: Implement a function called `main()` which orchastrates the solution by reading inputs from stdin and writing the answer to stdout. Feel free to use additional functions as necessary. Next do NOT forget to call `main` function at the end of the program otherwise you will not be awarded any points.\n"
73
+ prompt += "```python\n# YOUR CODE HERE\n```\n\n"
74
+ prompt += f"### Answer: (use the provided format with backticks)\n\n"
75
+ return prompt
76
+
77
+
78
+ def get_geminithinking_question_template_answer(question: CodeGenerationProblem):
79
+ prompt = f"### Question:\n{question.question_content}\n\n"
80
+ if question.starter_code:
81
+ prompt += (
82
+ f"### Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
83
+ )
84
+ prompt += f"```python\n{question.starter_code}\n```\n\n"
85
+ else:
86
+ prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
87
+ prompt += "```python\n# YOUR CODE HERE\n```\n\n"
88
+ prompt += f"### Answer: (use the provided format with backticks)\n\n"
89
+ return prompt
90
+
91
+
92
+ def get_deepseekcode_question_template_answer(question: CodeGenerationProblem):
93
+ prompt = f"### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
94
+ prompt += f"Question:\n{question.question_content}\n\n"
95
+ if question.starter_code:
96
+ prompt += (
97
+ f"### Instruction: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
98
+ )
99
+ prompt += f"```python\n{question.starter_code}\n```\n\n"
100
+ else:
101
+ prompt += (
102
+ f"### Instruction: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
103
+ )
104
+ prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
105
+ prompt += f"### Response:\n\n"
106
+ return prompt
107
+
108
+
109
+ def get_qwen_question_template_answer(question: CodeGenerationProblem):
110
+ from transformers import AutoTokenizer
111
+
112
+ tokenizer = AutoTokenizer.from_pretrained(
113
+ "/abacus/models/Qwen1.5-72B-Chat/", padding_side="left", use_fast=False
114
+ )
115
+ prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
116
+ prompt += f"Question:\n{question.question_content}\n\n"
117
+ if question.starter_code:
118
+ prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
119
+ prompt += f"```python\n{question.starter_code}\n```\n\n"
120
+ else:
121
+ prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n"
122
+ prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
123
+
124
+ messages = [
125
+ {"role": "system", "content": PromptConstants.SYSTEM_MESSAGE_GENERIC},
126
+ {"role": "user", "content": prompt},
127
+ ]
128
+
129
+ prompt = tokenizer.apply_chat_template(
130
+ messages,
131
+ tokenize=False,
132
+ add_generation_prompt=True,
133
+ truncation=False,
134
+ padding=False,
135
+ )
136
+ return prompt
137
+
138
+
139
+ def get_codeqwen_question_template_answer(question: CodeGenerationProblem):
140
+ prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
141
+ prompt += f"Question: {question.question_content}\n\n"
142
+ if question.starter_code:
143
+ prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
144
+ prompt += f"```python\n{question.starter_code}\n```\n\n<|im_end|>\n"
145
+ else:
146
+ prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
147
+ prompt += f"```python\n# YOUR CODE HERE\n```\n\n<|im_end|>\n"
148
+ prompt += f"<|im_start|>assistant\n"
149
+ return prompt
150
+
151
+
152
+ # Get the absolute path of the current script
153
+ current_dir = Path(__file__).absolute().parent
154
+
155
+ # Navigate two directories up to reach the project root
156
+ project_root = current_dir.parent.parent
157
+
158
+ # Construct the path to the JSON files relative to the project root
159
+ func_path = project_root / 'livecodebench' / 'prompts' / 'few_shot_examples' / 'generation' / 'func.json'
160
+ stdin_path = project_root / 'livecodebench' / 'prompts' / 'few_shot_examples' / 'generation' / 'stdin.json'
161
+
162
+ # Open and load the JSON files
163
+ with open(func_path) as f:
164
+ func = json.load(f)
165
+
166
+ with open(stdin_path) as f:
167
+ stdin = json.load(f)
168
+
169
+
170
+ def get_base_model_question_template_answer(question: CodeGenerationProblem):
171
+ if question.starter_code:
172
+ examples_json = func
173
+ else:
174
+ examples_json = stdin
175
+
176
+ def get_example_prompt(example):
177
+ prompt = ""
178
+ prompt += "### Question\n"
179
+ prompt += example["question"]
180
+ prompt += "\n\n"
181
+ if question.starter_code:
182
+ prompt += "### Starter Code\n"
183
+ prompt += example["sample_code"]
184
+ prompt += "\n\n"
185
+ prompt += "### Answer\n\n"
186
+ prompt += example["answer"]
187
+ if example["answer"]:
188
+ prompt += "\n\n"
189
+ return prompt
190
+
191
+ prompt = ""
192
+ prompt += get_example_prompt(examples_json[0])
193
+ prompt += get_example_prompt(
194
+ {
195
+ "question": question.question_content,
196
+ "sample_code": question.starter_code,
197
+ "answer": "",
198
+ }
199
+ )
200
+ return prompt
201
+
202
+
203
+ def format_prompt_generation(
204
+ question: CodeGenerationProblem, LanguageModelStyle: LMStyle
205
+ ) -> str:
206
+ if LanguageModelStyle in [LMStyle.OpenAIChat, LMStyle.DeepSeekAPI]:
207
+ chat_messages = [
208
+ {
209
+ "role": "system",
210
+ "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
211
+ },
212
+ ]
213
+ chat_messages += [
214
+ {
215
+ "role": "user",
216
+ "content": get_generic_question_template_answer(question),
217
+ },
218
+ ]
219
+ return chat_messages
220
+ elif LanguageModelStyle == LMStyle.OpenAIReasonPreview:
221
+ chat_messages = [
222
+ {
223
+ "role": "user",
224
+ "content": PromptConstants.SYSTEM_MESSAGE_GENERIC
225
+ + "\n\n"
226
+ + get_generic_question_template_answer(question),
227
+ },
228
+ ]
229
+ return chat_messages
230
+ elif LanguageModelStyle == LMStyle.OpenAIReason:
231
+ chat_messages = [
232
+ {
233
+ "role": "user",
234
+ "content": PromptConstants.SYSTEM_MESSAGE_GENERIC
235
+ + "\n\n"
236
+ + get_oaireason_question_template_answer(question),
237
+ },
238
+ ]
239
+ return chat_messages
240
+
241
+ if LanguageModelStyle == LMStyle.LLaMa3:
242
+ chat_messages = [
243
+ {
244
+ "role": "system",
245
+ "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
246
+ },
247
+ ]
248
+ chat_messages += [
249
+ {
250
+ "role": "user",
251
+ "content": get_generic_question_template_answer(question),
252
+ },
253
+ ]
254
+ from transformers import AutoTokenizer
255
+
256
+ tokenizer = AutoTokenizer.from_pretrained(
257
+ "meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left", use_fast=False
258
+ )
259
+ return tokenizer.apply_chat_template(
260
+ chat_messages,
261
+ tokenize=False,
262
+ add_generation_prompt=True,
263
+ truncation=False,
264
+ padding=False,
265
+ )
266
+
267
+ if LanguageModelStyle == LMStyle.Claude:
268
+ prompt = f"{HUMAN_PROMPT}\n"
269
+ prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n"
270
+ prompt += f"{get_generic_question_template_answer(question).rstrip()}\n"
271
+ prompt += f"{AI_PROMPT}"
272
+ return prompt
273
+
274
+ if LanguageModelStyle == LMStyle.Claude3:
275
+ system = PromptConstants.SYSTEM_MESSAGE_GENERIC
276
+ prompt = [
277
+ {
278
+ "role": "user",
279
+ "content": get_generic_question_template_answer(question).rstrip(),
280
+ }
281
+ ]
282
+ return system, prompt
283
+
284
+ if LanguageModelStyle == LMStyle.Gemini:
285
+ prompt = f"{PromptConstants.SYSTEM_MESSAGE_GEMINI}\n"
286
+ prompt += f"{get_generic_question_template_answer(question)}"
287
+ return prompt
288
+
289
+ if LanguageModelStyle == LMStyle.GeminiThinking:
290
+ prompt = f"{PromptConstants.SYSTEM_MESSAGE_GEMINITHINK}\n"
291
+ prompt += f"{get_geminithinking_question_template_answer(question)}"
292
+ return prompt
293
+
294
+ if LanguageModelStyle == LMStyle.MistralWeb:
295
+ chat_messages = [
296
+ {
297
+ "role": "system",
298
+ "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
299
+ },
300
+ {
301
+ "role": "user",
302
+ "content": get_generic_question_template_answer(question),
303
+ },
304
+ ]
305
+ return chat_messages
306
+
307
+ if LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
308
+ prompt = f"{PromptConstants.SYSTEM_MESSAGE_DEEPSEEK}\n\n"
309
+ prompt += f"{get_deepseekcode_question_template_answer(question)}"
310
+ return prompt
311
+
312
+ if LanguageModelStyle == LMStyle.CodeQwenInstruct:
313
+ prompt = f"{PromptConstants.SYSTEM_MESSAGE_CODEQWEN}\n\n"
314
+ prompt += f"{get_codeqwen_question_template_answer(question)}"
315
+ return prompt
316
+
317
+ if LanguageModelStyle == LMStyle.GenericBase:
318
+ prompt = get_base_model_question_template_answer(question)
319
+ return prompt
320
+
321
+ if LanguageModelStyle == LMStyle.GenericOAIServer:
322
+ prompt = [
323
+ {
324
+ "role": "user",
325
+ "content": get_generic_question_template_answer(question).rstrip(),
326
+ }
327
+ ]
328
+ return prompt
329
+
330
+ raise NotImplementedError(
331
+ f"LanguageModelStyle {LanguageModelStyle} not implemented"
332
+ )
333
+
334
+
335
+ def test():
336
+ import pathlib
337
+
338
+ base_dir = "logs/example_prompts/generation"
339
+ pathlib.Path(base_dir).mkdir(parents=True, exist_ok=True)
340
+
341
+ for lmstyle in LMStyle:
342
+ generation_problem = CodeGenerationProblem(
343
+ "title",
344
+ "question-content",
345
+ "leetcode",
346
+ "question_id",
347
+ "contest_id",
348
+ "contest_date",
349
+ "",
350
+ "easy",
351
+ "[]",
352
+ "[]",
353
+ "{}",
354
+ )
355
+ prompt1 = format_prompt_generation(generation_problem, lmstyle)
356
+ with open(f"{base_dir}/{lmstyle}_1.txt", "w") as f:
357
+ try:
358
+ f.write(prompt1)
359
+ except TypeError:
360
+ f.write(json.dumps(prompt1))
361
+
362
+ generation_problem.starter_code = "starter code"
363
+ prompt2 = format_prompt_generation(generation_problem, lmstyle)
364
+ with open(f"{base_dir}/{lmstyle}_2.txt", "w") as f:
365
+ try:
366
+ f.write(prompt2)
367
+ except TypeError:
368
+ f.write(json.dumps(prompt2))
369
+
370
+
371
+ if __name__ == "__main__":
372
+ test()
@@ -0,0 +1,12 @@
1
+ [
2
+ {
3
+ "question": "You are given a 0-indexed array of positive integers nums. Find the number of triplets (i, j, k) that meet the following conditions:\n\n0 <= i < j < k < nums.length\nnums[i], nums[j], and nums[k] are pairwise distinct.\n\t\nIn other words, nums[i] != nums[j], nums[i] != nums[k], and nums[j] != nums[k].\n\n\n\nReturn the number of triplets that meet the conditions.\n \nExample 1:\n\nInput: nums = [4,4,2,4,3]\nOutput: 3\nExplanation: The following triplets meet the conditions:\n- (0, 2, 4) because 4 != 2 != 3\n- (1, 2, 4) because 4 != 2 != 3\n- (2, 3, 4) because 2 != 4 != 3\nSince there are 3 triplets, we return 3.\nNote that (2, 0, 4) is not a valid triplet because 2 > 0.\n\nExample 2:\n\nInput: nums = [1,1,1,1,1]\nOutput: 0\nExplanation: No triplets meet the conditions so we return 0.\n\n \nConstraints:\n\n3 <= nums.length <= 100\n1 <= nums[i] <= 1000\n\n",
4
+ "sample_code": "class Solution:\n def unequalTriplets(self, nums: List[int]) -> int:\n ",
5
+ "answer": "class Solution:\n def unequalTriplets(self, a: List[int]) -> int:\n ans = 0\n n = len(a)\n for i in range(n):\n for j in range(i + 1, n):\n for k in range(j + 1, n):\n ans += len({a[i], a[j], a[k]}) == 3\n return ans"
6
+ },
7
+ {
8
+ "question": "You are given two strings s and t consisting of only lowercase English letters.\nReturn the minimum number of characters that need to be appended to the end of s so that t becomes a subsequence of s.\nA subsequence is a string that can be derived from another string by deleting some or no characters without changing the order of the remaining characters.\n \nExample 1:\n\nInput: s = \"coaching\", t = \"coding\"\nOutput: 4\nExplanation: Append the characters \"ding\" to the end of s so that s = \"coachingding\".\nNow, t is a subsequence of s (\"coachingding\").\nIt can be shown that appending any 3 characters to the end of s will never make t a subsequence.\n\nExample 2:\n\nInput: s = \"abcde\", t = \"a\"\nOutput: 0\nExplanation: t is already a subsequence of s (\"abcde\").\n\nExample 3:\n\nInput: s = \"z\", t = \"abcde\"\nOutput: 5\nExplanation: Append the characters \"abcde\" to the end of s so that s = \"zabcde\".\nNow, t is a subsequence of s (\"zabcde\").\nIt can be shown that appending any 4 characters to the end of s will never make t a subsequence.\n\n \nConstraints:\n\n1 <= s.length, t.length <= 10^5\ns and t consist only of lowercase English letters.\n\n",
9
+ "sample_code": "class Solution:\n def appendCharacters(self, s: str, t: str) -> int:\n ",
10
+ "answer": "class Solution:\n def appendCharacters(self, s: str, t: str) -> int:\n i = 0\n for char in s:\n if i < len(t) and char == t[i]:\n i += 1\n return len(t) - i"
11
+ }
12
+ ]
@@ -0,0 +1,10 @@
1
+ [
2
+ {
3
+ "question": "You have $n$ gifts and you want to give all of them to children. Of course, you don't want to offend anyone, so all gifts should be equal between each other. The $i$-th gift consists of $a_i$ candies and $b_i$ oranges.\n\nDuring one move, you can choose some gift $1 \\le i \\le n$ and do one of the following operations:\n\n eat exactly one candy from this gift (decrease $a_i$ by one); eat exactly one orange from this gift (decrease $b_i$ by one); eat exactly one candy and exactly one orange from this gift (decrease both $a_i$ and $b_i$ by one). \n\nOf course, you can not eat a candy or orange if it's not present in the gift (so neither $a_i$ nor $b_i$ can become less than zero).\n\nAs said above, all gifts should be equal. This means that after some sequence of moves the following two conditions should be satisfied: $a_1 = a_2 = \\dots = a_n$ and $b_1 = b_2 = \\dots = b_n$ (and $a_i$ equals $b_i$ is not necessary).\n\nYour task is to find the minimum number of moves required to equalize all the given gifts.\n\nYou have to answer $t$ independent test cases.\n\n\n-----Input-----\n\nThe first line of the input contains one integer $t$ ($1 \\le t \\le 1000$) \u2014 the number of test cases. Then $t$ test cases follow.\n\nThe first line of the test case contains one integer $n$ ($1 \\le n \\le 50$) \u2014 the number of gifts. The second line of the test case contains $n$ integers $a_1, a_2, \\dots, a_n$ ($1 \\le a_i \\le 10^9$), where $a_i$ is the number of candies in the $i$-th gift. The third line of the test case contains $n$ integers $b_1, b_2, \\dots, b_n$ ($1 \\le b_i \\le 10^9$), where $b_i$ is the number of oranges in the $i$-th gift.\n\n\n-----Output-----\n\nFor each test case, print one integer: the minimum number of moves required to equalize all the given gifts.\n\n\n-----Example-----\nInput\n5\n3\n3 5 6\n3 2 3\n5\n1 2 3 4 5\n5 4 3 2 1\n3\n1 1 1\n2 2 2\n6\n1 1000000000 1000000000 1000000000 1000000000 1000000000\n1 1 1 1 1 1\n3\n10 12 8\n7 5 4\n\nOutput\n6\n16\n0\n4999999995\n7\n\n\n\n-----Note-----\n\nIn the first test case of the example, we can perform the following sequence of moves:\n\n choose the first gift and eat one orange from it, so $a = [3, 5, 6]$ and $b = [2, 2, 3]$; choose the second gift and eat one candy from it, so $a = [3, 4, 6]$ and $b = [2, 2, 3]$; choose the second gift and eat one candy from it, so $a = [3, 3, 6]$ and $b = [2, 2, 3]$; choose the third gift and eat one candy and one orange from it, so $a = [3, 3, 5]$ and $b = [2, 2, 2]$; choose the third gift and eat one candy from it, so $a = [3, 3, 4]$ and $b = [2, 2, 2]$; choose the third gift and eat one candy from it, so $a = [3, 3, 3]$ and $b = [2, 2, 2]$.",
4
+ "answer": "def minimum_moves(t, test_cases):\n for _ in range(t):\n n = test_cases[_][0]\n candies = test_cases[_][1]\n oranges = test_cases[_][2]\n min_candies = min(candies)\n min_oranges = min(oranges)\n ans = 0\n for i in range(n):\n ans += max(candies[i] - min_candies, oranges[i] - min_oranges)\n print(ans)\n\n\ndef main():\n t = int(input())\n test_cases = []\n for _ in range(t):\n n = int(input())\n candies = list(map(int, input().split()))\n oranges = list(map(int, input().split()))\n test_cases.append((n, candies, oranges))\n minimum_moves(t, test_cases)\n\n\nmain()\n"
5
+ },
6
+ {
7
+ "question": "Let's call a string a phone number if it has length 11 and fits the pattern \"8xxxxxxxxxx\", where each \"x\" is replaced by a digit.\n\nFor example, \"80123456789\" and \"80000000000\" are phone numbers, while \"8012345678\" and \"79000000000\" are not.\n\nYou have n cards with digits, and you want to use them to make as many phone numbers as possible. Each card must be used in at most one phone number, and you don't have to use all cards. The phone numbers do not necessarily have to be distinct.\n\nInput\n\nThe first line contains an integer n \u2014 the number of cards with digits that you have (1 \u2264 n \u2264 100).\n\nThe second line contains a string of n digits (characters \"0\", \"1\", ..., \"9\") s_1, s_2, \u2026, s_n. The string will not contain any other characters, such as leading or trailing spaces.\n\nOutput\n\nIf at least one phone number can be made from these cards, output the maximum number of phone numbers that can be made. Otherwise, output 0.\n\nExamples\n\nInput\n\n11\n00000000008\n\n\nOutput\n\n1\n\n\nInput\n\n22\n0011223344556677889988\n\n\nOutput\n\n2\n\n\nInput\n\n11\n31415926535\n\n\nOutput\n\n0\n\nNote\n\nIn the first example, one phone number, \"8000000000\", can be made from these cards.\n\nIn the second example, you can make two phone numbers from the cards, for example, \"80123456789\" and \"80123456789\".\n\nIn the third example you can't make any phone number from the given cards.",
8
+ "answer": "def count_phone_numbers(num_cards, card_digits):\n count_eights = card_digits.count(\"8\")\n max_phone_numbers = num_cards // 11\n max_possible = min(count_eights, max_phone_numbers)\n return max_possible\n\ndef main():\n num_cards = int(input())\n card_digits = input().strip()\n max_possible = count_phone_numbers(num_cards, card_digits)\n print(max_possible)\n\nmain()"
9
+ }
10
+ ]