janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +117 -0
- janus/cli/constants.py +49 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +207 -0
- janus/cli/document.py +183 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +191 -0
- janus/cli/partition.py +134 -0
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +147 -0
- janus/cli/translate.py +192 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +7 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +444 -153
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +27 -16
- janus/converter/evaluate.py +143 -144
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +3 -59
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +78 -14
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/llm/models_info.py +9 -1
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/metric.py +47 -124
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +6 -4
- janus/parsers/reqs_parser.py +11 -8
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +113 -4
- janus/utils/enums.py +127 -112
- janus/utils/logger.py +2 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
- janus_llm-4.4.5.dist-info/RECORD +210 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
- janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1488
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- janus_llm-4.3.1.dist-info/RECORD +0 -115
- janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/llm/models_info.py
CHANGED
@@ -50,6 +50,7 @@ except ImportError:
|
|
50
50
|
ModelType = TypeVar(
|
51
51
|
"ModelType",
|
52
52
|
AzureChatOpenAI,
|
53
|
+
ChatOpenAI,
|
53
54
|
HuggingFaceTextGenInference,
|
54
55
|
Bedrock,
|
55
56
|
BedrockChat,
|
@@ -247,6 +248,7 @@ def load_model(model_id) -> JanusModel:
|
|
247
248
|
token_limit = model_config["token_limit"]
|
248
249
|
input_token_cost = model_config["model_cost"]["input"]
|
249
250
|
output_token_cost = model_config["model_cost"]["output"]
|
251
|
+
input_token_proportion = model_config["input_token_proportion"]
|
250
252
|
|
251
253
|
elif model_id in DEFAULT_MODELS:
|
252
254
|
model_id = model_id
|
@@ -257,6 +259,7 @@ def load_model(model_id) -> JanusModel:
|
|
257
259
|
token_limit = 0
|
258
260
|
input_token_cost = 0.0
|
259
261
|
output_token_cost = 0.0
|
262
|
+
input_token_proportion = 0.4
|
260
263
|
if model_long_id in TOKEN_LIMITS:
|
261
264
|
token_limit = TOKEN_LIMITS[model_long_id]
|
262
265
|
if model_long_id in COST_PER_1K_TOKENS:
|
@@ -286,7 +289,6 @@ def load_model(model_id) -> JanusModel:
|
|
286
289
|
elif model_type_name == "OpenAI":
|
287
290
|
model_args.update(
|
288
291
|
openai_api_key=str(os.getenv("OPENAI_API_KEY")),
|
289
|
-
openai_organization=str(os.getenv("OPENAI_ORG_ID")),
|
290
292
|
)
|
291
293
|
# log.warning("Do NOT use this model in sensitive environments!")
|
292
294
|
# log.warning("If you would like to cancel, please press Ctrl+C.")
|
@@ -310,15 +312,20 @@ def load_model(model_id) -> JanusModel:
|
|
310
312
|
|
311
313
|
class JanusModel(model_type):
|
312
314
|
model_id: str
|
315
|
+
# model_name is for LangChain compatibility
|
316
|
+
# It searches for `self.model_name` when counting tokens
|
317
|
+
model_name: str
|
313
318
|
short_model_id: str
|
314
319
|
model_type_name: str
|
315
320
|
token_limit: int
|
321
|
+
input_token_proportion: float
|
316
322
|
input_token_cost: float
|
317
323
|
output_token_cost: float
|
318
324
|
prompt_engine: type[PromptEngine]
|
319
325
|
|
320
326
|
model_args.update(
|
321
327
|
model_id=MODEL_ID_TO_LONG_ID[model_id],
|
328
|
+
model_name=model_id, # This is for LangChain compatibility
|
322
329
|
short_model_id=model_id,
|
323
330
|
)
|
324
331
|
|
@@ -327,6 +334,7 @@ def load_model(model_id) -> JanusModel:
|
|
327
334
|
token_limit=token_limit,
|
328
335
|
input_token_cost=input_token_cost,
|
329
336
|
output_token_cost=output_token_cost,
|
337
|
+
input_token_proportion=input_token_proportion,
|
330
338
|
prompt_engine=prompt_engine,
|
331
339
|
**model_args,
|
332
340
|
)
|
janus/metrics/metric.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
import inspect
|
2
2
|
import json
|
3
|
-
from pathlib import Path
|
4
3
|
from typing import Callable, Optional
|
5
4
|
|
6
5
|
import click
|
7
6
|
import typer
|
8
7
|
from typing_extensions import Annotated
|
9
8
|
|
9
|
+
from janus.cli.constants import CONVERTERS
|
10
|
+
from janus.converter.converter import Converter
|
10
11
|
from janus.llm import load_model
|
11
12
|
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
12
13
|
from janus.metrics.cli import evaluate
|
@@ -70,31 +71,6 @@ def metric(
|
|
70
71
|
help="Reference file or string to use as reference/baseline.",
|
71
72
|
),
|
72
73
|
] = None,
|
73
|
-
json_file_name: Annotated[
|
74
|
-
Optional[str],
|
75
|
-
typer.Option(
|
76
|
-
"--json",
|
77
|
-
"-j",
|
78
|
-
help="Json file to extract pairs from \
|
79
|
-
(if set ignores --target and --reference)",
|
80
|
-
),
|
81
|
-
] = None,
|
82
|
-
target_key: Annotated[
|
83
|
-
str,
|
84
|
-
typer.Option(
|
85
|
-
"--target-key",
|
86
|
-
"-tk",
|
87
|
-
help="json key to extract list of target strings",
|
88
|
-
),
|
89
|
-
] = "target",
|
90
|
-
reference_key: Annotated[
|
91
|
-
str,
|
92
|
-
typer.Option(
|
93
|
-
"--reference-key",
|
94
|
-
"-rk",
|
95
|
-
help="json key to extract list of reference strings",
|
96
|
-
),
|
97
|
-
] = "reference",
|
98
74
|
file_pairing_method: Annotated[
|
99
75
|
str,
|
100
76
|
typer.Option(
|
@@ -123,6 +99,14 @@ def metric(
|
|
123
99
|
is_flag=True,
|
124
100
|
),
|
125
101
|
] = False,
|
102
|
+
use_janus_inputs: Annotated[
|
103
|
+
bool,
|
104
|
+
typer.Option(
|
105
|
+
"-j",
|
106
|
+
"--use-janus-inputs",
|
107
|
+
help="present if janus output files should be evaluated",
|
108
|
+
),
|
109
|
+
] = False,
|
126
110
|
use_strings: Annotated[
|
127
111
|
bool,
|
128
112
|
typer.Option(
|
@@ -137,25 +121,23 @@ def metric(
|
|
137
121
|
):
|
138
122
|
out = []
|
139
123
|
llm = load_model(llm_name)
|
140
|
-
if
|
141
|
-
with open(
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
pairs[model_key][k] = (model_dict[target_key][k], ref[k])
|
158
|
-
elif target is not None and reference is not None:
|
124
|
+
if use_janus_inputs:
|
125
|
+
with open(target, "r") as f:
|
126
|
+
target_obj = json.load(f)
|
127
|
+
with open(reference, "r") as f:
|
128
|
+
reference_obj = json.load(f)
|
129
|
+
converter_cls = CONVERTERS.get(
|
130
|
+
target_obj["metadata"].get("converter_name", "Converter"),
|
131
|
+
Converter,
|
132
|
+
)
|
133
|
+
out = converter_cls.eval_obj_reference(
|
134
|
+
target=target_obj,
|
135
|
+
reference=reference_obj,
|
136
|
+
metric_func=function,
|
137
|
+
*args,
|
138
|
+
**kwargs,
|
139
|
+
)
|
140
|
+
else:
|
159
141
|
if use_strings:
|
160
142
|
target_contents = target
|
161
143
|
reference_contents = reference
|
@@ -175,25 +157,6 @@ def metric(
|
|
175
157
|
token_limit=llm.token_limit,
|
176
158
|
model_cost=COST_PER_1K_TOKENS[llm.model_id],
|
177
159
|
)
|
178
|
-
else:
|
179
|
-
raise ValueError(
|
180
|
-
"Error, specify json or target and reference files/strings"
|
181
|
-
)
|
182
|
-
if isinstance(pairs, dict):
|
183
|
-
out = {}
|
184
|
-
for k in pairs:
|
185
|
-
out[k] = apply_function_pairs(
|
186
|
-
pairs[k],
|
187
|
-
function,
|
188
|
-
progress,
|
189
|
-
language,
|
190
|
-
llm,
|
191
|
-
llm.token_limit,
|
192
|
-
COST_PER_1K_TOKENS[llm.model_id],
|
193
|
-
*args,
|
194
|
-
**kwargs,
|
195
|
-
)
|
196
|
-
else:
|
197
160
|
out = apply_function_pairs(
|
198
161
|
pairs,
|
199
162
|
function,
|
@@ -205,17 +168,15 @@ def metric(
|
|
205
168
|
*args,
|
206
169
|
**kwargs,
|
207
170
|
)
|
208
|
-
out_file = Path(out_file)
|
209
|
-
out_file.parent.mkdir(parents=True, exist_ok=True)
|
210
171
|
with open(out_file, "w") as f:
|
172
|
+
log.info(f"Writing output to {out_file}")
|
211
173
|
json.dump(out, f)
|
212
|
-
log.info(f"Saved results to file: {out_file}")
|
213
174
|
|
214
175
|
sig1 = inspect.signature(function)
|
215
176
|
sig2 = inspect.signature(func)
|
216
177
|
func.__signature__ = sig2.replace(
|
217
178
|
parameters=tuple(
|
218
|
-
list(sig2.parameters.values())[:
|
179
|
+
list(sig2.parameters.values())[:9]
|
219
180
|
+ list(sig1.parameters.values())[2:-1]
|
220
181
|
)
|
221
182
|
)
|
@@ -241,23 +202,14 @@ def metric(
|
|
241
202
|
"--target", "-t", help="Target file or string to evaluate."
|
242
203
|
),
|
243
204
|
] = None,
|
244
|
-
|
245
|
-
|
205
|
+
use_janus_inputs: Annotated[
|
206
|
+
bool,
|
246
207
|
typer.Option(
|
247
|
-
"--json",
|
248
208
|
"-j",
|
249
|
-
|
250
|
-
|
209
|
+
"--use-janus-inputs",
|
210
|
+
help="whether to use a janus output file as input",
|
251
211
|
),
|
252
|
-
] =
|
253
|
-
target_key: Annotated[
|
254
|
-
str,
|
255
|
-
typer.Option(
|
256
|
-
"--target-key",
|
257
|
-
"-tk",
|
258
|
-
help="json key to extract list of target strings",
|
259
|
-
),
|
260
|
-
] = "target",
|
212
|
+
] = False,
|
261
213
|
splitting_method: Annotated[
|
262
214
|
str,
|
263
215
|
typer.Option(
|
@@ -298,25 +250,17 @@ def metric(
|
|
298
250
|
**kwargs,
|
299
251
|
):
|
300
252
|
llm = load_model(llm_name)
|
301
|
-
if
|
302
|
-
with open(
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
if target_key not in model_dict:
|
313
|
-
continue
|
314
|
-
if model_key not in strings:
|
315
|
-
strings[model_key] = {}
|
316
|
-
for k in model_dict[target_key]:
|
317
|
-
strings[model_key][k] = model_dict[target_key][k]
|
318
|
-
# strings += list(json_obj[key][target_key].values())
|
319
|
-
elif target is not None:
|
253
|
+
if use_janus_inputs:
|
254
|
+
with open(target, "r") as f:
|
255
|
+
target_obj = json.load(f)
|
256
|
+
converter_cls = CONVERTERS.get(
|
257
|
+
target_obj["metadata"].get("converter_name", "Converter"),
|
258
|
+
Converter,
|
259
|
+
)
|
260
|
+
out = converter_cls.eval_obj(
|
261
|
+
target=target_obj, metric_func=function, *args, **kwargs
|
262
|
+
)
|
263
|
+
else:
|
320
264
|
if use_strings:
|
321
265
|
target_contents = target
|
322
266
|
else:
|
@@ -332,25 +276,6 @@ def metric(
|
|
332
276
|
token_limit=llm.token_limit,
|
333
277
|
model_cost=COST_PER_1K_TOKENS[llm.model_id],
|
334
278
|
)
|
335
|
-
else:
|
336
|
-
raise ValueError(
|
337
|
-
"Error: must specify either json file or target file/string"
|
338
|
-
)
|
339
|
-
if isinstance(strings, dict):
|
340
|
-
out = {}
|
341
|
-
for k in strings:
|
342
|
-
out[k] = apply_function_strings(
|
343
|
-
strings[k],
|
344
|
-
function,
|
345
|
-
progress,
|
346
|
-
language,
|
347
|
-
llm,
|
348
|
-
llm.token_limit,
|
349
|
-
COST_PER_1K_TOKENS[llm.model_id],
|
350
|
-
*args,
|
351
|
-
**kwargs,
|
352
|
-
)
|
353
|
-
else:
|
354
279
|
out = apply_function_strings(
|
355
280
|
strings,
|
356
281
|
function,
|
@@ -362,17 +287,15 @@ def metric(
|
|
362
287
|
*args,
|
363
288
|
**kwargs,
|
364
289
|
)
|
365
|
-
out_file = Path(out_file)
|
366
|
-
out_file.parent.mkdir(parents=True, exist_ok=True)
|
367
290
|
with open(out_file, "w") as f:
|
291
|
+
log.info(f"Writing output to {out_file}")
|
368
292
|
json.dump(out, f)
|
369
|
-
log.info(f"Saved results to file: {out_file}")
|
370
293
|
|
371
294
|
sig1 = inspect.signature(function)
|
372
295
|
sig2 = inspect.signature(func)
|
373
296
|
func.__signature__ = sig2.replace(
|
374
297
|
parameters=tuple(
|
375
|
-
list(sig2.parameters.values())[:
|
298
|
+
list(sig2.parameters.values())[:7]
|
376
299
|
+ list(sig1.parameters.values())[1:-1]
|
377
300
|
)
|
378
301
|
)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
Based on the following target written in the {language} programming language, how would you rate the code clarity of the target on a scale of integers from 1 to 10? Higher is better.
|
2
|
+
|
3
|
+
Think through your answer before selecting a rating with the following format:
|
4
|
+
|
5
|
+
Target: the target code
|
6
|
+
{format_instructions}
|
7
|
+
|
8
|
+
Target: {target}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Use the following rubric to evaluate the target written in the {language} programming language:
|
2
|
+
|
3
|
+
Rubric:
|
4
|
+
Does the comment address all capabilities of the relevant source code?
|
5
|
+
|
6
|
+
10 - All essential functionality is documented.
|
7
|
+
6-9 - Most essential functionality is documented.
|
8
|
+
2-5 - Little essential functionality is documented.
|
9
|
+
1 - No essential functionality is documented.
|
10
|
+
|
11
|
+
Think through your answer before selecting a rating with the following format:
|
12
|
+
|
13
|
+
Target: the target code
|
14
|
+
{format_instructions}
|
15
|
+
|
16
|
+
Target: {target}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Based on the following target and reference written in the {language} programming language, how would you rate the faithfulness of the target to the original reference on a scale of integers from 1 to 10? Higher is better.
|
2
|
+
|
3
|
+
Think through your answer before selecting a rating with the following format:
|
4
|
+
|
5
|
+
Target: the target code
|
6
|
+
Reference: the reference code that we are judging the target against
|
7
|
+
{format_instructions}
|
8
|
+
|
9
|
+
Target: {target}
|
10
|
+
Reference: {reference}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Use the following rubric to evaluate the target written in the {language} programming language:
|
2
|
+
|
3
|
+
Rubric:
|
4
|
+
Does the comment provide true information?
|
5
|
+
|
6
|
+
10 - The comment provides only true information.
|
7
|
+
6-9 - The comment provides mostly true information.
|
8
|
+
2-5 - The comment provides mostly untrue information.
|
9
|
+
1 - The comment is completely untrue.
|
10
|
+
|
11
|
+
Think through your answer before selecting a rating with the following format:
|
12
|
+
|
13
|
+
Target: the target code
|
14
|
+
{format_instructions}
|
15
|
+
|
16
|
+
Target: {target}
|
@@ -0,0 +1,8 @@
|
|
1
|
+
Based on the following target written in the {language} programming language, how would you rate the code quality of the target on a scale of integers from 1 to 10? Higher is better.
|
2
|
+
|
3
|
+
Think through your answer before selecting a rating with the following format:
|
4
|
+
|
5
|
+
Target: the target code
|
6
|
+
{format_instructions}
|
7
|
+
|
8
|
+
Target: {target}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Use the following rubric to evaluate the target written in the {language} programming language:
|
2
|
+
|
3
|
+
Rubric:
|
4
|
+
Is the comment clear to read?
|
5
|
+
|
6
|
+
10 - The comment is well-written.
|
7
|
+
6-9 - The comment has few problems.
|
8
|
+
2-5 - The comment has many problems.
|
9
|
+
1 - The comment is unreadable.
|
10
|
+
|
11
|
+
Think through your answer before selecting a rating with the following format:
|
12
|
+
|
13
|
+
Target: the target code
|
14
|
+
{format_instructions}
|
15
|
+
|
16
|
+
Target: {target}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Use the following rubric to evaluate the target written in the {language} programming language:
|
2
|
+
|
3
|
+
Rubric:
|
4
|
+
Is the comment useful?
|
5
|
+
|
6
|
+
10 - The comment helps an expert programmer understand the code better.
|
7
|
+
6-9 - The comment helps an average programmer understand the code better.
|
8
|
+
2-5 - The comment documents only trivial functionality.
|
9
|
+
1 - The comment is not useful at any level.
|
10
|
+
|
11
|
+
Think through your answer before selecting a rating with the following format:
|
12
|
+
|
13
|
+
Target: the target code
|
14
|
+
{format_instructions}
|
15
|
+
|
16
|
+
Target: {target}
|
janus/parsers/code_parser.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
import re
|
2
2
|
|
3
|
-
from langchain_core.exceptions import OutputParserException
|
4
3
|
from langchain_core.messages import BaseMessage
|
5
4
|
|
6
|
-
from janus.parsers.parser import JanusParser
|
5
|
+
from janus.parsers.parser import JanusParser, JanusParserException
|
7
6
|
from janus.utils.logger import create_logger
|
8
7
|
|
9
8
|
log = create_logger(__name__)
|
@@ -18,8 +17,9 @@ class CodeParser(JanusParser):
|
|
18
17
|
pattern = rf"```[^\S\r\n]*(?:{self.language}[^\S\r\n]*)?\n?(.*?)\n*```"
|
19
18
|
code = re.search(pattern, text, re.DOTALL)
|
20
19
|
if code is None:
|
21
|
-
raise
|
22
|
-
|
20
|
+
raise JanusParserException(
|
21
|
+
text,
|
22
|
+
"Code not find code between triple square brackets",
|
23
23
|
)
|
24
24
|
return str(code.group(1))
|
25
25
|
|
janus/parsers/doc_parser.py
CHANGED
@@ -8,7 +8,7 @@ from langchain_core.messages import BaseMessage
|
|
8
8
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
9
9
|
|
10
10
|
from janus.language.block import CodeBlock
|
11
|
-
from janus.parsers.parser import JanusParser
|
11
|
+
from janus.parsers.parser import JanusParser, JanusParserException
|
12
12
|
from janus.utils.logger import create_logger
|
13
13
|
|
14
14
|
log = create_logger(__name__)
|
@@ -86,7 +86,7 @@ class MultiDocumentationParser(JanusParser, PydanticOutputParser):
|
|
86
86
|
return str(self.__class__.name)
|
87
87
|
|
88
88
|
|
89
|
-
class
|
89
|
+
class ClozeDocumentationParser(JanusParser):
|
90
90
|
expected_keys: set[str]
|
91
91
|
|
92
92
|
def __init__(self):
|
@@ -107,11 +107,12 @@ class MadlibsDocumentationParser(JanusParser):
|
|
107
107
|
obj = parse_json_markdown(text)
|
108
108
|
except json.JSONDecodeError as e:
|
109
109
|
log.debug(f"Invalid JSON object. Output:\n{text}")
|
110
|
-
raise
|
110
|
+
raise JanusParserException(text, f"Got invalid JSON object. Error: {e}")
|
111
111
|
|
112
112
|
if not isinstance(obj, dict):
|
113
|
-
raise
|
114
|
-
|
113
|
+
raise JanusParserException(
|
114
|
+
text,
|
115
|
+
f"Got invalid return object. Expected a dictionary, but got {type(obj)}",
|
115
116
|
)
|
116
117
|
|
117
118
|
seen_keys = set(obj.keys())
|
@@ -122,9 +123,10 @@ class MadlibsDocumentationParser(JanusParser):
|
|
122
123
|
if invalid_keys:
|
123
124
|
log.debug(f"Invalid keys: {invalid_keys}")
|
124
125
|
log.debug(f"Missing keys: {missing_keys}")
|
125
|
-
raise
|
126
|
+
raise JanusParserException(
|
127
|
+
text,
|
126
128
|
f"Got invalid return object. Missing the following expected "
|
127
|
-
f"keys: {missing_keys}"
|
129
|
+
f"keys: {missing_keys}",
|
128
130
|
)
|
129
131
|
|
130
132
|
for key in invalid_keys:
|
@@ -132,9 +134,10 @@ class MadlibsDocumentationParser(JanusParser):
|
|
132
134
|
|
133
135
|
for value in obj.values():
|
134
136
|
if not isinstance(value, str):
|
135
|
-
raise
|
137
|
+
raise JanusParserException(
|
138
|
+
text,
|
136
139
|
f"Got invalid return object. Expected all string values,"
|
137
|
-
f' but got type "{type(value)}"'
|
140
|
+
f' but got type "{type(value)}"',
|
138
141
|
)
|
139
142
|
|
140
143
|
return json.dumps(obj)
|
janus/parsers/parser.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from langchain.schema.output_parser import BaseOutputParser
|
2
|
+
from langchain_core.exceptions import OutputParserException
|
2
3
|
from langchain_core.messages import BaseMessage
|
3
4
|
from langchain_core.output_parsers import StrOutputParser
|
4
5
|
|
@@ -49,3 +50,9 @@ class GenericParser(JanusParser, StrOutputParser):
|
|
49
50
|
|
50
51
|
def get_format_instructions(self) -> str:
|
51
52
|
return "Output should be a string"
|
53
|
+
|
54
|
+
|
55
|
+
class JanusParserException(OutputParserException):
|
56
|
+
def __init__(self, unparsed_output, *args, **kwargs):
|
57
|
+
self.unparsed_output = unparsed_output
|
58
|
+
super().__init__(*args, **kwargs)
|
@@ -9,7 +9,7 @@ from langchain_core.messages import BaseMessage
|
|
9
9
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
10
|
|
11
11
|
from janus.language.block import CodeBlock
|
12
|
-
from janus.parsers.parser import JanusParser
|
12
|
+
from janus.parsers.parser import JanusParser, JanusParserException
|
13
13
|
from janus.utils.logger import create_logger
|
14
14
|
|
15
15
|
log = create_logger(__name__)
|
@@ -97,6 +97,7 @@ class PartitionParser(JanusParser, PydanticOutputParser):
|
|
97
97
|
def parse(self, text: str | BaseMessage) -> str:
|
98
98
|
if isinstance(text, BaseMessage):
|
99
99
|
text = str(text.content)
|
100
|
+
original_text = text
|
100
101
|
|
101
102
|
# Strip everything outside the JSON object
|
102
103
|
begin, end = text.find("["), text.rfind("]")
|
@@ -122,7 +123,7 @@ class PartitionParser(JanusParser, PydanticOutputParser):
|
|
122
123
|
+ ", ".join(invalid_splits)
|
123
124
|
)
|
124
125
|
log.warning(err_msg)
|
125
|
-
raise
|
126
|
+
raise JanusParserException(original_text, err_msg)
|
126
127
|
|
127
128
|
# Map line IDs to indices (so they can be sorted and lines indexed)
|
128
129
|
index_to_line_id = {0: "START", None: "END"}
|
@@ -160,9 +161,10 @@ class PartitionParser(JanusParser, PydanticOutputParser):
|
|
160
161
|
"Oversized chunks:\n"
|
161
162
|
+ "\n#############\n".join(chunk for _, chunk, _ in data)
|
162
163
|
)
|
163
|
-
raise
|
164
|
+
raise JanusParserException(
|
165
|
+
original_text,
|
164
166
|
f"The following segments are too long and must be "
|
165
|
-
f"further subdivided:\n{problem_points}"
|
167
|
+
f"further subdivided:\n{problem_points}",
|
166
168
|
)
|
167
169
|
|
168
170
|
return "\n<JANUS_PARTITION>\n".join(chunks)
|
janus/parsers/reqs_parser.py
CHANGED
@@ -2,10 +2,9 @@ import json
|
|
2
2
|
import re
|
3
3
|
|
4
4
|
from langchain.output_parsers.json import parse_json_markdown
|
5
|
-
from langchain_core.exceptions import OutputParserException
|
6
5
|
from langchain_core.messages import BaseMessage
|
7
6
|
|
8
|
-
from janus.parsers.parser import JanusParser
|
7
|
+
from janus.parsers.parser import JanusParser, JanusParserException
|
9
8
|
from janus.utils.logger import create_logger
|
10
9
|
|
11
10
|
log = create_logger(__name__)
|
@@ -20,6 +19,7 @@ class RequirementsParser(JanusParser):
|
|
20
19
|
def parse(self, text: str | BaseMessage) -> str:
|
21
20
|
if isinstance(text, BaseMessage):
|
22
21
|
text = str(text.content)
|
22
|
+
original_text = text
|
23
23
|
|
24
24
|
# TODO: This is an incorrect implementation (lstrip and rstrip take character
|
25
25
|
# lists and strip any instances of those characters, not the full str)
|
@@ -30,15 +30,18 @@ class RequirementsParser(JanusParser):
|
|
30
30
|
obj = parse_json_markdown(text)
|
31
31
|
except json.JSONDecodeError as e:
|
32
32
|
log.debug(f"Invalid JSON object. Output:\n{text}")
|
33
|
-
raise
|
33
|
+
raise JanusParserException(
|
34
|
+
original_text, f"Got invalid JSON object. Error: {e}"
|
35
|
+
)
|
34
36
|
|
35
37
|
if not isinstance(obj, dict):
|
36
|
-
raise
|
37
|
-
|
38
|
+
raise JanusParserException(
|
39
|
+
original_text,
|
40
|
+
f"Got invalid return object. Expected a dictionary, but got {type(obj)}",
|
38
41
|
)
|
39
42
|
return json.dumps(obj)
|
40
43
|
|
41
|
-
def parse_combined_output(self, text: str):
|
44
|
+
def parse_combined_output(self, text: str) -> str:
|
42
45
|
"""Parse the output text from the LLM when multiple inputs are combined.
|
43
46
|
|
44
47
|
Arguments:
|
@@ -49,10 +52,10 @@ class RequirementsParser(JanusParser):
|
|
49
52
|
"""
|
50
53
|
json_strings = re.findall(r"\{.*?\}", text)
|
51
54
|
output_list = list()
|
52
|
-
for
|
55
|
+
for _, json_string in enumerate(json_strings, 1):
|
53
56
|
json_dict = json.loads(json_string)
|
54
57
|
output_list.append(json_dict["requirements"])
|
55
|
-
return output_list
|
58
|
+
return json.dumps(output_list)
|
56
59
|
|
57
60
|
def get_format_instructions(self) -> str:
|
58
61
|
"""Get the format instructions for the parser.
|