janus-llm 4.3.5__py3-none-any.whl → 4.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/aggregate.py +2 -2
- janus/cli/cli.py +6 -0
- janus/cli/constants.py +6 -0
- janus/cli/diagram.py +36 -7
- janus/cli/document.py +10 -1
- janus/cli/llm.py +7 -3
- janus/cli/partition.py +10 -1
- janus/cli/pipeline.py +126 -0
- janus/cli/self_eval.py +10 -3
- janus/cli/translate.py +10 -1
- janus/converter/__init__.py +2 -0
- janus/converter/_tests/test_translate.py +6 -5
- janus/converter/chain.py +100 -0
- janus/converter/converter.py +467 -90
- janus/converter/diagram.py +12 -8
- janus/converter/document.py +17 -7
- janus/converter/evaluate.py +174 -147
- janus/converter/partition.py +6 -11
- janus/converter/passthrough.py +29 -0
- janus/converter/pool.py +74 -0
- janus/converter/requirements.py +7 -40
- janus/converter/translate.py +2 -58
- janus/language/_tests/test_combine.py +1 -0
- janus/language/block.py +115 -5
- janus/llm/model_callbacks.py +6 -0
- janus/llm/models_info.py +19 -0
- janus/metrics/_tests/test_reading.py +48 -4
- janus/metrics/_tests/test_rouge_score.py +5 -11
- janus/metrics/metric.py +47 -124
- janus/metrics/reading.py +48 -28
- janus/metrics/rouge_score.py +21 -34
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/eval_parsers/incose_parser.py +3 -3
- janus/parsers/reqs_parser.py +3 -3
- janus/prompts/templates/cyclic/human.txt +16 -0
- janus/prompts/templates/cyclic/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
- janus/prompts/templates/extract_variables/human.txt +5 -0
- janus/prompts/templates/extract_variables/system.txt +1 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +14 -15
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +46 -40
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0
janus/metrics/metric.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
import inspect
|
2
2
|
import json
|
3
|
-
from pathlib import Path
|
4
3
|
from typing import Callable, Optional
|
5
4
|
|
6
5
|
import click
|
7
6
|
import typer
|
8
7
|
from typing_extensions import Annotated
|
9
8
|
|
9
|
+
from janus.cli.constants import CONVERTERS
|
10
|
+
from janus.converter.converter import Converter
|
10
11
|
from janus.llm import load_model
|
11
12
|
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
12
13
|
from janus.metrics.cli import evaluate
|
@@ -70,31 +71,6 @@ def metric(
|
|
70
71
|
help="Reference file or string to use as reference/baseline.",
|
71
72
|
),
|
72
73
|
] = None,
|
73
|
-
json_file_name: Annotated[
|
74
|
-
Optional[str],
|
75
|
-
typer.Option(
|
76
|
-
"--json",
|
77
|
-
"-j",
|
78
|
-
help="Json file to extract pairs from \
|
79
|
-
(if set ignores --target and --reference)",
|
80
|
-
),
|
81
|
-
] = None,
|
82
|
-
target_key: Annotated[
|
83
|
-
str,
|
84
|
-
typer.Option(
|
85
|
-
"--target-key",
|
86
|
-
"-tk",
|
87
|
-
help="json key to extract list of target strings",
|
88
|
-
),
|
89
|
-
] = "target",
|
90
|
-
reference_key: Annotated[
|
91
|
-
str,
|
92
|
-
typer.Option(
|
93
|
-
"--reference-key",
|
94
|
-
"-rk",
|
95
|
-
help="json key to extract list of reference strings",
|
96
|
-
),
|
97
|
-
] = "reference",
|
98
74
|
file_pairing_method: Annotated[
|
99
75
|
str,
|
100
76
|
typer.Option(
|
@@ -123,6 +99,14 @@ def metric(
|
|
123
99
|
is_flag=True,
|
124
100
|
),
|
125
101
|
] = False,
|
102
|
+
use_janus_inputs: Annotated[
|
103
|
+
bool,
|
104
|
+
typer.Option(
|
105
|
+
"-j",
|
106
|
+
"--use-janus-inputs",
|
107
|
+
help="present if janus output files should be evaluated",
|
108
|
+
),
|
109
|
+
] = False,
|
126
110
|
use_strings: Annotated[
|
127
111
|
bool,
|
128
112
|
typer.Option(
|
@@ -137,25 +121,23 @@ def metric(
|
|
137
121
|
):
|
138
122
|
out = []
|
139
123
|
llm = load_model(llm_name)
|
140
|
-
if
|
141
|
-
with open(
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
pairs[model_key][k] = (model_dict[target_key][k], ref[k])
|
158
|
-
elif target is not None and reference is not None:
|
124
|
+
if use_janus_inputs:
|
125
|
+
with open(target, "r") as f:
|
126
|
+
target_obj = json.load(f)
|
127
|
+
with open(reference, "r") as f:
|
128
|
+
reference_obj = json.load(f)
|
129
|
+
converter_cls = CONVERTERS.get(
|
130
|
+
target_obj["metadata"].get("converter_name", "Converter"),
|
131
|
+
Converter,
|
132
|
+
)
|
133
|
+
out = converter_cls.eval_obj_reference(
|
134
|
+
target=target_obj,
|
135
|
+
reference=reference_obj,
|
136
|
+
metric_func=function,
|
137
|
+
*args,
|
138
|
+
**kwargs,
|
139
|
+
)
|
140
|
+
else:
|
159
141
|
if use_strings:
|
160
142
|
target_contents = target
|
161
143
|
reference_contents = reference
|
@@ -175,25 +157,6 @@ def metric(
|
|
175
157
|
token_limit=llm.token_limit,
|
176
158
|
model_cost=COST_PER_1K_TOKENS[llm.model_id],
|
177
159
|
)
|
178
|
-
else:
|
179
|
-
raise ValueError(
|
180
|
-
"Error, specify json or target and reference files/strings"
|
181
|
-
)
|
182
|
-
if isinstance(pairs, dict):
|
183
|
-
out = {}
|
184
|
-
for k in pairs:
|
185
|
-
out[k] = apply_function_pairs(
|
186
|
-
pairs[k],
|
187
|
-
function,
|
188
|
-
progress,
|
189
|
-
language,
|
190
|
-
llm,
|
191
|
-
llm.token_limit,
|
192
|
-
COST_PER_1K_TOKENS[llm.model_id],
|
193
|
-
*args,
|
194
|
-
**kwargs,
|
195
|
-
)
|
196
|
-
else:
|
197
160
|
out = apply_function_pairs(
|
198
161
|
pairs,
|
199
162
|
function,
|
@@ -205,17 +168,15 @@ def metric(
|
|
205
168
|
*args,
|
206
169
|
**kwargs,
|
207
170
|
)
|
208
|
-
out_file = Path(out_file)
|
209
|
-
out_file.parent.mkdir(parents=True, exist_ok=True)
|
210
171
|
with open(out_file, "w") as f:
|
172
|
+
log.info(f"Writing output to {out_file}")
|
211
173
|
json.dump(out, f)
|
212
|
-
log.info(f"Saved results to file: {out_file}")
|
213
174
|
|
214
175
|
sig1 = inspect.signature(function)
|
215
176
|
sig2 = inspect.signature(func)
|
216
177
|
func.__signature__ = sig2.replace(
|
217
178
|
parameters=tuple(
|
218
|
-
list(sig2.parameters.values())[:
|
179
|
+
list(sig2.parameters.values())[:9]
|
219
180
|
+ list(sig1.parameters.values())[2:-1]
|
220
181
|
)
|
221
182
|
)
|
@@ -241,23 +202,14 @@ def metric(
|
|
241
202
|
"--target", "-t", help="Target file or string to evaluate."
|
242
203
|
),
|
243
204
|
] = None,
|
244
|
-
|
245
|
-
|
205
|
+
use_janus_inputs: Annotated[
|
206
|
+
bool,
|
246
207
|
typer.Option(
|
247
|
-
"--json",
|
248
208
|
"-j",
|
249
|
-
|
250
|
-
|
209
|
+
"--use-janus-inputs",
|
210
|
+
help="whether to use a janus output file as input",
|
251
211
|
),
|
252
|
-
] =
|
253
|
-
target_key: Annotated[
|
254
|
-
str,
|
255
|
-
typer.Option(
|
256
|
-
"--target-key",
|
257
|
-
"-tk",
|
258
|
-
help="json key to extract list of target strings",
|
259
|
-
),
|
260
|
-
] = "target",
|
212
|
+
] = False,
|
261
213
|
splitting_method: Annotated[
|
262
214
|
str,
|
263
215
|
typer.Option(
|
@@ -298,25 +250,17 @@ def metric(
|
|
298
250
|
**kwargs,
|
299
251
|
):
|
300
252
|
llm = load_model(llm_name)
|
301
|
-
if
|
302
|
-
with open(
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
if target_key not in model_dict:
|
313
|
-
continue
|
314
|
-
if model_key not in strings:
|
315
|
-
strings[model_key] = {}
|
316
|
-
for k in model_dict[target_key]:
|
317
|
-
strings[model_key][k] = model_dict[target_key][k]
|
318
|
-
# strings += list(json_obj[key][target_key].values())
|
319
|
-
elif target is not None:
|
253
|
+
if use_janus_inputs:
|
254
|
+
with open(target, "r") as f:
|
255
|
+
target_obj = json.load(f)
|
256
|
+
converter_cls = CONVERTERS.get(
|
257
|
+
target_obj["metadata"].get("converter_name", "Converter"),
|
258
|
+
Converter,
|
259
|
+
)
|
260
|
+
out = converter_cls.eval_obj(
|
261
|
+
target=target_obj, metric_func=function, *args, **kwargs
|
262
|
+
)
|
263
|
+
else:
|
320
264
|
if use_strings:
|
321
265
|
target_contents = target
|
322
266
|
else:
|
@@ -332,25 +276,6 @@ def metric(
|
|
332
276
|
token_limit=llm.token_limit,
|
333
277
|
model_cost=COST_PER_1K_TOKENS[llm.model_id],
|
334
278
|
)
|
335
|
-
else:
|
336
|
-
raise ValueError(
|
337
|
-
"Error: must specify either json file or target file/string"
|
338
|
-
)
|
339
|
-
if isinstance(strings, dict):
|
340
|
-
out = {}
|
341
|
-
for k in strings:
|
342
|
-
out[k] = apply_function_strings(
|
343
|
-
strings[k],
|
344
|
-
function,
|
345
|
-
progress,
|
346
|
-
language,
|
347
|
-
llm,
|
348
|
-
llm.token_limit,
|
349
|
-
COST_PER_1K_TOKENS[llm.model_id],
|
350
|
-
*args,
|
351
|
-
**kwargs,
|
352
|
-
)
|
353
|
-
else:
|
354
279
|
out = apply_function_strings(
|
355
280
|
strings,
|
356
281
|
function,
|
@@ -362,17 +287,15 @@ def metric(
|
|
362
287
|
*args,
|
363
288
|
**kwargs,
|
364
289
|
)
|
365
|
-
out_file = Path(out_file)
|
366
|
-
out_file.parent.mkdir(parents=True, exist_ok=True)
|
367
290
|
with open(out_file, "w") as f:
|
291
|
+
log.info(f"Writing output to {out_file}")
|
368
292
|
json.dump(out, f)
|
369
|
-
log.info(f"Saved results to file: {out_file}")
|
370
293
|
|
371
294
|
sig1 = inspect.signature(function)
|
372
295
|
sig2 = inspect.signature(func)
|
373
296
|
func.__signature__ = sig2.replace(
|
374
297
|
parameters=tuple(
|
375
|
-
list(sig2.parameters.values())[:
|
298
|
+
list(sig2.parameters.values())[:7]
|
376
299
|
+ list(sig1.parameters.values())[1:-1]
|
377
300
|
)
|
378
301
|
)
|
janus/metrics/reading.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
import re
|
2
2
|
|
3
|
-
import nltk
|
4
|
-
import readability
|
5
3
|
from nltk.tokenize import TweetTokenizer
|
4
|
+
from textstat import textstat
|
6
5
|
|
7
6
|
from janus.metrics.metric import metric
|
8
7
|
|
@@ -48,23 +47,9 @@ def _repeat_text(text):
|
|
48
47
|
return repeated_text
|
49
48
|
|
50
49
|
|
51
|
-
def get_readability(target: str) -> readability.Readability:
|
52
|
-
"""Create a Readability object from an input string
|
53
|
-
|
54
|
-
Arguments:
|
55
|
-
target: The target text.
|
56
|
-
|
57
|
-
Returns:
|
58
|
-
py-readability-metrics Readability object for that text
|
59
|
-
"""
|
60
|
-
nltk.download("punkt", quiet=True)
|
61
|
-
target = _repeat_text(target)
|
62
|
-
return readability.Readability(target)
|
63
|
-
|
64
|
-
|
65
50
|
@metric(use_reference=False, help="The Flesch Readability score")
|
66
51
|
def flesch(target: str, **kwargs) -> float:
|
67
|
-
"""Calculate the Flesch Score using
|
52
|
+
"""Calculate the Flesch Score using textstat.
|
68
53
|
|
69
54
|
Arguments:
|
70
55
|
target: The target text.
|
@@ -74,12 +59,13 @@ def flesch(target: str, **kwargs) -> float:
|
|
74
59
|
"""
|
75
60
|
if not target.strip(): # Check if the target text is blank
|
76
61
|
return None
|
77
|
-
|
62
|
+
target = _repeat_text(target)
|
63
|
+
return textstat.flesch_reading_ease(target)
|
78
64
|
|
79
65
|
|
80
66
|
@metric(use_reference=False, help="The Flesch Grade Level Readability score")
|
81
67
|
def flesch_grade(target: str, **kwargs) -> float:
|
82
|
-
"""Calculate the Flesch Score using
|
68
|
+
"""Calculate the Flesch Score using textstat.
|
83
69
|
|
84
70
|
Arguments:
|
85
71
|
target: The target text.
|
@@ -89,12 +75,13 @@ def flesch_grade(target: str, **kwargs) -> float:
|
|
89
75
|
"""
|
90
76
|
if not target.strip(): # Check if the target text is blank
|
91
77
|
return None
|
92
|
-
|
78
|
+
target = _repeat_text(target)
|
79
|
+
return textstat.flesch_kincaid_grade(target)
|
93
80
|
|
94
81
|
|
95
82
|
@metric(use_reference=False, help="The Gunning-Fog Readability score")
|
96
83
|
def gunning_fog(target: str, **kwargs) -> float:
|
97
|
-
"""Calculate the Gunning-Fog Score using
|
84
|
+
"""Calculate the Gunning-Fog Score using textstat.
|
98
85
|
|
99
86
|
Arguments:
|
100
87
|
target: The target text.
|
@@ -104,20 +91,53 @@ def gunning_fog(target: str, **kwargs) -> float:
|
|
104
91
|
"""
|
105
92
|
if not target.strip(): # Check if the target text is blank
|
106
93
|
return None
|
107
|
-
|
94
|
+
target = _repeat_text(target)
|
95
|
+
return textstat.gunning_fog(target)
|
108
96
|
|
109
97
|
|
110
|
-
@metric(use_reference=False, help="The
|
111
|
-
def
|
112
|
-
"""Calculate the
|
98
|
+
@metric(use_reference=False, help="The Dale-Chall Readability score")
|
99
|
+
def dale_chall(target: str, **kwargs) -> float:
|
100
|
+
"""Calculate the Dale-Chall Readability Score using textstat.
|
113
101
|
|
114
102
|
Arguments:
|
115
103
|
target: The target text.
|
116
104
|
|
117
105
|
Returns:
|
118
|
-
The
|
106
|
+
The Dale-Chall score.
|
119
107
|
"""
|
120
108
|
if not target.strip(): # Check if the target text is blank
|
121
109
|
return None
|
122
|
-
|
123
|
-
return
|
110
|
+
target = _repeat_text(target)
|
111
|
+
return textstat.dale_chall_readability_score_v2(target)
|
112
|
+
|
113
|
+
|
114
|
+
@metric(use_reference=False, help="The Automated Readability Index")
|
115
|
+
def automated_readability(target: str, **kwargs) -> float:
|
116
|
+
"""Calculate the Automated Readability Index using textstat.
|
117
|
+
|
118
|
+
Arguments:
|
119
|
+
target: The target text.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
The Automated Readability score.
|
123
|
+
"""
|
124
|
+
if not target.strip(): # Check if the target text is blank
|
125
|
+
return None
|
126
|
+
target = _repeat_text(target)
|
127
|
+
return textstat.automated_readability_index(target)
|
128
|
+
|
129
|
+
|
130
|
+
@metric(use_reference=False, help="The Coleman-Liau Index")
|
131
|
+
def coleman_liau(target: str, **kwargs) -> float:
|
132
|
+
"""Calculate the Coleman-Liau Index using textstat.
|
133
|
+
|
134
|
+
Arguments:
|
135
|
+
target: The target text.
|
136
|
+
|
137
|
+
Returns:
|
138
|
+
The Coleman-Liau Index.
|
139
|
+
"""
|
140
|
+
if not target.strip(): # Check if the target text is blank
|
141
|
+
return None
|
142
|
+
target = _repeat_text(target)
|
143
|
+
return textstat.coleman_liau_index(target)
|
janus/metrics/rouge_score.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
import click
|
2
|
-
import nltk
|
3
2
|
import typer
|
4
|
-
from
|
3
|
+
from rouge_score import rouge_scorer
|
5
4
|
from typing_extensions import Annotated
|
6
5
|
|
7
6
|
from janus.metrics.metric import metric
|
@@ -18,9 +17,9 @@ def rouge(
|
|
18
17
|
"-g",
|
19
18
|
help=(
|
20
19
|
"The granularity of the ROUGE score. `n` refers to "
|
21
|
-
"ROUGE-N, `l` refers to ROUGE-L
|
20
|
+
"ROUGE-N, `l` refers to ROUGE-L."
|
22
21
|
),
|
23
|
-
click_type=click.Choice(["n", "l"
|
22
|
+
click_type=click.Choice(["n", "l"]),
|
24
23
|
),
|
25
24
|
] = "n",
|
26
25
|
n_gram: Annotated[
|
@@ -52,7 +51,7 @@ def rouge(
|
|
52
51
|
target: The target text.
|
53
52
|
reference: The reference text.
|
54
53
|
granularity: The granularity of the ROUGE score. `n` refers to ROUGE-N, `l`
|
55
|
-
refers to ROUGE-L
|
54
|
+
refers to ROUGE-L.
|
56
55
|
n_gram: The n-gram overlap calculated for ROUGE-N. Can be an integer.
|
57
56
|
score_type: Whether to use the F-score, precision, or recall. For example, `f`
|
58
57
|
refers to the F-score, `p` refers to precision, and `r` refers to recall.
|
@@ -60,37 +59,25 @@ def rouge(
|
|
60
59
|
Returns:
|
61
60
|
The ROUGE score.
|
62
61
|
"""
|
63
|
-
nltk.download("punkt", quiet=True)
|
64
|
-
|
65
62
|
if granularity.lower() == "n":
|
66
|
-
metric_name = "rouge
|
67
|
-
metric_name_output = f"rouge-{n_gram}"
|
68
|
-
max_n = n_gram
|
63
|
+
metric_name = f"rouge{n_gram}"
|
69
64
|
elif granularity.lower() == "l":
|
70
|
-
metric_name = "
|
71
|
-
metric_name_output = "rouge-l"
|
72
|
-
max_n = 4
|
73
|
-
elif granularity.lower() == "w":
|
74
|
-
metric_name = "rouge-w"
|
75
|
-
metric_name_output = "rouge-w"
|
76
|
-
max_n = 4
|
65
|
+
metric_name = "rougeL"
|
77
66
|
else:
|
78
|
-
raise ValueError("Invalid granularity. Must be one of `n
|
79
|
-
|
80
|
-
if score_type.lower() not in ["f", "p", "r"]:
|
81
|
-
raise ValueError("Invalid score type. Must be one of `f`, `p`, or `r`.")
|
67
|
+
raise ValueError("Invalid granularity. Must be one of `n` or `l`")
|
82
68
|
|
83
|
-
evaluator =
|
84
|
-
|
85
|
-
|
86
|
-
limit_length=False,
|
87
|
-
length_limit=1_000,
|
88
|
-
length_limit_type="words",
|
89
|
-
apply_avg=False,
|
90
|
-
apply_best=False,
|
91
|
-
alpha=0.5, # Default F1_score
|
92
|
-
weight_factor=1.2,
|
93
|
-
stemming=True,
|
69
|
+
evaluator = rouge_scorer.RougeScorer(
|
70
|
+
[metric_name],
|
71
|
+
use_stemmer=True,
|
94
72
|
)
|
95
|
-
scores = evaluator.
|
96
|
-
|
73
|
+
scores = evaluator.score(target, reference)
|
74
|
+
scores_fpr = scores[metric_name]
|
75
|
+
if score_type.lower() == "f":
|
76
|
+
score = scores_fpr.fmeasure
|
77
|
+
elif score_type.lower() == "p":
|
78
|
+
score = scores_fpr.precision
|
79
|
+
elif score_type.lower() == "r":
|
80
|
+
score = scores_fpr.recall
|
81
|
+
else:
|
82
|
+
raise ValueError("Invalid score type. Must be one of `f`, `p`, or `r`.")
|
83
|
+
return score
|
@@ -25,7 +25,7 @@ class TestCodeParser(unittest.TestCase):
|
|
25
25
|
def test_get_format_instructions(self):
|
26
26
|
self.assertEqual(
|
27
27
|
self.parser.get_format_instructions(),
|
28
|
-
"Output must contain text contained within triple
|
28
|
+
"Output must contain text contained within triple backticks (```)",
|
29
29
|
)
|
30
30
|
|
31
31
|
|
janus/parsers/code_parser.py
CHANGED
@@ -19,9 +19,9 @@ class CodeParser(JanusParser):
|
|
19
19
|
if code is None:
|
20
20
|
raise JanusParserException(
|
21
21
|
text,
|
22
|
-
"Code not find code between triple
|
22
|
+
"Code not find code between triple backticks",
|
23
23
|
)
|
24
24
|
return str(code.group(1))
|
25
25
|
|
26
26
|
def get_format_instructions(self) -> str:
|
27
|
-
return "Output must contain text contained within triple
|
27
|
+
return "Output must contain text contained within triple backticks (```)"
|
@@ -70,7 +70,6 @@ class IncoseParser(JanusParser, PydanticOutputParser):
|
|
70
70
|
|
71
71
|
obj = json.loads(text)
|
72
72
|
|
73
|
-
# For some reason requirements objects are in a double list?
|
74
73
|
reqs = obj["requirements"]
|
75
74
|
|
76
75
|
# Generate a unique ID for each requirement (ensure they are unique)
|
@@ -91,10 +90,11 @@ class IncoseParser(JanusParser, PydanticOutputParser):
|
|
91
90
|
|
92
91
|
# Strip everything outside the JSON object
|
93
92
|
begin, end = text.find("["), text.rfind("]")
|
94
|
-
|
93
|
+
end += 1 if end != -1 else 0
|
94
|
+
text = text[begin:end]
|
95
95
|
|
96
96
|
try:
|
97
|
-
out: RequirementList = super().parse(text)
|
97
|
+
out: RequirementList = super(IncoseParser, self).parse(text)
|
98
98
|
except json.JSONDecodeError as e:
|
99
99
|
log.debug(f"Invalid JSON object. Output:\n{text}")
|
100
100
|
raise OutputParserException(f"Got invalid JSON object. Error: {e}")
|
janus/parsers/reqs_parser.py
CHANGED
@@ -41,7 +41,7 @@ class RequirementsParser(JanusParser):
|
|
41
41
|
)
|
42
42
|
return json.dumps(obj)
|
43
43
|
|
44
|
-
def parse_combined_output(self, text: str):
|
44
|
+
def parse_combined_output(self, text: str) -> str:
|
45
45
|
"""Parse the output text from the LLM when multiple inputs are combined.
|
46
46
|
|
47
47
|
Arguments:
|
@@ -52,10 +52,10 @@ class RequirementsParser(JanusParser):
|
|
52
52
|
"""
|
53
53
|
json_strings = re.findall(r"\{.*?\}", text)
|
54
54
|
output_list = list()
|
55
|
-
for
|
55
|
+
for _, json_string in enumerate(json_strings, 1):
|
56
56
|
json_dict = json.loads(json_string)
|
57
57
|
output_list.append(json_dict["requirements"])
|
58
|
-
return output_list
|
58
|
+
return json.dumps(output_list)
|
59
59
|
|
60
60
|
def get_format_instructions(self) -> str:
|
61
61
|
"""Get the format instructions for the parser.
|
@@ -0,0 +1,16 @@
|
|
1
|
+
You are tasked with generating code in the {TARGET_LANGUAGE} language given a list of requirements.
|
2
|
+
|
3
|
+
|
4
|
+
1. Read all requirements.
|
5
|
+
2. Write code that addresses all requirments, ensuring that all mentioned conditions are met.
|
6
|
+
3. Adhere to the coding conventions and best practices of the {TARGET_LANGUAGE} language.
|
7
|
+
4. Ensure the code is correct, well-structured, and includes comments for readability.
|
8
|
+
5. The code you provide should be succienct, concise, and runable.
|
9
|
+
|
10
|
+
Here are the requirements for the code:
|
11
|
+
|
12
|
+
```
|
13
|
+
{SOURCE_CODE}
|
14
|
+
```
|
15
|
+
|
16
|
+
Don't forget to include your final code between triple backticks!
|
@@ -0,0 +1 @@
|
|
1
|
+
Your task is to generate code based on the provided requirements. The code should be written in the {TARGET_LANGUAGE} language. Make sure the code adheres to best practices, is efficient, and well-documented.
|
@@ -17,7 +17,7 @@ C9 - Conforming: Individual needs and requirements should conform to an approved
|
|
17
17
|
|
18
18
|
For each and every requirement below, you must indicate whether they "pass" or "fail" each of the above criteria. Briefly explain your reasoning before providing each pass/fail.
|
19
19
|
|
20
|
-
Your response should be formatted as a list of JSON objects, with each object corresponding to one requirement. Each object should include 10 keys: `requirement_id`, `C1`, `C2`, ..., `C9`. `requirement_id` should have a string value that holds the 8-character UUID associated with the requirement. The other four values should each be a JSON object with two keys: `reasoning` (a clear explanation of why the criterion is passed or failed) and a `score` (the literal string "pass" or "fail").
|
20
|
+
Your response should be formatted as a list of JSON objects, with each object corresponding to one requirement. Each object should include 10 keys: `requirement_id`, `C1`, `C2`, ..., `C9`. `requirement_id` should have a string value that holds the 8-character UUID associated with the requirement. The other four values should each be a JSON object with two keys: `reasoning` (a clear explanation of why the criterion is passed or failed) and a `score` (the literal string "pass" or "fail"). You should also include the requirement itself as a string value for the key `requirement`.
|
21
21
|
|
22
22
|
Be discerning in your evaluation; only very high-quality requirements should pass all criteria. Be a hard grader. If a requirement fails a criterion, be thorough and detailed in your explanation of why.
|
23
23
|
|
@@ -0,0 +1 @@
|
|
1
|
+
You are a senior software engineer named John and tasked with creating intermediate products of {SOURCE_LANGUAGE} code.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: janus-llm
|
3
|
-
Version: 4.
|
3
|
+
Version: 4.5.4
|
4
4
|
Summary: A transcoding library using LLMs.
|
5
5
|
License: Apache 2.0
|
6
6
|
Author: Michael Doyle
|
@@ -24,22 +24,21 @@ Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
|
|
24
24
|
Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
|
25
25
|
Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
|
26
26
|
Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
|
27
|
-
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
28
27
|
Requires-Dist: numpy (>=1.24.3,<2.0.0)
|
29
28
|
Requires-Dist: openai (>=1.14.0,<2.0.0)
|
30
29
|
Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
|
31
|
-
Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
|
32
|
-
Requires-Dist: py-rouge (>=1.1,<2.0)
|
33
30
|
Requires-Dist: pybind11 (>=2.13.6,<3.0.0)
|
34
31
|
Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
|
35
32
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
36
33
|
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
34
|
+
Requires-Dist: rouge-score (>=0.1.2,<0.2.0)
|
37
35
|
Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
|
38
36
|
Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
|
39
37
|
Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
|
40
38
|
Requires-Dist: setuptools (>=75.6.0,<76.0.0)
|
41
39
|
Requires-Dist: tesseract (>=0.1.3,<0.2.0)
|
42
40
|
Requires-Dist: text-generation (>=0.6.0,<0.7.0)
|
41
|
+
Requires-Dist: textstat (>=0.7.5,<0.8.0)
|
43
42
|
Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
|
44
43
|
Requires-Dist: transformers (>=4.31.0,<5.0.0)
|
45
44
|
Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)
|
@@ -53,7 +52,7 @@ Description-Content-Type: text/markdown
|
|
53
52
|
|
54
53
|
|
55
54
|
<p align="center">
|
56
|
-
<img src="assets/icons/logo_horizontal.png">
|
55
|
+
<img src="https://raw.githubusercontent.com/janus-llm/janus-llm/public/assets/icons/logo_horizontal.png">
|
57
56
|
</p>
|
58
57
|
<p align="center">
|
59
58
|
<a href="https://github.com/janus-llm/janus-llm/actions/workflows/pages.yml" target="_blank">
|
@@ -78,16 +77,12 @@ Description-Content-Type: text/markdown
|
|
78
77
|
Janus (`janus-llm`) uses LLMs to aid in the modernization of legacy IT systems. The repository can currently do the following:
|
79
78
|
|
80
79
|
1. Chunk code of over 100 programming languages to fit within different model context windows and add to a [Chroma](https://trychroma.com) vector database.
|
81
|
-
2. Translate from one programming language to another on a file-by-file basis using an LLM
|
82
|
-
3. Translate from a binary file to a programming language using Ghidra decompilation.
|
83
|
-
4.
|
80
|
+
2. Translate from one programming language to another on a file-by-file basis using an LLM.
|
81
|
+
3. Translate from a binary file to a programming language using [Ghidra](https://github.com/NationalSecurityAgency/ghidra) decompilation.
|
82
|
+
4. Generate requirements, UML diagrams, code comments, and summaries from source code.
|
83
|
+
5. Evaluate the products that you generate.
|
84
|
+
6. Do 1-5 with a CLI tool (`janus`).
|
84
85
|
|
85
|
-
## Roadmap
|
86
|
-
|
87
|
-
### Priorities
|
88
|
-
|
89
|
-
1. Scripts interacting with Chroma Vector DB for RAG translation and understanding.
|
90
|
-
2. Evaluation of outputs in CLI using LLM self-evaluation or static analysis.
|
91
86
|
|
92
87
|
## Installation
|
93
88
|
|
@@ -111,10 +106,14 @@ export PATH=$PATH:$HOME/.local/bin
|
|
111
106
|
poetry install
|
112
107
|
```
|
113
108
|
|
109
|
+
### Documentation
|
110
|
+
|
111
|
+
See [the documentation](https://janus-llm.github.io/janus-llm) for more information on how to use the package.
|
112
|
+
|
114
113
|
### Contributing
|
115
114
|
|
116
115
|
See our [contributing pages](https://janus-llm.github.io/janus-llm/contributing.html)
|
117
116
|
|
118
117
|
### Copyright
|
119
|
-
Copyright ©
|
118
|
+
Copyright ©2025 The MITRE Corporation. ALL RIGHTS RESERVED. Approved for Public Release; Distribution Unlimited. Public Release Case Number 23-4084.
|
120
119
|
|