janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/__main__.py +1 -1
- janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
- janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
- janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
- janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
- janus/_tests/test_cli.py +3 -2
- janus/cli/aggregate.py +135 -0
- janus/cli/cli.py +117 -0
- janus/cli/constants.py +49 -0
- janus/cli/database.py +289 -0
- janus/cli/diagram.py +207 -0
- janus/cli/document.py +183 -0
- janus/cli/embedding.py +122 -0
- janus/cli/llm.py +191 -0
- janus/cli/partition.py +134 -0
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +147 -0
- janus/cli/translate.py +192 -0
- janus/converter/__init__.py +1 -1
- janus/converter/_tests/test_translate.py +7 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +444 -153
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +27 -16
- janus/converter/evaluate.py +143 -144
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +3 -59
- janus/embedding/collections.py +1 -1
- janus/language/alc/_tests/alc.asm +3779 -0
- janus/language/binary/_tests/hello.bin +0 -0
- janus/language/block.py +78 -14
- janus/language/file.py +1 -1
- janus/language/mumps/_tests/mumps.m +235 -0
- janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
- janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
- janus/language/treesitter/_tests/languages/matlab.m +225 -0
- janus/llm/models_info.py +9 -1
- janus/metrics/_tests/asm_test_file.asm +10 -0
- janus/metrics/_tests/mumps_test_file.m +6 -0
- janus/metrics/_tests/test_treesitter_metrics.py +1 -1
- janus/metrics/metric.py +47 -124
- janus/metrics/prompts/clarity.txt +8 -0
- janus/metrics/prompts/completeness.txt +16 -0
- janus/metrics/prompts/faithfulness.txt +10 -0
- janus/metrics/prompts/hallucination.txt +16 -0
- janus/metrics/prompts/quality.txt +8 -0
- janus/metrics/prompts/readability.txt +16 -0
- janus/metrics/prompts/usefulness.txt +16 -0
- janus/parsers/code_parser.py +4 -4
- janus/parsers/doc_parser.py +12 -9
- janus/parsers/parser.py +7 -0
- janus/parsers/partition_parser.py +6 -4
- janus/parsers/reqs_parser.py +11 -8
- janus/parsers/uml.py +5 -4
- janus/prompts/prompt.py +2 -2
- janus/prompts/templates/README.md +30 -0
- janus/prompts/templates/basic_aggregation/human.txt +6 -0
- janus/prompts/templates/basic_aggregation/system.txt +1 -0
- janus/prompts/templates/basic_refinement/human.txt +14 -0
- janus/prompts/templates/basic_refinement/system.txt +1 -0
- janus/prompts/templates/diagram/human.txt +9 -0
- janus/prompts/templates/diagram/system.txt +1 -0
- janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
- janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
- janus/prompts/templates/document/human.txt +10 -0
- janus/prompts/templates/document/system.txt +1 -0
- janus/prompts/templates/document_cloze/human.txt +11 -0
- janus/prompts/templates/document_cloze/system.txt +1 -0
- janus/prompts/templates/document_cloze/variables.json +4 -0
- janus/prompts/templates/document_cloze/variables_asm.json +4 -0
- janus/prompts/templates/document_inline/human.txt +13 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
- janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
- janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
- janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
- janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
- janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
- janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
- janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
- janus/prompts/templates/multidocument/human.txt +15 -0
- janus/prompts/templates/multidocument/system.txt +1 -0
- janus/prompts/templates/partition/human.txt +22 -0
- janus/prompts/templates/partition/system.txt +1 -0
- janus/prompts/templates/partition/variables.json +4 -0
- janus/prompts/templates/pseudocode/human.txt +7 -0
- janus/prompts/templates/pseudocode/system.txt +7 -0
- janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
- janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
- janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
- janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
- janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
- janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
- janus/prompts/templates/refinement/hallucination/human.txt +13 -0
- janus/prompts/templates/refinement/hallucination/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/human.txt +15 -0
- janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
- janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
- janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/reflection/system.txt +1 -0
- janus/prompts/templates/refinement/revision/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
- janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
- janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
- janus/prompts/templates/refinement/revision/system.txt +1 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
- janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
- janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
- janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
- janus/prompts/templates/requirements/human.txt +13 -0
- janus/prompts/templates/requirements/system.txt +2 -0
- janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
- janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
- janus/prompts/templates/simple/human.txt +16 -0
- janus/prompts/templates/simple/system.txt +3 -0
- janus/refiners/format.py +49 -0
- janus/refiners/refiner.py +113 -4
- janus/utils/enums.py +127 -112
- janus/utils/logger.py +2 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
- janus_llm-4.4.5.dist-info/RECORD +210 -0
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
- janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
- janus/cli.py +0 -1488
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- janus_llm-4.3.1.dist-info/RECORD +0 -115
- janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
- {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli/self_eval.py
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
|
12
|
+
|
13
|
+
def llm_self_eval(
|
14
|
+
input_dir: Annotated[
|
15
|
+
Path,
|
16
|
+
typer.Option(
|
17
|
+
"--input",
|
18
|
+
"-i",
|
19
|
+
help="The directory containing the source code to be evaluated. "
|
20
|
+
"The files should all be in one flat directory.",
|
21
|
+
),
|
22
|
+
],
|
23
|
+
language: Annotated[
|
24
|
+
str,
|
25
|
+
typer.Option(
|
26
|
+
"--language",
|
27
|
+
"-l",
|
28
|
+
help="The language of the source code.",
|
29
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
30
|
+
),
|
31
|
+
],
|
32
|
+
output_dir: Annotated[
|
33
|
+
Path,
|
34
|
+
typer.Option("--output", "-o", help="The directory to store the evaluations in."),
|
35
|
+
],
|
36
|
+
failure_dir: Annotated[
|
37
|
+
Optional[Path],
|
38
|
+
typer.Option(
|
39
|
+
"--failure-directory",
|
40
|
+
"-f",
|
41
|
+
help="The directory to store failure files during translation",
|
42
|
+
),
|
43
|
+
] = None,
|
44
|
+
llm_name: Annotated[
|
45
|
+
str,
|
46
|
+
typer.Option(
|
47
|
+
"--llm",
|
48
|
+
"-L",
|
49
|
+
help="The custom name of the model set with 'janus llm add'.",
|
50
|
+
),
|
51
|
+
] = "gpt-4o",
|
52
|
+
evaluation_type: Annotated[
|
53
|
+
str,
|
54
|
+
typer.Option(
|
55
|
+
"--evaluation-type",
|
56
|
+
"-e",
|
57
|
+
help="Type of output to evaluate.",
|
58
|
+
click_type=click.Choice(["incose", "comments"]),
|
59
|
+
),
|
60
|
+
] = "incose",
|
61
|
+
max_prompts: Annotated[
|
62
|
+
int,
|
63
|
+
typer.Option(
|
64
|
+
"--max-prompts",
|
65
|
+
"-m",
|
66
|
+
help="The maximum number of times to prompt a model on one functional block "
|
67
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
68
|
+
),
|
69
|
+
] = 10,
|
70
|
+
overwrite: Annotated[
|
71
|
+
bool,
|
72
|
+
typer.Option(
|
73
|
+
"--overwrite/--preserve",
|
74
|
+
help="Whether to overwrite existing files in the output directory",
|
75
|
+
),
|
76
|
+
] = False,
|
77
|
+
temperature: Annotated[
|
78
|
+
float,
|
79
|
+
typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
|
80
|
+
] = 0.7,
|
81
|
+
collection: Annotated[
|
82
|
+
str,
|
83
|
+
typer.Option(
|
84
|
+
"--collection",
|
85
|
+
"-c",
|
86
|
+
help="If set, will put the translated result into a Chroma DB "
|
87
|
+
"collection with the name provided.",
|
88
|
+
),
|
89
|
+
] = None,
|
90
|
+
splitter_type: Annotated[
|
91
|
+
str,
|
92
|
+
typer.Option(
|
93
|
+
"-S",
|
94
|
+
"--splitter",
|
95
|
+
help="Name of custom splitter to use",
|
96
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
97
|
+
),
|
98
|
+
] = "file",
|
99
|
+
refiner_types: Annotated[
|
100
|
+
list[str],
|
101
|
+
typer.Option(
|
102
|
+
"-r",
|
103
|
+
"--refiner",
|
104
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
105
|
+
refinement chain",
|
106
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
107
|
+
),
|
108
|
+
] = ["JanusRefiner"],
|
109
|
+
eval_items_per_request: Annotated[
|
110
|
+
int,
|
111
|
+
typer.Option(
|
112
|
+
"--eval-items-per-request",
|
113
|
+
"-rc",
|
114
|
+
help="The maximum number of evaluation items per request",
|
115
|
+
),
|
116
|
+
] = None,
|
117
|
+
max_tokens: Annotated[
|
118
|
+
int,
|
119
|
+
typer.Option(
|
120
|
+
"--max-tokens",
|
121
|
+
"-M",
|
122
|
+
help="The maximum number of tokens the model will take in. "
|
123
|
+
"If unspecificed, model's default max will be used.",
|
124
|
+
),
|
125
|
+
] = None,
|
126
|
+
):
|
127
|
+
from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
|
128
|
+
|
129
|
+
model_arguments = dict(temperature=temperature)
|
130
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
131
|
+
kwargs = dict(
|
132
|
+
eval_items_per_request=eval_items_per_request,
|
133
|
+
model=llm_name,
|
134
|
+
model_arguments=model_arguments,
|
135
|
+
source_language=language,
|
136
|
+
max_prompts=max_prompts,
|
137
|
+
max_tokens=max_tokens,
|
138
|
+
splitter_type=splitter_type,
|
139
|
+
refiner_types=refiner_types,
|
140
|
+
)
|
141
|
+
# Setting parser type here
|
142
|
+
if evaluation_type == "incose":
|
143
|
+
evaluator = RequirementEvaluator(**kwargs)
|
144
|
+
elif evaluation_type == "comments":
|
145
|
+
evaluator = InlineCommentEvaluator(**kwargs)
|
146
|
+
|
147
|
+
evaluator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
janus/cli/translate.py
ADDED
@@ -0,0 +1,192 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from typing_extensions import Annotated
|
7
|
+
|
8
|
+
from janus.cli.constants import REFINERS
|
9
|
+
from janus.language.naive.registry import CUSTOM_SPLITTERS
|
10
|
+
from janus.utils.enums import LANGUAGES
|
11
|
+
from janus.utils.logger import create_logger
|
12
|
+
|
13
|
+
log = create_logger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
def translate(
|
17
|
+
input_dir: Annotated[
|
18
|
+
Path,
|
19
|
+
typer.Option(
|
20
|
+
"--input",
|
21
|
+
"-i",
|
22
|
+
help="The directory containing the source code to be translated. "
|
23
|
+
"The files should all be in one flat directory.",
|
24
|
+
),
|
25
|
+
],
|
26
|
+
source_lang: Annotated[
|
27
|
+
str,
|
28
|
+
typer.Option(
|
29
|
+
"--source-language",
|
30
|
+
"-s",
|
31
|
+
help="The language of the source code.",
|
32
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
33
|
+
),
|
34
|
+
],
|
35
|
+
output_dir: Annotated[
|
36
|
+
Path,
|
37
|
+
typer.Option(
|
38
|
+
"--output", "-o", help="The directory to store the translated code in."
|
39
|
+
),
|
40
|
+
],
|
41
|
+
target_lang: Annotated[
|
42
|
+
str,
|
43
|
+
typer.Option(
|
44
|
+
"--target-language",
|
45
|
+
"-t",
|
46
|
+
help="The desired output language to translate the source code to. The "
|
47
|
+
"format can follow a 'language-version' syntax. Use 'text' to get plaintext"
|
48
|
+
"results as returned by the LLM. Examples: `python-3.10`, `mumps`, `java-10`,"
|
49
|
+
"text.",
|
50
|
+
),
|
51
|
+
],
|
52
|
+
llm_name: Annotated[
|
53
|
+
str,
|
54
|
+
typer.Option(
|
55
|
+
"--llm",
|
56
|
+
"-L",
|
57
|
+
help="The custom name of the model set with 'janus llm add'.",
|
58
|
+
),
|
59
|
+
],
|
60
|
+
failure_dir: Annotated[
|
61
|
+
Optional[Path],
|
62
|
+
typer.Option(
|
63
|
+
"--failure-directory",
|
64
|
+
"-f",
|
65
|
+
help="The directory to store failure files during translation",
|
66
|
+
),
|
67
|
+
] = None,
|
68
|
+
max_prompts: Annotated[
|
69
|
+
int,
|
70
|
+
typer.Option(
|
71
|
+
"--max-prompts",
|
72
|
+
"-m",
|
73
|
+
help="The maximum number of times to prompt a model on one functional block "
|
74
|
+
"before exiting the application. This is to prevent wasting too much money.",
|
75
|
+
),
|
76
|
+
] = 10,
|
77
|
+
overwrite: Annotated[
|
78
|
+
bool,
|
79
|
+
typer.Option(
|
80
|
+
"--overwrite/--preserve",
|
81
|
+
help="Whether to overwrite existing files in the output directory",
|
82
|
+
),
|
83
|
+
] = False,
|
84
|
+
skip_context: Annotated[
|
85
|
+
bool,
|
86
|
+
typer.Option(
|
87
|
+
"--skip-context",
|
88
|
+
help="Prompts will include any context information associated with source"
|
89
|
+
" code blocks, unless this option is specified",
|
90
|
+
),
|
91
|
+
] = False,
|
92
|
+
temp: Annotated[
|
93
|
+
float,
|
94
|
+
typer.Option("--temperature", "-T", help="Sampling temperature.", min=0, max=2),
|
95
|
+
] = 0.7,
|
96
|
+
prompt_template: Annotated[
|
97
|
+
str,
|
98
|
+
typer.Option(
|
99
|
+
"--prompt-template",
|
100
|
+
"-p",
|
101
|
+
help="Name of the Janus prompt template directory or "
|
102
|
+
"path to a directory containing those template files.",
|
103
|
+
),
|
104
|
+
] = "simple",
|
105
|
+
collection: Annotated[
|
106
|
+
str,
|
107
|
+
typer.Option(
|
108
|
+
"--collection",
|
109
|
+
"-c",
|
110
|
+
help="If set, will put the translated result into a Chroma DB "
|
111
|
+
"collection with the name provided.",
|
112
|
+
),
|
113
|
+
] = None,
|
114
|
+
splitter_type: Annotated[
|
115
|
+
str,
|
116
|
+
typer.Option(
|
117
|
+
"-S",
|
118
|
+
"--splitter",
|
119
|
+
help="Name of custom splitter to use",
|
120
|
+
click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
|
121
|
+
),
|
122
|
+
] = "file",
|
123
|
+
refiner_types: Annotated[
|
124
|
+
list[str],
|
125
|
+
typer.Option(
|
126
|
+
"-r",
|
127
|
+
"--refiner",
|
128
|
+
help="List of refiner types to use. Add -r for each refiner to use in\
|
129
|
+
refinement chain",
|
130
|
+
click_type=click.Choice(list(REFINERS.keys())),
|
131
|
+
),
|
132
|
+
] = ["JanusRefiner"],
|
133
|
+
retriever_type: Annotated[
|
134
|
+
str,
|
135
|
+
typer.Option(
|
136
|
+
"-R",
|
137
|
+
"--retriever",
|
138
|
+
help="Name of custom retriever to use",
|
139
|
+
click_type=click.Choice(["active_usings", "language_docs"]),
|
140
|
+
),
|
141
|
+
] = None,
|
142
|
+
max_tokens: Annotated[
|
143
|
+
int,
|
144
|
+
typer.Option(
|
145
|
+
"--max-tokens",
|
146
|
+
"-M",
|
147
|
+
help="The maximum number of tokens the model will take in. "
|
148
|
+
"If unspecificed, model's default max will be used.",
|
149
|
+
),
|
150
|
+
] = None,
|
151
|
+
use_janus_inputs: Annotated[
|
152
|
+
bool,
|
153
|
+
typer.Option(
|
154
|
+
"-j",
|
155
|
+
"--use-janus-inputs",
|
156
|
+
help="Prsent if translator should use janus files as inputs",
|
157
|
+
),
|
158
|
+
] = False,
|
159
|
+
):
|
160
|
+
from janus.cli.constants import db_loc, get_collections_config
|
161
|
+
from janus.converter.translate import Translator
|
162
|
+
|
163
|
+
refiner_types = [REFINERS[r] for r in refiner_types]
|
164
|
+
try:
|
165
|
+
target_language, target_version = target_lang.split("-")
|
166
|
+
except ValueError:
|
167
|
+
target_language = target_lang
|
168
|
+
target_version = None
|
169
|
+
# make sure not overwriting input
|
170
|
+
if source_lang.lower() == target_language.lower() and input_dir == output_dir:
|
171
|
+
log.error("Output files would overwrite input! Aborting...")
|
172
|
+
raise ValueError
|
173
|
+
|
174
|
+
model_arguments = dict(temperature=temp)
|
175
|
+
collections_config = get_collections_config()
|
176
|
+
translator = Translator(
|
177
|
+
model=llm_name,
|
178
|
+
model_arguments=model_arguments,
|
179
|
+
source_language=source_lang,
|
180
|
+
target_language=target_language,
|
181
|
+
target_version=target_version,
|
182
|
+
max_prompts=max_prompts,
|
183
|
+
max_tokens=max_tokens,
|
184
|
+
prompt_templates=prompt_template,
|
185
|
+
db_path=db_loc,
|
186
|
+
db_config=collections_config,
|
187
|
+
splitter_type=splitter_type,
|
188
|
+
refiner_types=refiner_types,
|
189
|
+
retriever_type=retriever_type,
|
190
|
+
use_janus_inputs=use_janus_inputs,
|
191
|
+
)
|
192
|
+
translator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
janus/converter/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from janus.converter.converter import Converter
|
2
2
|
from janus.converter.diagram import DiagramGenerator
|
3
|
-
from janus.converter.document import
|
3
|
+
from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
|
4
4
|
from janus.converter.evaluate import Evaluator
|
5
5
|
from janus.converter.partition import Partitioner
|
6
6
|
from janus.converter.requirements import RequirementsDocumenter
|
@@ -11,6 +11,7 @@ from janus.converter.diagram import DiagramGenerator
|
|
11
11
|
from janus.converter.requirements import RequirementsDocumenter
|
12
12
|
from janus.converter.translate import Translator
|
13
13
|
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
14
|
+
from janus.refiners.format import CodeFormatRefiner
|
14
15
|
|
15
16
|
|
16
17
|
class MockCollection(VectorStore):
|
@@ -50,6 +51,7 @@ class TestTranslator(unittest.TestCase):
|
|
50
51
|
target_language="python",
|
51
52
|
target_version="3.10",
|
52
53
|
splitter_type="ast-flex",
|
54
|
+
refiner_types=[CodeFormatRefiner],
|
53
55
|
)
|
54
56
|
self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
|
55
57
|
self.TEST_FILE_EMBEDDING_COUNT = 14
|
@@ -57,14 +59,14 @@ class TestTranslator(unittest.TestCase):
|
|
57
59
|
self.req_translator = RequirementsDocumenter(
|
58
60
|
model="gpt-4o-mini",
|
59
61
|
source_language="fortran",
|
60
|
-
|
62
|
+
prompt_templates="requirements",
|
61
63
|
)
|
62
64
|
|
63
65
|
@pytest.mark.translate
|
64
66
|
def test_translate(self):
|
65
67
|
"""Test translate method."""
|
66
68
|
# Delete a file if it's already there
|
67
|
-
python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.
|
69
|
+
python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.json"
|
68
70
|
python_file.unlink(missing_ok=True)
|
69
71
|
python_file.parent.rmdir() if python_file.parent.is_dir() else None
|
70
72
|
self.translator.translate(self.test_file.parent, self.test_file.parent / "python")
|
@@ -80,7 +82,7 @@ class TestTranslator(unittest.TestCase):
|
|
80
82
|
self.assertRaises(
|
81
83
|
ValueError, self.translator.set_source_language, "scribbledy-doop"
|
82
84
|
)
|
83
|
-
self.translator.
|
85
|
+
self.translator.set_prompts(["pish posh"])
|
84
86
|
self.assertRaises(ValueError, self.translator._load_parameters)
|
85
87
|
|
86
88
|
|
@@ -147,10 +149,10 @@ def test_language_combinations(
|
|
147
149
|
translator.set_model("gpt-4o")
|
148
150
|
translator.set_source_language(source_language)
|
149
151
|
translator.set_target_language(expected_target_language, expected_target_version)
|
150
|
-
translator.
|
152
|
+
translator.set_prompts(prompt_template)
|
151
153
|
translator._load_parameters()
|
152
154
|
assert translator._target_language == expected_target_language # nosec
|
153
155
|
assert translator._target_version == expected_target_version # nosec
|
154
156
|
assert translator._splitter.language == source_language # nosec
|
155
157
|
assert translator._splitter.model.model_name == "gpt-4o" # nosec
|
156
|
-
assert translator.
|
158
|
+
assert translator._prompt_template_names == [prompt_template] # nosec
|
janus/converter/chain.py
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from janus.converter.converter import Converter
|
4
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
5
|
+
from janus.utils.logger import create_logger
|
6
|
+
|
7
|
+
log = create_logger(__name__)
|
8
|
+
|
9
|
+
|
10
|
+
class ConverterChain(Converter):
|
11
|
+
"""
|
12
|
+
Class for representing multiple converters chained together
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, *args, **kwargs) -> None:
|
16
|
+
if len(args) == 0:
|
17
|
+
raise ValueError("Error: Converter chain must be passed at least 1 converter")
|
18
|
+
for converter in args:
|
19
|
+
if not isinstance(converter, Converter):
|
20
|
+
raise ValueError(f"Error: unrecognized type: {type(converter)}")
|
21
|
+
self._converters = args
|
22
|
+
kwargs.update(
|
23
|
+
source_language=self._converters[0].source_language,
|
24
|
+
target_language=self._converters[-1]._target_language,
|
25
|
+
target_version=self._converters[-1]._target_version,
|
26
|
+
use_janus_inputs=self._converters[0]._use_janus_inputs,
|
27
|
+
)
|
28
|
+
super().__init__(**kwargs)
|
29
|
+
|
30
|
+
def _run_converters(
|
31
|
+
self, translated_code_block, name: str, failure_path: Path | None = None
|
32
|
+
):
|
33
|
+
for i, converter in enumerate(self._converters[1:]):
|
34
|
+
if not translated_code_block.translated:
|
35
|
+
log.info(
|
36
|
+
f"Error: chain failed to translate at step {i}:"
|
37
|
+
f"{self._converters[i].__class__.__name__}"
|
38
|
+
)
|
39
|
+
break
|
40
|
+
if converter._use_janus_inputs:
|
41
|
+
janus_obj = self._converters[i]._get_output_obj(translated_code_block)
|
42
|
+
translated_code_block = converter.translate_janus_obj(
|
43
|
+
janus_obj, name, failure_path
|
44
|
+
)
|
45
|
+
else:
|
46
|
+
translated_code_block = converter.translate_block(
|
47
|
+
translated_code_block.to_codeblock(), name, failure_path
|
48
|
+
)
|
49
|
+
if not translated_code_block.translated:
|
50
|
+
log.info(
|
51
|
+
f"Error: chain failed to translate at step {len(self._converters)-1}: "
|
52
|
+
f"{self._converters[-1].__class__.__name__}"
|
53
|
+
)
|
54
|
+
|
55
|
+
return translated_code_block
|
56
|
+
|
57
|
+
def translate_file(
|
58
|
+
self, file: Path, failure_path: Path | None = None
|
59
|
+
) -> TranslatedCodeBlock:
|
60
|
+
"""Translate a file using the chain of converters
|
61
|
+
|
62
|
+
Arguments:
|
63
|
+
file: The file to translate
|
64
|
+
failure_path: The path to write the failure file to
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
The translated code block
|
68
|
+
"""
|
69
|
+
filename = file.name
|
70
|
+
translated_code_block = self._converters[0].translate_file(file, failure_path)
|
71
|
+
translated_code_block = self._run_converters(
|
72
|
+
translated_code_block, filename, failure_path
|
73
|
+
)
|
74
|
+
return translated_code_block
|
75
|
+
|
76
|
+
def translate_text(
|
77
|
+
self, text: str, name: str, failure_path: Path | None = None
|
78
|
+
) -> TranslatedCodeBlock:
|
79
|
+
"""Translate a text using the chain of converters
|
80
|
+
|
81
|
+
Arguments:
|
82
|
+
text: The text to translate
|
83
|
+
name: The name of the file
|
84
|
+
failure_path: The path to write the failure file to
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
The translated code block
|
88
|
+
"""
|
89
|
+
translated_code_block = self._converters[0].translate_text(
|
90
|
+
text, name, failure_path
|
91
|
+
)
|
92
|
+
translated_code_block = self._run_converters(
|
93
|
+
translated_code_block, name, failure_path
|
94
|
+
)
|
95
|
+
return translated_code_block
|
96
|
+
|
97
|
+
def translate_block(
|
98
|
+
self,
|
99
|
+
input_block: CodeBlock | list[CodeBlock],
|
100
|
+
name: str,
|
101
|
+
failure_path: Path | None = None,
|
102
|
+
) -> TranslatedCodeBlock:
|
103
|
+
"""Translate a block of code using the chain of converters
|
104
|
+
|
105
|
+
Arguments:
|
106
|
+
input_block: The block of code to translate
|
107
|
+
name: The name of the file
|
108
|
+
failure_path: The path to write the failure file to
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
The translated code block
|
112
|
+
"""
|
113
|
+
translated_code_block = self._converters[0].translate_block(
|
114
|
+
input_block, name, failure_path
|
115
|
+
)
|
116
|
+
translated_code_block = self._run_converters(
|
117
|
+
translated_code_block, name, failure_path
|
118
|
+
)
|
119
|
+
return translated_code_block
|
120
|
+
|
121
|
+
def _get_output_obj(
|
122
|
+
self, block: TranslatedCodeBlock | list, combine_children: bool = True
|
123
|
+
) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
|
124
|
+
output_obj = super()._get_output_obj(block, combine_children)
|
125
|
+
intermediate_outputs = []
|
126
|
+
for i, intermediate_out in enumerate(block.previous_generations):
|
127
|
+
if isinstance(intermediate_out, TranslatedCodeBlock):
|
128
|
+
intermediate_outputs.append(
|
129
|
+
self._converters[i]._get_output_obj(intermediate_out)
|
130
|
+
)
|
131
|
+
else:
|
132
|
+
intermediate_outputs.append(intermediate_out)
|
133
|
+
intermediate_outputs.append(self._converters[-1]._get_output_obj(block))
|
134
|
+
output_obj["intermediate_outputs"] = intermediate_outputs
|
135
|
+
metadata = output_obj["metadata"]
|
136
|
+
metadata["cost"] += sum(
|
137
|
+
b.cost if isinstance(b, TranslatedCodeBlock) else b["metadata"]["cost"]
|
138
|
+
for b in block.previous_generations
|
139
|
+
)
|
140
|
+
metadata["processing_time"] += sum(
|
141
|
+
(
|
142
|
+
b.processing_time
|
143
|
+
if isinstance(b, TranslatedCodeBlock)
|
144
|
+
else b["metadata"]["processing_time"]
|
145
|
+
)
|
146
|
+
for b in block.previous_generations
|
147
|
+
)
|
148
|
+
metadata["num_requests"] += sum(
|
149
|
+
(
|
150
|
+
b.total_num_requests
|
151
|
+
if isinstance(b, TranslatedCodeBlock)
|
152
|
+
else b["metadata"]["num_requests"]
|
153
|
+
)
|
154
|
+
for b in block.previous_generations
|
155
|
+
)
|
156
|
+
metadata["input_tokens"] += sum(
|
157
|
+
(
|
158
|
+
b.total_request_input_tokens
|
159
|
+
if isinstance(b, TranslatedCodeBlock)
|
160
|
+
else b["metadata"]["input_tokens"]
|
161
|
+
)
|
162
|
+
for b in block.previous_generations
|
163
|
+
)
|
164
|
+
metadata["output_tokens"] += sum(
|
165
|
+
(
|
166
|
+
b.total_request_output_tokens
|
167
|
+
if isinstance(b, TranslatedCodeBlock)
|
168
|
+
else b["metadata"]["output_tokens"]
|
169
|
+
)
|
170
|
+
for b in block.previous_generations
|
171
|
+
)
|
172
|
+
output_obj["metadata"] = metadata
|
173
|
+
if len(block.previous_generations) > 0:
|
174
|
+
b = block.previous_generations[0]
|
175
|
+
output_obj["input"] = (
|
176
|
+
(b.original.text or "")
|
177
|
+
if isinstance(b, TranslatedCodeBlock)
|
178
|
+
else b["input"]
|
179
|
+
)
|
180
|
+
return output_obj
|