janus-llm 4.3.5__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/aggregate.py +2 -2
- janus/cli/cli.py +6 -0
- janus/cli/constants.py +6 -0
- janus/cli/diagram.py +36 -7
- janus/cli/document.py +10 -1
- janus/cli/llm.py +7 -3
- janus/cli/partition.py +10 -1
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +1 -3
- janus/cli/translate.py +10 -1
- janus/converter/_tests/test_translate.py +5 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +333 -78
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +7 -3
- janus/converter/evaluate.py +140 -148
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +2 -58
- janus/language/block.py +31 -2
- janus/metrics/metric.py +47 -124
- janus/parsers/reqs_parser.py +3 -3
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/METADATA +12 -12
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/RECORD +28 -28
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
|
|
5
5
|
from janus.converter.translate import Translator
|
6
6
|
from janus.metrics import * # noqa: F403
|
7
7
|
|
8
|
-
__version__ = "4.
|
8
|
+
__version__ = "4.4.5"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/cli/aggregate.py
CHANGED
@@ -33,7 +33,7 @@ def aggregate(
|
|
33
33
|
output_dir: Annotated[
|
34
34
|
Path,
|
35
35
|
typer.Option(
|
36
|
-
"--output
|
36
|
+
"--output", "-o", help="The directory to store the translated code in."
|
37
37
|
),
|
38
38
|
],
|
39
39
|
llm_name: Annotated[
|
@@ -130,6 +130,6 @@ def aggregate(
|
|
130
130
|
db_path=db_loc,
|
131
131
|
db_config=collections_config,
|
132
132
|
splitter_type=splitter_type,
|
133
|
-
|
133
|
+
prompt_templates="basic_aggregation",
|
134
134
|
)
|
135
135
|
aggregator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
janus/cli/cli.py
CHANGED
@@ -10,6 +10,7 @@ from janus.cli.document import document
|
|
10
10
|
from janus.cli.embedding import embedding
|
11
11
|
from janus.cli.llm import llm
|
12
12
|
from janus.cli.partition import partition
|
13
|
+
from janus.cli.pipeline import pipeline
|
13
14
|
from janus.cli.self_eval import llm_self_eval
|
14
15
|
from janus.cli.translate import translate
|
15
16
|
from janus.metrics.cli import evaluate
|
@@ -101,6 +102,11 @@ translate = app.command(
|
|
101
102
|
no_args_is_help=True,
|
102
103
|
)(translate)
|
103
104
|
|
105
|
+
pipeline = app.command(
|
106
|
+
help="Run a janus pipeline",
|
107
|
+
no_args_is_help=True,
|
108
|
+
)(pipeline)
|
109
|
+
|
104
110
|
app.add_typer(db, name="db")
|
105
111
|
app.add_typer(llm, name="llm")
|
106
112
|
app.add_typer(evaluate, name="evaluate")
|
janus/cli/constants.py
CHANGED
@@ -33,6 +33,12 @@ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
|
|
33
33
|
)
|
34
34
|
REFINERS = {r.__name__: r for r in REFINER_TYPES}
|
35
35
|
|
36
|
+
CONVERTER_TYPES = get_subclasses(janus.converter.converter.Converter).union(
|
37
|
+
{janus.converter.converter.Converter}
|
38
|
+
)
|
39
|
+
|
40
|
+
CONVERTERS = {c.__name__: c for c in CONVERTER_TYPES}
|
41
|
+
|
36
42
|
|
37
43
|
def get_collections_config():
|
38
44
|
if collections_config_file.exists():
|
janus/cli/diagram.py
CHANGED
@@ -32,7 +32,7 @@ def diagram(
|
|
32
32
|
output_dir: Annotated[
|
33
33
|
Path,
|
34
34
|
typer.Option(
|
35
|
-
"--output
|
35
|
+
"--output", "-o", help="The directory to store the translated code in."
|
36
36
|
),
|
37
37
|
],
|
38
38
|
llm_name: Annotated[
|
@@ -112,7 +112,7 @@ def diagram(
|
|
112
112
|
refinement chain",
|
113
113
|
click_type=click.Choice(list(REFINERS.keys())),
|
114
114
|
),
|
115
|
-
] = ["
|
115
|
+
] = ["CodeFormatRefiner"],
|
116
116
|
retriever_type: Annotated[
|
117
117
|
str,
|
118
118
|
typer.Option(
|
@@ -122,6 +122,24 @@ def diagram(
|
|
122
122
|
click_type=click.Choice(["active_usings", "language_docs"]),
|
123
123
|
),
|
124
124
|
] = None,
|
125
|
+
extract_variables: Annotated[
|
126
|
+
bool,
|
127
|
+
typer.Option(
|
128
|
+
"-ev",
|
129
|
+
"--extract-variables",
|
130
|
+
help="Present when diagram generator should \
|
131
|
+
extract variables before producing diagram",
|
132
|
+
),
|
133
|
+
] = False,
|
134
|
+
use_janus_inputs: Annotated[
|
135
|
+
bool,
|
136
|
+
typer.Option(
|
137
|
+
"-j",
|
138
|
+
"--use-janus-inputs",
|
139
|
+
help="Present when diagram generator should be\
|
140
|
+
be using janus files as inputs",
|
141
|
+
),
|
142
|
+
] = False,
|
125
143
|
):
|
126
144
|
from janus.cli.constants import db_loc, get_collections_config
|
127
145
|
from janus.converter.diagram import DiagramGenerator
|
@@ -141,6 +159,8 @@ def diagram(
|
|
141
159
|
retriever_type=retriever_type,
|
142
160
|
diagram_type=diagram_type,
|
143
161
|
add_documentation=add_documentation,
|
162
|
+
extract_variables=extract_variables,
|
163
|
+
use_janus_inputs=use_janus_inputs,
|
144
164
|
)
|
145
165
|
diagram_generator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
146
166
|
|
@@ -170,9 +190,18 @@ def render(
|
|
170
190
|
if not output_file.parent.exists():
|
171
191
|
output_file.parent.mkdir()
|
172
192
|
|
173
|
-
|
174
|
-
|
193
|
+
def _render(obj, ind=0):
|
194
|
+
for o in obj["outputs"]:
|
195
|
+
if isinstance(o, dict):
|
196
|
+
ind += _render(o, ind)
|
197
|
+
else:
|
198
|
+
outfile_new = output_file.with_stem(f"{output_file.stem}_{ind}")
|
199
|
+
text = o.replace("\\n", "\n").strip()
|
200
|
+
outfile_new.write_text(text)
|
201
|
+
jar_path = homedir / ".janus/lib/plantuml.jar"
|
202
|
+
subprocess.run(["java", "-jar", jar_path, outfile_new]) # nosec
|
203
|
+
outfile_new.unlink()
|
204
|
+
ind += 1
|
205
|
+
return ind
|
175
206
|
|
176
|
-
|
177
|
-
subprocess.run(["java", "-jar", jar_path, output_file]) # nosec
|
178
|
-
output_file.unlink()
|
207
|
+
_render(data)
|
janus/cli/document.py
CHANGED
@@ -32,7 +32,7 @@ def document(
|
|
32
32
|
output_dir: Annotated[
|
33
33
|
Path,
|
34
34
|
typer.Option(
|
35
|
-
"--output
|
35
|
+
"--output", "-o", help="The directory to store the translated code in."
|
36
36
|
),
|
37
37
|
],
|
38
38
|
llm_name: Annotated[
|
@@ -142,6 +142,14 @@ def document(
|
|
142
142
|
"If unspecificed, model's default max will be used.",
|
143
143
|
),
|
144
144
|
] = None,
|
145
|
+
use_janus_inputs: Annotated[
|
146
|
+
bool,
|
147
|
+
typer.Option(
|
148
|
+
"-j",
|
149
|
+
"--use-janus-inputs",
|
150
|
+
help="Present if converter should use janus files as inputs",
|
151
|
+
),
|
152
|
+
] = False,
|
145
153
|
):
|
146
154
|
from janus.cli.constants import db_loc, get_collections_config
|
147
155
|
from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
|
@@ -161,6 +169,7 @@ def document(
|
|
161
169
|
splitter_type=splitter_type,
|
162
170
|
refiner_types=refiner_types,
|
163
171
|
retriever_type=retriever_type,
|
172
|
+
use_janus_inputs=use_janus_inputs,
|
164
173
|
)
|
165
174
|
if doc_mode == "cloze":
|
166
175
|
documenter = ClozeDocumenter(comments_per_request=comments_per_request, **kwargs)
|
janus/cli/llm.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import click
|
2
2
|
import typer
|
3
|
+
from rich import print
|
3
4
|
from typing_extensions import Annotated
|
4
5
|
|
5
6
|
from janus.llm.models_info import MODEL_TYPE_CONSTRUCTORS
|
@@ -45,7 +46,10 @@ def llm_add(
|
|
45
46
|
if model_type == "HuggingFace":
|
46
47
|
url = typer.prompt("Enter the model's URL")
|
47
48
|
max_tokens = typer.prompt(
|
48
|
-
"Enter the model's
|
49
|
+
"Enter the model's token limit", default=65536, type=int
|
50
|
+
)
|
51
|
+
max_tokens = typer.prompt(
|
52
|
+
"Enter the model's max output tokens", default=8192, type=int
|
49
53
|
)
|
50
54
|
in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
|
51
55
|
out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
|
@@ -61,6 +65,7 @@ def llm_add(
|
|
61
65
|
)
|
62
66
|
cfg = {
|
63
67
|
"model_type": model_type,
|
68
|
+
"model_id": "gpt-4o", # This is a placeholder to use the Azure PromptEngine
|
64
69
|
"model_args": params,
|
65
70
|
"token_limit": max_tokens,
|
66
71
|
"model_cost": {"input": in_cost, "output": out_cost},
|
@@ -172,8 +177,7 @@ def llm_ls(
|
|
172
177
|
):
|
173
178
|
import json
|
174
179
|
|
175
|
-
from janus.
|
176
|
-
from janus.llm.models_info import MODEL_TYPES
|
180
|
+
from janus.llm.models_info import MODEL_CONFIG_DIR, MODEL_TYPES
|
177
181
|
|
178
182
|
print("\n[green]User-configured models[/green]:")
|
179
183
|
for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):
|
janus/cli/partition.py
CHANGED
@@ -31,7 +31,7 @@ def partition(
|
|
31
31
|
output_dir: Annotated[
|
32
32
|
Path,
|
33
33
|
typer.Option(
|
34
|
-
"--output
|
34
|
+
"--output", "-o", help="The directory to store the partitioned code in."
|
35
35
|
),
|
36
36
|
],
|
37
37
|
llm_name: Annotated[
|
@@ -106,6 +106,14 @@ def partition(
|
|
106
106
|
help="The limit on the number of tokens per partition.",
|
107
107
|
),
|
108
108
|
] = 8192,
|
109
|
+
use_janus_inputs: Annotated[
|
110
|
+
bool,
|
111
|
+
typer.Option(
|
112
|
+
"-j",
|
113
|
+
"--use-janus-inputs",
|
114
|
+
help="Present if converter should use janus inputs",
|
115
|
+
),
|
116
|
+
] = False,
|
109
117
|
):
|
110
118
|
from janus.converter.partition import Partitioner
|
111
119
|
|
@@ -120,6 +128,7 @@ def partition(
|
|
120
128
|
splitter_type=splitter_type,
|
121
129
|
refiner_types=refiner_types,
|
122
130
|
partition_token_limit=partition_token_limit,
|
131
|
+
use_janus_inputs=use_janus_inputs,
|
123
132
|
)
|
124
133
|
partitioner = Partitioner(**kwargs)
|
125
134
|
partitioner.translate(input_dir, output_dir, failure_dir, overwrite)
|
janus/cli/pipeline.py
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
import json
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
import click
|
6
|
+
import typer
|
7
|
+
from typing_extensions import Annotated
|
8
|
+
|
9
|
+
from janus.cli.constants import CONVERTERS
|
10
|
+
from janus.converter.chain import ConverterChain
|
11
|
+
from janus.utils.enums import LANGUAGES
|
12
|
+
|
13
|
+
|
14
|
+
def instiantiate(x):
|
15
|
+
if isinstance(x, dict):
|
16
|
+
if "type" in x:
|
17
|
+
if "args" not in x:
|
18
|
+
x["args"] = []
|
19
|
+
x["args"] = [instiantiate(a) for a in x["args"]]
|
20
|
+
if "kwargs" not in x:
|
21
|
+
x["kwargs"] = {}
|
22
|
+
x["kwargs"] = {k: instiantiate(x["kwargs"][k]) for k in x["kwargs"]}
|
23
|
+
if x["type"] not in CONVERTERS:
|
24
|
+
raise ValueError(f"Error: {x['type']} is not a Converter")
|
25
|
+
return CONVERTERS[x["type"]](*x["args"], **x["kwargs"])
|
26
|
+
else:
|
27
|
+
return {k: instiantiate(x[k]) for k in x}
|
28
|
+
elif isinstance(x, list):
|
29
|
+
return [instiantiate(a) for a in x]
|
30
|
+
else:
|
31
|
+
return x
|
32
|
+
|
33
|
+
|
34
|
+
def instiantiate_pipeline(
|
35
|
+
pipeline: list[dict],
|
36
|
+
language: str = "text",
|
37
|
+
model: str = "gpt-4o",
|
38
|
+
use_janus_inputs: None | bool = None,
|
39
|
+
):
|
40
|
+
if "kwargs" not in pipeline[0]:
|
41
|
+
pipeline[0]["kwargs"] = {}
|
42
|
+
pipeline[0]["kwargs"].update(source_language=language, model=model)
|
43
|
+
if use_janus_inputs is not None:
|
44
|
+
pipeline[0]["kwargs"].update(janus_inputs=use_janus_inputs)
|
45
|
+
print(pipeline[0])
|
46
|
+
converters = [instiantiate(pipeline[0])]
|
47
|
+
for p in pipeline[1:]:
|
48
|
+
p["kwargs"].update(source_language=converters[-1].target_language, model=model)
|
49
|
+
converters.append(instiantiate(p))
|
50
|
+
return ConverterChain(*converters)
|
51
|
+
|
52
|
+
|
53
|
+
def pipeline(
|
54
|
+
pipeline_file: Annotated[
|
55
|
+
Path, typer.Option("-p", "--pipeline", help="Name of pipeline file to use")
|
56
|
+
],
|
57
|
+
input_dir: Annotated[
|
58
|
+
Path,
|
59
|
+
typer.Option(
|
60
|
+
"--input",
|
61
|
+
"-i",
|
62
|
+
help="The directory containing the source code to be translated. "
|
63
|
+
"The files should all be in one flat directory.",
|
64
|
+
),
|
65
|
+
],
|
66
|
+
language: Annotated[
|
67
|
+
str,
|
68
|
+
typer.Option(
|
69
|
+
"--language",
|
70
|
+
"-l",
|
71
|
+
help="The language of the source code.",
|
72
|
+
click_type=click.Choice(sorted(LANGUAGES)),
|
73
|
+
),
|
74
|
+
],
|
75
|
+
output_dir: Annotated[
|
76
|
+
Path,
|
77
|
+
typer.Option(
|
78
|
+
"--output", "-o", help="The directory to store the translated code in."
|
79
|
+
),
|
80
|
+
],
|
81
|
+
llm_name: Annotated[
|
82
|
+
str,
|
83
|
+
typer.Option(
|
84
|
+
"--llm",
|
85
|
+
"-L",
|
86
|
+
help="The custom name of the model set with 'janus llm add'.",
|
87
|
+
),
|
88
|
+
],
|
89
|
+
failure_dir: Annotated[
|
90
|
+
Optional[Path],
|
91
|
+
typer.Option(
|
92
|
+
"--failure-directory",
|
93
|
+
"-f",
|
94
|
+
help="The directory to store failure files during documentation",
|
95
|
+
),
|
96
|
+
] = None,
|
97
|
+
overwrite: Annotated[
|
98
|
+
bool,
|
99
|
+
typer.Option(
|
100
|
+
"--overwrite/--preserve",
|
101
|
+
help="Whether to overwrite existing files in the output directory",
|
102
|
+
),
|
103
|
+
] = False,
|
104
|
+
use_janus_inputs: Annotated[
|
105
|
+
Optional[bool],
|
106
|
+
typer.Option(
|
107
|
+
"-j",
|
108
|
+
"--use-janus-inputs",
|
109
|
+
help="Present if converter chain should use janus input files",
|
110
|
+
),
|
111
|
+
] = None,
|
112
|
+
):
|
113
|
+
with open(pipeline_file, "r") as f:
|
114
|
+
json_obj = json.load(f)
|
115
|
+
pipeline = instiantiate_pipeline(
|
116
|
+
json_obj, language=language, model=llm_name, use_janus_inputs=use_janus_inputs
|
117
|
+
)
|
118
|
+
pipeline.translate(
|
119
|
+
input_directory=input_dir,
|
120
|
+
output_directory=output_dir,
|
121
|
+
failure_directory=failure_dir,
|
122
|
+
overwrite=overwrite,
|
123
|
+
)
|
janus/cli/self_eval.py
CHANGED
@@ -31,9 +31,7 @@ def llm_self_eval(
|
|
31
31
|
],
|
32
32
|
output_dir: Annotated[
|
33
33
|
Path,
|
34
|
-
typer.Option(
|
35
|
-
"--output-dir", "-o", help="The directory to store the evaluations in."
|
36
|
-
),
|
34
|
+
typer.Option("--output", "-o", help="The directory to store the evaluations in."),
|
37
35
|
],
|
38
36
|
failure_dir: Annotated[
|
39
37
|
Optional[Path],
|
janus/cli/translate.py
CHANGED
@@ -148,6 +148,14 @@ def translate(
|
|
148
148
|
"If unspecificed, model's default max will be used.",
|
149
149
|
),
|
150
150
|
] = None,
|
151
|
+
use_janus_inputs: Annotated[
|
152
|
+
bool,
|
153
|
+
typer.Option(
|
154
|
+
"-j",
|
155
|
+
"--use-janus-inputs",
|
156
|
+
help="Prsent if translator should use janus files as inputs",
|
157
|
+
),
|
158
|
+
] = False,
|
151
159
|
):
|
152
160
|
from janus.cli.constants import db_loc, get_collections_config
|
153
161
|
from janus.converter.translate import Translator
|
@@ -173,11 +181,12 @@ def translate(
|
|
173
181
|
target_version=target_version,
|
174
182
|
max_prompts=max_prompts,
|
175
183
|
max_tokens=max_tokens,
|
176
|
-
|
184
|
+
prompt_templates=prompt_template,
|
177
185
|
db_path=db_loc,
|
178
186
|
db_config=collections_config,
|
179
187
|
splitter_type=splitter_type,
|
180
188
|
refiner_types=refiner_types,
|
181
189
|
retriever_type=retriever_type,
|
190
|
+
use_janus_inputs=use_janus_inputs,
|
182
191
|
)
|
183
192
|
translator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
|
@@ -59,14 +59,14 @@ class TestTranslator(unittest.TestCase):
|
|
59
59
|
self.req_translator = RequirementsDocumenter(
|
60
60
|
model="gpt-4o-mini",
|
61
61
|
source_language="fortran",
|
62
|
-
|
62
|
+
prompt_templates="requirements",
|
63
63
|
)
|
64
64
|
|
65
65
|
@pytest.mark.translate
|
66
66
|
def test_translate(self):
|
67
67
|
"""Test translate method."""
|
68
68
|
# Delete a file if it's already there
|
69
|
-
python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.
|
69
|
+
python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.json"
|
70
70
|
python_file.unlink(missing_ok=True)
|
71
71
|
python_file.parent.rmdir() if python_file.parent.is_dir() else None
|
72
72
|
self.translator.translate(self.test_file.parent, self.test_file.parent / "python")
|
@@ -82,7 +82,7 @@ class TestTranslator(unittest.TestCase):
|
|
82
82
|
self.assertRaises(
|
83
83
|
ValueError, self.translator.set_source_language, "scribbledy-doop"
|
84
84
|
)
|
85
|
-
self.translator.
|
85
|
+
self.translator.set_prompts(["pish posh"])
|
86
86
|
self.assertRaises(ValueError, self.translator._load_parameters)
|
87
87
|
|
88
88
|
|
@@ -149,10 +149,10 @@ def test_language_combinations(
|
|
149
149
|
translator.set_model("gpt-4o")
|
150
150
|
translator.set_source_language(source_language)
|
151
151
|
translator.set_target_language(expected_target_language, expected_target_version)
|
152
|
-
translator.
|
152
|
+
translator.set_prompts(prompt_template)
|
153
153
|
translator._load_parameters()
|
154
154
|
assert translator._target_language == expected_target_language # nosec
|
155
155
|
assert translator._target_version == expected_target_version # nosec
|
156
156
|
assert translator._splitter.language == source_language # nosec
|
157
157
|
assert translator._splitter.model.model_name == "gpt-4o" # nosec
|
158
|
-
assert translator.
|
158
|
+
assert translator._prompt_template_names == [prompt_template] # nosec
|
janus/converter/chain.py
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from janus.converter.converter import Converter
|
4
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
5
|
+
from janus.utils.logger import create_logger
|
6
|
+
|
7
|
+
log = create_logger(__name__)
|
8
|
+
|
9
|
+
|
10
|
+
class ConverterChain(Converter):
|
11
|
+
"""
|
12
|
+
Class for representing multiple converters chained together
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, *args, **kwargs) -> None:
|
16
|
+
if len(args) == 0:
|
17
|
+
raise ValueError("Error: Converter chain must be passed at least 1 converter")
|
18
|
+
for converter in args:
|
19
|
+
if not isinstance(converter, Converter):
|
20
|
+
raise ValueError(f"Error: unrecognized type: {type(converter)}")
|
21
|
+
self._converters = args
|
22
|
+
kwargs.update(
|
23
|
+
source_language=self._converters[0].source_language,
|
24
|
+
target_language=self._converters[-1]._target_language,
|
25
|
+
target_version=self._converters[-1]._target_version,
|
26
|
+
use_janus_inputs=self._converters[0]._use_janus_inputs,
|
27
|
+
)
|
28
|
+
super().__init__(**kwargs)
|
29
|
+
|
30
|
+
def _run_converters(
|
31
|
+
self, translated_code_block, name: str, failure_path: Path | None = None
|
32
|
+
):
|
33
|
+
for i, converter in enumerate(self._converters[1:]):
|
34
|
+
if not translated_code_block.translated:
|
35
|
+
log.info(
|
36
|
+
f"Error: chain failed to translate at step {i}:"
|
37
|
+
f"{self._converters[i].__class__.__name__}"
|
38
|
+
)
|
39
|
+
break
|
40
|
+
if converter._use_janus_inputs:
|
41
|
+
janus_obj = self._converters[i]._get_output_obj(translated_code_block)
|
42
|
+
translated_code_block = converter.translate_janus_obj(
|
43
|
+
janus_obj, name, failure_path
|
44
|
+
)
|
45
|
+
else:
|
46
|
+
translated_code_block = converter.translate_block(
|
47
|
+
translated_code_block.to_codeblock(), name, failure_path
|
48
|
+
)
|
49
|
+
if not translated_code_block.translated:
|
50
|
+
log.info(
|
51
|
+
f"Error: chain failed to translate at step {len(self._converters)-1}: "
|
52
|
+
f"{self._converters[-1].__class__.__name__}"
|
53
|
+
)
|
54
|
+
|
55
|
+
return translated_code_block
|
56
|
+
|
57
|
+
def translate_file(
|
58
|
+
self, file: Path, failure_path: Path | None = None
|
59
|
+
) -> TranslatedCodeBlock:
|
60
|
+
"""Translate a file using the chain of converters
|
61
|
+
|
62
|
+
Arguments:
|
63
|
+
file: The file to translate
|
64
|
+
failure_path: The path to write the failure file to
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
The translated code block
|
68
|
+
"""
|
69
|
+
filename = file.name
|
70
|
+
translated_code_block = self._converters[0].translate_file(file, failure_path)
|
71
|
+
translated_code_block = self._run_converters(
|
72
|
+
translated_code_block, filename, failure_path
|
73
|
+
)
|
74
|
+
return translated_code_block
|
75
|
+
|
76
|
+
def translate_text(
|
77
|
+
self, text: str, name: str, failure_path: Path | None = None
|
78
|
+
) -> TranslatedCodeBlock:
|
79
|
+
"""Translate a text using the chain of converters
|
80
|
+
|
81
|
+
Arguments:
|
82
|
+
text: The text to translate
|
83
|
+
name: The name of the file
|
84
|
+
failure_path: The path to write the failure file to
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
The translated code block
|
88
|
+
"""
|
89
|
+
translated_code_block = self._converters[0].translate_text(
|
90
|
+
text, name, failure_path
|
91
|
+
)
|
92
|
+
translated_code_block = self._run_converters(
|
93
|
+
translated_code_block, name, failure_path
|
94
|
+
)
|
95
|
+
return translated_code_block
|
96
|
+
|
97
|
+
def translate_block(
|
98
|
+
self,
|
99
|
+
input_block: CodeBlock | list[CodeBlock],
|
100
|
+
name: str,
|
101
|
+
failure_path: Path | None = None,
|
102
|
+
) -> TranslatedCodeBlock:
|
103
|
+
"""Translate a block of code using the chain of converters
|
104
|
+
|
105
|
+
Arguments:
|
106
|
+
input_block: The block of code to translate
|
107
|
+
name: The name of the file
|
108
|
+
failure_path: The path to write the failure file to
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
The translated code block
|
112
|
+
"""
|
113
|
+
translated_code_block = self._converters[0].translate_block(
|
114
|
+
input_block, name, failure_path
|
115
|
+
)
|
116
|
+
translated_code_block = self._run_converters(
|
117
|
+
translated_code_block, name, failure_path
|
118
|
+
)
|
119
|
+
return translated_code_block
|
120
|
+
|
121
|
+
def _get_output_obj(
|
122
|
+
self, block: TranslatedCodeBlock | list, combine_children: bool = True
|
123
|
+
) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
|
124
|
+
output_obj = super()._get_output_obj(block, combine_children)
|
125
|
+
intermediate_outputs = []
|
126
|
+
for i, intermediate_out in enumerate(block.previous_generations):
|
127
|
+
if isinstance(intermediate_out, TranslatedCodeBlock):
|
128
|
+
intermediate_outputs.append(
|
129
|
+
self._converters[i]._get_output_obj(intermediate_out)
|
130
|
+
)
|
131
|
+
else:
|
132
|
+
intermediate_outputs.append(intermediate_out)
|
133
|
+
intermediate_outputs.append(self._converters[-1]._get_output_obj(block))
|
134
|
+
output_obj["intermediate_outputs"] = intermediate_outputs
|
135
|
+
metadata = output_obj["metadata"]
|
136
|
+
metadata["cost"] += sum(
|
137
|
+
b.cost if isinstance(b, TranslatedCodeBlock) else b["metadata"]["cost"]
|
138
|
+
for b in block.previous_generations
|
139
|
+
)
|
140
|
+
metadata["processing_time"] += sum(
|
141
|
+
(
|
142
|
+
b.processing_time
|
143
|
+
if isinstance(b, TranslatedCodeBlock)
|
144
|
+
else b["metadata"]["processing_time"]
|
145
|
+
)
|
146
|
+
for b in block.previous_generations
|
147
|
+
)
|
148
|
+
metadata["num_requests"] += sum(
|
149
|
+
(
|
150
|
+
b.total_num_requests
|
151
|
+
if isinstance(b, TranslatedCodeBlock)
|
152
|
+
else b["metadata"]["num_requests"]
|
153
|
+
)
|
154
|
+
for b in block.previous_generations
|
155
|
+
)
|
156
|
+
metadata["input_tokens"] += sum(
|
157
|
+
(
|
158
|
+
b.total_request_input_tokens
|
159
|
+
if isinstance(b, TranslatedCodeBlock)
|
160
|
+
else b["metadata"]["input_tokens"]
|
161
|
+
)
|
162
|
+
for b in block.previous_generations
|
163
|
+
)
|
164
|
+
metadata["output_tokens"] += sum(
|
165
|
+
(
|
166
|
+
b.total_request_output_tokens
|
167
|
+
if isinstance(b, TranslatedCodeBlock)
|
168
|
+
else b["metadata"]["output_tokens"]
|
169
|
+
)
|
170
|
+
for b in block.previous_generations
|
171
|
+
)
|
172
|
+
output_obj["metadata"] = metadata
|
173
|
+
if len(block.previous_generations) > 0:
|
174
|
+
b = block.previous_generations[0]
|
175
|
+
output_obj["input"] = (
|
176
|
+
(b.original.text or "")
|
177
|
+
if isinstance(b, TranslatedCodeBlock)
|
178
|
+
else b["input"]
|
179
|
+
)
|
180
|
+
return output_obj
|