janus-llm 4.3.5__py3-none-any.whl → 4.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/aggregate.py +2 -2
- janus/cli/cli.py +6 -0
- janus/cli/constants.py +6 -0
- janus/cli/diagram.py +36 -7
- janus/cli/document.py +10 -1
- janus/cli/llm.py +7 -3
- janus/cli/partition.py +10 -1
- janus/cli/pipeline.py +126 -0
- janus/cli/self_eval.py +10 -3
- janus/cli/translate.py +10 -1
- janus/converter/__init__.py +2 -0
- janus/converter/_tests/test_translate.py +6 -5
- janus/converter/chain.py +100 -0
- janus/converter/converter.py +467 -90
- janus/converter/diagram.py +12 -8
- janus/converter/document.py +17 -7
- janus/converter/evaluate.py +174 -147
- janus/converter/partition.py +6 -11
- janus/converter/passthrough.py +29 -0
- janus/converter/pool.py +74 -0
- janus/converter/requirements.py +7 -40
- janus/converter/translate.py +2 -58
- janus/language/_tests/test_combine.py +1 -0
- janus/language/block.py +115 -5
- janus/llm/model_callbacks.py +6 -0
- janus/llm/models_info.py +19 -0
- janus/metrics/_tests/test_reading.py +48 -4
- janus/metrics/_tests/test_rouge_score.py +5 -11
- janus/metrics/metric.py +47 -124
- janus/metrics/reading.py +48 -28
- janus/metrics/rouge_score.py +21 -34
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/eval_parsers/incose_parser.py +3 -3
- janus/parsers/reqs_parser.py +3 -3
- janus/prompts/templates/cyclic/human.txt +16 -0
- janus/prompts/templates/cyclic/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
- janus/prompts/templates/extract_variables/human.txt +5 -0
- janus/prompts/templates/extract_variables/system.txt +1 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +14 -15
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +46 -40
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0
janus/converter/diagram.py
CHANGED
@@ -12,8 +12,10 @@ class DiagramGenerator(Documenter):
|
|
12
12
|
|
13
13
|
def __init__(
|
14
14
|
self,
|
15
|
-
diagram_type="Activity",
|
16
|
-
add_documentation=False,
|
15
|
+
diagram_type: str = "Activity",
|
16
|
+
add_documentation: bool = False,
|
17
|
+
extract_variables: bool = False,
|
18
|
+
output_type: str = "diagram",
|
17
19
|
**kwargs,
|
18
20
|
) -> None:
|
19
21
|
"""Initialize the DiagramGenerator class
|
@@ -27,25 +29,27 @@ class DiagramGenerator(Documenter):
|
|
27
29
|
self._add_documentation = add_documentation
|
28
30
|
self._documenter = Documenter(**kwargs)
|
29
31
|
|
32
|
+
kwargs.update(dict(output_type=output_type))
|
30
33
|
super().__init__(**kwargs)
|
31
|
-
|
32
|
-
|
34
|
+
prompts = []
|
35
|
+
if extract_variables:
|
36
|
+
prompts.append("extract_variables")
|
37
|
+
prompts += ["diagram_with_documentation" if add_documentation else "diagram"]
|
38
|
+
self.set_prompts(prompts)
|
33
39
|
self._parser = UMLSyntaxParser(language="plantuml")
|
34
40
|
|
35
41
|
self._load_parameters()
|
36
42
|
|
37
|
-
def _load_prompt(self):
|
38
|
-
super()._load_prompt()
|
39
|
-
self._prompt = self._prompt.partial(DIAGRAM_TYPE=self._diagram_type)
|
40
|
-
|
41
43
|
def _input_runnable(self) -> Runnable:
|
42
44
|
if self._add_documentation:
|
43
45
|
return RunnableParallel(
|
44
46
|
SOURCE_CODE=self._parser.parse_input,
|
45
47
|
DOCUMENTATION=self._documenter.chain,
|
46
48
|
context=self._retriever,
|
49
|
+
DIAGRAM_TYPE=lambda x: self._diagram_type,
|
47
50
|
)
|
48
51
|
return RunnableParallel(
|
49
52
|
SOURCE_CODE=self._parser.parse_input,
|
50
53
|
context=self._retriever,
|
54
|
+
DIAGRAM_TYPE=lambda x: self._diagram_type,
|
51
55
|
)
|
janus/converter/document.py
CHANGED
@@ -15,11 +15,15 @@ log = create_logger(__name__)
|
|
15
15
|
|
16
16
|
class Documenter(Converter):
|
17
17
|
def __init__(
|
18
|
-
self,
|
18
|
+
self,
|
19
|
+
source_language: str = "fortran",
|
20
|
+
drop_comments: bool = True,
|
21
|
+
output_type: str = "documentation",
|
22
|
+
**kwargs,
|
19
23
|
):
|
20
|
-
kwargs.update(source_language=source_language)
|
24
|
+
kwargs.update(source_language=source_language, output_type=output_type)
|
21
25
|
super().__init__(**kwargs)
|
22
|
-
self.
|
26
|
+
self.set_prompts("document")
|
23
27
|
|
24
28
|
if drop_comments:
|
25
29
|
comment_node_type = LANGUAGES[source_language].get(
|
@@ -31,27 +35,33 @@ class Documenter(Converter):
|
|
31
35
|
|
32
36
|
|
33
37
|
class MultiDocumenter(Documenter):
|
34
|
-
def __init__(self, **kwargs):
|
38
|
+
def __init__(self, output_type: str = "multidocumentation", **kwargs):
|
39
|
+
kwargs.update(output_type=output_type)
|
35
40
|
super().__init__(**kwargs)
|
36
|
-
self.
|
41
|
+
self.set_prompts("multidocument")
|
37
42
|
self._combiner = JsonCombiner()
|
38
43
|
self._parser = MultiDocumentationParser()
|
39
44
|
|
45
|
+
self._load_parameters()
|
46
|
+
|
40
47
|
|
41
48
|
class ClozeDocumenter(Documenter):
|
42
49
|
def __init__(
|
43
50
|
self,
|
44
51
|
comments_per_request: int | None = None,
|
52
|
+
output_type: str = "cloze_comments",
|
45
53
|
**kwargs,
|
46
54
|
) -> None:
|
47
|
-
kwargs.update(drop_comments=False)
|
55
|
+
kwargs.update(drop_comments=False, output_type=output_type)
|
48
56
|
super().__init__(**kwargs)
|
49
|
-
self.
|
57
|
+
self.set_prompts("document_cloze")
|
50
58
|
self._combiner = JsonCombiner()
|
51
59
|
self._parser = ClozeDocumentationParser()
|
52
60
|
|
53
61
|
self.comments_per_request = comments_per_request
|
54
62
|
|
63
|
+
self._load_parameters()
|
64
|
+
|
55
65
|
def _add_translation(self, block: TranslatedCodeBlock):
|
56
66
|
if block.translated:
|
57
67
|
return
|
janus/converter/evaluate.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
import json
|
2
2
|
import re
|
3
|
-
from
|
3
|
+
from pathlib import Path
|
4
4
|
|
5
5
|
from langchain_core.runnables import Runnable, RunnableLambda, RunnableParallel
|
6
6
|
|
7
7
|
from janus.converter.converter import Converter
|
8
|
-
from janus.language.block import TranslatedCodeBlock
|
8
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
9
9
|
from janus.language.combine import JsonCombiner
|
10
10
|
from janus.parsers.eval_parsers.incose_parser import IncoseParser
|
11
11
|
from janus.parsers.eval_parsers.inline_comment_parser import InlineCommentParser
|
12
|
-
from janus.parsers.parser import JanusParserException
|
13
12
|
from janus.utils.logger import create_logger
|
14
13
|
|
15
14
|
log = create_logger(__name__)
|
@@ -35,6 +34,7 @@ class Evaluator(Converter):
|
|
35
34
|
model_arguments: Additional arguments to pass to the LLM constructor.
|
36
35
|
max_prompts: The maximum number of prompts to try before giving up.
|
37
36
|
"""
|
37
|
+
kwargs.update(use_janus_inputs=True)
|
38
38
|
super().__init__(**kwargs)
|
39
39
|
self._combiner = JsonCombiner()
|
40
40
|
self._load_parameters()
|
@@ -50,7 +50,13 @@ class RequirementEvaluator(Evaluator):
|
|
50
50
|
|
51
51
|
"""
|
52
52
|
|
53
|
-
def __init__(
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
eval_items_per_request: int | None = None,
|
56
|
+
input_types: str | set[str] = set(["requirements"]),
|
57
|
+
output_type: str = "requirements_eval",
|
58
|
+
**kwargs,
|
59
|
+
) -> None:
|
54
60
|
"""Initialize the Evaluator class
|
55
61
|
|
56
62
|
Arguments:
|
@@ -59,10 +65,11 @@ class RequirementEvaluator(Evaluator):
|
|
59
65
|
model_arguments: Additional arguments to pass to the LLM constructor.
|
60
66
|
max_prompts: The maximum number of prompts to try before giving up.
|
61
67
|
"""
|
68
|
+
kwargs.update(input_types=input_types, output_type=output_type)
|
62
69
|
super().__init__(**kwargs)
|
63
70
|
self.eval_items_per_request = eval_items_per_request
|
64
71
|
self._parser = IncoseParser()
|
65
|
-
self.
|
72
|
+
self.set_prompts("eval_prompts/incose")
|
66
73
|
|
67
74
|
def _input_runnable(self) -> Runnable:
|
68
75
|
def _get_code(json_text: str) -> str:
|
@@ -77,76 +84,67 @@ class RequirementEvaluator(Evaluator):
|
|
77
84
|
context=self._retriever,
|
78
85
|
)
|
79
86
|
|
80
|
-
def
|
81
|
-
if
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
87
|
+
def translate_block(self, input_block: CodeBlock, failure_path: Path | None = None):
|
88
|
+
if len(input_block.previous_generations) == 0:
|
89
|
+
raise ValueError(
|
90
|
+
"Error: Evaluating requirements without previous generations"
|
91
|
+
)
|
92
|
+
if isinstance(input_block.previous_generations[-1], dict):
|
93
|
+
input_str = input_block.previous_generations[-1]["input"]
|
94
|
+
else:
|
95
|
+
input_str = input_block.previous_generations[-1].original.text
|
96
|
+
requirements = json.loads(input_block.text)
|
97
|
+
# The requirements are often a list of lists
|
98
|
+
if isinstance(requirements[0], list):
|
99
|
+
requirements = requirements[0]
|
94
100
|
if not requirements:
|
95
|
-
log.debug(f"[{
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
# Update the output text to merge this section's output in
|
138
|
-
obj.update(json.loads(working_block.text))
|
139
|
-
# intermediate result of block,
|
140
|
-
# will be overwritten if file completes successfully
|
141
|
-
block.text = json.dumps(obj)
|
142
|
-
|
143
|
-
block.text = json.dumps(obj)
|
144
|
-
block.tokens = self._llm.get_num_tokens(block.text)
|
145
|
-
block.translated = True
|
146
|
-
|
147
|
-
log.debug(
|
148
|
-
f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
|
101
|
+
log.debug(f"[{input_block.name}] Skipping empty output")
|
102
|
+
return []
|
103
|
+
if (
|
104
|
+
not self.eval_items_per_request
|
105
|
+
or len(requirements) < self.eval_items_per_request
|
106
|
+
):
|
107
|
+
obj_str = json.dumps(
|
108
|
+
dict(
|
109
|
+
requirements=requirements,
|
110
|
+
code=input_str,
|
111
|
+
)
|
112
|
+
)
|
113
|
+
temp_block = self._split_text(obj_str, input_block.name)
|
114
|
+
translated_block = super().translate_block(temp_block, failure_path)
|
115
|
+
translated_block.original = input_block
|
116
|
+
translated_block.previous_generations = input_block.previous_generations
|
117
|
+
return translated_block
|
118
|
+
else:
|
119
|
+
translated_blocks = []
|
120
|
+
translated_str: str
|
121
|
+
translate_obj = {}
|
122
|
+
for i in range(0, len(requirements), self.eval_items_per_request):
|
123
|
+
working_requirements = requirements[i : i + self.eval_items_per_request]
|
124
|
+
obj_str = json.dumps(
|
125
|
+
dict(
|
126
|
+
requirements=working_requirements,
|
127
|
+
code=input_str,
|
128
|
+
)
|
129
|
+
)
|
130
|
+
temp_block = self._split_text(obj_str, input_block.name)
|
131
|
+
translated_block = super().translate_block(temp_block, failure_path)
|
132
|
+
translated_blocks.append(translated_block)
|
133
|
+
translate_obj.update(json.loads(translated_block.text))
|
134
|
+
translated_str = json.dumps(translate_obj)
|
135
|
+
|
136
|
+
translated_block = TranslatedCodeBlock(
|
137
|
+
input_block,
|
138
|
+
self._target_language,
|
139
|
+
self,
|
140
|
+
self._output_type,
|
141
|
+
self._output_label,
|
149
142
|
)
|
143
|
+
translated_block.text = translated_str
|
144
|
+
translated_block.children = translated_blocks
|
145
|
+
translated_block.tokens = self._llm.get_num_tokens(translated_str)
|
146
|
+
translated_block.translated = True
|
147
|
+
return translated_block
|
150
148
|
|
151
149
|
|
152
150
|
class InlineCommentEvaluator(Evaluator):
|
@@ -156,7 +154,13 @@ class InlineCommentEvaluator(Evaluator):
|
|
156
154
|
with an associated prompt.
|
157
155
|
"""
|
158
156
|
|
159
|
-
def __init__(
|
157
|
+
def __init__(
|
158
|
+
self,
|
159
|
+
eval_items_per_request: int | None = None,
|
160
|
+
input_types: str | set[str] = set(["cloze_comments"]),
|
161
|
+
output_type: str = "cloze_comments_eval",
|
162
|
+
**kwargs,
|
163
|
+
) -> None:
|
160
164
|
"""Initialize the Evaluator class
|
161
165
|
|
162
166
|
Arguments:
|
@@ -165,84 +169,107 @@ class InlineCommentEvaluator(Evaluator):
|
|
165
169
|
model_arguments: Additional arguments to pass to the LLM constructor.
|
166
170
|
max_prompts: The maximum number of prompts to try before giving up.
|
167
171
|
"""
|
172
|
+
kwargs.update(input_types=input_types, output_type=output_type)
|
168
173
|
super().__init__(**kwargs)
|
169
174
|
self._combiner = JsonCombiner()
|
170
|
-
self._load_parameters()
|
171
175
|
self._parser = InlineCommentParser()
|
172
|
-
self.
|
176
|
+
self.set_prompts("eval_prompts/inline_comments")
|
173
177
|
self.eval_items_per_request = eval_items_per_request
|
178
|
+
self._load_parameters()
|
174
179
|
|
175
|
-
def
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
180
|
+
def _process_comments(self, input_str: str, generated_comments: dict[str, str]):
|
181
|
+
comment_patterns = [
|
182
|
+
(r"<BLOCK_COMMENT (\w{8})>", "<BLOCK_COMMENT {}>", "<BLOCK_COMMENT {}>"),
|
183
|
+
(r"<INLINE_COMMENT (\w{8})>", "<INLINE_COMMENT {}>", "<INLINE_COMMENT {}>"),
|
184
|
+
(r"<MODULE (\w{8})>", "<MODULE {}>", "<BLOCK_COMMENT {}>"),
|
185
|
+
]
|
186
|
+
missing_comments = 0
|
187
|
+
for pattern, find_template, repl_template in comment_patterns:
|
188
|
+
matches = re.findall(pattern, input_str)
|
189
|
+
|
190
|
+
for comment_id in matches:
|
191
|
+
find_tag = find_template.format(comment_id)
|
192
|
+
repl_tag = repl_template.format(comment_id)
|
193
|
+
|
194
|
+
if comment_id not in generated_comments:
|
195
|
+
missing_comments += 1
|
196
|
+
comment = generated_comments.get(comment_id, "[comment missing]")
|
197
|
+
comment = comment.replace("\n", "\\n")
|
198
|
+
|
199
|
+
# Replace the tag in the code with the comment appended.
|
200
|
+
input_str = input_str.replace(find_tag, f"{repl_tag} {comment}")
|
201
|
+
processed_str = re.sub(r"\s*<JANUS_PARTITION>\s*\n", "\n", input_str)
|
202
|
+
return processed_str.strip("\n"), missing_comments
|
203
|
+
|
204
|
+
def translate_block(self, input_block: CodeBlock, failure_path: Path | None = None):
|
186
205
|
comment_pattern = r"<(?:INLINE|BLOCK)_COMMENT \w{8}>.*$"
|
187
|
-
|
188
|
-
|
206
|
+
if len(input_block.previous_generations) == 0:
|
207
|
+
raise ValueError(
|
208
|
+
"Error: cannot evaluate block, no previous generations found"
|
209
|
+
)
|
210
|
+
if isinstance(input_block.previous_generations[-1], dict):
|
211
|
+
input_str = input_block.previous_generations[-1]["input"]
|
212
|
+
else:
|
213
|
+
input_str = input_block.previous_generations[-1].original.text
|
214
|
+
generated_comments = json.loads(input_block.text)
|
215
|
+
processed_input, missing_comments = self._process_comments(
|
216
|
+
input_str, generated_comments
|
189
217
|
)
|
190
|
-
|
218
|
+
if missing_comments:
|
219
|
+
log.info(f"[{input_block.name}] Warning: missing {missing_comments} comments")
|
220
|
+
comments = list(re.finditer(comment_pattern, processed_input, flags=re.MULTILINE))
|
191
221
|
if not comments:
|
192
|
-
log.info(f"[{
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
log.debug(
|
247
|
-
f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
|
248
|
-
)
|
222
|
+
log.info(f"[{input_block.name}] Skipping commentless block")
|
223
|
+
return []
|
224
|
+
if (
|
225
|
+
self.eval_items_per_request is None
|
226
|
+
or len(comments) < self.eval_items_per_request
|
227
|
+
):
|
228
|
+
temp_block = self._split_text(processed_input, input_block.name)
|
229
|
+
translated_block = super().translate_block(temp_block, failure_path)
|
230
|
+
translated_block.original = input_block
|
231
|
+
translated_block.previous_generations = input_block.previous_generations
|
232
|
+
return translated_block
|
233
|
+
else:
|
234
|
+
comment_group_indices = list(
|
235
|
+
range(0, len(comments), self.eval_items_per_request)
|
236
|
+
)
|
237
|
+
log.debug(
|
238
|
+
f"[{input_block.name}]"
|
239
|
+
f" Block contains more than {self.eval_items_per_request}"
|
240
|
+
f" comments, splitting {len(comments)} comments into"
|
241
|
+
f" {len(comment_group_indices)} groups"
|
242
|
+
)
|
243
|
+
translated_blocks = []
|
244
|
+
translated_str: str
|
245
|
+
translate_obj = {}
|
246
|
+
for comment_ind in comment_group_indices:
|
247
|
+
working_comments = comments[
|
248
|
+
comment_ind : comment_ind + self.eval_items_per_request
|
249
|
+
]
|
250
|
+
start_idx = working_comments[0].start()
|
251
|
+
end_idx = working_comments[-1].end()
|
252
|
+
prefix = processed_input[:start_idx]
|
253
|
+
keeper = processed_input[start_idx:end_idx]
|
254
|
+
suffix = processed_input[end_idx:]
|
255
|
+
|
256
|
+
# Strip all comment placeholders outside of the section of interest
|
257
|
+
prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
|
258
|
+
suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
|
259
|
+
temp_block = self._split_text(prefix + keeper + suffix, input_block.name)
|
260
|
+
translated_block = super().translate_block(temp_block, failure_path)
|
261
|
+
translated_blocks.append(translated_block)
|
262
|
+
translate_obj.update(json.loads(translated_block.text))
|
263
|
+
translated_str = json.dumps(translate_obj)
|
264
|
+
translated_block = TranslatedCodeBlock(
|
265
|
+
input_block,
|
266
|
+
self._target_language,
|
267
|
+
self,
|
268
|
+
self._output_type,
|
269
|
+
self._output_label,
|
270
|
+
)
|
271
|
+
translated_block.children = translated_blocks
|
272
|
+
translated_block.text = translated_str
|
273
|
+
translated_block.tokens = self._llm.get_num_tokens(translated_str)
|
274
|
+
translated_block.translated = True
|
275
|
+
return translated_block
|
janus/converter/partition.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
1
|
from janus.converter.converter import Converter
|
4
|
-
from janus.language.block import TranslatedCodeBlock
|
5
2
|
from janus.parsers.partition_parser import PartitionParser
|
6
3
|
from janus.utils.logger import create_logger
|
7
4
|
|
@@ -9,19 +6,17 @@ log = create_logger(__name__)
|
|
9
6
|
|
10
7
|
|
11
8
|
class Partitioner(Converter):
|
12
|
-
def __init__(
|
9
|
+
def __init__(
|
10
|
+
self, partition_token_limit: int, output_type: str = "partition", **kwargs
|
11
|
+
):
|
12
|
+
kwargs.update(output_type=output_type)
|
13
13
|
super().__init__(**kwargs)
|
14
|
-
self.
|
14
|
+
self.set_prompts("partition")
|
15
15
|
self._load_model()
|
16
16
|
self._parser = PartitionParser(
|
17
17
|
token_limit=partition_token_limit,
|
18
18
|
model=self._llm,
|
19
19
|
)
|
20
20
|
self._target_language = self._source_language
|
21
|
-
self._target_suffix = self.
|
21
|
+
self._target_suffix = self._source_suffixes[0]
|
22
22
|
self._load_parameters()
|
23
|
-
|
24
|
-
def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
|
25
|
-
output_str = self._parser.parse_combined_output(block.complete_text)
|
26
|
-
out_path.parent.mkdir(parents=True, exist_ok=True)
|
27
|
-
out_path.write_text(output_str, encoding="utf-8")
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from janus.converter.converter import Converter
|
4
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
5
|
+
|
6
|
+
|
7
|
+
class ConverterPassthrough(Converter):
|
8
|
+
def __init__(self, **kwargs) -> None:
|
9
|
+
super().__init__(**kwargs)
|
10
|
+
|
11
|
+
def translate_block(
|
12
|
+
self, input_block: CodeBlock, failure_path: Path | None = None
|
13
|
+
) -> TranslatedCodeBlock:
|
14
|
+
self._output_label = input_block.block_label
|
15
|
+
self._output_type = input_block.block_type
|
16
|
+
res = super().translate_block(input_block, failure_path)
|
17
|
+
if isinstance(input_block.previous_generations[-1], dict):
|
18
|
+
res.original = self._split_text(
|
19
|
+
input_block.previous_generations[-1]["input"], res.name
|
20
|
+
)
|
21
|
+
else:
|
22
|
+
res.original = input_block.previous_generations[-1].original
|
23
|
+
res.previous_generations = input_block.previous_generations[:-1]
|
24
|
+
return res
|
25
|
+
|
26
|
+
def _add_translation(self, block: TranslatedCodeBlock) -> None:
|
27
|
+
block.text = block.original.text
|
28
|
+
block.tokens = block.original.tokens
|
29
|
+
block.translated = True
|
janus/converter/pool.py
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from janus.converter.converter import Converter
|
4
|
+
from janus.language.block import BlockCollection, CodeBlock, TranslatedCodeBlock
|
5
|
+
|
6
|
+
|
7
|
+
class ConverterPool(Converter):
|
8
|
+
def __init__(self, *args, **kwargs):
|
9
|
+
if len(args) == 0:
|
10
|
+
raise ValueError("Error: Converter chain must be passed at least 1 converter")
|
11
|
+
for converter in args:
|
12
|
+
if not isinstance(converter, Converter):
|
13
|
+
raise ValueError(f"Error: unrecognized type: {type(converter)}")
|
14
|
+
self._converters = args
|
15
|
+
if "source_language" in kwargs:
|
16
|
+
for c in self._converters:
|
17
|
+
c.set_source_language(kwargs["source_language"])
|
18
|
+
if "model" in kwargs:
|
19
|
+
for c in self._converters:
|
20
|
+
c.set_model(kwargs["model"])
|
21
|
+
super().__init__(**kwargs)
|
22
|
+
|
23
|
+
def translate_blocks(
|
24
|
+
self, input_blocks: CodeBlock | BlockCollection, failure_path: Path | None = None
|
25
|
+
):
|
26
|
+
output_blocks = []
|
27
|
+
for c in self._converters:
|
28
|
+
collection = c.translate_blocks(input_blocks)
|
29
|
+
for b in collection.blocks:
|
30
|
+
c._combiner.combine(b)
|
31
|
+
output_blocks += collection.blocks
|
32
|
+
return BlockCollection(output_blocks, input_blocks.previous_generations)
|
33
|
+
|
34
|
+
def _get_output_obj(
|
35
|
+
self,
|
36
|
+
block: TranslatedCodeBlock | BlockCollection | dict,
|
37
|
+
combine_children: bool = True,
|
38
|
+
include_previous_outputs: bool = True,
|
39
|
+
) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
|
40
|
+
outputs = []
|
41
|
+
for b in block.blocks:
|
42
|
+
for c in self._converters:
|
43
|
+
if c == b.converter:
|
44
|
+
outputs.append(c._get_output_obj(b, c._combine_output, False))
|
45
|
+
break
|
46
|
+
|
47
|
+
def _get_input(block):
|
48
|
+
if isinstance(block, BlockCollection):
|
49
|
+
return self._combine_inputs([_get_input(b) for b in block.blocks])
|
50
|
+
return block.original.text or ""
|
51
|
+
|
52
|
+
out = dict(
|
53
|
+
input=_get_input(block),
|
54
|
+
metadata=dict(
|
55
|
+
cost=block.total_cost,
|
56
|
+
processing_time=block.total_processing_time,
|
57
|
+
num_requests=block.total_num_requests,
|
58
|
+
input_tokens=block.total_request_input_tokens,
|
59
|
+
output_tokens=block.total_request_output_tokens,
|
60
|
+
converter_name=self.__class__.__name__,
|
61
|
+
type=block.block_type,
|
62
|
+
label=block.block_label,
|
63
|
+
),
|
64
|
+
outputs=outputs,
|
65
|
+
)
|
66
|
+
if include_previous_outputs and len(block.previous_generations) > 0:
|
67
|
+
intermediate_outputs = [
|
68
|
+
self._get_output_obj(g, combine_children, False)
|
69
|
+
for g in block.previous_generations
|
70
|
+
if isinstance(g, dict)
|
71
|
+
]
|
72
|
+
if len(intermediate_outputs) > 0:
|
73
|
+
out["intermediate_outputs"] = intermediate_outputs
|
74
|
+
return out
|