janus-llm 4.3.5__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/aggregate.py +2 -2
- janus/cli/cli.py +6 -0
- janus/cli/constants.py +6 -0
- janus/cli/diagram.py +36 -7
- janus/cli/document.py +10 -1
- janus/cli/llm.py +7 -3
- janus/cli/partition.py +10 -1
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +1 -3
- janus/cli/translate.py +10 -1
- janus/converter/_tests/test_translate.py +5 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +333 -78
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +7 -3
- janus/converter/evaluate.py +140 -148
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +2 -58
- janus/language/block.py +31 -2
- janus/metrics/metric.py +47 -124
- janus/parsers/reqs_parser.py +3 -3
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/METADATA +12 -12
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/RECORD +28 -28
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/entry_points.txt +0 -0
janus/converter/document.py
CHANGED
@@ -19,7 +19,7 @@ class Documenter(Converter):
|
|
19
19
|
):
|
20
20
|
kwargs.update(source_language=source_language)
|
21
21
|
super().__init__(**kwargs)
|
22
|
-
self.
|
22
|
+
self.set_prompts("document")
|
23
23
|
|
24
24
|
if drop_comments:
|
25
25
|
comment_node_type = LANGUAGES[source_language].get(
|
@@ -33,10 +33,12 @@ class Documenter(Converter):
|
|
33
33
|
class MultiDocumenter(Documenter):
|
34
34
|
def __init__(self, **kwargs):
|
35
35
|
super().__init__(**kwargs)
|
36
|
-
self.
|
36
|
+
self.set_prompts("multidocument")
|
37
37
|
self._combiner = JsonCombiner()
|
38
38
|
self._parser = MultiDocumentationParser()
|
39
39
|
|
40
|
+
self._load_parameters()
|
41
|
+
|
40
42
|
|
41
43
|
class ClozeDocumenter(Documenter):
|
42
44
|
def __init__(
|
@@ -46,12 +48,14 @@ class ClozeDocumenter(Documenter):
|
|
46
48
|
) -> None:
|
47
49
|
kwargs.update(drop_comments=False)
|
48
50
|
super().__init__(**kwargs)
|
49
|
-
self.
|
51
|
+
self.set_prompts("document_cloze")
|
50
52
|
self._combiner = JsonCombiner()
|
51
53
|
self._parser = ClozeDocumentationParser()
|
52
54
|
|
53
55
|
self.comments_per_request = comments_per_request
|
54
56
|
|
57
|
+
self._load_parameters()
|
58
|
+
|
55
59
|
def _add_translation(self, block: TranslatedCodeBlock):
|
56
60
|
if block.translated:
|
57
61
|
return
|
janus/converter/evaluate.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
import json
|
2
2
|
import re
|
3
3
|
from copy import deepcopy
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any
|
4
6
|
|
5
7
|
from langchain_core.runnables import Runnable, RunnableLambda, RunnableParallel
|
6
8
|
|
7
9
|
from janus.converter.converter import Converter
|
8
|
-
from janus.language.block import TranslatedCodeBlock
|
9
10
|
from janus.language.combine import JsonCombiner
|
10
11
|
from janus.parsers.eval_parsers.incose_parser import IncoseParser
|
11
12
|
from janus.parsers.eval_parsers.inline_comment_parser import InlineCommentParser
|
12
|
-
from janus.parsers.parser import JanusParserException
|
13
13
|
from janus.utils.logger import create_logger
|
14
14
|
|
15
15
|
log = create_logger(__name__)
|
@@ -35,6 +35,7 @@ class Evaluator(Converter):
|
|
35
35
|
model_arguments: Additional arguments to pass to the LLM constructor.
|
36
36
|
max_prompts: The maximum number of prompts to try before giving up.
|
37
37
|
"""
|
38
|
+
kwargs.update(use_janus_inputs=True)
|
38
39
|
super().__init__(**kwargs)
|
39
40
|
self._combiner = JsonCombiner()
|
40
41
|
self._load_parameters()
|
@@ -62,7 +63,7 @@ class RequirementEvaluator(Evaluator):
|
|
62
63
|
super().__init__(**kwargs)
|
63
64
|
self.eval_items_per_request = eval_items_per_request
|
64
65
|
self._parser = IncoseParser()
|
65
|
-
self.
|
66
|
+
self.set_prompts("eval_prompts/incose")
|
66
67
|
|
67
68
|
def _input_runnable(self) -> Runnable:
|
68
69
|
def _get_code(json_text: str) -> str:
|
@@ -77,76 +78,55 @@ class RequirementEvaluator(Evaluator):
|
|
77
78
|
context=self._retriever,
|
78
79
|
)
|
79
80
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
finally:
|
130
|
-
# Update metadata to include for all runs
|
131
|
-
block.num_requests += working_block.num_requests
|
132
|
-
block.cost += working_block.cost
|
133
|
-
block.processing_time += working_block.processing_time
|
134
|
-
block.request_input_tokens += working_block.request_input_tokens
|
135
|
-
block.request_output_tokens += working_block.request_output_tokens
|
136
|
-
|
137
|
-
# Update the output text to merge this section's output in
|
138
|
-
obj.update(json.loads(working_block.text))
|
139
|
-
# intermediate result of block,
|
140
|
-
# will be overwritten if file completes successfully
|
141
|
-
block.text = json.dumps(obj)
|
142
|
-
|
143
|
-
block.text = json.dumps(obj)
|
144
|
-
block.tokens = self._llm.get_num_tokens(block.text)
|
145
|
-
block.translated = True
|
146
|
-
|
147
|
-
log.debug(
|
148
|
-
f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
|
149
|
-
)
|
81
|
+
def translate_janus_obj(self, obj: Any, name: str, failure_path: Path | None = None):
|
82
|
+
results = []
|
83
|
+
for o in obj["outputs"]:
|
84
|
+
if isinstance(o, dict):
|
85
|
+
results += self.translate_janus_obj(o, name, failure_path)
|
86
|
+
elif isinstance(o, str):
|
87
|
+
temp_obj = deepcopy(obj)
|
88
|
+
requirements = json.loads(o)
|
89
|
+
if not requirements:
|
90
|
+
log.debug(f"[{name}] Skipping empty output")
|
91
|
+
continue
|
92
|
+
if (
|
93
|
+
not self.eval_items_per_request
|
94
|
+
or len(requirements) < self.eval_items_per_request
|
95
|
+
):
|
96
|
+
obj_str = json.dumps(
|
97
|
+
dict(
|
98
|
+
requirements=requirements,
|
99
|
+
code=obj["input"],
|
100
|
+
)
|
101
|
+
)
|
102
|
+
temp_obj["outputs"] = [obj_str]
|
103
|
+
temp_block = self._janus_object_to_codeblock(temp_obj, name)
|
104
|
+
translated_block = self.translate_block(temp_block, failure_path)
|
105
|
+
translated_block.previous_generations[-1] = obj
|
106
|
+
translated_block.original = self._janus_object_to_codeblock(obj, name)
|
107
|
+
results.append(translated_block)
|
108
|
+
else:
|
109
|
+
for i in range(0, len(requirements), self.eval_items_per_request):
|
110
|
+
working_requirements = requirements[
|
111
|
+
i : i + self.eval_items_per_request
|
112
|
+
]
|
113
|
+
obj_str = json.dumps(
|
114
|
+
dict(
|
115
|
+
requirements=working_requirements,
|
116
|
+
code=obj["input"],
|
117
|
+
)
|
118
|
+
)
|
119
|
+
temp_obj["outputs"] = [obj_str]
|
120
|
+
temp_block = self._janus_object_to_codeblock(temp_obj, name)
|
121
|
+
translated_block = self.translate_block(temp_block, failure_path)
|
122
|
+
translated_block.previous_generations[-1] = obj
|
123
|
+
translated_block.original = self._janus_object_to_codeblock(
|
124
|
+
obj, name
|
125
|
+
)
|
126
|
+
results.append(translated_block)
|
127
|
+
else:
|
128
|
+
raise ValueError(f"Error: unable to find janus object: {type(o)}")
|
129
|
+
return results
|
150
130
|
|
151
131
|
|
152
132
|
class InlineCommentEvaluator(Evaluator):
|
@@ -167,82 +147,94 @@ class InlineCommentEvaluator(Evaluator):
|
|
167
147
|
"""
|
168
148
|
super().__init__(**kwargs)
|
169
149
|
self._combiner = JsonCombiner()
|
170
|
-
self._load_parameters()
|
171
150
|
self._parser = InlineCommentParser()
|
172
|
-
self.
|
151
|
+
self.set_prompts("eval_prompts/inline_comments")
|
173
152
|
self.eval_items_per_request = eval_items_per_request
|
153
|
+
self._load_parameters()
|
174
154
|
|
175
|
-
def
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
155
|
+
def _process_comments(self, input_str: str, generated_comments: dict[str, str]):
|
156
|
+
comment_patterns = [
|
157
|
+
(r"<BLOCK_COMMENT (\w{8})>", "<BLOCK_COMMENT {}>", "<BLOCK_COMMENT {}>"),
|
158
|
+
(r"<INLINE_COMMENT (\w{8})>", "<INLINE_COMMENT {}>", "<INLINE_COMMENT {}>"),
|
159
|
+
(r"<MODULE (\w{8})>", "<MODULE {}>", "<BLOCK_COMMENT {}>"),
|
160
|
+
]
|
161
|
+
missing_comments = 0
|
162
|
+
for pattern, find_template, repl_template in comment_patterns:
|
163
|
+
matches = re.findall(pattern, input_str)
|
164
|
+
|
165
|
+
for comment_id in matches:
|
166
|
+
find_tag = find_template.format(comment_id)
|
167
|
+
repl_tag = repl_template.format(comment_id)
|
168
|
+
|
169
|
+
if comment_id not in generated_comments:
|
170
|
+
missing_comments += 1
|
171
|
+
comment = generated_comments.get(comment_id, "[comment missing]")
|
172
|
+
comment = comment.replace("\n", "\\n")
|
173
|
+
|
174
|
+
# Replace the tag in the code with the comment appended.
|
175
|
+
input_str = input_str.replace(find_tag, f"{repl_tag} {comment}")
|
176
|
+
processed_str = re.sub(r"\s*<JANUS_PARTITION>\s*\n", "\n", input_str)
|
177
|
+
return processed_str.strip("\n"), missing_comments
|
178
|
+
|
179
|
+
def translate_janus_obj(self, obj: Any, name: str, failure_path: Path | None = None):
|
186
180
|
comment_pattern = r"<(?:INLINE|BLOCK)_COMMENT \w{8}>.*$"
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
|
248
|
-
)
|
181
|
+
results = []
|
182
|
+
input_str = obj["input"]
|
183
|
+
for o in obj["outputs"]:
|
184
|
+
if isinstance(o, dict):
|
185
|
+
results += self.translate_janus_obj(o, name, failure_path)
|
186
|
+
elif isinstance(o, str):
|
187
|
+
temp_obj = deepcopy(obj)
|
188
|
+
generated_comments = json.loads(o)
|
189
|
+
processed_input, missing_comments = self._process_comments(
|
190
|
+
input_str, generated_comments
|
191
|
+
)
|
192
|
+
if missing_comments:
|
193
|
+
log.info(f"[{name}] Warning: missing {missing_comments} comments")
|
194
|
+
comments = list(
|
195
|
+
re.finditer(comment_pattern, processed_input, flags=re.MULTILINE)
|
196
|
+
)
|
197
|
+
if not comments:
|
198
|
+
log.info(f"[{name}] Skipping commentless block")
|
199
|
+
continue
|
200
|
+
if (
|
201
|
+
self.eval_items_per_request is None
|
202
|
+
or len(comments) < self.eval_items_per_request
|
203
|
+
):
|
204
|
+
temp_obj["outputs"] = [processed_input]
|
205
|
+
temp_block = self._janus_object_to_codeblock(temp_obj, name)
|
206
|
+
translated_block = self.translate_block(temp_block, failure_path)
|
207
|
+
translated_block.previous_generations[-1] = obj
|
208
|
+
translated_block.original = self._janus_object_to_codeblock(obj, name)
|
209
|
+
results.append(translated_block)
|
210
|
+
continue
|
211
|
+
comment_group_indices = list(
|
212
|
+
range(0, len(comments), self.eval_items_per_request)
|
213
|
+
)
|
214
|
+
log.debug(
|
215
|
+
f"[{name}] Block contains more than {self.eval_items_per_request}"
|
216
|
+
f" comments, splitting {len(comments)} comments into"
|
217
|
+
f" {len(comment_group_indices)} groups"
|
218
|
+
)
|
219
|
+
for comment_ind in comment_group_indices:
|
220
|
+
working_comments = comments[
|
221
|
+
comment_ind : comment_ind + self.eval_items_per_request
|
222
|
+
]
|
223
|
+
start_idx = working_comments[0].start()
|
224
|
+
end_idx = working_comments[-1].end()
|
225
|
+
prefix = processed_input[:start_idx]
|
226
|
+
keeper = processed_input[start_idx:end_idx]
|
227
|
+
suffix = processed_input[end_idx:]
|
228
|
+
|
229
|
+
# Strip all comment placeholders outside of the section of interest
|
230
|
+
prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
|
231
|
+
suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
|
232
|
+
temp_obj["outputs"] = [prefix + keeper + suffix]
|
233
|
+
temp_block = self._janus_object_to_codeblock(temp_obj, name)
|
234
|
+
translated_block = self.translate_block(temp_block, failure_path)
|
235
|
+
translated_block.previous_generations[-1] = obj
|
236
|
+
translated_block.original = self._janus_object_to_codeblock(obj, name)
|
237
|
+
results.append(translated_block)
|
238
|
+
else:
|
239
|
+
raise ValueError(f"Error: unrecognized janus object type: {type(o)}")
|
240
|
+
return results
|
janus/converter/partition.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
1
|
from janus.converter.converter import Converter
|
4
|
-
from janus.language.block import TranslatedCodeBlock
|
5
2
|
from janus.parsers.partition_parser import PartitionParser
|
6
3
|
from janus.utils.logger import create_logger
|
7
4
|
|
@@ -11,17 +8,12 @@ log = create_logger(__name__)
|
|
11
8
|
class Partitioner(Converter):
|
12
9
|
def __init__(self, partition_token_limit: int, **kwargs):
|
13
10
|
super().__init__(**kwargs)
|
14
|
-
self.
|
11
|
+
self.set_prompts("partition")
|
15
12
|
self._load_model()
|
16
13
|
self._parser = PartitionParser(
|
17
14
|
token_limit=partition_token_limit,
|
18
15
|
model=self._llm,
|
19
16
|
)
|
20
17
|
self._target_language = self._source_language
|
21
|
-
self._target_suffix = self.
|
18
|
+
self._target_suffix = self._source_suffixes[0]
|
22
19
|
self._load_parameters()
|
23
|
-
|
24
|
-
def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
|
25
|
-
output_str = self._parser.parse_combined_output(block.complete_text)
|
26
|
-
out_path.parent.mkdir(parents=True, exist_ok=True)
|
27
|
-
out_path.write_text(output_str, encoding="utf-8")
|
janus/converter/requirements.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1
|
-
import json
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
1
|
from janus.converter.document import Documenter
|
5
|
-
from janus.language.block import TranslatedCodeBlock
|
6
2
|
from janus.language.combine import ChunkCombiner
|
7
3
|
from janus.parsers.reqs_parser import RequirementsParser
|
8
4
|
from janus.utils.logger import create_logger
|
@@ -16,41 +12,9 @@ class RequirementsDocumenter(Documenter):
|
|
16
12
|
A class that translates code from one programming language to its requirements.
|
17
13
|
"""
|
18
14
|
|
19
|
-
def __init__(self, **kwargs):
|
20
|
-
super().__init__(**kwargs)
|
21
|
-
self.
|
15
|
+
def __init__(self, combine_output: bool = False, **kwargs):
|
16
|
+
super().__init__(combine_output=combine_output, **kwargs)
|
17
|
+
self.set_prompts("requirements")
|
22
18
|
self._combiner = ChunkCombiner()
|
23
19
|
self._parser = RequirementsParser()
|
24
|
-
|
25
|
-
@staticmethod
|
26
|
-
def get_prompt_replacements(block) -> dict[str, str]:
|
27
|
-
prompt_replacements: dict[str, str] = {"SOURCE_CODE": block.original.text}
|
28
|
-
return prompt_replacements
|
29
|
-
|
30
|
-
def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
|
31
|
-
"""Save a file to disk.
|
32
|
-
|
33
|
-
Arguments:
|
34
|
-
block: The `CodeBlock` to save to a file.
|
35
|
-
"""
|
36
|
-
output_list = list()
|
37
|
-
# For each chunk of code, get generation metadata, the text of the code,
|
38
|
-
# and the LLM generated requirements
|
39
|
-
blocks = [block for block in block.children] if len(block.children) else [block]
|
40
|
-
for block in blocks:
|
41
|
-
code = block.original.text
|
42
|
-
requirements = self._parser.parse_combined_output(block.complete_text)
|
43
|
-
metadata = dict(
|
44
|
-
retries=block.total_retries,
|
45
|
-
cost=block.total_cost,
|
46
|
-
processing_time=block.processing_time,
|
47
|
-
)
|
48
|
-
# Put them all in a top level 'output' key
|
49
|
-
output_list.append(
|
50
|
-
dict(metadata=metadata, code=code, requirements=requirements)
|
51
|
-
)
|
52
|
-
obj = dict(
|
53
|
-
output=output_list,
|
54
|
-
)
|
55
|
-
out_path.parent.mkdir(parents=True, exist_ok=True)
|
56
|
-
out_path.write_text(json.dumps(obj, indent=2), encoding="utf-8")
|
20
|
+
self._load_parameters()
|
janus/converter/translate.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
from janus.converter.converter import Converter, run_if_changed
|
2
|
-
from janus.llm.models_info import MODEL_PROMPT_ENGINES
|
3
2
|
from janus.parsers.code_parser import CodeParser
|
4
|
-
from janus.prompts.prompt import SAME_OUTPUT
|
5
|
-
from janus.utils.enums import LANGUAGES
|
6
3
|
from janus.utils.logger import create_logger
|
7
4
|
|
8
5
|
log = create_logger(__name__)
|
@@ -29,13 +26,11 @@ class Translator(Converter):
|
|
29
26
|
max_prompts: The maximum number of prompts to try before giving up.
|
30
27
|
max_tokens: The maximum number of tokens the model will take in.
|
31
28
|
If unspecificed, model's default max will be used.
|
32
|
-
|
33
|
-
(see janus/prompts/templates) or
|
29
|
+
prompt_templates: name of prompt template directories
|
30
|
+
(see janus/prompts/templates) or paths to directories.
|
34
31
|
"""
|
35
32
|
super().__init__(**kwargs)
|
36
33
|
|
37
|
-
self._target_version: str | None
|
38
|
-
|
39
34
|
self.set_target_language(
|
40
35
|
target_language=target_language,
|
41
36
|
target_version=target_version,
|
@@ -47,57 +42,6 @@ class Translator(Converter):
|
|
47
42
|
self._load_parser()
|
48
43
|
super()._load_parameters()
|
49
44
|
|
50
|
-
def set_target_language(
|
51
|
-
self, target_language: str, target_version: str | None
|
52
|
-
) -> None:
|
53
|
-
"""Validate and set the target language.
|
54
|
-
|
55
|
-
The affected objects will not be updated until translate() is called.
|
56
|
-
|
57
|
-
Arguments:
|
58
|
-
target_language: The target programming language.
|
59
|
-
target_version: The target version of the target programming language.
|
60
|
-
"""
|
61
|
-
target_language = target_language.lower()
|
62
|
-
if target_language not in LANGUAGES:
|
63
|
-
raise ValueError(
|
64
|
-
f"Invalid target language: {target_language}. "
|
65
|
-
"Valid target languages are found in `janus.utils.enums.LANGUAGES`."
|
66
|
-
)
|
67
|
-
self._target_language = target_language
|
68
|
-
self._target_version = target_version
|
69
|
-
# Taking the first suffix as the default for output files
|
70
|
-
self._target_suffix = f".{LANGUAGES[target_language]['suffixes'][0]}"
|
71
|
-
|
72
|
-
@run_if_changed(
|
73
|
-
"_prompt_template_name",
|
74
|
-
"_source_language",
|
75
|
-
"_target_language",
|
76
|
-
"_target_version",
|
77
|
-
"_model_name",
|
78
|
-
)
|
79
|
-
def _load_prompt(self) -> None:
|
80
|
-
"""Load the prompt according to this instance's attributes.
|
81
|
-
|
82
|
-
If the relevant fields have not been changed since the last time this
|
83
|
-
method was called, nothing happens.
|
84
|
-
"""
|
85
|
-
if self._prompt_template_name in SAME_OUTPUT:
|
86
|
-
if self._target_language != self._source_language:
|
87
|
-
raise ValueError(
|
88
|
-
f"Prompt template ({self._prompt_template_name}) suggests "
|
89
|
-
f"source and target languages should match, but do not "
|
90
|
-
f"({self._source_language} != {self._target_language})"
|
91
|
-
)
|
92
|
-
|
93
|
-
prompt_engine = MODEL_PROMPT_ENGINES[self._llm.short_model_id](
|
94
|
-
source_language=self._source_language,
|
95
|
-
target_language=self._target_language,
|
96
|
-
target_version=self._target_version,
|
97
|
-
prompt_template=self._prompt_template_name,
|
98
|
-
)
|
99
|
-
self._prompt = prompt_engine.prompt
|
100
|
-
|
101
45
|
@run_if_changed("_target_language")
|
102
46
|
def _load_parser(self) -> None:
|
103
47
|
"""Load the parser according to this instance's attributes.
|
janus/language/block.py
CHANGED
@@ -46,6 +46,7 @@ class CodeBlock:
|
|
46
46
|
embedding_id: Optional[str] = None,
|
47
47
|
affixes: Tuple[str, str] = ("", ""),
|
48
48
|
context_tags: dict[str, str] = {},
|
49
|
+
previous_generations: list["TranslatedCodeBlock"] = [],
|
49
50
|
) -> None:
|
50
51
|
self.id: Hashable = id
|
51
52
|
self.name: Optional[str] = name
|
@@ -65,6 +66,7 @@ class CodeBlock:
|
|
65
66
|
self.complete = True
|
66
67
|
self.omit_prefix = True
|
67
68
|
self.omit_suffix = False
|
69
|
+
self.previous_generations = previous_generations
|
68
70
|
|
69
71
|
if self.children:
|
70
72
|
self.children[0].omit_prefix = False
|
@@ -210,15 +212,16 @@ class TranslatedCodeBlock(CodeBlock):
|
|
210
212
|
TranslatedCodeBlock(child, language) for child in original.children
|
211
213
|
],
|
212
214
|
affixes=original.affixes,
|
215
|
+
previous_generations=original.previous_generations,
|
213
216
|
)
|
214
217
|
self.original = original
|
215
218
|
|
216
219
|
self.complete = original.complete
|
217
220
|
self.translated = False
|
218
|
-
self.cost = 0
|
221
|
+
self.cost = 0
|
219
222
|
self.num_requests = 0
|
220
223
|
self.tokens = 0
|
221
|
-
self.processing_time = 0
|
224
|
+
self.processing_time = 0
|
222
225
|
|
223
226
|
self.request_input_tokens = 0
|
224
227
|
self.request_output_tokens = 0
|
@@ -297,3 +300,29 @@ class TranslatedCodeBlock(CodeBlock):
|
|
297
300
|
if self.original.total_tokens
|
298
301
|
else 0
|
299
302
|
)
|
303
|
+
|
304
|
+
def to_codeblock(self) -> CodeBlock:
|
305
|
+
return CodeBlock(
|
306
|
+
id=self.id,
|
307
|
+
name=self.name,
|
308
|
+
node_type=self.node_type,
|
309
|
+
language=self.language,
|
310
|
+
text=self.text,
|
311
|
+
start_point=self.start_point,
|
312
|
+
end_point=self.end_point,
|
313
|
+
start_byte=self.start_byte,
|
314
|
+
end_byte=self.end_byte,
|
315
|
+
embedding_id=self.embedding_id,
|
316
|
+
tokens=self.tokens,
|
317
|
+
children=[child.to_codeblock() for child in self.children],
|
318
|
+
affixes=self.affixes,
|
319
|
+
previous_generations=self.previous_generations + [self],
|
320
|
+
)
|
321
|
+
|
322
|
+
def __iadd__(self, other):
|
323
|
+
self.cost += other.cost
|
324
|
+
self.num_requests += other.num_requests
|
325
|
+
self.processing_time += other.processing_time
|
326
|
+
self.request_input_tokens += other.request_input_tokens
|
327
|
+
self.request_output_tokens += other.request_output_tokens
|
328
|
+
return self
|