janus-llm 4.3.5__py3-none-any.whl → 4.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. janus/__init__.py +1 -1
  2. janus/cli/aggregate.py +2 -2
  3. janus/cli/cli.py +6 -0
  4. janus/cli/constants.py +6 -0
  5. janus/cli/diagram.py +36 -7
  6. janus/cli/document.py +10 -1
  7. janus/cli/llm.py +7 -3
  8. janus/cli/partition.py +10 -1
  9. janus/cli/pipeline.py +126 -0
  10. janus/cli/self_eval.py +10 -3
  11. janus/cli/translate.py +10 -1
  12. janus/converter/__init__.py +2 -0
  13. janus/converter/_tests/test_translate.py +6 -5
  14. janus/converter/chain.py +100 -0
  15. janus/converter/converter.py +467 -90
  16. janus/converter/diagram.py +12 -8
  17. janus/converter/document.py +17 -7
  18. janus/converter/evaluate.py +174 -147
  19. janus/converter/partition.py +6 -11
  20. janus/converter/passthrough.py +29 -0
  21. janus/converter/pool.py +74 -0
  22. janus/converter/requirements.py +7 -40
  23. janus/converter/translate.py +2 -58
  24. janus/language/_tests/test_combine.py +1 -0
  25. janus/language/block.py +115 -5
  26. janus/llm/model_callbacks.py +6 -0
  27. janus/llm/models_info.py +19 -0
  28. janus/metrics/_tests/test_reading.py +48 -4
  29. janus/metrics/_tests/test_rouge_score.py +5 -11
  30. janus/metrics/metric.py +47 -124
  31. janus/metrics/reading.py +48 -28
  32. janus/metrics/rouge_score.py +21 -34
  33. janus/parsers/_tests/test_code_parser.py +1 -1
  34. janus/parsers/code_parser.py +2 -2
  35. janus/parsers/eval_parsers/incose_parser.py +3 -3
  36. janus/parsers/reqs_parser.py +3 -3
  37. janus/prompts/templates/cyclic/human.txt +16 -0
  38. janus/prompts/templates/cyclic/system.txt +1 -0
  39. janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
  40. janus/prompts/templates/extract_variables/human.txt +5 -0
  41. janus/prompts/templates/extract_variables/system.txt +1 -0
  42. {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +14 -15
  43. {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +46 -40
  44. {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
  45. janus/metrics/_tests/test_llm.py +0 -90
  46. janus/metrics/llm_metrics.py +0 -202
  47. {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
  48. {janus_llm-4.3.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0
@@ -12,8 +12,10 @@ class DiagramGenerator(Documenter):
12
12
 
13
13
  def __init__(
14
14
  self,
15
- diagram_type="Activity",
16
- add_documentation=False,
15
+ diagram_type: str = "Activity",
16
+ add_documentation: bool = False,
17
+ extract_variables: bool = False,
18
+ output_type: str = "diagram",
17
19
  **kwargs,
18
20
  ) -> None:
19
21
  """Initialize the DiagramGenerator class
@@ -27,25 +29,27 @@ class DiagramGenerator(Documenter):
27
29
  self._add_documentation = add_documentation
28
30
  self._documenter = Documenter(**kwargs)
29
31
 
32
+ kwargs.update(dict(output_type=output_type))
30
33
  super().__init__(**kwargs)
31
-
32
- self.set_prompt("diagram_with_documentation" if add_documentation else "diagram")
34
+ prompts = []
35
+ if extract_variables:
36
+ prompts.append("extract_variables")
37
+ prompts += ["diagram_with_documentation" if add_documentation else "diagram"]
38
+ self.set_prompts(prompts)
33
39
  self._parser = UMLSyntaxParser(language="plantuml")
34
40
 
35
41
  self._load_parameters()
36
42
 
37
- def _load_prompt(self):
38
- super()._load_prompt()
39
- self._prompt = self._prompt.partial(DIAGRAM_TYPE=self._diagram_type)
40
-
41
43
  def _input_runnable(self) -> Runnable:
42
44
  if self._add_documentation:
43
45
  return RunnableParallel(
44
46
  SOURCE_CODE=self._parser.parse_input,
45
47
  DOCUMENTATION=self._documenter.chain,
46
48
  context=self._retriever,
49
+ DIAGRAM_TYPE=lambda x: self._diagram_type,
47
50
  )
48
51
  return RunnableParallel(
49
52
  SOURCE_CODE=self._parser.parse_input,
50
53
  context=self._retriever,
54
+ DIAGRAM_TYPE=lambda x: self._diagram_type,
51
55
  )
@@ -15,11 +15,15 @@ log = create_logger(__name__)
15
15
 
16
16
  class Documenter(Converter):
17
17
  def __init__(
18
- self, source_language: str = "fortran", drop_comments: bool = True, **kwargs
18
+ self,
19
+ source_language: str = "fortran",
20
+ drop_comments: bool = True,
21
+ output_type: str = "documentation",
22
+ **kwargs,
19
23
  ):
20
- kwargs.update(source_language=source_language)
24
+ kwargs.update(source_language=source_language, output_type=output_type)
21
25
  super().__init__(**kwargs)
22
- self.set_prompt("document")
26
+ self.set_prompts("document")
23
27
 
24
28
  if drop_comments:
25
29
  comment_node_type = LANGUAGES[source_language].get(
@@ -31,27 +35,33 @@ class Documenter(Converter):
31
35
 
32
36
 
33
37
  class MultiDocumenter(Documenter):
34
- def __init__(self, **kwargs):
38
+ def __init__(self, output_type: str = "multidocumentation", **kwargs):
39
+ kwargs.update(output_type=output_type)
35
40
  super().__init__(**kwargs)
36
- self.set_prompt("multidocument")
41
+ self.set_prompts("multidocument")
37
42
  self._combiner = JsonCombiner()
38
43
  self._parser = MultiDocumentationParser()
39
44
 
45
+ self._load_parameters()
46
+
40
47
 
41
48
  class ClozeDocumenter(Documenter):
42
49
  def __init__(
43
50
  self,
44
51
  comments_per_request: int | None = None,
52
+ output_type: str = "cloze_comments",
45
53
  **kwargs,
46
54
  ) -> None:
47
- kwargs.update(drop_comments=False)
55
+ kwargs.update(drop_comments=False, output_type=output_type)
48
56
  super().__init__(**kwargs)
49
- self.set_prompt("document_cloze")
57
+ self.set_prompts("document_cloze")
50
58
  self._combiner = JsonCombiner()
51
59
  self._parser = ClozeDocumentationParser()
52
60
 
53
61
  self.comments_per_request = comments_per_request
54
62
 
63
+ self._load_parameters()
64
+
55
65
  def _add_translation(self, block: TranslatedCodeBlock):
56
66
  if block.translated:
57
67
  return
@@ -1,15 +1,14 @@
1
1
  import json
2
2
  import re
3
- from copy import deepcopy
3
+ from pathlib import Path
4
4
 
5
5
  from langchain_core.runnables import Runnable, RunnableLambda, RunnableParallel
6
6
 
7
7
  from janus.converter.converter import Converter
8
- from janus.language.block import TranslatedCodeBlock
8
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
9
9
  from janus.language.combine import JsonCombiner
10
10
  from janus.parsers.eval_parsers.incose_parser import IncoseParser
11
11
  from janus.parsers.eval_parsers.inline_comment_parser import InlineCommentParser
12
- from janus.parsers.parser import JanusParserException
13
12
  from janus.utils.logger import create_logger
14
13
 
15
14
  log = create_logger(__name__)
@@ -35,6 +34,7 @@ class Evaluator(Converter):
35
34
  model_arguments: Additional arguments to pass to the LLM constructor.
36
35
  max_prompts: The maximum number of prompts to try before giving up.
37
36
  """
37
+ kwargs.update(use_janus_inputs=True)
38
38
  super().__init__(**kwargs)
39
39
  self._combiner = JsonCombiner()
40
40
  self._load_parameters()
@@ -50,7 +50,13 @@ class RequirementEvaluator(Evaluator):
50
50
 
51
51
  """
52
52
 
53
- def __init__(self, eval_items_per_request: int | None = None, **kwargs) -> None:
53
+ def __init__(
54
+ self,
55
+ eval_items_per_request: int | None = None,
56
+ input_types: str | set[str] = set(["requirements"]),
57
+ output_type: str = "requirements_eval",
58
+ **kwargs,
59
+ ) -> None:
54
60
  """Initialize the Evaluator class
55
61
 
56
62
  Arguments:
@@ -59,10 +65,11 @@ class RequirementEvaluator(Evaluator):
59
65
  model_arguments: Additional arguments to pass to the LLM constructor.
60
66
  max_prompts: The maximum number of prompts to try before giving up.
61
67
  """
68
+ kwargs.update(input_types=input_types, output_type=output_type)
62
69
  super().__init__(**kwargs)
63
70
  self.eval_items_per_request = eval_items_per_request
64
71
  self._parser = IncoseParser()
65
- self.set_prompt("eval_prompts/incose")
72
+ self.set_prompts("eval_prompts/incose")
66
73
 
67
74
  def _input_runnable(self) -> Runnable:
68
75
  def _get_code(json_text: str) -> str:
@@ -77,76 +84,67 @@ class RequirementEvaluator(Evaluator):
77
84
  context=self._retriever,
78
85
  )
79
86
 
80
- def _add_translation(self, block: TranslatedCodeBlock):
81
- if block.translated:
82
- return
83
-
84
- if block.original.text is None:
85
- block.translated = True
86
- return
87
-
88
- if self.eval_items_per_request is None:
89
- return super()._add_translation(block)
90
-
91
- input_obj = json.loads(block.original.text)
92
- requirements = input_obj.get("requirements", [])
93
-
87
+ def translate_block(self, input_block: CodeBlock, failure_path: Path | None = None):
88
+ if len(input_block.previous_generations) == 0:
89
+ raise ValueError(
90
+ "Error: Evaluating requirements without previous generations"
91
+ )
92
+ if isinstance(input_block.previous_generations[-1], dict):
93
+ input_str = input_block.previous_generations[-1]["input"]
94
+ else:
95
+ input_str = input_block.previous_generations[-1].original.text
96
+ requirements = json.loads(input_block.text)
97
+ # The requirements are often a list of lists
98
+ if isinstance(requirements[0], list):
99
+ requirements = requirements[0]
94
100
  if not requirements:
95
- log.debug(f"[{block.name}] Skipping empty block")
96
- block.translated = True
97
- block.text = None
98
- block.complete = True
99
- return
100
-
101
- # For some reason requirements objects are in nested lists?
102
- while isinstance(requirements[0], list):
103
- requirements = [r for lst in requirements for r in lst]
104
-
105
- if len(requirements) <= self.eval_items_per_request:
106
- input_obj["requirements"] = requirements
107
- block.original.text = json.dumps(input_obj)
108
- return super()._add_translation(block)
109
-
110
- block.processing_time = 0
111
- block.cost = 0
112
- obj = {}
113
- for i in range(0, len(requirements), self.eval_items_per_request):
114
- # Build a new TranslatedBlock using the new working text
115
- working_requirements = requirements[i : i + self.eval_items_per_request]
116
- working_copy = deepcopy(block.original)
117
- working_obj = json.loads(working_copy.text) # type: ignore
118
- working_obj["requirements"] = working_requirements
119
- working_copy.text = json.dumps(working_obj)
120
- working_block = TranslatedCodeBlock(working_copy, self._target_language)
121
-
122
- # Run the LLM on the working text
123
- try:
124
- super()._add_translation(working_block)
125
- except JanusParserException as e:
126
- block.text += "\n==============\n" + working_block.text
127
- block.tokens = self._llm.get_num_tokens(block.text)
128
- raise e
129
- finally:
130
- # Update metadata to include for all runs
131
- block.num_requests += working_block.num_requests
132
- block.cost += working_block.cost
133
- block.processing_time += working_block.processing_time
134
- block.request_input_tokens += working_block.request_input_tokens
135
- block.request_output_tokens += working_block.request_output_tokens
136
-
137
- # Update the output text to merge this section's output in
138
- obj.update(json.loads(working_block.text))
139
- # intermediate result of block,
140
- # will be overwritten if file completes successfully
141
- block.text = json.dumps(obj)
142
-
143
- block.text = json.dumps(obj)
144
- block.tokens = self._llm.get_num_tokens(block.text)
145
- block.translated = True
146
-
147
- log.debug(
148
- f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
101
+ log.debug(f"[{input_block.name}] Skipping empty output")
102
+ return []
103
+ if (
104
+ not self.eval_items_per_request
105
+ or len(requirements) < self.eval_items_per_request
106
+ ):
107
+ obj_str = json.dumps(
108
+ dict(
109
+ requirements=requirements,
110
+ code=input_str,
111
+ )
112
+ )
113
+ temp_block = self._split_text(obj_str, input_block.name)
114
+ translated_block = super().translate_block(temp_block, failure_path)
115
+ translated_block.original = input_block
116
+ translated_block.previous_generations = input_block.previous_generations
117
+ return translated_block
118
+ else:
119
+ translated_blocks = []
120
+ translated_str: str
121
+ translate_obj = {}
122
+ for i in range(0, len(requirements), self.eval_items_per_request):
123
+ working_requirements = requirements[i : i + self.eval_items_per_request]
124
+ obj_str = json.dumps(
125
+ dict(
126
+ requirements=working_requirements,
127
+ code=input_str,
128
+ )
129
+ )
130
+ temp_block = self._split_text(obj_str, input_block.name)
131
+ translated_block = super().translate_block(temp_block, failure_path)
132
+ translated_blocks.append(translated_block)
133
+ translate_obj.update(json.loads(translated_block.text))
134
+ translated_str = json.dumps(translate_obj)
135
+
136
+ translated_block = TranslatedCodeBlock(
137
+ input_block,
138
+ self._target_language,
139
+ self,
140
+ self._output_type,
141
+ self._output_label,
149
142
  )
143
+ translated_block.text = translated_str
144
+ translated_block.children = translated_blocks
145
+ translated_block.tokens = self._llm.get_num_tokens(translated_str)
146
+ translated_block.translated = True
147
+ return translated_block
150
148
 
151
149
 
152
150
  class InlineCommentEvaluator(Evaluator):
@@ -156,7 +154,13 @@ class InlineCommentEvaluator(Evaluator):
156
154
  with an associated prompt.
157
155
  """
158
156
 
159
- def __init__(self, eval_items_per_request: int | None = None, **kwargs) -> None:
157
+ def __init__(
158
+ self,
159
+ eval_items_per_request: int | None = None,
160
+ input_types: str | set[str] = set(["cloze_comments"]),
161
+ output_type: str = "cloze_comments_eval",
162
+ **kwargs,
163
+ ) -> None:
160
164
  """Initialize the Evaluator class
161
165
 
162
166
  Arguments:
@@ -165,84 +169,107 @@ class InlineCommentEvaluator(Evaluator):
165
169
  model_arguments: Additional arguments to pass to the LLM constructor.
166
170
  max_prompts: The maximum number of prompts to try before giving up.
167
171
  """
172
+ kwargs.update(input_types=input_types, output_type=output_type)
168
173
  super().__init__(**kwargs)
169
174
  self._combiner = JsonCombiner()
170
- self._load_parameters()
171
175
  self._parser = InlineCommentParser()
172
- self.set_prompt("eval_prompts/inline_comments")
176
+ self.set_prompts("eval_prompts/inline_comments")
173
177
  self.eval_items_per_request = eval_items_per_request
178
+ self._load_parameters()
174
179
 
175
- def _add_translation(self, block: TranslatedCodeBlock):
176
- if block.translated:
177
- return
178
-
179
- if block.original.text is None:
180
- block.translated = True
181
- return
182
-
183
- if self.eval_items_per_request is None:
184
- return super()._add_translation(block)
185
-
180
+ def _process_comments(self, input_str: str, generated_comments: dict[str, str]):
181
+ comment_patterns = [
182
+ (r"<BLOCK_COMMENT (\w{8})>", "<BLOCK_COMMENT {}>", "<BLOCK_COMMENT {}>"),
183
+ (r"<INLINE_COMMENT (\w{8})>", "<INLINE_COMMENT {}>", "<INLINE_COMMENT {}>"),
184
+ (r"<MODULE (\w{8})>", "<MODULE {}>", "<BLOCK_COMMENT {}>"),
185
+ ]
186
+ missing_comments = 0
187
+ for pattern, find_template, repl_template in comment_patterns:
188
+ matches = re.findall(pattern, input_str)
189
+
190
+ for comment_id in matches:
191
+ find_tag = find_template.format(comment_id)
192
+ repl_tag = repl_template.format(comment_id)
193
+
194
+ if comment_id not in generated_comments:
195
+ missing_comments += 1
196
+ comment = generated_comments.get(comment_id, "[comment missing]")
197
+ comment = comment.replace("\n", "\\n")
198
+
199
+ # Replace the tag in the code with the comment appended.
200
+ input_str = input_str.replace(find_tag, f"{repl_tag} {comment}")
201
+ processed_str = re.sub(r"\s*<JANUS_PARTITION>\s*\n", "\n", input_str)
202
+ return processed_str.strip("\n"), missing_comments
203
+
204
+ def translate_block(self, input_block: CodeBlock, failure_path: Path | None = None):
186
205
  comment_pattern = r"<(?:INLINE|BLOCK)_COMMENT \w{8}>.*$"
187
- comments = list(
188
- re.finditer(comment_pattern, block.original.text, flags=re.MULTILINE)
206
+ if len(input_block.previous_generations) == 0:
207
+ raise ValueError(
208
+ "Error: cannot evaluate block, no previous generations found"
209
+ )
210
+ if isinstance(input_block.previous_generations[-1], dict):
211
+ input_str = input_block.previous_generations[-1]["input"]
212
+ else:
213
+ input_str = input_block.previous_generations[-1].original.text
214
+ generated_comments = json.loads(input_block.text)
215
+ processed_input, missing_comments = self._process_comments(
216
+ input_str, generated_comments
189
217
  )
190
-
218
+ if missing_comments:
219
+ log.info(f"[{input_block.name}] Warning: missing {missing_comments} comments")
220
+ comments = list(re.finditer(comment_pattern, processed_input, flags=re.MULTILINE))
191
221
  if not comments:
192
- log.info(f"[{block.name}] Skipping commentless block")
193
- block.translated = True
194
- block.text = None
195
- block.complete = True
196
- return
197
-
198
- if len(comments) <= self.eval_items_per_request:
199
- return super()._add_translation(block)
200
-
201
- comment_group_indices = list(range(0, len(comments), self.eval_items_per_request))
202
- log.debug(
203
- f"[{block.name}] Block contains more than {self.eval_items_per_request}"
204
- f" comments, splitting {len(comments)} comments into"
205
- f" {len(comment_group_indices)} groups"
206
- )
207
-
208
- block.processing_time = 0
209
- block.cost = 0
210
- block.retries = 0
211
- obj = {}
212
- for i in range(0, len(comments), self.eval_items_per_request):
213
- # Split the text into the section containing comments of interest,
214
- # all the text prior to those comments, and all the text after them
215
- working_comments = comments[i : i + self.eval_items_per_request]
216
- start_idx = working_comments[0].start()
217
- end_idx = working_comments[-1].end()
218
- prefix = block.original.text[:start_idx]
219
- keeper = block.original.text[start_idx:end_idx]
220
- suffix = block.original.text[end_idx:]
221
-
222
- # Strip all comment placeholders outside of the section of interest
223
- prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
224
- suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
225
-
226
- # Build a new TranslatedBlock using the new working text
227
- working_copy = deepcopy(block.original)
228
- working_copy.text = prefix + keeper + suffix
229
- working_block = TranslatedCodeBlock(working_copy, self._target_language)
230
-
231
- # Run the LLM on the working text
232
- super()._add_translation(working_block)
233
-
234
- # Update metadata to include for all runs
235
- block.retries += working_block.retries
236
- block.cost += working_block.cost
237
- block.processing_time += working_block.processing_time
238
-
239
- # Update the output text to merge this section's output in
240
- obj.update(json.loads(working_block.text))
241
-
242
- block.text = json.dumps(obj)
243
- block.tokens = self._llm.get_num_tokens(block.text)
244
- block.translated = True
245
-
246
- log.debug(
247
- f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
248
- )
222
+ log.info(f"[{input_block.name}] Skipping commentless block")
223
+ return []
224
+ if (
225
+ self.eval_items_per_request is None
226
+ or len(comments) < self.eval_items_per_request
227
+ ):
228
+ temp_block = self._split_text(processed_input, input_block.name)
229
+ translated_block = super().translate_block(temp_block, failure_path)
230
+ translated_block.original = input_block
231
+ translated_block.previous_generations = input_block.previous_generations
232
+ return translated_block
233
+ else:
234
+ comment_group_indices = list(
235
+ range(0, len(comments), self.eval_items_per_request)
236
+ )
237
+ log.debug(
238
+ f"[{input_block.name}]"
239
+ f" Block contains more than {self.eval_items_per_request}"
240
+ f" comments, splitting {len(comments)} comments into"
241
+ f" {len(comment_group_indices)} groups"
242
+ )
243
+ translated_blocks = []
244
+ translated_str: str
245
+ translate_obj = {}
246
+ for comment_ind in comment_group_indices:
247
+ working_comments = comments[
248
+ comment_ind : comment_ind + self.eval_items_per_request
249
+ ]
250
+ start_idx = working_comments[0].start()
251
+ end_idx = working_comments[-1].end()
252
+ prefix = processed_input[:start_idx]
253
+ keeper = processed_input[start_idx:end_idx]
254
+ suffix = processed_input[end_idx:]
255
+
256
+ # Strip all comment placeholders outside of the section of interest
257
+ prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
258
+ suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
259
+ temp_block = self._split_text(prefix + keeper + suffix, input_block.name)
260
+ translated_block = super().translate_block(temp_block, failure_path)
261
+ translated_blocks.append(translated_block)
262
+ translate_obj.update(json.loads(translated_block.text))
263
+ translated_str = json.dumps(translate_obj)
264
+ translated_block = TranslatedCodeBlock(
265
+ input_block,
266
+ self._target_language,
267
+ self,
268
+ self._output_type,
269
+ self._output_label,
270
+ )
271
+ translated_block.children = translated_blocks
272
+ translated_block.text = translated_str
273
+ translated_block.tokens = self._llm.get_num_tokens(translated_str)
274
+ translated_block.translated = True
275
+ return translated_block
@@ -1,7 +1,4 @@
1
- from pathlib import Path
2
-
3
1
  from janus.converter.converter import Converter
4
- from janus.language.block import TranslatedCodeBlock
5
2
  from janus.parsers.partition_parser import PartitionParser
6
3
  from janus.utils.logger import create_logger
7
4
 
@@ -9,19 +6,17 @@ log = create_logger(__name__)
9
6
 
10
7
 
11
8
  class Partitioner(Converter):
12
- def __init__(self, partition_token_limit: int, **kwargs):
9
+ def __init__(
10
+ self, partition_token_limit: int, output_type: str = "partition", **kwargs
11
+ ):
12
+ kwargs.update(output_type=output_type)
13
13
  super().__init__(**kwargs)
14
- self.set_prompt("partition")
14
+ self.set_prompts("partition")
15
15
  self._load_model()
16
16
  self._parser = PartitionParser(
17
17
  token_limit=partition_token_limit,
18
18
  model=self._llm,
19
19
  )
20
20
  self._target_language = self._source_language
21
- self._target_suffix = self._source_suffix
21
+ self._target_suffix = self._source_suffixes[0]
22
22
  self._load_parameters()
23
-
24
- def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
25
- output_str = self._parser.parse_combined_output(block.complete_text)
26
- out_path.parent.mkdir(parents=True, exist_ok=True)
27
- out_path.write_text(output_str, encoding="utf-8")
@@ -0,0 +1,29 @@
1
+ from pathlib import Path
2
+
3
+ from janus.converter.converter import Converter
4
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
5
+
6
+
7
+ class ConverterPassthrough(Converter):
8
+ def __init__(self, **kwargs) -> None:
9
+ super().__init__(**kwargs)
10
+
11
+ def translate_block(
12
+ self, input_block: CodeBlock, failure_path: Path | None = None
13
+ ) -> TranslatedCodeBlock:
14
+ self._output_label = input_block.block_label
15
+ self._output_type = input_block.block_type
16
+ res = super().translate_block(input_block, failure_path)
17
+ if isinstance(input_block.previous_generations[-1], dict):
18
+ res.original = self._split_text(
19
+ input_block.previous_generations[-1]["input"], res.name
20
+ )
21
+ else:
22
+ res.original = input_block.previous_generations[-1].original
23
+ res.previous_generations = input_block.previous_generations[:-1]
24
+ return res
25
+
26
+ def _add_translation(self, block: TranslatedCodeBlock) -> None:
27
+ block.text = block.original.text
28
+ block.tokens = block.original.tokens
29
+ block.translated = True
@@ -0,0 +1,74 @@
1
+ from pathlib import Path
2
+
3
+ from janus.converter.converter import Converter
4
+ from janus.language.block import BlockCollection, CodeBlock, TranslatedCodeBlock
5
+
6
+
7
+ class ConverterPool(Converter):
8
+ def __init__(self, *args, **kwargs):
9
+ if len(args) == 0:
10
+ raise ValueError("Error: Converter chain must be passed at least 1 converter")
11
+ for converter in args:
12
+ if not isinstance(converter, Converter):
13
+ raise ValueError(f"Error: unrecognized type: {type(converter)}")
14
+ self._converters = args
15
+ if "source_language" in kwargs:
16
+ for c in self._converters:
17
+ c.set_source_language(kwargs["source_language"])
18
+ if "model" in kwargs:
19
+ for c in self._converters:
20
+ c.set_model(kwargs["model"])
21
+ super().__init__(**kwargs)
22
+
23
+ def translate_blocks(
24
+ self, input_blocks: CodeBlock | BlockCollection, failure_path: Path | None = None
25
+ ):
26
+ output_blocks = []
27
+ for c in self._converters:
28
+ collection = c.translate_blocks(input_blocks)
29
+ for b in collection.blocks:
30
+ c._combiner.combine(b)
31
+ output_blocks += collection.blocks
32
+ return BlockCollection(output_blocks, input_blocks.previous_generations)
33
+
34
+ def _get_output_obj(
35
+ self,
36
+ block: TranslatedCodeBlock | BlockCollection | dict,
37
+ combine_children: bool = True,
38
+ include_previous_outputs: bool = True,
39
+ ) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
40
+ outputs = []
41
+ for b in block.blocks:
42
+ for c in self._converters:
43
+ if c == b.converter:
44
+ outputs.append(c._get_output_obj(b, c._combine_output, False))
45
+ break
46
+
47
+ def _get_input(block):
48
+ if isinstance(block, BlockCollection):
49
+ return self._combine_inputs([_get_input(b) for b in block.blocks])
50
+ return block.original.text or ""
51
+
52
+ out = dict(
53
+ input=_get_input(block),
54
+ metadata=dict(
55
+ cost=block.total_cost,
56
+ processing_time=block.total_processing_time,
57
+ num_requests=block.total_num_requests,
58
+ input_tokens=block.total_request_input_tokens,
59
+ output_tokens=block.total_request_output_tokens,
60
+ converter_name=self.__class__.__name__,
61
+ type=block.block_type,
62
+ label=block.block_label,
63
+ ),
64
+ outputs=outputs,
65
+ )
66
+ if include_previous_outputs and len(block.previous_generations) > 0:
67
+ intermediate_outputs = [
68
+ self._get_output_obj(g, combine_children, False)
69
+ for g in block.previous_generations
70
+ if isinstance(g, dict)
71
+ ]
72
+ if len(intermediate_outputs) > 0:
73
+ out["intermediate_outputs"] = intermediate_outputs
74
+ return out