janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +117 -0
  10. janus/cli/constants.py +49 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +207 -0
  13. janus/cli/document.py +183 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +191 -0
  16. janus/cli/partition.py +134 -0
  17. janus/cli/pipeline.py +123 -0
  18. janus/cli/self_eval.py +147 -0
  19. janus/cli/translate.py +192 -0
  20. janus/converter/__init__.py +1 -1
  21. janus/converter/_tests/test_translate.py +7 -5
  22. janus/converter/chain.py +180 -0
  23. janus/converter/converter.py +444 -153
  24. janus/converter/diagram.py +8 -6
  25. janus/converter/document.py +27 -16
  26. janus/converter/evaluate.py +143 -144
  27. janus/converter/partition.py +2 -10
  28. janus/converter/requirements.py +4 -40
  29. janus/converter/translate.py +3 -59
  30. janus/embedding/collections.py +1 -1
  31. janus/language/alc/_tests/alc.asm +3779 -0
  32. janus/language/binary/_tests/hello.bin +0 -0
  33. janus/language/block.py +78 -14
  34. janus/language/file.py +1 -1
  35. janus/language/mumps/_tests/mumps.m +235 -0
  36. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  37. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  38. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  39. janus/llm/models_info.py +9 -1
  40. janus/metrics/_tests/asm_test_file.asm +10 -0
  41. janus/metrics/_tests/mumps_test_file.m +6 -0
  42. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  43. janus/metrics/metric.py +47 -124
  44. janus/metrics/prompts/clarity.txt +8 -0
  45. janus/metrics/prompts/completeness.txt +16 -0
  46. janus/metrics/prompts/faithfulness.txt +10 -0
  47. janus/metrics/prompts/hallucination.txt +16 -0
  48. janus/metrics/prompts/quality.txt +8 -0
  49. janus/metrics/prompts/readability.txt +16 -0
  50. janus/metrics/prompts/usefulness.txt +16 -0
  51. janus/parsers/code_parser.py +4 -4
  52. janus/parsers/doc_parser.py +12 -9
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +6 -4
  55. janus/parsers/reqs_parser.py +11 -8
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +113 -4
  125. janus/utils/enums.py +127 -112
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
  128. janus_llm-4.4.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1488
  132. janus/metrics/_tests/test_llm.py +0 -90
  133. janus/metrics/llm_metrics.py +0 -202
  134. janus_llm-4.3.1.dist-info/RECORD +0 -115
  135. janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
  136. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
@@ -14,6 +14,7 @@ class DiagramGenerator(Documenter):
14
14
  self,
15
15
  diagram_type="Activity",
16
16
  add_documentation=False,
17
+ extract_variables=False,
17
18
  **kwargs,
18
19
  ) -> None:
19
20
  """Initialize the DiagramGenerator class
@@ -28,24 +29,25 @@ class DiagramGenerator(Documenter):
28
29
  self._documenter = Documenter(**kwargs)
29
30
 
30
31
  super().__init__(**kwargs)
31
-
32
- self.set_prompt("diagram_with_documentation" if add_documentation else "diagram")
32
+ prompts = []
33
+ if extract_variables:
34
+ prompts.append("extract_variables")
35
+ prompts += ["diagram_with_documentation" if add_documentation else "diagram"]
36
+ self.set_prompts(prompts)
33
37
  self._parser = UMLSyntaxParser(language="plantuml")
34
38
 
35
39
  self._load_parameters()
36
40
 
37
- def _load_prompt(self):
38
- super()._load_prompt()
39
- self._prompt = self._prompt.partial(DIAGRAM_TYPE=self._diagram_type)
40
-
41
41
  def _input_runnable(self) -> Runnable:
42
42
  if self._add_documentation:
43
43
  return RunnableParallel(
44
44
  SOURCE_CODE=self._parser.parse_input,
45
45
  DOCUMENTATION=self._documenter.chain,
46
46
  context=self._retriever,
47
+ DIAGRAM_TYPE=lambda x: self._diagram_type,
47
48
  )
48
49
  return RunnableParallel(
49
50
  SOURCE_CODE=self._parser.parse_input,
50
51
  context=self._retriever,
52
+ DIAGRAM_TYPE=lambda x: self._diagram_type,
51
53
  )
@@ -5,10 +5,8 @@ from copy import deepcopy
5
5
  from janus.converter.converter import Converter
6
6
  from janus.language.block import TranslatedCodeBlock
7
7
  from janus.language.combine import JsonCombiner
8
- from janus.parsers.doc_parser import (
9
- MadlibsDocumentationParser,
10
- MultiDocumentationParser,
11
- )
8
+ from janus.parsers.doc_parser import ClozeDocumentationParser, MultiDocumentationParser
9
+ from janus.parsers.parser import JanusParserException
12
10
  from janus.utils.enums import LANGUAGES
13
11
  from janus.utils.logger import create_logger
14
12
 
@@ -21,7 +19,7 @@ class Documenter(Converter):
21
19
  ):
22
20
  kwargs.update(source_language=source_language)
23
21
  super().__init__(**kwargs)
24
- self.set_prompt("document")
22
+ self.set_prompts("document")
25
23
 
26
24
  if drop_comments:
27
25
  comment_node_type = LANGUAGES[source_language].get(
@@ -35,12 +33,14 @@ class Documenter(Converter):
35
33
  class MultiDocumenter(Documenter):
36
34
  def __init__(self, **kwargs):
37
35
  super().__init__(**kwargs)
38
- self.set_prompt("multidocument")
36
+ self.set_prompts("multidocument")
39
37
  self._combiner = JsonCombiner()
40
38
  self._parser = MultiDocumentationParser()
41
39
 
40
+ self._load_parameters()
41
+
42
42
 
43
- class MadLibsDocumenter(Documenter):
43
+ class ClozeDocumenter(Documenter):
44
44
  def __init__(
45
45
  self,
46
46
  comments_per_request: int | None = None,
@@ -48,12 +48,14 @@ class MadLibsDocumenter(Documenter):
48
48
  ) -> None:
49
49
  kwargs.update(drop_comments=False)
50
50
  super().__init__(**kwargs)
51
- self.set_prompt("document_madlibs")
51
+ self.set_prompts("document_cloze")
52
52
  self._combiner = JsonCombiner()
53
- self._parser = MadlibsDocumentationParser()
53
+ self._parser = ClozeDocumentationParser()
54
54
 
55
55
  self.comments_per_request = comments_per_request
56
56
 
57
+ self._load_parameters()
58
+
57
59
  def _add_translation(self, block: TranslatedCodeBlock):
58
60
  if block.translated:
59
61
  return
@@ -92,7 +94,6 @@ class MadLibsDocumenter(Documenter):
92
94
 
93
95
  block.processing_time = 0
94
96
  block.cost = 0
95
- block.retries = 0
96
97
  obj = {}
97
98
  for i in range(0, len(comments), self.comments_per_request):
98
99
  # Split the text into the section containing comments of interest,
@@ -114,16 +115,26 @@ class MadLibsDocumenter(Documenter):
114
115
  working_block = TranslatedCodeBlock(working_copy, self._target_language)
115
116
 
116
117
  # Run the LLM on the working text
117
- super()._add_translation(working_block)
118
-
119
- # Update metadata to include for all runs
120
- block.retries += working_block.retries
121
- block.cost += working_block.cost
122
- block.processing_time += working_block.processing_time
118
+ try:
119
+ super()._add_translation(working_block)
120
+ except JanusParserException as e:
121
+ block.text += "\n===============\n" + working_block.text
122
+ block.tokens = self._llm.get_num_tokens(block.text)
123
+ raise e
124
+ finally:
125
+ # Update metadata to include for all runs
126
+ block.num_requests += working_block.num_requests
127
+ block.cost += working_block.cost
128
+ block.processing_time += working_block.processing_time
129
+ block.request_input_tokens += working_block.request_input_tokens
130
+ block.request_output_tokens += working_block.request_output_tokens
123
131
 
124
132
  # Update the output text to merge this section's output in
125
133
  out_text = self._parser.parse(working_block.text)
126
134
  obj.update(json.loads(out_text))
135
+ # Set intermediate text, will be overwritten if file
136
+ # successfully completes
137
+ block.text = json.dumps(obj)
127
138
 
128
139
  self._parser.parse_input(block.original)
129
140
  block.text = self._parser.parse(json.dumps(obj))
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  import re
3
3
  from copy import deepcopy
4
+ from pathlib import Path
5
+ from typing import Any
4
6
 
5
7
  from langchain_core.runnables import Runnable, RunnableLambda, RunnableParallel
6
8
 
7
9
  from janus.converter.converter import Converter
8
- from janus.language.block import TranslatedCodeBlock
9
10
  from janus.language.combine import JsonCombiner
10
11
  from janus.parsers.eval_parsers.incose_parser import IncoseParser
11
12
  from janus.parsers.eval_parsers.inline_comment_parser import InlineCommentParser
@@ -30,11 +31,11 @@ class Evaluator(Converter):
30
31
 
31
32
  Arguments:
32
33
  model: The LLM to use for translation. If an OpenAI model, the
33
- `OPENAI_API_KEY` environment variable must be set and the
34
- `OPENAI_ORG_ID` environment variable should be set if needed.
34
+ `OPENAI_API_KEY` environment variable must be set.
35
35
  model_arguments: Additional arguments to pass to the LLM constructor.
36
36
  max_prompts: The maximum number of prompts to try before giving up.
37
37
  """
38
+ kwargs.update(use_janus_inputs=True)
38
39
  super().__init__(**kwargs)
39
40
  self._combiner = JsonCombiner()
40
41
  self._load_parameters()
@@ -55,15 +56,14 @@ class RequirementEvaluator(Evaluator):
55
56
 
56
57
  Arguments:
57
58
  model: The LLM to use for translation. If an OpenAI model, the
58
- `OPENAI_API_KEY` environment variable must be set and the
59
- `OPENAI_ORG_ID` environment variable should be set if needed.
59
+ `OPENAI_API_KEY` environment variable must be set.
60
60
  model_arguments: Additional arguments to pass to the LLM constructor.
61
61
  max_prompts: The maximum number of prompts to try before giving up.
62
62
  """
63
63
  super().__init__(**kwargs)
64
64
  self.eval_items_per_request = eval_items_per_request
65
65
  self._parser = IncoseParser()
66
- self.set_prompt("eval_prompts/incose")
66
+ self.set_prompts("eval_prompts/incose")
67
67
 
68
68
  def _input_runnable(self) -> Runnable:
69
69
  def _get_code(json_text: str) -> str:
@@ -78,67 +78,55 @@ class RequirementEvaluator(Evaluator):
78
78
  context=self._retriever,
79
79
  )
80
80
 
81
- def _add_translation(self, block: TranslatedCodeBlock):
82
- if block.translated:
83
- return
84
-
85
- if block.original.text is None:
86
- block.translated = True
87
- return
88
-
89
- if self.eval_items_per_request is None:
90
- return super()._add_translation(block)
91
-
92
- input_obj = json.loads(block.original.text)
93
- requirements = input_obj.get("requirements", [])
94
-
95
- if not requirements:
96
- log.debug(f"[{block.name}] Skipping empty block")
97
- block.translated = True
98
- block.text = None
99
- block.complete = True
100
- return
101
-
102
- # For some reason requirements objects are in nested lists?
103
- while isinstance(requirements[0], list):
104
- requirements = [r for lst in requirements for r in lst]
105
-
106
- if len(requirements) <= self.eval_items_per_request:
107
- input_obj["requirements"] = requirements
108
- block.original.text = json.dumps(input_obj)
109
- return super()._add_translation(block)
110
-
111
- block.processing_time = 0
112
- block.cost = 0
113
- block.retries = 0
114
- obj = {}
115
- for i in range(0, len(requirements), self.eval_items_per_request):
116
- # Build a new TranslatedBlock using the new working text
117
- working_requirements = requirements[i : i + self.eval_items_per_request]
118
- working_copy = deepcopy(block.original)
119
- working_obj = json.loads(working_copy.text) # type: ignore
120
- working_obj["requirements"] = working_requirements
121
- working_copy.text = json.dumps(working_obj)
122
- working_block = TranslatedCodeBlock(working_copy, self._target_language)
123
-
124
- # Run the LLM on the working text
125
- super()._add_translation(working_block)
126
-
127
- # Update metadata to include for all runs
128
- block.retries += working_block.retries
129
- block.cost += working_block.cost
130
- block.processing_time += working_block.processing_time
131
-
132
- # Update the output text to merge this section's output in
133
- obj.update(json.loads(working_block.text))
134
-
135
- block.text = json.dumps(obj)
136
- block.tokens = self._llm.get_num_tokens(block.text)
137
- block.translated = True
138
-
139
- log.debug(
140
- f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
141
- )
81
+ def translate_janus_obj(self, obj: Any, name: str, failure_path: Path | None = None):
82
+ results = []
83
+ for o in obj["outputs"]:
84
+ if isinstance(o, dict):
85
+ results += self.translate_janus_obj(o, name, failure_path)
86
+ elif isinstance(o, str):
87
+ temp_obj = deepcopy(obj)
88
+ requirements = json.loads(o)
89
+ if not requirements:
90
+ log.debug(f"[{name}] Skipping empty output")
91
+ continue
92
+ if (
93
+ not self.eval_items_per_request
94
+ or len(requirements) < self.eval_items_per_request
95
+ ):
96
+ obj_str = json.dumps(
97
+ dict(
98
+ requirements=requirements,
99
+ code=obj["input"],
100
+ )
101
+ )
102
+ temp_obj["outputs"] = [obj_str]
103
+ temp_block = self._janus_object_to_codeblock(temp_obj, name)
104
+ translated_block = self.translate_block(temp_block, failure_path)
105
+ translated_block.previous_generations[-1] = obj
106
+ translated_block.original = self._janus_object_to_codeblock(obj, name)
107
+ results.append(translated_block)
108
+ else:
109
+ for i in range(0, len(requirements), self.eval_items_per_request):
110
+ working_requirements = requirements[
111
+ i : i + self.eval_items_per_request
112
+ ]
113
+ obj_str = json.dumps(
114
+ dict(
115
+ requirements=working_requirements,
116
+ code=obj["input"],
117
+ )
118
+ )
119
+ temp_obj["outputs"] = [obj_str]
120
+ temp_block = self._janus_object_to_codeblock(temp_obj, name)
121
+ translated_block = self.translate_block(temp_block, failure_path)
122
+ translated_block.previous_generations[-1] = obj
123
+ translated_block.original = self._janus_object_to_codeblock(
124
+ obj, name
125
+ )
126
+ results.append(translated_block)
127
+ else:
128
+ raise ValueError(f"Error: unable to find janus object: {type(o)}")
129
+ return results
142
130
 
143
131
 
144
132
  class InlineCommentEvaluator(Evaluator):
@@ -153,89 +141,100 @@ class InlineCommentEvaluator(Evaluator):
153
141
 
154
142
  Arguments:
155
143
  model: The LLM to use for translation. If an OpenAI model, the
156
- `OPENAI_API_KEY` environment variable must be set and the
157
- `OPENAI_ORG_ID` environment variable should be set if needed.
144
+ `OPENAI_API_KEY` environment variable must be set.
158
145
  model_arguments: Additional arguments to pass to the LLM constructor.
159
146
  max_prompts: The maximum number of prompts to try before giving up.
160
147
  """
161
148
  super().__init__(**kwargs)
162
149
  self._combiner = JsonCombiner()
163
- self._load_parameters()
164
150
  self._parser = InlineCommentParser()
165
- self.set_prompt("eval_prompts/inline_comments")
151
+ self.set_prompts("eval_prompts/inline_comments")
166
152
  self.eval_items_per_request = eval_items_per_request
153
+ self._load_parameters()
167
154
 
168
- def _add_translation(self, block: TranslatedCodeBlock):
169
- if block.translated:
170
- return
171
-
172
- if block.original.text is None:
173
- block.translated = True
174
- return
175
-
176
- if self.eval_items_per_request is None:
177
- return super()._add_translation(block)
178
-
155
+ def _process_comments(self, input_str: str, generated_comments: dict[str, str]):
156
+ comment_patterns = [
157
+ (r"<BLOCK_COMMENT (\w{8})>", "<BLOCK_COMMENT {}>", "<BLOCK_COMMENT {}>"),
158
+ (r"<INLINE_COMMENT (\w{8})>", "<INLINE_COMMENT {}>", "<INLINE_COMMENT {}>"),
159
+ (r"<MODULE (\w{8})>", "<MODULE {}>", "<BLOCK_COMMENT {}>"),
160
+ ]
161
+ missing_comments = 0
162
+ for pattern, find_template, repl_template in comment_patterns:
163
+ matches = re.findall(pattern, input_str)
164
+
165
+ for comment_id in matches:
166
+ find_tag = find_template.format(comment_id)
167
+ repl_tag = repl_template.format(comment_id)
168
+
169
+ if comment_id not in generated_comments:
170
+ missing_comments += 1
171
+ comment = generated_comments.get(comment_id, "[comment missing]")
172
+ comment = comment.replace("\n", "\\n")
173
+
174
+ # Replace the tag in the code with the comment appended.
175
+ input_str = input_str.replace(find_tag, f"{repl_tag} {comment}")
176
+ processed_str = re.sub(r"\s*<JANUS_PARTITION>\s*\n", "\n", input_str)
177
+ return processed_str.strip("\n"), missing_comments
178
+
179
+ def translate_janus_obj(self, obj: Any, name: str, failure_path: Path | None = None):
179
180
  comment_pattern = r"<(?:INLINE|BLOCK)_COMMENT \w{8}>.*$"
180
- comments = list(
181
- re.finditer(comment_pattern, block.original.text, flags=re.MULTILINE)
182
- )
183
-
184
- if not comments:
185
- log.info(f"[{block.name}] Skipping commentless block")
186
- block.translated = True
187
- block.text = None
188
- block.complete = True
189
- return
190
-
191
- if len(comments) <= self.eval_items_per_request:
192
- return super()._add_translation(block)
193
-
194
- comment_group_indices = list(range(0, len(comments), self.eval_items_per_request))
195
- log.debug(
196
- f"[{block.name}] Block contains more than {self.eval_items_per_request}"
197
- f" comments, splitting {len(comments)} comments into"
198
- f" {len(comment_group_indices)} groups"
199
- )
200
-
201
- block.processing_time = 0
202
- block.cost = 0
203
- block.retries = 0
204
- obj = {}
205
- for i in range(0, len(comments), self.eval_items_per_request):
206
- # Split the text into the section containing comments of interest,
207
- # all the text prior to those comments, and all the text after them
208
- working_comments = comments[i : i + self.eval_items_per_request]
209
- start_idx = working_comments[0].start()
210
- end_idx = working_comments[-1].end()
211
- prefix = block.original.text[:start_idx]
212
- keeper = block.original.text[start_idx:end_idx]
213
- suffix = block.original.text[end_idx:]
214
-
215
- # Strip all comment placeholders outside of the section of interest
216
- prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
217
- suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
218
-
219
- # Build a new TranslatedBlock using the new working text
220
- working_copy = deepcopy(block.original)
221
- working_copy.text = prefix + keeper + suffix
222
- working_block = TranslatedCodeBlock(working_copy, self._target_language)
223
-
224
- # Run the LLM on the working text
225
- super()._add_translation(working_block)
226
-
227
- # Update metadata to include for all runs
228
- block.retries += working_block.retries
229
- block.cost += working_block.cost
230
- block.processing_time += working_block.processing_time
231
-
232
- # Update the output text to merge this section's output in
233
- obj.update(json.loads(working_block.text))
234
-
235
- block.text = json.dumps(obj)
236
- block.tokens = self._llm.get_num_tokens(block.text)
237
- block.translated = True
238
-
239
- log.debug(
240
- f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
241
- )
181
+ results = []
182
+ input_str = obj["input"]
183
+ for o in obj["outputs"]:
184
+ if isinstance(o, dict):
185
+ results += self.translate_janus_obj(o, name, failure_path)
186
+ elif isinstance(o, str):
187
+ temp_obj = deepcopy(obj)
188
+ generated_comments = json.loads(o)
189
+ processed_input, missing_comments = self._process_comments(
190
+ input_str, generated_comments
191
+ )
192
+ if missing_comments:
193
+ log.info(f"[{name}] Warning: missing {missing_comments} comments")
194
+ comments = list(
195
+ re.finditer(comment_pattern, processed_input, flags=re.MULTILINE)
196
+ )
197
+ if not comments:
198
+ log.info(f"[{name}] Skipping commentless block")
199
+ continue
200
+ if (
201
+ self.eval_items_per_request is None
202
+ or len(comments) < self.eval_items_per_request
203
+ ):
204
+ temp_obj["outputs"] = [processed_input]
205
+ temp_block = self._janus_object_to_codeblock(temp_obj, name)
206
+ translated_block = self.translate_block(temp_block, failure_path)
207
+ translated_block.previous_generations[-1] = obj
208
+ translated_block.original = self._janus_object_to_codeblock(obj, name)
209
+ results.append(translated_block)
210
+ continue
211
+ comment_group_indices = list(
212
+ range(0, len(comments), self.eval_items_per_request)
213
+ )
214
+ log.debug(
215
+ f"[{name}] Block contains more than {self.eval_items_per_request}"
216
+ f" comments, splitting {len(comments)} comments into"
217
+ f" {len(comment_group_indices)} groups"
218
+ )
219
+ for comment_ind in comment_group_indices:
220
+ working_comments = comments[
221
+ comment_ind : comment_ind + self.eval_items_per_request
222
+ ]
223
+ start_idx = working_comments[0].start()
224
+ end_idx = working_comments[-1].end()
225
+ prefix = processed_input[:start_idx]
226
+ keeper = processed_input[start_idx:end_idx]
227
+ suffix = processed_input[end_idx:]
228
+
229
+ # Strip all comment placeholders outside of the section of interest
230
+ prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
231
+ suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
232
+ temp_obj["outputs"] = [prefix + keeper + suffix]
233
+ temp_block = self._janus_object_to_codeblock(temp_obj, name)
234
+ translated_block = self.translate_block(temp_block, failure_path)
235
+ translated_block.previous_generations[-1] = obj
236
+ translated_block.original = self._janus_object_to_codeblock(obj, name)
237
+ results.append(translated_block)
238
+ else:
239
+ raise ValueError(f"Error: unrecognized janus object type: {type(o)}")
240
+ return results
@@ -1,7 +1,4 @@
1
- from pathlib import Path
2
-
3
1
  from janus.converter.converter import Converter
4
- from janus.language.block import TranslatedCodeBlock
5
2
  from janus.parsers.partition_parser import PartitionParser
6
3
  from janus.utils.logger import create_logger
7
4
 
@@ -11,17 +8,12 @@ log = create_logger(__name__)
11
8
  class Partitioner(Converter):
12
9
  def __init__(self, partition_token_limit: int, **kwargs):
13
10
  super().__init__(**kwargs)
14
- self.set_prompt("partition")
11
+ self.set_prompts("partition")
15
12
  self._load_model()
16
13
  self._parser = PartitionParser(
17
14
  token_limit=partition_token_limit,
18
15
  model=self._llm,
19
16
  )
20
17
  self._target_language = self._source_language
21
- self._target_suffix = self._source_suffix
18
+ self._target_suffix = self._source_suffixes[0]
22
19
  self._load_parameters()
23
-
24
- def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
25
- output_str = self._parser.parse_combined_output(block.complete_text)
26
- out_path.parent.mkdir(parents=True, exist_ok=True)
27
- out_path.write_text(output_str, encoding="utf-8")
@@ -1,8 +1,4 @@
1
- import json
2
- from pathlib import Path
3
-
4
1
  from janus.converter.document import Documenter
5
- from janus.language.block import TranslatedCodeBlock
6
2
  from janus.language.combine import ChunkCombiner
7
3
  from janus.parsers.reqs_parser import RequirementsParser
8
4
  from janus.utils.logger import create_logger
@@ -16,41 +12,9 @@ class RequirementsDocumenter(Documenter):
16
12
  A class that translates code from one programming language to its requirements.
17
13
  """
18
14
 
19
- def __init__(self, **kwargs):
20
- super().__init__(**kwargs)
21
- self.set_prompt("requirements")
15
+ def __init__(self, combine_output: bool = False, **kwargs):
16
+ super().__init__(combine_output=combine_output, **kwargs)
17
+ self.set_prompts("requirements")
22
18
  self._combiner = ChunkCombiner()
23
19
  self._parser = RequirementsParser()
24
-
25
- @staticmethod
26
- def get_prompt_replacements(block) -> dict[str, str]:
27
- prompt_replacements: dict[str, str] = {"SOURCE_CODE": block.original.text}
28
- return prompt_replacements
29
-
30
- def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
31
- """Save a file to disk.
32
-
33
- Arguments:
34
- block: The `CodeBlock` to save to a file.
35
- """
36
- output_list = list()
37
- # For each chunk of code, get generation metadata, the text of the code,
38
- # and the LLM generated requirements
39
- blocks = [block for block in block.children] if len(block.children) else [block]
40
- for block in blocks:
41
- code = block.original.text
42
- requirements = self._parser.parse_combined_output(block.complete_text)
43
- metadata = dict(
44
- retries=block.total_retries,
45
- cost=block.total_cost,
46
- processing_time=block.processing_time,
47
- )
48
- # Put them all in a top level 'output' key
49
- output_list.append(
50
- dict(metadata=metadata, code=code, requirements=requirements)
51
- )
52
- obj = dict(
53
- output=output_list,
54
- )
55
- out_path.parent.mkdir(parents=True, exist_ok=True)
56
- out_path.write_text(json.dumps(obj, indent=2), encoding="utf-8")
20
+ self._load_parameters()
@@ -1,8 +1,5 @@
1
1
  from janus.converter.converter import Converter, run_if_changed
2
- from janus.llm.models_info import MODEL_PROMPT_ENGINES
3
2
  from janus.parsers.code_parser import CodeParser
4
- from janus.prompts.prompt import SAME_OUTPUT
5
- from janus.utils.enums import LANGUAGES
6
3
  from janus.utils.logger import create_logger
7
4
 
8
5
  log = create_logger(__name__)
@@ -21,8 +18,7 @@ class Translator(Converter):
21
18
 
22
19
  Arguments:
23
20
  model: The LLM to use for translation. If an OpenAI model, the
24
- `OPENAI_API_KEY` environment variable must be set and the
25
- `OPENAI_ORG_ID` environment variable should be set if needed.
21
+ `OPENAI_API_KEY` environment variable must be set.
26
22
  model_arguments: Additional arguments to pass to the LLM constructor.
27
23
  source_language: The source programming language.
28
24
  target_language: The target programming language.
@@ -30,13 +26,11 @@ class Translator(Converter):
30
26
  max_prompts: The maximum number of prompts to try before giving up.
31
27
  max_tokens: The maximum number of tokens the model will take in.
32
28
  If unspecificed, model's default max will be used.
33
- prompt_template: name of prompt template directory
34
- (see janus/prompts/templates) or path to a directory.
29
+ prompt_templates: name of prompt template directories
30
+ (see janus/prompts/templates) or paths to directories.
35
31
  """
36
32
  super().__init__(**kwargs)
37
33
 
38
- self._target_version: str | None
39
-
40
34
  self.set_target_language(
41
35
  target_language=target_language,
42
36
  target_version=target_version,
@@ -48,56 +42,6 @@ class Translator(Converter):
48
42
  self._load_parser()
49
43
  super()._load_parameters()
50
44
 
51
- def set_target_language(
52
- self, target_language: str, target_version: str | None
53
- ) -> None:
54
- """Validate and set the target language.
55
-
56
- The affected objects will not be updated until translate() is called.
57
-
58
- Arguments:
59
- target_language: The target programming language.
60
- target_version: The target version of the target programming language.
61
- """
62
- target_language = target_language.lower()
63
- if target_language not in LANGUAGES:
64
- raise ValueError(
65
- f"Invalid target language: {target_language}. "
66
- "Valid target languages are found in `janus.utils.enums.LANGUAGES`."
67
- )
68
- self._target_language = target_language
69
- self._target_version = target_version
70
- self._target_suffix = f".{LANGUAGES[target_language]['suffix']}"
71
-
72
- @run_if_changed(
73
- "_prompt_template_name",
74
- "_source_language",
75
- "_target_language",
76
- "_target_version",
77
- "_model_name",
78
- )
79
- def _load_prompt(self) -> None:
80
- """Load the prompt according to this instance's attributes.
81
-
82
- If the relevant fields have not been changed since the last time this
83
- method was called, nothing happens.
84
- """
85
- if self._prompt_template_name in SAME_OUTPUT:
86
- if self._target_language != self._source_language:
87
- raise ValueError(
88
- f"Prompt template ({self._prompt_template_name}) suggests "
89
- f"source and target languages should match, but do not "
90
- f"({self._source_language} != {self._target_language})"
91
- )
92
-
93
- prompt_engine = MODEL_PROMPT_ENGINES[self._llm.short_model_id](
94
- source_language=self._source_language,
95
- target_language=self._target_language,
96
- target_version=self._target_version,
97
- prompt_template=self._prompt_template_name,
98
- )
99
- self._prompt = prompt_engine.prompt
100
-
101
45
  @run_if_changed("_target_language")
102
46
  def _load_parser(self) -> None:
103
47
  """Load the parser according to this instance's attributes.