janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +117 -0
  10. janus/cli/constants.py +49 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +207 -0
  13. janus/cli/document.py +183 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +191 -0
  16. janus/cli/partition.py +134 -0
  17. janus/cli/pipeline.py +123 -0
  18. janus/cli/self_eval.py +147 -0
  19. janus/cli/translate.py +192 -0
  20. janus/converter/__init__.py +1 -1
  21. janus/converter/_tests/test_translate.py +7 -5
  22. janus/converter/chain.py +180 -0
  23. janus/converter/converter.py +444 -153
  24. janus/converter/diagram.py +8 -6
  25. janus/converter/document.py +27 -16
  26. janus/converter/evaluate.py +143 -144
  27. janus/converter/partition.py +2 -10
  28. janus/converter/requirements.py +4 -40
  29. janus/converter/translate.py +3 -59
  30. janus/embedding/collections.py +1 -1
  31. janus/language/alc/_tests/alc.asm +3779 -0
  32. janus/language/binary/_tests/hello.bin +0 -0
  33. janus/language/block.py +78 -14
  34. janus/language/file.py +1 -1
  35. janus/language/mumps/_tests/mumps.m +235 -0
  36. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  37. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  38. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  39. janus/llm/models_info.py +9 -1
  40. janus/metrics/_tests/asm_test_file.asm +10 -0
  41. janus/metrics/_tests/mumps_test_file.m +6 -0
  42. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  43. janus/metrics/metric.py +47 -124
  44. janus/metrics/prompts/clarity.txt +8 -0
  45. janus/metrics/prompts/completeness.txt +16 -0
  46. janus/metrics/prompts/faithfulness.txt +10 -0
  47. janus/metrics/prompts/hallucination.txt +16 -0
  48. janus/metrics/prompts/quality.txt +8 -0
  49. janus/metrics/prompts/readability.txt +16 -0
  50. janus/metrics/prompts/usefulness.txt +16 -0
  51. janus/parsers/code_parser.py +4 -4
  52. janus/parsers/doc_parser.py +12 -9
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +6 -4
  55. janus/parsers/reqs_parser.py +11 -8
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +113 -4
  125. janus/utils/enums.py +127 -112
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
  128. janus_llm-4.4.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1488
  132. janus/metrics/_tests/test_llm.py +0 -90
  133. janus/metrics/llm_metrics.py +0 -202
  134. janus_llm-4.3.1.dist-info/RECORD +0 -115
  135. janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
  136. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli/self_eval.py ADDED
@@ -0,0 +1,147 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+
12
+
13
+ def llm_self_eval(
14
+ input_dir: Annotated[
15
+ Path,
16
+ typer.Option(
17
+ "--input",
18
+ "-i",
19
+ help="The directory containing the source code to be evaluated. "
20
+ "The files should all be in one flat directory.",
21
+ ),
22
+ ],
23
+ language: Annotated[
24
+ str,
25
+ typer.Option(
26
+ "--language",
27
+ "-l",
28
+ help="The language of the source code.",
29
+ click_type=click.Choice(sorted(LANGUAGES)),
30
+ ),
31
+ ],
32
+ output_dir: Annotated[
33
+ Path,
34
+ typer.Option("--output", "-o", help="The directory to store the evaluations in."),
35
+ ],
36
+ failure_dir: Annotated[
37
+ Optional[Path],
38
+ typer.Option(
39
+ "--failure-directory",
40
+ "-f",
41
+ help="The directory to store failure files during translation",
42
+ ),
43
+ ] = None,
44
+ llm_name: Annotated[
45
+ str,
46
+ typer.Option(
47
+ "--llm",
48
+ "-L",
49
+ help="The custom name of the model set with 'janus llm add'.",
50
+ ),
51
+ ] = "gpt-4o",
52
+ evaluation_type: Annotated[
53
+ str,
54
+ typer.Option(
55
+ "--evaluation-type",
56
+ "-e",
57
+ help="Type of output to evaluate.",
58
+ click_type=click.Choice(["incose", "comments"]),
59
+ ),
60
+ ] = "incose",
61
+ max_prompts: Annotated[
62
+ int,
63
+ typer.Option(
64
+ "--max-prompts",
65
+ "-m",
66
+ help="The maximum number of times to prompt a model on one functional block "
67
+ "before exiting the application. This is to prevent wasting too much money.",
68
+ ),
69
+ ] = 10,
70
+ overwrite: Annotated[
71
+ bool,
72
+ typer.Option(
73
+ "--overwrite/--preserve",
74
+ help="Whether to overwrite existing files in the output directory",
75
+ ),
76
+ ] = False,
77
+ temperature: Annotated[
78
+ float,
79
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
80
+ ] = 0.7,
81
+ collection: Annotated[
82
+ str,
83
+ typer.Option(
84
+ "--collection",
85
+ "-c",
86
+ help="If set, will put the translated result into a Chroma DB "
87
+ "collection with the name provided.",
88
+ ),
89
+ ] = None,
90
+ splitter_type: Annotated[
91
+ str,
92
+ typer.Option(
93
+ "-S",
94
+ "--splitter",
95
+ help="Name of custom splitter to use",
96
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
97
+ ),
98
+ ] = "file",
99
+ refiner_types: Annotated[
100
+ list[str],
101
+ typer.Option(
102
+ "-r",
103
+ "--refiner",
104
+ help="List of refiner types to use. Add -r for each refiner to use in\
105
+ refinement chain",
106
+ click_type=click.Choice(list(REFINERS.keys())),
107
+ ),
108
+ ] = ["JanusRefiner"],
109
+ eval_items_per_request: Annotated[
110
+ int,
111
+ typer.Option(
112
+ "--eval-items-per-request",
113
+ "-rc",
114
+ help="The maximum number of evaluation items per request",
115
+ ),
116
+ ] = None,
117
+ max_tokens: Annotated[
118
+ int,
119
+ typer.Option(
120
+ "--max-tokens",
121
+ "-M",
122
+ help="The maximum number of tokens the model will take in. "
123
+ "If unspecificed, model's default max will be used.",
124
+ ),
125
+ ] = None,
126
+ ):
127
+ from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
128
+
129
+ model_arguments = dict(temperature=temperature)
130
+ refiner_types = [REFINERS[r] for r in refiner_types]
131
+ kwargs = dict(
132
+ eval_items_per_request=eval_items_per_request,
133
+ model=llm_name,
134
+ model_arguments=model_arguments,
135
+ source_language=language,
136
+ max_prompts=max_prompts,
137
+ max_tokens=max_tokens,
138
+ splitter_type=splitter_type,
139
+ refiner_types=refiner_types,
140
+ )
141
+ # Setting parser type here
142
+ if evaluation_type == "incose":
143
+ evaluator = RequirementEvaluator(**kwargs)
144
+ elif evaluation_type == "comments":
145
+ evaluator = InlineCommentEvaluator(**kwargs)
146
+
147
+ evaluator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
janus/cli/translate.py ADDED
@@ -0,0 +1,192 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+ from janus.utils.logger import create_logger
12
+
13
+ log = create_logger(__name__)
14
+
15
+
16
+ def translate(
17
+ input_dir: Annotated[
18
+ Path,
19
+ typer.Option(
20
+ "--input",
21
+ "-i",
22
+ help="The directory containing the source code to be translated. "
23
+ "The files should all be in one flat directory.",
24
+ ),
25
+ ],
26
+ source_lang: Annotated[
27
+ str,
28
+ typer.Option(
29
+ "--source-language",
30
+ "-s",
31
+ help="The language of the source code.",
32
+ click_type=click.Choice(sorted(LANGUAGES)),
33
+ ),
34
+ ],
35
+ output_dir: Annotated[
36
+ Path,
37
+ typer.Option(
38
+ "--output", "-o", help="The directory to store the translated code in."
39
+ ),
40
+ ],
41
+ target_lang: Annotated[
42
+ str,
43
+ typer.Option(
44
+ "--target-language",
45
+ "-t",
46
+ help="The desired output language to translate the source code to. The "
47
+ "format can follow a 'language-version' syntax. Use 'text' to get plaintext"
48
+ "results as returned by the LLM. Examples: `python-3.10`, `mumps`, `java-10`,"
49
+ "text.",
50
+ ),
51
+ ],
52
+ llm_name: Annotated[
53
+ str,
54
+ typer.Option(
55
+ "--llm",
56
+ "-L",
57
+ help="The custom name of the model set with 'janus llm add'.",
58
+ ),
59
+ ],
60
+ failure_dir: Annotated[
61
+ Optional[Path],
62
+ typer.Option(
63
+ "--failure-directory",
64
+ "-f",
65
+ help="The directory to store failure files during translation",
66
+ ),
67
+ ] = None,
68
+ max_prompts: Annotated[
69
+ int,
70
+ typer.Option(
71
+ "--max-prompts",
72
+ "-m",
73
+ help="The maximum number of times to prompt a model on one functional block "
74
+ "before exiting the application. This is to prevent wasting too much money.",
75
+ ),
76
+ ] = 10,
77
+ overwrite: Annotated[
78
+ bool,
79
+ typer.Option(
80
+ "--overwrite/--preserve",
81
+ help="Whether to overwrite existing files in the output directory",
82
+ ),
83
+ ] = False,
84
+ skip_context: Annotated[
85
+ bool,
86
+ typer.Option(
87
+ "--skip-context",
88
+ help="Prompts will include any context information associated with source"
89
+ " code blocks, unless this option is specified",
90
+ ),
91
+ ] = False,
92
+ temp: Annotated[
93
+ float,
94
+ typer.Option("--temperature", "-T", help="Sampling temperature.", min=0, max=2),
95
+ ] = 0.7,
96
+ prompt_template: Annotated[
97
+ str,
98
+ typer.Option(
99
+ "--prompt-template",
100
+ "-p",
101
+ help="Name of the Janus prompt template directory or "
102
+ "path to a directory containing those template files.",
103
+ ),
104
+ ] = "simple",
105
+ collection: Annotated[
106
+ str,
107
+ typer.Option(
108
+ "--collection",
109
+ "-c",
110
+ help="If set, will put the translated result into a Chroma DB "
111
+ "collection with the name provided.",
112
+ ),
113
+ ] = None,
114
+ splitter_type: Annotated[
115
+ str,
116
+ typer.Option(
117
+ "-S",
118
+ "--splitter",
119
+ help="Name of custom splitter to use",
120
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
121
+ ),
122
+ ] = "file",
123
+ refiner_types: Annotated[
124
+ list[str],
125
+ typer.Option(
126
+ "-r",
127
+ "--refiner",
128
+ help="List of refiner types to use. Add -r for each refiner to use in\
129
+ refinement chain",
130
+ click_type=click.Choice(list(REFINERS.keys())),
131
+ ),
132
+ ] = ["JanusRefiner"],
133
+ retriever_type: Annotated[
134
+ str,
135
+ typer.Option(
136
+ "-R",
137
+ "--retriever",
138
+ help="Name of custom retriever to use",
139
+ click_type=click.Choice(["active_usings", "language_docs"]),
140
+ ),
141
+ ] = None,
142
+ max_tokens: Annotated[
143
+ int,
144
+ typer.Option(
145
+ "--max-tokens",
146
+ "-M",
147
+ help="The maximum number of tokens the model will take in. "
148
+ "If unspecificed, model's default max will be used.",
149
+ ),
150
+ ] = None,
151
+ use_janus_inputs: Annotated[
152
+ bool,
153
+ typer.Option(
154
+ "-j",
155
+ "--use-janus-inputs",
156
+ help="Prsent if translator should use janus files as inputs",
157
+ ),
158
+ ] = False,
159
+ ):
160
+ from janus.cli.constants import db_loc, get_collections_config
161
+ from janus.converter.translate import Translator
162
+
163
+ refiner_types = [REFINERS[r] for r in refiner_types]
164
+ try:
165
+ target_language, target_version = target_lang.split("-")
166
+ except ValueError:
167
+ target_language = target_lang
168
+ target_version = None
169
+ # make sure not overwriting input
170
+ if source_lang.lower() == target_language.lower() and input_dir == output_dir:
171
+ log.error("Output files would overwrite input! Aborting...")
172
+ raise ValueError
173
+
174
+ model_arguments = dict(temperature=temp)
175
+ collections_config = get_collections_config()
176
+ translator = Translator(
177
+ model=llm_name,
178
+ model_arguments=model_arguments,
179
+ source_language=source_lang,
180
+ target_language=target_language,
181
+ target_version=target_version,
182
+ max_prompts=max_prompts,
183
+ max_tokens=max_tokens,
184
+ prompt_templates=prompt_template,
185
+ db_path=db_loc,
186
+ db_config=collections_config,
187
+ splitter_type=splitter_type,
188
+ refiner_types=refiner_types,
189
+ retriever_type=retriever_type,
190
+ use_janus_inputs=use_janus_inputs,
191
+ )
192
+ translator.translate(input_dir, output_dir, failure_dir, overwrite, collection)
@@ -1,6 +1,6 @@
1
1
  from janus.converter.converter import Converter
2
2
  from janus.converter.diagram import DiagramGenerator
3
- from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
3
+ from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
4
4
  from janus.converter.evaluate import Evaluator
5
5
  from janus.converter.partition import Partitioner
6
6
  from janus.converter.requirements import RequirementsDocumenter
@@ -11,6 +11,7 @@ from janus.converter.diagram import DiagramGenerator
11
11
  from janus.converter.requirements import RequirementsDocumenter
12
12
  from janus.converter.translate import Translator
13
13
  from janus.language.block import CodeBlock, TranslatedCodeBlock
14
+ from janus.refiners.format import CodeFormatRefiner
14
15
 
15
16
 
16
17
  class MockCollection(VectorStore):
@@ -50,6 +51,7 @@ class TestTranslator(unittest.TestCase):
50
51
  target_language="python",
51
52
  target_version="3.10",
52
53
  splitter_type="ast-flex",
54
+ refiner_types=[CodeFormatRefiner],
53
55
  )
54
56
  self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
55
57
  self.TEST_FILE_EMBEDDING_COUNT = 14
@@ -57,14 +59,14 @@ class TestTranslator(unittest.TestCase):
57
59
  self.req_translator = RequirementsDocumenter(
58
60
  model="gpt-4o-mini",
59
61
  source_language="fortran",
60
- prompt_template="requirements",
62
+ prompt_templates="requirements",
61
63
  )
62
64
 
63
65
  @pytest.mark.translate
64
66
  def test_translate(self):
65
67
  """Test translate method."""
66
68
  # Delete a file if it's already there
67
- python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.py"
69
+ python_file = self.test_file.parent / "python" / f"{self.test_file.stem}.json"
68
70
  python_file.unlink(missing_ok=True)
69
71
  python_file.parent.rmdir() if python_file.parent.is_dir() else None
70
72
  self.translator.translate(self.test_file.parent, self.test_file.parent / "python")
@@ -80,7 +82,7 @@ class TestTranslator(unittest.TestCase):
80
82
  self.assertRaises(
81
83
  ValueError, self.translator.set_source_language, "scribbledy-doop"
82
84
  )
83
- self.translator.set_prompt("pish posh")
85
+ self.translator.set_prompts(["pish posh"])
84
86
  self.assertRaises(ValueError, self.translator._load_parameters)
85
87
 
86
88
 
@@ -147,10 +149,10 @@ def test_language_combinations(
147
149
  translator.set_model("gpt-4o")
148
150
  translator.set_source_language(source_language)
149
151
  translator.set_target_language(expected_target_language, expected_target_version)
150
- translator.set_prompt(prompt_template)
152
+ translator.set_prompts(prompt_template)
151
153
  translator._load_parameters()
152
154
  assert translator._target_language == expected_target_language # nosec
153
155
  assert translator._target_version == expected_target_version # nosec
154
156
  assert translator._splitter.language == source_language # nosec
155
157
  assert translator._splitter.model.model_name == "gpt-4o" # nosec
156
- assert translator._prompt_template_name == prompt_template # nosec
158
+ assert translator._prompt_template_names == [prompt_template] # nosec
@@ -0,0 +1,180 @@
1
+ from pathlib import Path
2
+
3
+ from janus.converter.converter import Converter
4
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
5
+ from janus.utils.logger import create_logger
6
+
7
+ log = create_logger(__name__)
8
+
9
+
10
+ class ConverterChain(Converter):
11
+ """
12
+ Class for representing multiple converters chained together
13
+ """
14
+
15
+ def __init__(self, *args, **kwargs) -> None:
16
+ if len(args) == 0:
17
+ raise ValueError("Error: Converter chain must be passed at least 1 converter")
18
+ for converter in args:
19
+ if not isinstance(converter, Converter):
20
+ raise ValueError(f"Error: unrecognized type: {type(converter)}")
21
+ self._converters = args
22
+ kwargs.update(
23
+ source_language=self._converters[0].source_language,
24
+ target_language=self._converters[-1]._target_language,
25
+ target_version=self._converters[-1]._target_version,
26
+ use_janus_inputs=self._converters[0]._use_janus_inputs,
27
+ )
28
+ super().__init__(**kwargs)
29
+
30
+ def _run_converters(
31
+ self, translated_code_block, name: str, failure_path: Path | None = None
32
+ ):
33
+ for i, converter in enumerate(self._converters[1:]):
34
+ if not translated_code_block.translated:
35
+ log.info(
36
+ f"Error: chain failed to translate at step {i}:"
37
+ f"{self._converters[i].__class__.__name__}"
38
+ )
39
+ break
40
+ if converter._use_janus_inputs:
41
+ janus_obj = self._converters[i]._get_output_obj(translated_code_block)
42
+ translated_code_block = converter.translate_janus_obj(
43
+ janus_obj, name, failure_path
44
+ )
45
+ else:
46
+ translated_code_block = converter.translate_block(
47
+ translated_code_block.to_codeblock(), name, failure_path
48
+ )
49
+ if not translated_code_block.translated:
50
+ log.info(
51
+ f"Error: chain failed to translate at step {len(self._converters)-1}: "
52
+ f"{self._converters[-1].__class__.__name__}"
53
+ )
54
+
55
+ return translated_code_block
56
+
57
+ def translate_file(
58
+ self, file: Path, failure_path: Path | None = None
59
+ ) -> TranslatedCodeBlock:
60
+ """Translate a file using the chain of converters
61
+
62
+ Arguments:
63
+ file: The file to translate
64
+ failure_path: The path to write the failure file to
65
+
66
+ Returns:
67
+ The translated code block
68
+ """
69
+ filename = file.name
70
+ translated_code_block = self._converters[0].translate_file(file, failure_path)
71
+ translated_code_block = self._run_converters(
72
+ translated_code_block, filename, failure_path
73
+ )
74
+ return translated_code_block
75
+
76
+ def translate_text(
77
+ self, text: str, name: str, failure_path: Path | None = None
78
+ ) -> TranslatedCodeBlock:
79
+ """Translate a text using the chain of converters
80
+
81
+ Arguments:
82
+ text: The text to translate
83
+ name: The name of the file
84
+ failure_path: The path to write the failure file to
85
+
86
+ Returns:
87
+ The translated code block
88
+ """
89
+ translated_code_block = self._converters[0].translate_text(
90
+ text, name, failure_path
91
+ )
92
+ translated_code_block = self._run_converters(
93
+ translated_code_block, name, failure_path
94
+ )
95
+ return translated_code_block
96
+
97
+ def translate_block(
98
+ self,
99
+ input_block: CodeBlock | list[CodeBlock],
100
+ name: str,
101
+ failure_path: Path | None = None,
102
+ ) -> TranslatedCodeBlock:
103
+ """Translate a block of code using the chain of converters
104
+
105
+ Arguments:
106
+ input_block: The block of code to translate
107
+ name: The name of the file
108
+ failure_path: The path to write the failure file to
109
+
110
+ Returns:
111
+ The translated code block
112
+ """
113
+ translated_code_block = self._converters[0].translate_block(
114
+ input_block, name, failure_path
115
+ )
116
+ translated_code_block = self._run_converters(
117
+ translated_code_block, name, failure_path
118
+ )
119
+ return translated_code_block
120
+
121
+ def _get_output_obj(
122
+ self, block: TranslatedCodeBlock | list, combine_children: bool = True
123
+ ) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
124
+ output_obj = super()._get_output_obj(block, combine_children)
125
+ intermediate_outputs = []
126
+ for i, intermediate_out in enumerate(block.previous_generations):
127
+ if isinstance(intermediate_out, TranslatedCodeBlock):
128
+ intermediate_outputs.append(
129
+ self._converters[i]._get_output_obj(intermediate_out)
130
+ )
131
+ else:
132
+ intermediate_outputs.append(intermediate_out)
133
+ intermediate_outputs.append(self._converters[-1]._get_output_obj(block))
134
+ output_obj["intermediate_outputs"] = intermediate_outputs
135
+ metadata = output_obj["metadata"]
136
+ metadata["cost"] += sum(
137
+ b.cost if isinstance(b, TranslatedCodeBlock) else b["metadata"]["cost"]
138
+ for b in block.previous_generations
139
+ )
140
+ metadata["processing_time"] += sum(
141
+ (
142
+ b.processing_time
143
+ if isinstance(b, TranslatedCodeBlock)
144
+ else b["metadata"]["processing_time"]
145
+ )
146
+ for b in block.previous_generations
147
+ )
148
+ metadata["num_requests"] += sum(
149
+ (
150
+ b.total_num_requests
151
+ if isinstance(b, TranslatedCodeBlock)
152
+ else b["metadata"]["num_requests"]
153
+ )
154
+ for b in block.previous_generations
155
+ )
156
+ metadata["input_tokens"] += sum(
157
+ (
158
+ b.total_request_input_tokens
159
+ if isinstance(b, TranslatedCodeBlock)
160
+ else b["metadata"]["input_tokens"]
161
+ )
162
+ for b in block.previous_generations
163
+ )
164
+ metadata["output_tokens"] += sum(
165
+ (
166
+ b.total_request_output_tokens
167
+ if isinstance(b, TranslatedCodeBlock)
168
+ else b["metadata"]["output_tokens"]
169
+ )
170
+ for b in block.previous_generations
171
+ )
172
+ output_obj["metadata"] = metadata
173
+ if len(block.previous_generations) > 0:
174
+ b = block.previous_generations[0]
175
+ output_obj["input"] = (
176
+ (b.original.text or "")
177
+ if isinstance(b, TranslatedCodeBlock)
178
+ else b["input"]
179
+ )
180
+ return output_obj