janus-llm 4.4.5__py3-none-any.whl → 4.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. janus/__init__.py +1 -1
  2. janus/cli/pipeline.py +6 -3
  3. janus/cli/self_eval.py +9 -0
  4. janus/converter/__init__.py +2 -0
  5. janus/converter/_tests/test_translate.py +1 -0
  6. janus/converter/chain.py +53 -133
  7. janus/converter/converter.py +199 -77
  8. janus/converter/diagram.py +5 -3
  9. janus/converter/document.py +10 -4
  10. janus/converter/evaluate.py +148 -113
  11. janus/converter/partition.py +4 -1
  12. janus/converter/passthrough.py +29 -0
  13. janus/converter/pool.py +74 -0
  14. janus/converter/requirements.py +4 -1
  15. janus/language/_tests/test_combine.py +1 -0
  16. janus/language/block.py +84 -3
  17. janus/llm/model_callbacks.py +6 -0
  18. janus/llm/models_info.py +19 -0
  19. janus/metrics/_tests/test_reading.py +48 -4
  20. janus/metrics/_tests/test_rouge_score.py +5 -11
  21. janus/metrics/reading.py +48 -28
  22. janus/metrics/rouge_score.py +21 -34
  23. janus/parsers/_tests/test_code_parser.py +1 -1
  24. janus/parsers/code_parser.py +2 -2
  25. janus/parsers/eval_parsers/incose_parser.py +3 -3
  26. janus/prompts/templates/cyclic/human.txt +16 -0
  27. janus/prompts/templates/cyclic/system.txt +1 -0
  28. janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
  29. janus/prompts/templates/extract_variables/human.txt +5 -0
  30. janus/prompts/templates/extract_variables/system.txt +1 -0
  31. {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +3 -4
  32. {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +35 -29
  33. {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
  34. {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
  35. {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0
janus/__init__.py CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.4.5"
8
+ __version__ = "4.5.4"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/cli/pipeline.py CHANGED
@@ -8,6 +8,7 @@ from typing_extensions import Annotated
8
8
 
9
9
  from janus.cli.constants import CONVERTERS
10
10
  from janus.converter.chain import ConverterChain
11
+ from janus.converter.pool import ConverterPool
11
12
  from janus.utils.enums import LANGUAGES
12
13
 
13
14
 
@@ -41,11 +42,13 @@ def instiantiate_pipeline(
41
42
  pipeline[0]["kwargs"] = {}
42
43
  pipeline[0]["kwargs"].update(source_language=language, model=model)
43
44
  if use_janus_inputs is not None:
44
- pipeline[0]["kwargs"].update(janus_inputs=use_janus_inputs)
45
- print(pipeline[0])
45
+ pipeline[0]["kwargs"].update(use_janus_inputs=use_janus_inputs)
46
46
  converters = [instiantiate(pipeline[0])]
47
47
  for p in pipeline[1:]:
48
- p["kwargs"].update(source_language=converters[-1].target_language, model=model)
48
+ if not isinstance(converters[-1], ConverterPool) and p["type"] != "ConverterPool":
49
+ p["kwargs"].update(
50
+ source_language=converters[-1].target_language, model=model
51
+ )
49
52
  converters.append(instiantiate(p))
50
53
  return ConverterChain(*converters)
51
54
 
janus/cli/self_eval.py CHANGED
@@ -123,6 +123,14 @@ def llm_self_eval(
123
123
  "If unspecificed, model's default max will be used.",
124
124
  ),
125
125
  ] = None,
126
+ use_janus_inputs: Annotated[
127
+ bool,
128
+ typer.Option(
129
+ "-j",
130
+ "--use-janus-inputs",
131
+ help="Prsent if translator should use janus files as inputs",
132
+ ),
133
+ ] = False,
126
134
  ):
127
135
  from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
128
136
 
@@ -137,6 +145,7 @@ def llm_self_eval(
137
145
  max_tokens=max_tokens,
138
146
  splitter_type=splitter_type,
139
147
  refiner_types=refiner_types,
148
+ use_janus_inputs=use_janus_inputs,
140
149
  )
141
150
  # Setting parser type here
142
151
  if evaluation_type == "incose":
@@ -3,5 +3,7 @@ from janus.converter.diagram import DiagramGenerator
3
3
  from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
4
4
  from janus.converter.evaluate import Evaluator
5
5
  from janus.converter.partition import Partitioner
6
+ from janus.converter.passthrough import ConverterPassthrough
7
+ from janus.converter.pool import ConverterPool
6
8
  from janus.converter.requirements import RequirementsDocumenter
7
9
  from janus.converter.translate import Translator
@@ -120,6 +120,7 @@ class TestDiagramGenerator(unittest.TestCase):
120
120
  children=[],
121
121
  ),
122
122
  language="python",
123
+ converter=self.diagram_generator,
123
124
  )
124
125
  self.diagram_generator._add_translation(block)
125
126
  self.assertTrue(block.translated)
janus/converter/chain.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from janus.converter.converter import Converter
4
- from janus.language.block import CodeBlock, TranslatedCodeBlock
4
+ from janus.language.block import BlockCollection, CodeBlock, TranslatedCodeBlock
5
5
  from janus.utils.logger import create_logger
6
6
 
7
7
  log = create_logger(__name__)
@@ -24,157 +24,77 @@ class ConverterChain(Converter):
24
24
  target_language=self._converters[-1]._target_language,
25
25
  target_version=self._converters[-1]._target_version,
26
26
  use_janus_inputs=self._converters[0]._use_janus_inputs,
27
+ input_types=self._converters[0]._input_types,
28
+ input_labels=self._converters[0]._input_labels,
29
+ output_type=self._converters[-1]._output_type,
30
+ output_label=self._converters[-1]._output_label,
27
31
  )
28
32
  super().__init__(**kwargs)
29
33
 
30
- def _run_converters(
31
- self, translated_code_block, name: str, failure_path: Path | None = None
34
+ def translate_blocks(
35
+ self, input_blocks: CodeBlock | list[CodeBlock], failure_path: Path | None = None
32
36
  ):
33
- for i, converter in enumerate(self._converters[1:]):
34
- if not translated_code_block.translated:
37
+ failed = False
38
+ for i, converter in enumerate(self._converters):
39
+ translated_code_blocks = converter.translate_blocks(input_blocks)
40
+ if not translated_code_blocks.translation_completed:
35
41
  log.info(
36
42
  f"Error: chain failed to translate at step {i}:"
37
43
  f"{self._converters[i].__class__.__name__}"
38
44
  )
45
+ failed = True
39
46
  break
40
- if converter._use_janus_inputs:
41
- janus_obj = self._converters[i]._get_output_obj(translated_code_block)
42
- translated_code_block = converter.translate_janus_obj(
43
- janus_obj, name, failure_path
44
- )
45
- else:
46
- translated_code_block = converter.translate_block(
47
- translated_code_block.to_codeblock(), name, failure_path
48
- )
49
- if not translated_code_block.translated:
47
+ input_blocks = translated_code_blocks.to_codeblock()
48
+ if not failed and not translated_code_blocks.translation_completed:
50
49
  log.info(
51
50
  f"Error: chain failed to translate at step {len(self._converters)-1}: "
52
51
  f"{self._converters[-1].__class__.__name__}"
53
52
  )
54
-
55
- return translated_code_block
56
-
57
- def translate_file(
58
- self, file: Path, failure_path: Path | None = None
59
- ) -> TranslatedCodeBlock:
60
- """Translate a file using the chain of converters
61
-
62
- Arguments:
63
- file: The file to translate
64
- failure_path: The path to write the failure file to
65
-
66
- Returns:
67
- The translated code block
68
- """
69
- filename = file.name
70
- translated_code_block = self._converters[0].translate_file(file, failure_path)
71
- translated_code_block = self._run_converters(
72
- translated_code_block, filename, failure_path
73
- )
74
- return translated_code_block
75
-
76
- def translate_text(
77
- self, text: str, name: str, failure_path: Path | None = None
78
- ) -> TranslatedCodeBlock:
79
- """Translate a text using the chain of converters
80
-
81
- Arguments:
82
- text: The text to translate
83
- name: The name of the file
84
- failure_path: The path to write the failure file to
85
-
86
- Returns:
87
- The translated code block
88
- """
89
- translated_code_block = self._converters[0].translate_text(
90
- text, name, failure_path
91
- )
92
- translated_code_block = self._run_converters(
93
- translated_code_block, name, failure_path
94
- )
95
- return translated_code_block
96
-
97
- def translate_block(
98
- self,
99
- input_block: CodeBlock | list[CodeBlock],
100
- name: str,
101
- failure_path: Path | None = None,
102
- ) -> TranslatedCodeBlock:
103
- """Translate a block of code using the chain of converters
104
-
105
- Arguments:
106
- input_block: The block of code to translate
107
- name: The name of the file
108
- failure_path: The path to write the failure file to
109
-
110
- Returns:
111
- The translated code block
112
- """
113
- translated_code_block = self._converters[0].translate_block(
114
- input_block, name, failure_path
115
- )
116
- translated_code_block = self._run_converters(
117
- translated_code_block, name, failure_path
118
- )
119
- return translated_code_block
53
+ return translated_code_blocks
54
+
55
+ def _combine_metadata(self, metadatas: list[dict]):
56
+ metadata = super()._combine_metadata(metadatas)
57
+ if isinstance(metadata["type"], list):
58
+ metadata["type"] = metadata["type"][-1]
59
+ if isinstance(metadata["label"], list):
60
+ metadata["label"] = metadata["label"][-1]
61
+ metadata["type"] = metadatas[-1]["type"]
62
+ metadata["label"] = metadatas[-1]["label"]
63
+ return metadata
120
64
 
121
65
  def _get_output_obj(
122
- self, block: TranslatedCodeBlock | list, combine_children: bool = True
66
+ self,
67
+ block: TranslatedCodeBlock | BlockCollection,
68
+ combine_children: bool = True,
69
+ include_previous_outputs: bool = True,
123
70
  ) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
124
- output_obj = super()._get_output_obj(block, combine_children)
125
71
  intermediate_outputs = []
126
- for i, intermediate_out in enumerate(block.previous_generations):
127
- if isinstance(intermediate_out, TranslatedCodeBlock):
72
+ c_index = 0 # current converter index
73
+ start_index = 0 # start index of newly generated intermediate outputs
74
+ for g in block.previous_generations:
75
+ if isinstance(g, dict):
76
+ intermediate_outputs.append(g)
77
+ # Find the first index where we generated code
78
+ start_index += 1
79
+ else:
128
80
  intermediate_outputs.append(
129
- self._converters[i]._get_output_obj(intermediate_out)
81
+ self._converters[c_index]._get_output_obj(
82
+ g, self._converters[c_index]._combine_output, False
83
+ )
130
84
  )
131
- else:
132
- intermediate_outputs.append(intermediate_out)
133
- intermediate_outputs.append(self._converters[-1]._get_output_obj(block))
134
- output_obj["intermediate_outputs"] = intermediate_outputs
135
- metadata = output_obj["metadata"]
136
- metadata["cost"] += sum(
137
- b.cost if isinstance(b, TranslatedCodeBlock) else b["metadata"]["cost"]
138
- for b in block.previous_generations
139
- )
140
- metadata["processing_time"] += sum(
141
- (
142
- b.processing_time
143
- if isinstance(b, TranslatedCodeBlock)
144
- else b["metadata"]["processing_time"]
145
- )
146
- for b in block.previous_generations
147
- )
148
- metadata["num_requests"] += sum(
149
- (
150
- b.total_num_requests
151
- if isinstance(b, TranslatedCodeBlock)
152
- else b["metadata"]["num_requests"]
85
+ c_index += 1
86
+ intermediate_outputs.append(
87
+ self._converters[-1]._get_output_obj(
88
+ block, self._converters[-1]._combine_output, False
153
89
  )
154
- for b in block.previous_generations
155
90
  )
156
- metadata["input_tokens"] += sum(
157
- (
158
- b.total_request_input_tokens
159
- if isinstance(b, TranslatedCodeBlock)
160
- else b["metadata"]["input_tokens"]
161
- )
162
- for b in block.previous_generations
91
+ out = dict(
92
+ input=intermediate_outputs[start_index]["input"],
93
+ metadata=self._combine_metadata(
94
+ [i["metadata"] for i in intermediate_outputs]
95
+ ),
96
+ outputs=intermediate_outputs[-1]["outputs"],
163
97
  )
164
- metadata["output_tokens"] += sum(
165
- (
166
- b.total_request_output_tokens
167
- if isinstance(b, TranslatedCodeBlock)
168
- else b["metadata"]["output_tokens"]
169
- )
170
- for b in block.previous_generations
171
- )
172
- output_obj["metadata"] = metadata
173
- if len(block.previous_generations) > 0:
174
- b = block.previous_generations[0]
175
- output_obj["input"] = (
176
- (b.original.text or "")
177
- if isinstance(b, TranslatedCodeBlock)
178
- else b["input"]
179
- )
180
- return output_obj
98
+ if include_previous_outputs:
99
+ out["intermediate_outputs"] = intermediate_outputs
100
+ return out