janus-llm 4.4.5__py3-none-any.whl → 4.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/pipeline.py +6 -3
- janus/cli/self_eval.py +9 -0
- janus/converter/__init__.py +2 -0
- janus/converter/_tests/test_translate.py +1 -0
- janus/converter/chain.py +53 -133
- janus/converter/converter.py +199 -77
- janus/converter/diagram.py +5 -3
- janus/converter/document.py +10 -4
- janus/converter/evaluate.py +148 -113
- janus/converter/partition.py +4 -1
- janus/converter/passthrough.py +29 -0
- janus/converter/pool.py +74 -0
- janus/converter/requirements.py +4 -1
- janus/language/_tests/test_combine.py +1 -0
- janus/language/block.py +84 -3
- janus/llm/model_callbacks.py +6 -0
- janus/llm/models_info.py +19 -0
- janus/metrics/_tests/test_reading.py +48 -4
- janus/metrics/_tests/test_rouge_score.py +5 -11
- janus/metrics/reading.py +48 -28
- janus/metrics/rouge_score.py +21 -34
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/eval_parsers/incose_parser.py +3 -3
- janus/prompts/templates/cyclic/human.txt +16 -0
- janus/prompts/templates/cyclic/system.txt +1 -0
- janus/prompts/templates/eval_prompts/incose/human.txt +1 -1
- janus/prompts/templates/extract_variables/human.txt +5 -0
- janus/prompts/templates/extract_variables/system.txt +1 -0
- {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/METADATA +3 -4
- {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/RECORD +35 -29
- {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/WHEEL +1 -1
- {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/LICENSE +0 -0
- {janus_llm-4.4.5.dist-info → janus_llm-4.5.4.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
|
|
5
5
|
from janus.converter.translate import Translator
|
6
6
|
from janus.metrics import * # noqa: F403
|
7
7
|
|
8
|
-
__version__ = "4.4
|
8
|
+
__version__ = "4.5.4"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/cli/pipeline.py
CHANGED
@@ -8,6 +8,7 @@ from typing_extensions import Annotated
|
|
8
8
|
|
9
9
|
from janus.cli.constants import CONVERTERS
|
10
10
|
from janus.converter.chain import ConverterChain
|
11
|
+
from janus.converter.pool import ConverterPool
|
11
12
|
from janus.utils.enums import LANGUAGES
|
12
13
|
|
13
14
|
|
@@ -41,11 +42,13 @@ def instiantiate_pipeline(
|
|
41
42
|
pipeline[0]["kwargs"] = {}
|
42
43
|
pipeline[0]["kwargs"].update(source_language=language, model=model)
|
43
44
|
if use_janus_inputs is not None:
|
44
|
-
pipeline[0]["kwargs"].update(
|
45
|
-
print(pipeline[0])
|
45
|
+
pipeline[0]["kwargs"].update(use_janus_inputs=use_janus_inputs)
|
46
46
|
converters = [instiantiate(pipeline[0])]
|
47
47
|
for p in pipeline[1:]:
|
48
|
-
|
48
|
+
if not isinstance(converters[-1], ConverterPool) and p["type"] != "ConverterPool":
|
49
|
+
p["kwargs"].update(
|
50
|
+
source_language=converters[-1].target_language, model=model
|
51
|
+
)
|
49
52
|
converters.append(instiantiate(p))
|
50
53
|
return ConverterChain(*converters)
|
51
54
|
|
janus/cli/self_eval.py
CHANGED
@@ -123,6 +123,14 @@ def llm_self_eval(
|
|
123
123
|
"If unspecificed, model's default max will be used.",
|
124
124
|
),
|
125
125
|
] = None,
|
126
|
+
use_janus_inputs: Annotated[
|
127
|
+
bool,
|
128
|
+
typer.Option(
|
129
|
+
"-j",
|
130
|
+
"--use-janus-inputs",
|
131
|
+
help="Prsent if translator should use janus files as inputs",
|
132
|
+
),
|
133
|
+
] = False,
|
126
134
|
):
|
127
135
|
from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
|
128
136
|
|
@@ -137,6 +145,7 @@ def llm_self_eval(
|
|
137
145
|
max_tokens=max_tokens,
|
138
146
|
splitter_type=splitter_type,
|
139
147
|
refiner_types=refiner_types,
|
148
|
+
use_janus_inputs=use_janus_inputs,
|
140
149
|
)
|
141
150
|
# Setting parser type here
|
142
151
|
if evaluation_type == "incose":
|
janus/converter/__init__.py
CHANGED
@@ -3,5 +3,7 @@ from janus.converter.diagram import DiagramGenerator
|
|
3
3
|
from janus.converter.document import ClozeDocumenter, Documenter, MultiDocumenter
|
4
4
|
from janus.converter.evaluate import Evaluator
|
5
5
|
from janus.converter.partition import Partitioner
|
6
|
+
from janus.converter.passthrough import ConverterPassthrough
|
7
|
+
from janus.converter.pool import ConverterPool
|
6
8
|
from janus.converter.requirements import RequirementsDocumenter
|
7
9
|
from janus.converter.translate import Translator
|
janus/converter/chain.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
3
|
from janus.converter.converter import Converter
|
4
|
-
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
4
|
+
from janus.language.block import BlockCollection, CodeBlock, TranslatedCodeBlock
|
5
5
|
from janus.utils.logger import create_logger
|
6
6
|
|
7
7
|
log = create_logger(__name__)
|
@@ -24,157 +24,77 @@ class ConverterChain(Converter):
|
|
24
24
|
target_language=self._converters[-1]._target_language,
|
25
25
|
target_version=self._converters[-1]._target_version,
|
26
26
|
use_janus_inputs=self._converters[0]._use_janus_inputs,
|
27
|
+
input_types=self._converters[0]._input_types,
|
28
|
+
input_labels=self._converters[0]._input_labels,
|
29
|
+
output_type=self._converters[-1]._output_type,
|
30
|
+
output_label=self._converters[-1]._output_label,
|
27
31
|
)
|
28
32
|
super().__init__(**kwargs)
|
29
33
|
|
30
|
-
def
|
31
|
-
self,
|
34
|
+
def translate_blocks(
|
35
|
+
self, input_blocks: CodeBlock | list[CodeBlock], failure_path: Path | None = None
|
32
36
|
):
|
33
|
-
|
34
|
-
|
37
|
+
failed = False
|
38
|
+
for i, converter in enumerate(self._converters):
|
39
|
+
translated_code_blocks = converter.translate_blocks(input_blocks)
|
40
|
+
if not translated_code_blocks.translation_completed:
|
35
41
|
log.info(
|
36
42
|
f"Error: chain failed to translate at step {i}:"
|
37
43
|
f"{self._converters[i].__class__.__name__}"
|
38
44
|
)
|
45
|
+
failed = True
|
39
46
|
break
|
40
|
-
|
41
|
-
|
42
|
-
translated_code_block = converter.translate_janus_obj(
|
43
|
-
janus_obj, name, failure_path
|
44
|
-
)
|
45
|
-
else:
|
46
|
-
translated_code_block = converter.translate_block(
|
47
|
-
translated_code_block.to_codeblock(), name, failure_path
|
48
|
-
)
|
49
|
-
if not translated_code_block.translated:
|
47
|
+
input_blocks = translated_code_blocks.to_codeblock()
|
48
|
+
if not failed and not translated_code_blocks.translation_completed:
|
50
49
|
log.info(
|
51
50
|
f"Error: chain failed to translate at step {len(self._converters)-1}: "
|
52
51
|
f"{self._converters[-1].__class__.__name__}"
|
53
52
|
)
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
""
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
Returns:
|
67
|
-
The translated code block
|
68
|
-
"""
|
69
|
-
filename = file.name
|
70
|
-
translated_code_block = self._converters[0].translate_file(file, failure_path)
|
71
|
-
translated_code_block = self._run_converters(
|
72
|
-
translated_code_block, filename, failure_path
|
73
|
-
)
|
74
|
-
return translated_code_block
|
75
|
-
|
76
|
-
def translate_text(
|
77
|
-
self, text: str, name: str, failure_path: Path | None = None
|
78
|
-
) -> TranslatedCodeBlock:
|
79
|
-
"""Translate a text using the chain of converters
|
80
|
-
|
81
|
-
Arguments:
|
82
|
-
text: The text to translate
|
83
|
-
name: The name of the file
|
84
|
-
failure_path: The path to write the failure file to
|
85
|
-
|
86
|
-
Returns:
|
87
|
-
The translated code block
|
88
|
-
"""
|
89
|
-
translated_code_block = self._converters[0].translate_text(
|
90
|
-
text, name, failure_path
|
91
|
-
)
|
92
|
-
translated_code_block = self._run_converters(
|
93
|
-
translated_code_block, name, failure_path
|
94
|
-
)
|
95
|
-
return translated_code_block
|
96
|
-
|
97
|
-
def translate_block(
|
98
|
-
self,
|
99
|
-
input_block: CodeBlock | list[CodeBlock],
|
100
|
-
name: str,
|
101
|
-
failure_path: Path | None = None,
|
102
|
-
) -> TranslatedCodeBlock:
|
103
|
-
"""Translate a block of code using the chain of converters
|
104
|
-
|
105
|
-
Arguments:
|
106
|
-
input_block: The block of code to translate
|
107
|
-
name: The name of the file
|
108
|
-
failure_path: The path to write the failure file to
|
109
|
-
|
110
|
-
Returns:
|
111
|
-
The translated code block
|
112
|
-
"""
|
113
|
-
translated_code_block = self._converters[0].translate_block(
|
114
|
-
input_block, name, failure_path
|
115
|
-
)
|
116
|
-
translated_code_block = self._run_converters(
|
117
|
-
translated_code_block, name, failure_path
|
118
|
-
)
|
119
|
-
return translated_code_block
|
53
|
+
return translated_code_blocks
|
54
|
+
|
55
|
+
def _combine_metadata(self, metadatas: list[dict]):
|
56
|
+
metadata = super()._combine_metadata(metadatas)
|
57
|
+
if isinstance(metadata["type"], list):
|
58
|
+
metadata["type"] = metadata["type"][-1]
|
59
|
+
if isinstance(metadata["label"], list):
|
60
|
+
metadata["label"] = metadata["label"][-1]
|
61
|
+
metadata["type"] = metadatas[-1]["type"]
|
62
|
+
metadata["label"] = metadatas[-1]["label"]
|
63
|
+
return metadata
|
120
64
|
|
121
65
|
def _get_output_obj(
|
122
|
-
self,
|
66
|
+
self,
|
67
|
+
block: TranslatedCodeBlock | BlockCollection,
|
68
|
+
combine_children: bool = True,
|
69
|
+
include_previous_outputs: bool = True,
|
123
70
|
) -> dict[str, int | float | str | dict[str, str] | dict[str, float]]:
|
124
|
-
output_obj = super()._get_output_obj(block, combine_children)
|
125
71
|
intermediate_outputs = []
|
126
|
-
|
127
|
-
|
72
|
+
c_index = 0 # current converter index
|
73
|
+
start_index = 0 # start index of newly generated intermediate outputs
|
74
|
+
for g in block.previous_generations:
|
75
|
+
if isinstance(g, dict):
|
76
|
+
intermediate_outputs.append(g)
|
77
|
+
# Find the first index where we generated code
|
78
|
+
start_index += 1
|
79
|
+
else:
|
128
80
|
intermediate_outputs.append(
|
129
|
-
self._converters[
|
81
|
+
self._converters[c_index]._get_output_obj(
|
82
|
+
g, self._converters[c_index]._combine_output, False
|
83
|
+
)
|
130
84
|
)
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
metadata = output_obj["metadata"]
|
136
|
-
metadata["cost"] += sum(
|
137
|
-
b.cost if isinstance(b, TranslatedCodeBlock) else b["metadata"]["cost"]
|
138
|
-
for b in block.previous_generations
|
139
|
-
)
|
140
|
-
metadata["processing_time"] += sum(
|
141
|
-
(
|
142
|
-
b.processing_time
|
143
|
-
if isinstance(b, TranslatedCodeBlock)
|
144
|
-
else b["metadata"]["processing_time"]
|
145
|
-
)
|
146
|
-
for b in block.previous_generations
|
147
|
-
)
|
148
|
-
metadata["num_requests"] += sum(
|
149
|
-
(
|
150
|
-
b.total_num_requests
|
151
|
-
if isinstance(b, TranslatedCodeBlock)
|
152
|
-
else b["metadata"]["num_requests"]
|
85
|
+
c_index += 1
|
86
|
+
intermediate_outputs.append(
|
87
|
+
self._converters[-1]._get_output_obj(
|
88
|
+
block, self._converters[-1]._combine_output, False
|
153
89
|
)
|
154
|
-
for b in block.previous_generations
|
155
90
|
)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
for b in block.previous_generations
|
91
|
+
out = dict(
|
92
|
+
input=intermediate_outputs[start_index]["input"],
|
93
|
+
metadata=self._combine_metadata(
|
94
|
+
[i["metadata"] for i in intermediate_outputs]
|
95
|
+
),
|
96
|
+
outputs=intermediate_outputs[-1]["outputs"],
|
163
97
|
)
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
if isinstance(b, TranslatedCodeBlock)
|
168
|
-
else b["metadata"]["output_tokens"]
|
169
|
-
)
|
170
|
-
for b in block.previous_generations
|
171
|
-
)
|
172
|
-
output_obj["metadata"] = metadata
|
173
|
-
if len(block.previous_generations) > 0:
|
174
|
-
b = block.previous_generations[0]
|
175
|
-
output_obj["input"] = (
|
176
|
-
(b.original.text or "")
|
177
|
-
if isinstance(b, TranslatedCodeBlock)
|
178
|
-
else b["input"]
|
179
|
-
)
|
180
|
-
return output_obj
|
98
|
+
if include_previous_outputs:
|
99
|
+
out["intermediate_outputs"] = intermediate_outputs
|
100
|
+
return out
|