janus-llm 1.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +9 -1
- janus/__main__.py +4 -0
- janus/_tests/test_cli.py +128 -0
- janus/_tests/test_translate.py +49 -7
- janus/cli.py +530 -46
- janus/converter.py +50 -19
- janus/embedding/_tests/test_collections.py +2 -8
- janus/embedding/_tests/test_database.py +32 -0
- janus/embedding/_tests/test_vectorize.py +9 -4
- janus/embedding/collections.py +49 -6
- janus/embedding/embedding_models_info.py +130 -0
- janus/embedding/vectorize.py +53 -62
- janus/language/_tests/__init__.py +0 -0
- janus/language/_tests/test_combine.py +62 -0
- janus/language/_tests/test_splitter.py +16 -0
- janus/language/binary/_tests/test_binary.py +16 -1
- janus/language/binary/binary.py +10 -3
- janus/language/block.py +31 -30
- janus/language/combine.py +26 -34
- janus/language/mumps/_tests/test_mumps.py +2 -2
- janus/language/mumps/mumps.py +93 -9
- janus/language/naive/__init__.py +4 -0
- janus/language/naive/basic_splitter.py +14 -0
- janus/language/naive/chunk_splitter.py +26 -0
- janus/language/naive/registry.py +13 -0
- janus/language/naive/simple_ast.py +18 -0
- janus/language/naive/tag_splitter.py +61 -0
- janus/language/splitter.py +168 -74
- janus/language/treesitter/_tests/test_treesitter.py +19 -14
- janus/language/treesitter/treesitter.py +37 -13
- janus/llm/model_callbacks.py +177 -0
- janus/llm/models_info.py +165 -72
- janus/metrics/__init__.py +8 -0
- janus/metrics/_tests/__init__.py +0 -0
- janus/metrics/_tests/reference.py +2 -0
- janus/metrics/_tests/target.py +2 -0
- janus/metrics/_tests/test_bleu.py +56 -0
- janus/metrics/_tests/test_chrf.py +67 -0
- janus/metrics/_tests/test_file_pairing.py +59 -0
- janus/metrics/_tests/test_llm.py +91 -0
- janus/metrics/_tests/test_reading.py +28 -0
- janus/metrics/_tests/test_rouge_score.py +65 -0
- janus/metrics/_tests/test_similarity_score.py +23 -0
- janus/metrics/_tests/test_treesitter_metrics.py +110 -0
- janus/metrics/bleu.py +66 -0
- janus/metrics/chrf.py +55 -0
- janus/metrics/cli.py +7 -0
- janus/metrics/complexity_metrics.py +208 -0
- janus/metrics/file_pairing.py +113 -0
- janus/metrics/llm_metrics.py +202 -0
- janus/metrics/metric.py +466 -0
- janus/metrics/reading.py +70 -0
- janus/metrics/rouge_score.py +96 -0
- janus/metrics/similarity.py +53 -0
- janus/metrics/splitting.py +38 -0
- janus/parsers/_tests/__init__.py +0 -0
- janus/parsers/_tests/test_code_parser.py +32 -0
- janus/parsers/code_parser.py +24 -253
- janus/parsers/doc_parser.py +169 -0
- janus/parsers/eval_parser.py +80 -0
- janus/parsers/reqs_parser.py +72 -0
- janus/prompts/prompt.py +103 -30
- janus/translate.py +636 -111
- janus/utils/_tests/__init__.py +0 -0
- janus/utils/_tests/test_logger.py +67 -0
- janus/utils/_tests/test_progress.py +20 -0
- janus/utils/enums.py +56 -3
- janus/utils/progress.py +56 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/METADATA +27 -11
- janus_llm-2.0.1.dist-info/RECORD +94 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/WHEEL +1 -1
- janus_llm-1.0.0.dist-info/RECORD +0 -48
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/LICENSE +0 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
from typing import Any, Callable
|
2
|
+
|
3
|
+
from ..language.binary import BinarySplitter
|
4
|
+
from ..language.mumps import MumpsSplitter
|
5
|
+
from ..language.node import NodeType
|
6
|
+
from ..language.treesitter import TreeSitterSplitter
|
7
|
+
from ..utils.enums import CUSTOM_SPLITTERS
|
8
|
+
|
9
|
+
FILE_PAIRING_METHODS: dict[str, Callable[[str, str], list[tuple[str, str]]]] = {}
|
10
|
+
|
11
|
+
|
12
|
+
def register_pairing_method(name: None | str = None) -> Callable[[Callable], Callable]:
|
13
|
+
"""Registers a pairing method for pairing strings between files
|
14
|
+
|
15
|
+
Arguments:
|
16
|
+
name: The name of the pairing method. If None, the function name is used.
|
17
|
+
help: The help text for the pairing method.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
The decorator function.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def decorator(f: Callable[[str, str], list[tuple[str, str]]]):
|
24
|
+
if name is None:
|
25
|
+
pairing_name = f.__name__
|
26
|
+
else:
|
27
|
+
pairing_name = name
|
28
|
+
FILE_PAIRING_METHODS[pairing_name] = f
|
29
|
+
return f
|
30
|
+
|
31
|
+
return decorator
|
32
|
+
|
33
|
+
|
34
|
+
@register_pairing_method(name="file")
|
35
|
+
def pair_by_file(
|
36
|
+
target: str, reference: str, **kwargs: dict[str, Any]
|
37
|
+
) -> list[tuple[str, str]]:
|
38
|
+
"""Pairs the entire contents of a file together
|
39
|
+
|
40
|
+
Arguments:
|
41
|
+
target: The target file text.
|
42
|
+
reference: The reference file text.
|
43
|
+
state: The current evaluation state.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
A list of tuples of the target and reference file text.
|
47
|
+
"""
|
48
|
+
return [(target, reference)]
|
49
|
+
|
50
|
+
|
51
|
+
@register_pairing_method(name="line")
|
52
|
+
def pair_by_line(
|
53
|
+
target: str, reference: str, **kwargs: dict[str, Any]
|
54
|
+
) -> list[tuple[str, str]]:
|
55
|
+
"""Pairs the contents of a file together by line
|
56
|
+
|
57
|
+
Arguments:
|
58
|
+
target: The target file text.
|
59
|
+
reference: The reference file text.
|
60
|
+
state: The current evaluation state.
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
A list of tuples of the target and reference file text.
|
64
|
+
"""
|
65
|
+
return list(zip(target.split("\n"), reference.split("\n")))
|
66
|
+
|
67
|
+
|
68
|
+
@register_pairing_method(name="line-comment")
|
69
|
+
def pair_by_line_comment(
|
70
|
+
target: str, reference: str, **kwargs: dict[str, Any]
|
71
|
+
) -> list[tuple[str, str]]:
|
72
|
+
"""Pairs the comments of a file together by line
|
73
|
+
|
74
|
+
**WARNING**: Do not use, as this method is extremely brittle.
|
75
|
+
|
76
|
+
Arguments:
|
77
|
+
target: The target file text.
|
78
|
+
reference: The reference/reference file text.
|
79
|
+
state: The current evaluation state.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
A list of tuples of the target and reference file text.
|
83
|
+
"""
|
84
|
+
splitter_kwargs = dict(
|
85
|
+
max_tokens=kwargs["token_limit"] // 2.5,
|
86
|
+
model=kwargs["llm"],
|
87
|
+
protected_node_types=(NodeType("comment"),),
|
88
|
+
prune_node_types=tuple(),
|
89
|
+
)
|
90
|
+
if kwargs["target_file"] is None or kwargs["reference_file"] is None:
|
91
|
+
raise ValueError("Error: must provide file for pair by line comment")
|
92
|
+
if kwargs["lang"] is None:
|
93
|
+
raise ValueError("Error: must provide language for pair by line comment")
|
94
|
+
if kwargs["lang"] in CUSTOM_SPLITTERS:
|
95
|
+
if kwargs["lang"] == "mumps":
|
96
|
+
splitter = MumpsSplitter(**splitter_kwargs)
|
97
|
+
elif kwargs["lang"] == "binary":
|
98
|
+
splitter = BinarySplitter(**splitter_kwargs)
|
99
|
+
else:
|
100
|
+
splitter = TreeSitterSplitter(language=kwargs["lang"], **splitter_kwargs)
|
101
|
+
target_tree = splitter.split(kwargs["target_file"])
|
102
|
+
reference_tree = splitter.split(kwargs["reference_file"])
|
103
|
+
pairs = []
|
104
|
+
|
105
|
+
def _parse_pairs(node1, node2, pairs):
|
106
|
+
for c1, c2 in zip(node1.children, node2.children):
|
107
|
+
if c1.node_type == "comment" and c2.node_type == "comment":
|
108
|
+
pairs.append((c1.complete_text, c2.complete_text))
|
109
|
+
else:
|
110
|
+
_parse_pairs(c1, c2, pairs)
|
111
|
+
|
112
|
+
_parse_pairs(target_tree, reference_tree, pairs)
|
113
|
+
return pairs
|
@@ -0,0 +1,202 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
import click
|
5
|
+
import typer
|
6
|
+
from langchain_core.exceptions import OutputParserException
|
7
|
+
from langchain_core.output_parsers import BaseOutputParser, JsonOutputParser
|
8
|
+
from langchain_core.prompts import PromptTemplate
|
9
|
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
|
+
from typing_extensions import Annotated
|
11
|
+
|
12
|
+
from .metric import metric
|
13
|
+
|
14
|
+
|
15
|
+
class LLMMetricOutput(BaseModel):
|
16
|
+
"""The output of an LLM evaluation metric."""
|
17
|
+
|
18
|
+
thought: str = Field(
|
19
|
+
...,
|
20
|
+
description=(
|
21
|
+
"The thought process that you took to reach your value determination."
|
22
|
+
),
|
23
|
+
)
|
24
|
+
value: str | float | int = Field(
|
25
|
+
..., description="The value of the metric described in the prompt."
|
26
|
+
)
|
27
|
+
|
28
|
+
|
29
|
+
def load_prompt(path: Path, language: str, parser: BaseOutputParser) -> PromptTemplate:
|
30
|
+
"""Load a default prompt from a file.
|
31
|
+
|
32
|
+
Arguments:
|
33
|
+
path: The path to the file.
|
34
|
+
language: The language of the prompt.
|
35
|
+
pydantic_model: The Pydantic model to use for parsing the output.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
The prompt text.
|
39
|
+
"""
|
40
|
+
if not path.exists():
|
41
|
+
raise FileNotFoundError(f"File not found: {path}")
|
42
|
+
prompt = PromptTemplate.from_template(
|
43
|
+
path.read_text(),
|
44
|
+
template_format="f-string",
|
45
|
+
partial_variables={
|
46
|
+
"language": language,
|
47
|
+
"format_instructions": parser.get_format_instructions(),
|
48
|
+
},
|
49
|
+
)
|
50
|
+
return prompt
|
51
|
+
|
52
|
+
|
53
|
+
def evaluate(
|
54
|
+
target: str,
|
55
|
+
language: str,
|
56
|
+
model: str,
|
57
|
+
prompt_path: Path,
|
58
|
+
reference: str | None = None,
|
59
|
+
):
|
60
|
+
"""Calculate the LLM self evaluation score.
|
61
|
+
|
62
|
+
Arguments:
|
63
|
+
target: The target text.
|
64
|
+
language: The language that the target code is written in.
|
65
|
+
prompt_path: The filepath of the prompt text
|
66
|
+
reference: The reference text.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
The LLM Evaluation score.
|
70
|
+
"""
|
71
|
+
parser = JsonOutputParser(pydantic_object=LLMMetricOutput)
|
72
|
+
prompt = load_prompt(prompt_path, language, parser)
|
73
|
+
chain = prompt | model | parser
|
74
|
+
try:
|
75
|
+
output = (
|
76
|
+
chain.invoke(dict(target=target, reference=reference))
|
77
|
+
if reference
|
78
|
+
else chain.invoke(dict(target=target))
|
79
|
+
)
|
80
|
+
return output["value"]
|
81
|
+
except OutputParserException:
|
82
|
+
return False
|
83
|
+
|
84
|
+
|
85
|
+
@metric(use_reference=False, name="llm", help="LLM self-evaluation on a target file")
|
86
|
+
def llm_evaluate_option(
|
87
|
+
target: str,
|
88
|
+
metric: Annotated[
|
89
|
+
str,
|
90
|
+
typer.Option(
|
91
|
+
"--metric",
|
92
|
+
"-m",
|
93
|
+
help=("The pre-defined metric to use for evaluation."),
|
94
|
+
click_type=click.Choice(
|
95
|
+
[
|
96
|
+
"quality",
|
97
|
+
"clarity",
|
98
|
+
"faithfulness",
|
99
|
+
"completeness",
|
100
|
+
"hallucination",
|
101
|
+
"readability",
|
102
|
+
"usefulness",
|
103
|
+
]
|
104
|
+
),
|
105
|
+
),
|
106
|
+
] = "quality",
|
107
|
+
prompt: Annotated[
|
108
|
+
str,
|
109
|
+
None,
|
110
|
+
typer.Option(
|
111
|
+
"--prompt",
|
112
|
+
"-P",
|
113
|
+
help=("A custom prompt in a .txt file to use for evaluation."),
|
114
|
+
),
|
115
|
+
] = None,
|
116
|
+
num_eval: Annotated[
|
117
|
+
int,
|
118
|
+
typer.Option(
|
119
|
+
"-n",
|
120
|
+
"--num-eval",
|
121
|
+
help="Number of times to run the evaluation",
|
122
|
+
),
|
123
|
+
] = 1,
|
124
|
+
**kwargs,
|
125
|
+
) -> Any:
|
126
|
+
"""CLI option to calculate the LLM self evaluation score.
|
127
|
+
|
128
|
+
Arguments:
|
129
|
+
target: The target text.
|
130
|
+
reference: The reference text.
|
131
|
+
metric: The pre-defined metric to use for evaluation.
|
132
|
+
prompt: The prompt text.
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
The LLM Evaluation score.
|
136
|
+
"""
|
137
|
+
prompt_path: Path = (
|
138
|
+
Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
|
139
|
+
)
|
140
|
+
if num_eval == 1:
|
141
|
+
return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
|
142
|
+
else:
|
143
|
+
return [
|
144
|
+
evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
|
145
|
+
for _ in range(num_eval)
|
146
|
+
]
|
147
|
+
|
148
|
+
|
149
|
+
@metric(name="llm-ref", help="LLM self-evaluation on a target file and a reference file")
|
150
|
+
def llm_evaluate_ref_option(
|
151
|
+
target: str,
|
152
|
+
reference: str,
|
153
|
+
metric: Annotated[
|
154
|
+
str,
|
155
|
+
typer.Option(
|
156
|
+
"--metric",
|
157
|
+
"-m",
|
158
|
+
help=("The pre-defined metric to use for evaluation."),
|
159
|
+
click_type=click.Choice(["faithfulness"]),
|
160
|
+
),
|
161
|
+
] = "faithfulness",
|
162
|
+
prompt: Annotated[
|
163
|
+
str,
|
164
|
+
None,
|
165
|
+
typer.Option(
|
166
|
+
"--prompt",
|
167
|
+
"-P",
|
168
|
+
help=("A custom prompt in a .txt file to use for evaluation."),
|
169
|
+
),
|
170
|
+
] = None,
|
171
|
+
num_eval: Annotated[
|
172
|
+
int,
|
173
|
+
typer.Option(
|
174
|
+
"-n",
|
175
|
+
"--num-eval",
|
176
|
+
help="Number of times to run evaluation for pair",
|
177
|
+
),
|
178
|
+
] = 1,
|
179
|
+
**kwargs,
|
180
|
+
) -> Any:
|
181
|
+
"""CLI option to calculate the LLM self evaluation score, for evaluations which
|
182
|
+
require a reference file (e.g. faithfulness)
|
183
|
+
|
184
|
+
Arguments:
|
185
|
+
target: The target text.
|
186
|
+
reference: The reference text.
|
187
|
+
metric: The pre-defined metric to use for evaluation.
|
188
|
+
prompt: The prompt text.
|
189
|
+
|
190
|
+
Returns:
|
191
|
+
The LLM Evaluation score.
|
192
|
+
"""
|
193
|
+
prompt_path: Path = (
|
194
|
+
Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
|
195
|
+
)
|
196
|
+
if num_eval == 1:
|
197
|
+
return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
|
198
|
+
else:
|
199
|
+
return [
|
200
|
+
evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
|
201
|
+
for _ in range(num_eval)
|
202
|
+
]
|