janus-llm 4.3.5__py3-none-any.whl → 4.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +1 -1
- janus/cli/aggregate.py +2 -2
- janus/cli/cli.py +6 -0
- janus/cli/constants.py +6 -0
- janus/cli/diagram.py +36 -7
- janus/cli/document.py +10 -1
- janus/cli/llm.py +7 -3
- janus/cli/partition.py +10 -1
- janus/cli/pipeline.py +123 -0
- janus/cli/self_eval.py +1 -3
- janus/cli/translate.py +10 -1
- janus/converter/_tests/test_translate.py +5 -5
- janus/converter/chain.py +180 -0
- janus/converter/converter.py +333 -78
- janus/converter/diagram.py +8 -6
- janus/converter/document.py +7 -3
- janus/converter/evaluate.py +140 -148
- janus/converter/partition.py +2 -10
- janus/converter/requirements.py +4 -40
- janus/converter/translate.py +2 -58
- janus/language/block.py +31 -2
- janus/metrics/metric.py +47 -124
- janus/parsers/reqs_parser.py +3 -3
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/METADATA +12 -12
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/RECORD +28 -28
- janus/metrics/_tests/test_llm.py +0 -90
- janus/metrics/llm_metrics.py +0 -202
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +0 -0
- {janus_llm-4.3.5.dist-info → janus_llm-4.4.5.dist-info}/entry_points.txt +0 -0
janus/metrics/llm_metrics.py
DELETED
@@ -1,202 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
import click
|
5
|
-
import typer
|
6
|
-
from langchain_core.exceptions import OutputParserException
|
7
|
-
from langchain_core.output_parsers import BaseOutputParser, JsonOutputParser
|
8
|
-
from langchain_core.prompts import PromptTemplate
|
9
|
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
|
-
from typing_extensions import Annotated
|
11
|
-
|
12
|
-
from janus.metrics.metric import metric
|
13
|
-
|
14
|
-
|
15
|
-
class LLMMetricOutput(BaseModel):
|
16
|
-
"""The output of an LLM evaluation metric."""
|
17
|
-
|
18
|
-
thought: str = Field(
|
19
|
-
...,
|
20
|
-
description=(
|
21
|
-
"The thought process that you took to reach your value determination."
|
22
|
-
),
|
23
|
-
)
|
24
|
-
value: str | float | int = Field(
|
25
|
-
..., description="The value of the metric described in the prompt."
|
26
|
-
)
|
27
|
-
|
28
|
-
|
29
|
-
def load_prompt(path: Path, language: str, parser: BaseOutputParser) -> PromptTemplate:
|
30
|
-
"""Load a default prompt from a file.
|
31
|
-
|
32
|
-
Arguments:
|
33
|
-
path: The path to the file.
|
34
|
-
language: The language of the prompt.
|
35
|
-
pydantic_model: The Pydantic model to use for parsing the output.
|
36
|
-
|
37
|
-
Returns:
|
38
|
-
The prompt text.
|
39
|
-
"""
|
40
|
-
if not path.exists():
|
41
|
-
raise FileNotFoundError(f"File not found: {path}")
|
42
|
-
prompt = PromptTemplate.from_template(
|
43
|
-
path.read_text(),
|
44
|
-
template_format="f-string",
|
45
|
-
partial_variables={
|
46
|
-
"language": language,
|
47
|
-
"format_instructions": parser.get_format_instructions(),
|
48
|
-
},
|
49
|
-
)
|
50
|
-
return prompt
|
51
|
-
|
52
|
-
|
53
|
-
def evaluate(
|
54
|
-
target: str,
|
55
|
-
language: str,
|
56
|
-
model: str,
|
57
|
-
prompt_path: Path,
|
58
|
-
reference: str | None = None,
|
59
|
-
):
|
60
|
-
"""Calculate the LLM self evaluation score.
|
61
|
-
|
62
|
-
Arguments:
|
63
|
-
target: The target text.
|
64
|
-
language: The language that the target code is written in.
|
65
|
-
prompt_path: The filepath of the prompt text
|
66
|
-
reference: The reference text.
|
67
|
-
|
68
|
-
Returns:
|
69
|
-
The LLM Evaluation score.
|
70
|
-
"""
|
71
|
-
parser = JsonOutputParser(pydantic_object=LLMMetricOutput)
|
72
|
-
prompt = load_prompt(prompt_path, language, parser)
|
73
|
-
chain = prompt | model | parser
|
74
|
-
try:
|
75
|
-
output = (
|
76
|
-
chain.invoke(dict(target=target, reference=reference))
|
77
|
-
if reference
|
78
|
-
else chain.invoke(dict(target=target))
|
79
|
-
)
|
80
|
-
return output["value"]
|
81
|
-
except OutputParserException:
|
82
|
-
return False
|
83
|
-
|
84
|
-
|
85
|
-
@metric(use_reference=False, name="llm", help="LLM self-evaluation on a target file")
|
86
|
-
def llm_evaluate_option(
|
87
|
-
target: str,
|
88
|
-
metric: Annotated[
|
89
|
-
str,
|
90
|
-
typer.Option(
|
91
|
-
"--metric",
|
92
|
-
"-m",
|
93
|
-
help=("The pre-defined metric to use for evaluation."),
|
94
|
-
click_type=click.Choice(
|
95
|
-
[
|
96
|
-
"quality",
|
97
|
-
"clarity",
|
98
|
-
"faithfulness",
|
99
|
-
"completeness",
|
100
|
-
"hallucination",
|
101
|
-
"readability",
|
102
|
-
"usefulness",
|
103
|
-
]
|
104
|
-
),
|
105
|
-
),
|
106
|
-
] = "quality",
|
107
|
-
prompt: Annotated[
|
108
|
-
str,
|
109
|
-
None,
|
110
|
-
typer.Option(
|
111
|
-
"--prompt",
|
112
|
-
"-P",
|
113
|
-
help=("A custom prompt in a .txt file to use for evaluation."),
|
114
|
-
),
|
115
|
-
] = None,
|
116
|
-
num_eval: Annotated[
|
117
|
-
int,
|
118
|
-
typer.Option(
|
119
|
-
"-n",
|
120
|
-
"--num-eval",
|
121
|
-
help="Number of times to run the evaluation",
|
122
|
-
),
|
123
|
-
] = 1,
|
124
|
-
**kwargs,
|
125
|
-
) -> Any:
|
126
|
-
"""CLI option to calculate the LLM self evaluation score.
|
127
|
-
|
128
|
-
Arguments:
|
129
|
-
target: The target text.
|
130
|
-
reference: The reference text.
|
131
|
-
metric: The pre-defined metric to use for evaluation.
|
132
|
-
prompt: The prompt text.
|
133
|
-
|
134
|
-
Returns:
|
135
|
-
The LLM Evaluation score.
|
136
|
-
"""
|
137
|
-
prompt_path: Path = (
|
138
|
-
Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
|
139
|
-
)
|
140
|
-
if num_eval == 1:
|
141
|
-
return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
|
142
|
-
else:
|
143
|
-
return [
|
144
|
-
evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
|
145
|
-
for _ in range(num_eval)
|
146
|
-
]
|
147
|
-
|
148
|
-
|
149
|
-
@metric(name="llm-ref", help="LLM self-evaluation on a target file and a reference file")
|
150
|
-
def llm_evaluate_ref_option(
|
151
|
-
target: str,
|
152
|
-
reference: str,
|
153
|
-
metric: Annotated[
|
154
|
-
str,
|
155
|
-
typer.Option(
|
156
|
-
"--metric",
|
157
|
-
"-m",
|
158
|
-
help=("The pre-defined metric to use for evaluation."),
|
159
|
-
click_type=click.Choice(["faithfulness"]),
|
160
|
-
),
|
161
|
-
] = "faithfulness",
|
162
|
-
prompt: Annotated[
|
163
|
-
str,
|
164
|
-
None,
|
165
|
-
typer.Option(
|
166
|
-
"--prompt",
|
167
|
-
"-P",
|
168
|
-
help=("A custom prompt in a .txt file to use for evaluation."),
|
169
|
-
),
|
170
|
-
] = None,
|
171
|
-
num_eval: Annotated[
|
172
|
-
int,
|
173
|
-
typer.Option(
|
174
|
-
"-n",
|
175
|
-
"--num-eval",
|
176
|
-
help="Number of times to run evaluation for pair",
|
177
|
-
),
|
178
|
-
] = 1,
|
179
|
-
**kwargs,
|
180
|
-
) -> Any:
|
181
|
-
"""CLI option to calculate the LLM self evaluation score, for evaluations which
|
182
|
-
require a reference file (e.g. faithfulness)
|
183
|
-
|
184
|
-
Arguments:
|
185
|
-
target: The target text.
|
186
|
-
reference: The reference text.
|
187
|
-
metric: The pre-defined metric to use for evaluation.
|
188
|
-
prompt: The prompt text.
|
189
|
-
|
190
|
-
Returns:
|
191
|
-
The LLM Evaluation score.
|
192
|
-
"""
|
193
|
-
prompt_path: Path = (
|
194
|
-
Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
|
195
|
-
)
|
196
|
-
if num_eval == 1:
|
197
|
-
return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
|
198
|
-
else:
|
199
|
-
return [
|
200
|
-
evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
|
201
|
-
for _ in range(num_eval)
|
202
|
-
]
|
File without changes
|
File without changes
|
File without changes
|