janus-llm 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. janus/__init__.py +9 -1
  2. janus/__main__.py +4 -0
  3. janus/_tests/test_cli.py +128 -0
  4. janus/_tests/test_translate.py +49 -7
  5. janus/cli.py +530 -46
  6. janus/converter.py +50 -19
  7. janus/embedding/_tests/test_collections.py +2 -8
  8. janus/embedding/_tests/test_database.py +32 -0
  9. janus/embedding/_tests/test_vectorize.py +9 -4
  10. janus/embedding/collections.py +49 -6
  11. janus/embedding/embedding_models_info.py +120 -0
  12. janus/embedding/vectorize.py +53 -62
  13. janus/language/_tests/__init__.py +0 -0
  14. janus/language/_tests/test_combine.py +62 -0
  15. janus/language/_tests/test_splitter.py +16 -0
  16. janus/language/binary/_tests/test_binary.py +16 -1
  17. janus/language/binary/binary.py +10 -3
  18. janus/language/block.py +31 -30
  19. janus/language/combine.py +26 -34
  20. janus/language/mumps/_tests/test_mumps.py +2 -2
  21. janus/language/mumps/mumps.py +93 -9
  22. janus/language/naive/__init__.py +4 -0
  23. janus/language/naive/basic_splitter.py +14 -0
  24. janus/language/naive/chunk_splitter.py +26 -0
  25. janus/language/naive/registry.py +13 -0
  26. janus/language/naive/simple_ast.py +18 -0
  27. janus/language/naive/tag_splitter.py +61 -0
  28. janus/language/splitter.py +168 -74
  29. janus/language/treesitter/_tests/test_treesitter.py +9 -6
  30. janus/language/treesitter/treesitter.py +37 -13
  31. janus/llm/model_callbacks.py +177 -0
  32. janus/llm/models_info.py +134 -70
  33. janus/metrics/__init__.py +8 -0
  34. janus/metrics/_tests/__init__.py +0 -0
  35. janus/metrics/_tests/reference.py +2 -0
  36. janus/metrics/_tests/target.py +2 -0
  37. janus/metrics/_tests/test_bleu.py +56 -0
  38. janus/metrics/_tests/test_chrf.py +67 -0
  39. janus/metrics/_tests/test_file_pairing.py +59 -0
  40. janus/metrics/_tests/test_llm.py +91 -0
  41. janus/metrics/_tests/test_reading.py +28 -0
  42. janus/metrics/_tests/test_rouge_score.py +65 -0
  43. janus/metrics/_tests/test_similarity_score.py +23 -0
  44. janus/metrics/_tests/test_treesitter_metrics.py +110 -0
  45. janus/metrics/bleu.py +66 -0
  46. janus/metrics/chrf.py +55 -0
  47. janus/metrics/cli.py +7 -0
  48. janus/metrics/complexity_metrics.py +208 -0
  49. janus/metrics/file_pairing.py +113 -0
  50. janus/metrics/llm_metrics.py +202 -0
  51. janus/metrics/metric.py +466 -0
  52. janus/metrics/reading.py +70 -0
  53. janus/metrics/rouge_score.py +96 -0
  54. janus/metrics/similarity.py +53 -0
  55. janus/metrics/splitting.py +38 -0
  56. janus/parsers/_tests/__init__.py +0 -0
  57. janus/parsers/_tests/test_code_parser.py +32 -0
  58. janus/parsers/code_parser.py +24 -253
  59. janus/parsers/doc_parser.py +169 -0
  60. janus/parsers/eval_parser.py +80 -0
  61. janus/parsers/reqs_parser.py +72 -0
  62. janus/prompts/prompt.py +103 -30
  63. janus/translate.py +636 -111
  64. janus/utils/_tests/__init__.py +0 -0
  65. janus/utils/_tests/test_logger.py +67 -0
  66. janus/utils/_tests/test_progress.py +20 -0
  67. janus/utils/enums.py +56 -3
  68. janus/utils/progress.py +56 -0
  69. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/METADATA +23 -10
  70. janus_llm-2.0.0.dist-info/RECORD +94 -0
  71. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/WHEEL +1 -1
  72. janus_llm-1.0.0.dist-info/RECORD +0 -48
  73. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/LICENSE +0 -0
  74. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,113 @@
1
+ from typing import Any, Callable
2
+
3
+ from ..language.binary import BinarySplitter
4
+ from ..language.mumps import MumpsSplitter
5
+ from ..language.node import NodeType
6
+ from ..language.treesitter import TreeSitterSplitter
7
+ from ..utils.enums import CUSTOM_SPLITTERS
8
+
9
+ FILE_PAIRING_METHODS: dict[str, Callable[[str, str], list[tuple[str, str]]]] = {}
10
+
11
+
12
+ def register_pairing_method(name: None | str = None) -> Callable[[Callable], Callable]:
13
+ """Registers a pairing method for pairing strings between files
14
+
15
+ Arguments:
16
+ name: The name of the pairing method. If None, the function name is used.
17
+ help: The help text for the pairing method.
18
+
19
+ Returns:
20
+ The decorator function.
21
+ """
22
+
23
+ def decorator(f: Callable[[str, str], list[tuple[str, str]]]):
24
+ if name is None:
25
+ pairing_name = f.__name__
26
+ else:
27
+ pairing_name = name
28
+ FILE_PAIRING_METHODS[pairing_name] = f
29
+ return f
30
+
31
+ return decorator
32
+
33
+
34
+ @register_pairing_method(name="file")
35
+ def pair_by_file(
36
+ target: str, reference: str, **kwargs: dict[str, Any]
37
+ ) -> list[tuple[str, str]]:
38
+ """Pairs the entire contents of a file together
39
+
40
+ Arguments:
41
+ target: The target file text.
42
+ reference: The reference file text.
43
+ state: The current evaluation state.
44
+
45
+ Returns:
46
+ A list of tuples of the target and reference file text.
47
+ """
48
+ return [(target, reference)]
49
+
50
+
51
+ @register_pairing_method(name="line")
52
+ def pair_by_line(
53
+ target: str, reference: str, **kwargs: dict[str, Any]
54
+ ) -> list[tuple[str, str]]:
55
+ """Pairs the contents of a file together by line
56
+
57
+ Arguments:
58
+ target: The target file text.
59
+ reference: The reference file text.
60
+ state: The current evaluation state.
61
+
62
+ Returns:
63
+ A list of tuples of the target and reference file text.
64
+ """
65
+ return list(zip(target.split("\n"), reference.split("\n")))
66
+
67
+
68
+ @register_pairing_method(name="line-comment")
69
+ def pair_by_line_comment(
70
+ target: str, reference: str, **kwargs: dict[str, Any]
71
+ ) -> list[tuple[str, str]]:
72
+ """Pairs the comments of a file together by line
73
+
74
+ **WARNING**: Do not use, as this method is extremely brittle.
75
+
76
+ Arguments:
77
+ target: The target file text.
78
+ reference: The reference/reference file text.
79
+ state: The current evaluation state.
80
+
81
+ Returns:
82
+ A list of tuples of the target and reference file text.
83
+ """
84
+ splitter_kwargs = dict(
85
+ max_tokens=kwargs["token_limit"] // 2.5,
86
+ model=kwargs["llm"],
87
+ protected_node_types=(NodeType("comment"),),
88
+ prune_node_types=tuple(),
89
+ )
90
+ if kwargs["target_file"] is None or kwargs["reference_file"] is None:
91
+ raise ValueError("Error: must provide file for pair by line comment")
92
+ if kwargs["lang"] is None:
93
+ raise ValueError("Error: must provide language for pair by line comment")
94
+ if kwargs["lang"] in CUSTOM_SPLITTERS:
95
+ if kwargs["lang"] == "mumps":
96
+ splitter = MumpsSplitter(**splitter_kwargs)
97
+ elif kwargs["lang"] == "binary":
98
+ splitter = BinarySplitter(**splitter_kwargs)
99
+ else:
100
+ splitter = TreeSitterSplitter(language=kwargs["lang"], **splitter_kwargs)
101
+ target_tree = splitter.split(kwargs["target_file"])
102
+ reference_tree = splitter.split(kwargs["reference_file"])
103
+ pairs = []
104
+
105
+ def _parse_pairs(node1, node2, pairs):
106
+ for c1, c2 in zip(node1.children, node2.children):
107
+ if c1.node_type == "comment" and c2.node_type == "comment":
108
+ pairs.append((c1.complete_text, c2.complete_text))
109
+ else:
110
+ _parse_pairs(c1, c2, pairs)
111
+
112
+ _parse_pairs(target_tree, reference_tree, pairs)
113
+ return pairs
@@ -0,0 +1,202 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ import click
5
+ import typer
6
+ from langchain_core.exceptions import OutputParserException
7
+ from langchain_core.output_parsers import BaseOutputParser, JsonOutputParser
8
+ from langchain_core.prompts import PromptTemplate
9
+ from langchain_core.pydantic_v1 import BaseModel, Field
10
+ from typing_extensions import Annotated
11
+
12
+ from .metric import metric
13
+
14
+
15
+ class LLMMetricOutput(BaseModel):
16
+ """The output of an LLM evaluation metric."""
17
+
18
+ thought: str = Field(
19
+ ...,
20
+ description=(
21
+ "The thought process that you took to reach your value determination."
22
+ ),
23
+ )
24
+ value: str | float | int = Field(
25
+ ..., description="The value of the metric described in the prompt."
26
+ )
27
+
28
+
29
+ def load_prompt(path: Path, language: str, parser: BaseOutputParser) -> PromptTemplate:
30
+ """Load a default prompt from a file.
31
+
32
+ Arguments:
33
+ path: The path to the file.
34
+ language: The language of the prompt.
35
+ pydantic_model: The Pydantic model to use for parsing the output.
36
+
37
+ Returns:
38
+ The prompt text.
39
+ """
40
+ if not path.exists():
41
+ raise FileNotFoundError(f"File not found: {path}")
42
+ prompt = PromptTemplate.from_template(
43
+ path.read_text(),
44
+ template_format="f-string",
45
+ partial_variables={
46
+ "language": language,
47
+ "format_instructions": parser.get_format_instructions(),
48
+ },
49
+ )
50
+ return prompt
51
+
52
+
53
+ def evaluate(
54
+ target: str,
55
+ language: str,
56
+ model: str,
57
+ prompt_path: Path,
58
+ reference: str | None = None,
59
+ ):
60
+ """Calculate the LLM self evaluation score.
61
+
62
+ Arguments:
63
+ target: The target text.
64
+ language: The language that the target code is written in.
65
+ prompt_path: The filepath of the prompt text
66
+ reference: The reference text.
67
+
68
+ Returns:
69
+ The LLM Evaluation score.
70
+ """
71
+ parser = JsonOutputParser(pydantic_object=LLMMetricOutput)
72
+ prompt = load_prompt(prompt_path, language, parser)
73
+ chain = prompt | model | parser
74
+ try:
75
+ output = (
76
+ chain.invoke(dict(target=target, reference=reference))
77
+ if reference
78
+ else chain.invoke(dict(target=target))
79
+ )
80
+ return output["value"]
81
+ except OutputParserException:
82
+ return False
83
+
84
+
85
+ @metric(use_reference=False, name="llm", help="LLM self-evaluation on a target file")
86
+ def llm_evaluate_option(
87
+ target: str,
88
+ metric: Annotated[
89
+ str,
90
+ typer.Option(
91
+ "--metric",
92
+ "-m",
93
+ help=("The pre-defined metric to use for evaluation."),
94
+ click_type=click.Choice(
95
+ [
96
+ "quality",
97
+ "clarity",
98
+ "faithfulness",
99
+ "completeness",
100
+ "hallucination",
101
+ "readability",
102
+ "usefulness",
103
+ ]
104
+ ),
105
+ ),
106
+ ] = "quality",
107
+ prompt: Annotated[
108
+ str,
109
+ None,
110
+ typer.Option(
111
+ "--prompt",
112
+ "-P",
113
+ help=("A custom prompt in a .txt file to use for evaluation."),
114
+ ),
115
+ ] = None,
116
+ num_eval: Annotated[
117
+ int,
118
+ typer.Option(
119
+ "-n",
120
+ "--num-eval",
121
+ help="Number of times to run the evaluation",
122
+ ),
123
+ ] = 1,
124
+ **kwargs,
125
+ ) -> Any:
126
+ """CLI option to calculate the LLM self evaluation score.
127
+
128
+ Arguments:
129
+ target: The target text.
130
+ reference: The reference text.
131
+ metric: The pre-defined metric to use for evaluation.
132
+ prompt: The prompt text.
133
+
134
+ Returns:
135
+ The LLM Evaluation score.
136
+ """
137
+ prompt_path: Path = (
138
+ Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
139
+ )
140
+ if num_eval == 1:
141
+ return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
142
+ else:
143
+ return [
144
+ evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
145
+ for _ in range(num_eval)
146
+ ]
147
+
148
+
149
+ @metric(name="llm-ref", help="LLM self-evaluation on a target file and a reference file")
150
+ def llm_evaluate_ref_option(
151
+ target: str,
152
+ reference: str,
153
+ metric: Annotated[
154
+ str,
155
+ typer.Option(
156
+ "--metric",
157
+ "-m",
158
+ help=("The pre-defined metric to use for evaluation."),
159
+ click_type=click.Choice(["faithfulness"]),
160
+ ),
161
+ ] = "faithfulness",
162
+ prompt: Annotated[
163
+ str,
164
+ None,
165
+ typer.Option(
166
+ "--prompt",
167
+ "-P",
168
+ help=("A custom prompt in a .txt file to use for evaluation."),
169
+ ),
170
+ ] = None,
171
+ num_eval: Annotated[
172
+ int,
173
+ typer.Option(
174
+ "-n",
175
+ "--num-eval",
176
+ help="Number of times to run evaluation for pair",
177
+ ),
178
+ ] = 1,
179
+ **kwargs,
180
+ ) -> Any:
181
+ """CLI option to calculate the LLM self evaluation score, for evaluations which
182
+ require a reference file (e.g. faithfulness)
183
+
184
+ Arguments:
185
+ target: The target text.
186
+ reference: The reference text.
187
+ metric: The pre-defined metric to use for evaluation.
188
+ prompt: The prompt text.
189
+
190
+ Returns:
191
+ The LLM Evaluation score.
192
+ """
193
+ prompt_path: Path = (
194
+ Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
195
+ )
196
+ if num_eval == 1:
197
+ return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
198
+ else:
199
+ return [
200
+ evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
201
+ for _ in range(num_eval)
202
+ ]