janus-llm 1.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. janus/__init__.py +9 -1
  2. janus/__main__.py +4 -0
  3. janus/_tests/test_cli.py +128 -0
  4. janus/_tests/test_translate.py +49 -7
  5. janus/cli.py +530 -46
  6. janus/converter.py +50 -19
  7. janus/embedding/_tests/test_collections.py +2 -8
  8. janus/embedding/_tests/test_database.py +32 -0
  9. janus/embedding/_tests/test_vectorize.py +9 -4
  10. janus/embedding/collections.py +49 -6
  11. janus/embedding/embedding_models_info.py +130 -0
  12. janus/embedding/vectorize.py +53 -62
  13. janus/language/_tests/__init__.py +0 -0
  14. janus/language/_tests/test_combine.py +62 -0
  15. janus/language/_tests/test_splitter.py +16 -0
  16. janus/language/binary/_tests/test_binary.py +16 -1
  17. janus/language/binary/binary.py +10 -3
  18. janus/language/block.py +31 -30
  19. janus/language/combine.py +26 -34
  20. janus/language/mumps/_tests/test_mumps.py +2 -2
  21. janus/language/mumps/mumps.py +93 -9
  22. janus/language/naive/__init__.py +4 -0
  23. janus/language/naive/basic_splitter.py +14 -0
  24. janus/language/naive/chunk_splitter.py +26 -0
  25. janus/language/naive/registry.py +13 -0
  26. janus/language/naive/simple_ast.py +18 -0
  27. janus/language/naive/tag_splitter.py +61 -0
  28. janus/language/splitter.py +168 -74
  29. janus/language/treesitter/_tests/test_treesitter.py +19 -14
  30. janus/language/treesitter/treesitter.py +37 -13
  31. janus/llm/model_callbacks.py +177 -0
  32. janus/llm/models_info.py +165 -72
  33. janus/metrics/__init__.py +8 -0
  34. janus/metrics/_tests/__init__.py +0 -0
  35. janus/metrics/_tests/reference.py +2 -0
  36. janus/metrics/_tests/target.py +2 -0
  37. janus/metrics/_tests/test_bleu.py +56 -0
  38. janus/metrics/_tests/test_chrf.py +67 -0
  39. janus/metrics/_tests/test_file_pairing.py +59 -0
  40. janus/metrics/_tests/test_llm.py +91 -0
  41. janus/metrics/_tests/test_reading.py +28 -0
  42. janus/metrics/_tests/test_rouge_score.py +65 -0
  43. janus/metrics/_tests/test_similarity_score.py +23 -0
  44. janus/metrics/_tests/test_treesitter_metrics.py +110 -0
  45. janus/metrics/bleu.py +66 -0
  46. janus/metrics/chrf.py +55 -0
  47. janus/metrics/cli.py +7 -0
  48. janus/metrics/complexity_metrics.py +208 -0
  49. janus/metrics/file_pairing.py +113 -0
  50. janus/metrics/llm_metrics.py +202 -0
  51. janus/metrics/metric.py +466 -0
  52. janus/metrics/reading.py +70 -0
  53. janus/metrics/rouge_score.py +96 -0
  54. janus/metrics/similarity.py +53 -0
  55. janus/metrics/splitting.py +38 -0
  56. janus/parsers/_tests/__init__.py +0 -0
  57. janus/parsers/_tests/test_code_parser.py +32 -0
  58. janus/parsers/code_parser.py +24 -253
  59. janus/parsers/doc_parser.py +169 -0
  60. janus/parsers/eval_parser.py +80 -0
  61. janus/parsers/reqs_parser.py +72 -0
  62. janus/prompts/prompt.py +103 -30
  63. janus/translate.py +636 -111
  64. janus/utils/_tests/__init__.py +0 -0
  65. janus/utils/_tests/test_logger.py +67 -0
  66. janus/utils/_tests/test_progress.py +20 -0
  67. janus/utils/enums.py +56 -3
  68. janus/utils/progress.py +56 -0
  69. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/METADATA +27 -11
  70. janus_llm-2.0.1.dist-info/RECORD +94 -0
  71. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/WHEEL +1 -1
  72. janus_llm-1.0.0.dist-info/RECORD +0 -48
  73. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/LICENSE +0 -0
  74. {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,113 @@
1
+ from typing import Any, Callable
2
+
3
+ from ..language.binary import BinarySplitter
4
+ from ..language.mumps import MumpsSplitter
5
+ from ..language.node import NodeType
6
+ from ..language.treesitter import TreeSitterSplitter
7
+ from ..utils.enums import CUSTOM_SPLITTERS
8
+
9
+ FILE_PAIRING_METHODS: dict[str, Callable[[str, str], list[tuple[str, str]]]] = {}
10
+
11
+
12
+ def register_pairing_method(name: None | str = None) -> Callable[[Callable], Callable]:
13
+ """Registers a pairing method for pairing strings between files
14
+
15
+ Arguments:
16
+ name: The name of the pairing method. If None, the function name is used.
17
+ help: The help text for the pairing method.
18
+
19
+ Returns:
20
+ The decorator function.
21
+ """
22
+
23
+ def decorator(f: Callable[[str, str], list[tuple[str, str]]]):
24
+ if name is None:
25
+ pairing_name = f.__name__
26
+ else:
27
+ pairing_name = name
28
+ FILE_PAIRING_METHODS[pairing_name] = f
29
+ return f
30
+
31
+ return decorator
32
+
33
+
34
+ @register_pairing_method(name="file")
35
+ def pair_by_file(
36
+ target: str, reference: str, **kwargs: dict[str, Any]
37
+ ) -> list[tuple[str, str]]:
38
+ """Pairs the entire contents of a file together
39
+
40
+ Arguments:
41
+ target: The target file text.
42
+ reference: The reference file text.
43
+ state: The current evaluation state.
44
+
45
+ Returns:
46
+ A list of tuples of the target and reference file text.
47
+ """
48
+ return [(target, reference)]
49
+
50
+
51
+ @register_pairing_method(name="line")
52
+ def pair_by_line(
53
+ target: str, reference: str, **kwargs: dict[str, Any]
54
+ ) -> list[tuple[str, str]]:
55
+ """Pairs the contents of a file together by line
56
+
57
+ Arguments:
58
+ target: The target file text.
59
+ reference: The reference file text.
60
+ state: The current evaluation state.
61
+
62
+ Returns:
63
+ A list of tuples of the target and reference file text.
64
+ """
65
+ return list(zip(target.split("\n"), reference.split("\n")))
66
+
67
+
68
+ @register_pairing_method(name="line-comment")
69
+ def pair_by_line_comment(
70
+ target: str, reference: str, **kwargs: dict[str, Any]
71
+ ) -> list[tuple[str, str]]:
72
+ """Pairs the comments of a file together by line
73
+
74
+ **WARNING**: Do not use, as this method is extremely brittle.
75
+
76
+ Arguments:
77
+ target: The target file text.
78
+ reference: The reference/reference file text.
79
+ state: The current evaluation state.
80
+
81
+ Returns:
82
+ A list of tuples of the target and reference file text.
83
+ """
84
+ splitter_kwargs = dict(
85
+ max_tokens=kwargs["token_limit"] // 2.5,
86
+ model=kwargs["llm"],
87
+ protected_node_types=(NodeType("comment"),),
88
+ prune_node_types=tuple(),
89
+ )
90
+ if kwargs["target_file"] is None or kwargs["reference_file"] is None:
91
+ raise ValueError("Error: must provide file for pair by line comment")
92
+ if kwargs["lang"] is None:
93
+ raise ValueError("Error: must provide language for pair by line comment")
94
+ if kwargs["lang"] in CUSTOM_SPLITTERS:
95
+ if kwargs["lang"] == "mumps":
96
+ splitter = MumpsSplitter(**splitter_kwargs)
97
+ elif kwargs["lang"] == "binary":
98
+ splitter = BinarySplitter(**splitter_kwargs)
99
+ else:
100
+ splitter = TreeSitterSplitter(language=kwargs["lang"], **splitter_kwargs)
101
+ target_tree = splitter.split(kwargs["target_file"])
102
+ reference_tree = splitter.split(kwargs["reference_file"])
103
+ pairs = []
104
+
105
+ def _parse_pairs(node1, node2, pairs):
106
+ for c1, c2 in zip(node1.children, node2.children):
107
+ if c1.node_type == "comment" and c2.node_type == "comment":
108
+ pairs.append((c1.complete_text, c2.complete_text))
109
+ else:
110
+ _parse_pairs(c1, c2, pairs)
111
+
112
+ _parse_pairs(target_tree, reference_tree, pairs)
113
+ return pairs
@@ -0,0 +1,202 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ import click
5
+ import typer
6
+ from langchain_core.exceptions import OutputParserException
7
+ from langchain_core.output_parsers import BaseOutputParser, JsonOutputParser
8
+ from langchain_core.prompts import PromptTemplate
9
+ from langchain_core.pydantic_v1 import BaseModel, Field
10
+ from typing_extensions import Annotated
11
+
12
+ from .metric import metric
13
+
14
+
15
+ class LLMMetricOutput(BaseModel):
16
+ """The output of an LLM evaluation metric."""
17
+
18
+ thought: str = Field(
19
+ ...,
20
+ description=(
21
+ "The thought process that you took to reach your value determination."
22
+ ),
23
+ )
24
+ value: str | float | int = Field(
25
+ ..., description="The value of the metric described in the prompt."
26
+ )
27
+
28
+
29
+ def load_prompt(path: Path, language: str, parser: BaseOutputParser) -> PromptTemplate:
30
+ """Load a default prompt from a file.
31
+
32
+ Arguments:
33
+ path: The path to the file.
34
+ language: The language of the prompt.
35
+ pydantic_model: The Pydantic model to use for parsing the output.
36
+
37
+ Returns:
38
+ The prompt text.
39
+ """
40
+ if not path.exists():
41
+ raise FileNotFoundError(f"File not found: {path}")
42
+ prompt = PromptTemplate.from_template(
43
+ path.read_text(),
44
+ template_format="f-string",
45
+ partial_variables={
46
+ "language": language,
47
+ "format_instructions": parser.get_format_instructions(),
48
+ },
49
+ )
50
+ return prompt
51
+
52
+
53
+ def evaluate(
54
+ target: str,
55
+ language: str,
56
+ model: str,
57
+ prompt_path: Path,
58
+ reference: str | None = None,
59
+ ):
60
+ """Calculate the LLM self evaluation score.
61
+
62
+ Arguments:
63
+ target: The target text.
64
+ language: The language that the target code is written in.
65
+ prompt_path: The filepath of the prompt text
66
+ reference: The reference text.
67
+
68
+ Returns:
69
+ The LLM Evaluation score.
70
+ """
71
+ parser = JsonOutputParser(pydantic_object=LLMMetricOutput)
72
+ prompt = load_prompt(prompt_path, language, parser)
73
+ chain = prompt | model | parser
74
+ try:
75
+ output = (
76
+ chain.invoke(dict(target=target, reference=reference))
77
+ if reference
78
+ else chain.invoke(dict(target=target))
79
+ )
80
+ return output["value"]
81
+ except OutputParserException:
82
+ return False
83
+
84
+
85
+ @metric(use_reference=False, name="llm", help="LLM self-evaluation on a target file")
86
+ def llm_evaluate_option(
87
+ target: str,
88
+ metric: Annotated[
89
+ str,
90
+ typer.Option(
91
+ "--metric",
92
+ "-m",
93
+ help=("The pre-defined metric to use for evaluation."),
94
+ click_type=click.Choice(
95
+ [
96
+ "quality",
97
+ "clarity",
98
+ "faithfulness",
99
+ "completeness",
100
+ "hallucination",
101
+ "readability",
102
+ "usefulness",
103
+ ]
104
+ ),
105
+ ),
106
+ ] = "quality",
107
+ prompt: Annotated[
108
+ str,
109
+ None,
110
+ typer.Option(
111
+ "--prompt",
112
+ "-P",
113
+ help=("A custom prompt in a .txt file to use for evaluation."),
114
+ ),
115
+ ] = None,
116
+ num_eval: Annotated[
117
+ int,
118
+ typer.Option(
119
+ "-n",
120
+ "--num-eval",
121
+ help="Number of times to run the evaluation",
122
+ ),
123
+ ] = 1,
124
+ **kwargs,
125
+ ) -> Any:
126
+ """CLI option to calculate the LLM self evaluation score.
127
+
128
+ Arguments:
129
+ target: The target text.
130
+ reference: The reference text.
131
+ metric: The pre-defined metric to use for evaluation.
132
+ prompt: The prompt text.
133
+
134
+ Returns:
135
+ The LLM Evaluation score.
136
+ """
137
+ prompt_path: Path = (
138
+ Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
139
+ )
140
+ if num_eval == 1:
141
+ return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
142
+ else:
143
+ return [
144
+ evaluate(target, kwargs["language"], kwargs["llm"], prompt_path)
145
+ for _ in range(num_eval)
146
+ ]
147
+
148
+
149
+ @metric(name="llm-ref", help="LLM self-evaluation on a target file and a reference file")
150
+ def llm_evaluate_ref_option(
151
+ target: str,
152
+ reference: str,
153
+ metric: Annotated[
154
+ str,
155
+ typer.Option(
156
+ "--metric",
157
+ "-m",
158
+ help=("The pre-defined metric to use for evaluation."),
159
+ click_type=click.Choice(["faithfulness"]),
160
+ ),
161
+ ] = "faithfulness",
162
+ prompt: Annotated[
163
+ str,
164
+ None,
165
+ typer.Option(
166
+ "--prompt",
167
+ "-P",
168
+ help=("A custom prompt in a .txt file to use for evaluation."),
169
+ ),
170
+ ] = None,
171
+ num_eval: Annotated[
172
+ int,
173
+ typer.Option(
174
+ "-n",
175
+ "--num-eval",
176
+ help="Number of times to run evaluation for pair",
177
+ ),
178
+ ] = 1,
179
+ **kwargs,
180
+ ) -> Any:
181
+ """CLI option to calculate the LLM self evaluation score, for evaluations which
182
+ require a reference file (e.g. faithfulness)
183
+
184
+ Arguments:
185
+ target: The target text.
186
+ reference: The reference text.
187
+ metric: The pre-defined metric to use for evaluation.
188
+ prompt: The prompt text.
189
+
190
+ Returns:
191
+ The LLM Evaluation score.
192
+ """
193
+ prompt_path: Path = (
194
+ Path(prompt) if prompt else Path(__file__).parent / "prompts" / f"{metric}.txt"
195
+ )
196
+ if num_eval == 1:
197
+ return evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
198
+ else:
199
+ return [
200
+ evaluate(target, kwargs["language"], kwargs["llm"], prompt_path, reference)
201
+ for _ in range(num_eval)
202
+ ]