janus-llm 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. janus/__init__.py +9 -1
  2. janus/__main__.py +4 -0
  3. janus/_tests/test_cli.py +128 -0
  4. janus/_tests/test_translate.py +49 -7
  5. janus/cli.py +530 -46
  6. janus/converter.py +50 -19
  7. janus/embedding/_tests/test_collections.py +2 -8
  8. janus/embedding/_tests/test_database.py +32 -0
  9. janus/embedding/_tests/test_vectorize.py +9 -4
  10. janus/embedding/collections.py +49 -6
  11. janus/embedding/embedding_models_info.py +120 -0
  12. janus/embedding/vectorize.py +53 -62
  13. janus/language/_tests/__init__.py +0 -0
  14. janus/language/_tests/test_combine.py +62 -0
  15. janus/language/_tests/test_splitter.py +16 -0
  16. janus/language/binary/_tests/test_binary.py +16 -1
  17. janus/language/binary/binary.py +10 -3
  18. janus/language/block.py +31 -30
  19. janus/language/combine.py +26 -34
  20. janus/language/mumps/_tests/test_mumps.py +2 -2
  21. janus/language/mumps/mumps.py +93 -9
  22. janus/language/naive/__init__.py +4 -0
  23. janus/language/naive/basic_splitter.py +14 -0
  24. janus/language/naive/chunk_splitter.py +26 -0
  25. janus/language/naive/registry.py +13 -0
  26. janus/language/naive/simple_ast.py +18 -0
  27. janus/language/naive/tag_splitter.py +61 -0
  28. janus/language/splitter.py +168 -74
  29. janus/language/treesitter/_tests/test_treesitter.py +9 -6
  30. janus/language/treesitter/treesitter.py +37 -13
  31. janus/llm/model_callbacks.py +177 -0
  32. janus/llm/models_info.py +134 -70
  33. janus/metrics/__init__.py +8 -0
  34. janus/metrics/_tests/__init__.py +0 -0
  35. janus/metrics/_tests/reference.py +2 -0
  36. janus/metrics/_tests/target.py +2 -0
  37. janus/metrics/_tests/test_bleu.py +56 -0
  38. janus/metrics/_tests/test_chrf.py +67 -0
  39. janus/metrics/_tests/test_file_pairing.py +59 -0
  40. janus/metrics/_tests/test_llm.py +91 -0
  41. janus/metrics/_tests/test_reading.py +28 -0
  42. janus/metrics/_tests/test_rouge_score.py +65 -0
  43. janus/metrics/_tests/test_similarity_score.py +23 -0
  44. janus/metrics/_tests/test_treesitter_metrics.py +110 -0
  45. janus/metrics/bleu.py +66 -0
  46. janus/metrics/chrf.py +55 -0
  47. janus/metrics/cli.py +7 -0
  48. janus/metrics/complexity_metrics.py +208 -0
  49. janus/metrics/file_pairing.py +113 -0
  50. janus/metrics/llm_metrics.py +202 -0
  51. janus/metrics/metric.py +466 -0
  52. janus/metrics/reading.py +70 -0
  53. janus/metrics/rouge_score.py +96 -0
  54. janus/metrics/similarity.py +53 -0
  55. janus/metrics/splitting.py +38 -0
  56. janus/parsers/_tests/__init__.py +0 -0
  57. janus/parsers/_tests/test_code_parser.py +32 -0
  58. janus/parsers/code_parser.py +24 -253
  59. janus/parsers/doc_parser.py +169 -0
  60. janus/parsers/eval_parser.py +80 -0
  61. janus/parsers/reqs_parser.py +72 -0
  62. janus/prompts/prompt.py +103 -30
  63. janus/translate.py +636 -111
  64. janus/utils/_tests/__init__.py +0 -0
  65. janus/utils/_tests/test_logger.py +67 -0
  66. janus/utils/_tests/test_progress.py +20 -0
  67. janus/utils/enums.py +56 -3
  68. janus/utils/progress.py +56 -0
  69. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/METADATA +23 -10
  70. janus_llm-2.0.0.dist-info/RECORD +94 -0
  71. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/WHEEL +1 -1
  72. janus_llm-1.0.0.dist-info/RECORD +0 -48
  73. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/LICENSE +0 -0
  74. {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,72 @@
1
+ import json
2
+ import re
3
+
4
+ from langchain.output_parsers.json import parse_json_markdown
5
+ from langchain.schema.output_parser import BaseOutputParser
6
+ from langchain_core.exceptions import OutputParserException
7
+ from langchain_core.messages import AIMessage
8
+
9
+ from ..language.block import CodeBlock
10
+ from ..utils.logger import create_logger
11
+ from .code_parser import JanusParser
12
+
13
+ log = create_logger(__name__)
14
+
15
+
16
+ class RequirementsParser(BaseOutputParser[str], JanusParser):
17
+ block_name: str = ""
18
+
19
+ def __init__(self):
20
+ super().__init__(expected_keys=[])
21
+
22
+ def set_reference(self, block: CodeBlock):
23
+ self.block_name = block.name
24
+
25
+ def parse(self, text: str) -> str:
26
+ if isinstance(text, AIMessage):
27
+ text = text.content
28
+ text = text.lstrip("```json")
29
+ text = text.rstrip("```")
30
+ try:
31
+ obj = parse_json_markdown(text)
32
+ except json.JSONDecodeError as e:
33
+ log.debug(f"Invalid JSON object. Output:\n{text}")
34
+ raise OutputParserException(f"Got invalid JSON object. Error: {e}")
35
+
36
+ if not isinstance(obj, dict):
37
+ raise OutputParserException(
38
+ f"Got invalid return object. Expected a dictionary, but got {type(obj)}"
39
+ )
40
+ return json.dumps(obj)
41
+
42
+ def parse_combined_output(self, text: str):
43
+ """Parse the output text from the LLM when multiple inputs are combined.
44
+
45
+ Arguments:
46
+ text: The output text from the LLM.
47
+
48
+ Returns:
49
+ A parsed version of the text.
50
+ """
51
+ json_strings = re.findall(r"\{.*?\}", text)
52
+ output_list = list()
53
+ for i, json_string in enumerate(json_strings, 1):
54
+ json_dict = json.loads(json_string)
55
+ output_list.append(json_dict["requirements"])
56
+ return output_list
57
+
58
+ def get_format_instructions(self) -> str:
59
+ """Get the format instructions for the parser.
60
+
61
+ Returns:
62
+ The format instructions for the LLM.
63
+ """
64
+ return (
65
+ "Output must contain an ieee style requirements specification "
66
+ "all in a json-formatted string, including the following field: "
67
+ '"requirements".'
68
+ )
69
+
70
+ @property
71
+ def _type(self) -> str:
72
+ return self.__class__.name
janus/prompts/prompt.py CHANGED
@@ -1,15 +1,14 @@
1
1
  import json
2
+ from abc import ABC, abstractmethod
2
3
  from pathlib import Path
3
- from typing import List
4
4
 
5
+ from langchain import PromptTemplate
5
6
  from langchain.prompts import ChatPromptTemplate
6
7
  from langchain.prompts.chat import (
7
8
  HumanMessagePromptTemplate,
8
9
  SystemMessagePromptTemplate,
9
10
  )
10
- from langchain.schema.messages import BaseMessage
11
11
 
12
- from ..language.block import CodeBlock
13
12
  from ..utils.enums import LANGUAGES
14
13
  from ..utils.logger import create_logger
15
14
 
@@ -18,12 +17,13 @@ log = create_logger(__name__)
18
17
 
19
18
  # Prompt names (self.template_map keys) that should output text,
20
19
  # regardless of the `output-lang` argument.
21
- TEXT_OUTPUT = ["document", "requirements"]
20
+ TEXT_OUTPUT = []
21
+
22
22
  # Prompt names (self.template_map keys) that should output the
23
23
  # same language as the input, regardless of the `output-lang` argument.
24
24
  SAME_OUTPUT = ["document_inline"]
25
25
 
26
- JSON_OUTPUT = ["evaluate"]
26
+ JSON_OUTPUT = ["evaluate", "document", "document_madlibs", "requirements"]
27
27
 
28
28
  # Directory containing Janus prompt template directories and files
29
29
  JANUS_PROMPT_TEMPLATES_DIR = Path(__file__).parent / "templates"
@@ -34,7 +34,7 @@ HUMAN_PROMPT_TEMPLATE_FILENAME = "human.txt"
34
34
  PROMPT_VARIABLES_FILENAME = "variables.json"
35
35
 
36
36
 
37
- class PromptEngine:
37
+ class PromptEngine(ABC):
38
38
  """A class defining prompting schemes for the LLM."""
39
39
 
40
40
  def __init__(
@@ -59,22 +59,14 @@ class PromptEngine:
59
59
  template_path = self.get_prompt_template_path(prompt_template)
60
60
  self._template_path = template_path
61
61
  self._template_name = prompt_template
62
- system_prompt_path = SystemMessagePromptTemplate.from_template(
63
- (template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME).read_text()
64
- )
65
- human_prompt_path = HumanMessagePromptTemplate.from_template(
66
- (template_path / HUMAN_PROMPT_TEMPLATE_FILENAME).read_text()
67
- )
68
- self.prompt = ChatPromptTemplate.from_messages(
69
- [system_prompt_path, human_prompt_path]
70
- )
62
+ self.prompt = self.load_prompt_template(template_path)
71
63
 
72
64
  # Define variables to be passed in to the prompt formatter
73
65
  source_language = source_language.lower()
74
66
  target_language = target_language.lower()
75
67
  self.variables = dict(
76
- SOURCE_LANGUAGE=source_language.lower(),
77
- TARGET_LANGUAGE=target_language.lower(),
68
+ SOURCE_LANGUAGE=source_language,
69
+ TARGET_LANGUAGE=target_language,
78
70
  TARGET_LANGUAGE_VERSION=str(target_version),
79
71
  FILE_SUFFIX=LANGUAGES[source_language]["suffix"],
80
72
  SOURCE_CODE_EXAMPLE=LANGUAGES[source_language]["example"],
@@ -83,20 +75,11 @@ class PromptEngine:
83
75
  variables_path = template_path / PROMPT_VARIABLES_FILENAME
84
76
  if variables_path.exists():
85
77
  self.variables.update(json.loads(variables_path.read_text()))
78
+ self.prompt = self.prompt.partial(**self.variables)
86
79
 
87
- def create(self, code: CodeBlock) -> List[BaseMessage]:
88
- """Convert a code block to a Chat GPT prompt.
89
-
90
- Arguments:
91
- code: The code block to convert.
92
-
93
- Returns:
94
- The converted prompt as a list of messages.
95
- """
96
- return self.prompt.format_prompt(
97
- SOURCE_CODE=code.text,
98
- **self.variables,
99
- ).to_messages()
80
+ @abstractmethod
81
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
82
+ pass
100
83
 
101
84
  @staticmethod
102
85
  def get_prompt_template_path(template_name: str) -> Path:
@@ -146,3 +129,93 @@ class PromptEngine:
146
129
  f"Specified prompt template directory {template_path} is "
147
130
  f"missing a {HUMAN_PROMPT_TEMPLATE_FILENAME}"
148
131
  )
132
+
133
+
134
+ class ChatGptPromptEngine(PromptEngine):
135
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
136
+ system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
137
+ system_prompt = system_prompt_path.read_text()
138
+ system_message = SystemMessagePromptTemplate.from_template(system_prompt)
139
+
140
+ human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
141
+ human_prompt = human_prompt_path.read_text()
142
+ human_message = HumanMessagePromptTemplate.from_template(human_prompt)
143
+ return ChatPromptTemplate.from_messages([system_message, human_message])
144
+
145
+
146
+ class ClaudePromptEngine(PromptEngine):
147
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
148
+ prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
149
+ prompt = prompt_path.read_text()
150
+ return PromptTemplate.from_template(f"Human: {prompt}\n\nAssistant: ")
151
+
152
+
153
+ class TitanPromptEngine(PromptEngine):
154
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
155
+ prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
156
+ prompt = prompt_path.read_text()
157
+ return PromptTemplate.from_template(f"User: {prompt}\n\nAssistant: ")
158
+
159
+
160
+ class Llama2PromptEngine(PromptEngine):
161
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
162
+ system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
163
+ system_prompt = system_prompt_path.read_text()
164
+
165
+ human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
166
+ human_prompt = human_prompt_path.read_text()
167
+
168
+ return PromptTemplate.from_template(
169
+ f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{human_prompt} [/INST]"
170
+ )
171
+
172
+
173
+ class Llama3PromptEngine(PromptEngine):
174
+ # see https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3
175
+ # /#special-tokens-used-with-meta-llama-3
176
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
177
+ system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
178
+ system_prompt = system_prompt_path.read_text()
179
+
180
+ human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
181
+ human_prompt = human_prompt_path.read_text()
182
+
183
+ return PromptTemplate.from_template(
184
+ f"<|begin_of_text|>"
185
+ f"<|start_header_id|>"
186
+ f"system"
187
+ f"<|end_header_id|>"
188
+ f"\n\n{system_prompt}"
189
+ f"<|eot_id|>"
190
+ f"<|start_header_id|>"
191
+ f"user"
192
+ f"<|end_header_id|>"
193
+ f"\n\n{human_prompt}"
194
+ f"<|eot_id|>"
195
+ f"<|start_header_id|>"
196
+ f"assistant"
197
+ f"<|end_header_id|>"
198
+ f"\n\n"
199
+ )
200
+
201
+
202
+ class CoherePromptEngine(PromptEngine):
203
+ # see https://docs.cohere.com/docs/prompting-command-r
204
+ def load_prompt_template(self, template_path: Path) -> ChatPromptTemplate:
205
+ system_prompt_path = template_path / SYSTEM_PROMPT_TEMPLATE_FILENAME
206
+ system_prompt = system_prompt_path.read_text()
207
+
208
+ human_prompt_path = template_path / HUMAN_PROMPT_TEMPLATE_FILENAME
209
+ human_prompt = human_prompt_path.read_text()
210
+
211
+ return PromptTemplate.from_template(
212
+ f"<BOS_TOKEN>"
213
+ f"<|START_OF_TURN_TOKEN|>"
214
+ f"<|SYSTEM_TOKEN|>"
215
+ f"{system_prompt}"
216
+ f"<|END_OF_TURN_TOKEN|>"
217
+ f"<|START_OF_TURN_TOKEN|>"
218
+ f"<|USER_TOKEN|>"
219
+ f"{human_prompt}"
220
+ f"<|END_OF_TURN_TOKEN|>"
221
+ )