janus-llm 3.2.1__tar.gz → 3.3.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. {janus_llm-3.2.1 → janus_llm-3.3.0}/PKG-INFO +1 -1
  2. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/__init__.py +1 -1
  3. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/_tests/test_translate.py +3 -2
  4. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/converter.py +49 -7
  5. janus_llm-3.3.0/janus/converter/diagram.py +139 -0
  6. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/llm/models_info.py +2 -0
  7. janus_llm-3.3.0/janus/parsers/refiner_parser.py +49 -0
  8. janus_llm-3.3.0/janus/refiners/refiner.py +63 -0
  9. {janus_llm-3.2.1 → janus_llm-3.3.0}/pyproject.toml +1 -1
  10. janus_llm-3.2.1/janus/converter/diagram.py +0 -126
  11. {janus_llm-3.2.1 → janus_llm-3.3.0}/LICENSE +0 -0
  12. {janus_llm-3.2.1 → janus_llm-3.3.0}/README.md +0 -0
  13. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/__main__.py +0 -0
  14. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/_tests/__init__.py +0 -0
  15. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/_tests/conftest.py +0 -0
  16. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/_tests/test_cli.py +0 -0
  17. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/cli.py +0 -0
  18. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/__init__.py +0 -0
  19. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/_tests/__init__.py +0 -0
  20. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/document.py +0 -0
  21. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/evaluate.py +0 -0
  22. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/requirements.py +0 -0
  23. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/converter/translate.py +0 -0
  24. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/__init__.py +0 -0
  25. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/_tests/__init__.py +0 -0
  26. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/_tests/test_collections.py +0 -0
  27. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/_tests/test_database.py +0 -0
  28. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/_tests/test_vectorize.py +0 -0
  29. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/collections.py +0 -0
  30. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/database.py +0 -0
  31. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/embedding_models_info.py +0 -0
  32. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/embedding/vectorize.py +0 -0
  33. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/__init__.py +0 -0
  34. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/_tests/__init__.py +0 -0
  35. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/_tests/test_combine.py +0 -0
  36. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/_tests/test_splitter.py +0 -0
  37. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/alc/__init__.py +0 -0
  38. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/alc/_tests/__init__.py +0 -0
  39. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/alc/_tests/test_alc.py +0 -0
  40. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/alc/alc.py +0 -0
  41. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/binary/__init__.py +0 -0
  42. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/binary/_tests/__init__.py +0 -0
  43. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/binary/_tests/test_binary.py +0 -0
  44. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/binary/binary.py +0 -0
  45. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/binary/reveng/decompile_script.py +0 -0
  46. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/block.py +0 -0
  47. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/combine.py +0 -0
  48. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/file.py +0 -0
  49. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/mumps/__init__.py +0 -0
  50. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/mumps/_tests/__init__.py +0 -0
  51. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/mumps/_tests/test_mumps.py +0 -0
  52. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/mumps/mumps.py +0 -0
  53. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/mumps/patterns.py +0 -0
  54. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/naive/__init__.py +0 -0
  55. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/naive/basic_splitter.py +0 -0
  56. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/naive/chunk_splitter.py +0 -0
  57. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/naive/registry.py +0 -0
  58. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/naive/simple_ast.py +0 -0
  59. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/naive/tag_splitter.py +0 -0
  60. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/node.py +0 -0
  61. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/splitter.py +0 -0
  62. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/treesitter/__init__.py +0 -0
  63. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/treesitter/_tests/__init__.py +0 -0
  64. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
  65. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/language/treesitter/treesitter.py +0 -0
  66. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/llm/__init__.py +0 -0
  67. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/llm/model_callbacks.py +0 -0
  68. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/__init__.py +0 -0
  69. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/__init__.py +0 -0
  70. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/reference.py +0 -0
  71. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/target.py +0 -0
  72. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_bleu.py +0 -0
  73. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_chrf.py +0 -0
  74. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_file_pairing.py +0 -0
  75. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_llm.py +0 -0
  76. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_reading.py +0 -0
  77. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_rouge_score.py +0 -0
  78. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_similarity_score.py +0 -0
  79. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
  80. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/bleu.py +0 -0
  81. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/chrf.py +0 -0
  82. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/cli.py +0 -0
  83. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/complexity_metrics.py +0 -0
  84. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/file_pairing.py +0 -0
  85. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/llm_metrics.py +0 -0
  86. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/metric.py +0 -0
  87. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/reading.py +0 -0
  88. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/rouge_score.py +0 -0
  89. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/similarity.py +0 -0
  90. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/metrics/splitting.py +0 -0
  91. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/__init__.py +0 -0
  92. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/_tests/__init__.py +0 -0
  93. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/_tests/test_code_parser.py +0 -0
  94. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/code_parser.py +0 -0
  95. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/doc_parser.py +0 -0
  96. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/eval_parser.py +0 -0
  97. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/reqs_parser.py +0 -0
  98. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/parsers/uml.py +0 -0
  99. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/prompts/__init__.py +0 -0
  100. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/prompts/prompt.py +0 -0
  101. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/__init__.py +0 -0
  102. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/_tests/__init__.py +0 -0
  103. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/_tests/test_logger.py +0 -0
  104. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/_tests/test_progress.py +0 -0
  105. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/enums.py +0 -0
  106. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/logger.py +0 -0
  107. {janus_llm-3.2.1 → janus_llm-3.3.0}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 3.2.1
3
+ Version: 3.3.0
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "3.2.1"
8
+ __version__ = "3.3.0"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -45,16 +45,17 @@ class TestTranslator(unittest.TestCase):
45
45
  def setUp(self):
46
46
  """Set up the tests."""
47
47
  self.translator = Translator(
48
- model="gpt-4o",
48
+ model="gpt-4o-mini",
49
49
  source_language="fortran",
50
50
  target_language="python",
51
51
  target_version="3.10",
52
+ splitter_type="ast-flex",
52
53
  )
53
54
  self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
54
55
  self.TEST_FILE_EMBEDDING_COUNT = 14
55
56
 
56
57
  self.req_translator = RequirementsDocumenter(
57
- model="gpt-4o",
58
+ model="gpt-4o-mini",
58
59
  source_language="fortran",
59
60
  prompt_template="requirements",
60
61
  )
@@ -6,7 +6,6 @@ from pathlib import Path
6
6
  from typing import Any
7
7
 
8
8
  from langchain.output_parsers import RetryWithErrorOutputParser
9
- from langchain.output_parsers.fix import OutputFixingParser
10
9
  from langchain_core.exceptions import OutputParserException
11
10
  from langchain_core.language_models import BaseLanguageModel
12
11
  from langchain_core.output_parsers import BaseOutputParser
@@ -29,6 +28,8 @@ from janus.llm import load_model
29
28
  from janus.llm.model_callbacks import get_model_callback
30
29
  from janus.llm.models_info import MODEL_PROMPT_ENGINES
31
30
  from janus.parsers.code_parser import GenericParser
31
+ from janus.parsers.refiner_parser import RefinerParser
32
+ from janus.refiners.refiner import BasicRefiner, Refiner
32
33
  from janus.utils.enums import LANGUAGES
33
34
  from janus.utils.logger import create_logger
34
35
 
@@ -75,6 +76,7 @@ class Converter:
75
76
  protected_node_types: tuple[str, ...] = (),
76
77
  prune_node_types: tuple[str, ...] = (),
77
78
  splitter_type: str = "file",
79
+ refiner_type: str = "basic",
78
80
  ) -> None:
79
81
  """Initialize a Converter instance.
80
82
 
@@ -84,6 +86,17 @@ class Converter:
84
86
  values are `"code"`, `"text"`, `"eval"`, and `None` (default). If `None`,
85
87
  the `Converter` assumes you won't be parsing an output (i.e., adding to an
86
88
  embedding DB).
89
+ max_prompts: The maximum number of prompts to try before giving up.
90
+ max_tokens: The maximum number of tokens to use in the LLM. If `None`, the
91
+ converter will use half the model's token limit.
92
+ prompt_template: The name of the prompt template to use.
93
+ db_path: The path to the database to use for vectorization.
94
+ db_config: The configuration for the database.
95
+ protected_node_types: A set of node types that aren't to be merged.
96
+ prune_node_types: A set of node types which should be pruned.
97
+ splitter_type: The type of splitter to use. Valid values are `"file"`,
98
+ `"tag"`, `"chunk"`, `"ast-strict"`, and `"ast-flex"`.
99
+ refiner_type: The type of refiner to use. Valid values are `"basic"`.
87
100
  """
88
101
  self._changed_attrs: set = set()
89
102
 
@@ -116,7 +129,11 @@ class Converter:
116
129
  self._parser: BaseOutputParser = GenericParser()
117
130
  self._combiner: Combiner = Combiner()
118
131
 
132
+ self._refiner_type: str
133
+ self._refiner: Refiner
134
+
119
135
  self.set_splitter(splitter_type=splitter_type)
136
+ self.set_refiner(refiner_type=refiner_type)
120
137
  self.set_model(model_name=model, **model_arguments)
121
138
  self.set_prompt(prompt_template=prompt_template)
122
139
  self.set_source_language(source_language)
@@ -142,6 +159,7 @@ class Converter:
142
159
  self._load_prompt()
143
160
  self._load_splitter()
144
161
  self._load_vectorizer()
162
+ self._load_refiner()
145
163
  self._changed_attrs.clear()
146
164
 
147
165
  def set_model(self, model_name: str, **custom_arguments: dict[str, Any]):
@@ -179,6 +197,16 @@ class Converter:
179
197
  """
180
198
  self._splitter_type = splitter_type
181
199
 
200
+ def set_refiner(self, refiner_type: str) -> None:
201
+ """Validate and set the refiner name
202
+
203
+ The affected objects will not be updated until translate is called
204
+
205
+ Arguments:
206
+ refiner_type: the name of the refiner to use
207
+ """
208
+ self._refiner_type = refiner_type
209
+
182
210
  def set_source_language(self, source_language: str) -> None:
183
211
  """Validate and set the source language.
184
212
 
@@ -249,10 +277,24 @@ class Converter:
249
277
  )
250
278
 
251
279
  if self._splitter_type == "tag":
252
- kwargs["tag"] = "<ITMOD_ALC_SPLIT>"
280
+ kwargs["tag"] = "<ITMOD_ALC_SPLIT>" # Hardcoded for now
253
281
 
254
282
  self._splitter = CUSTOM_SPLITTERS[self._splitter_type](**kwargs)
255
283
 
284
+ @run_if_changed("_refiner_type", "_model_name")
285
+ def _load_refiner(self) -> None:
286
+ """Load the refiner according to this instance's attributes.
287
+
288
+ If the relevant fields have not been changed since the last time this method was
289
+ called, nothing happens.
290
+ """
291
+ if self._refiner_type == "basic":
292
+ self._refiner = BasicRefiner(
293
+ "basic_refinement", self._model_name, self._source_language
294
+ )
295
+ else:
296
+ raise ValueError(f"Error: unknown refiner type {self._refiner_type}")
297
+
256
298
  @run_if_changed("_model_name", "_custom_model_arguments")
257
299
  def _load_model(self) -> None:
258
300
  """Load the model according to this instance's attributes.
@@ -561,22 +603,22 @@ class Converter:
561
603
  # Retries with just the input
562
604
  n3 = math.ceil(self.max_prompts / (n1 * n2))
563
605
 
564
- fix_format = OutputFixingParser.from_llm(
565
- llm=self._llm,
606
+ refine_output = RefinerParser(
566
607
  parser=self._parser,
608
+ initial_prompt=self._prompt.format(**{"SOURCE_CODE": block.original.text}),
609
+ refiner=self._refiner,
567
610
  max_retries=n1,
611
+ llm=self._llm,
568
612
  )
569
613
  retry = RetryWithErrorOutputParser.from_llm(
570
614
  llm=self._llm,
571
- parser=fix_format,
615
+ parser=refine_output,
572
616
  max_retries=n2,
573
617
  )
574
-
575
618
  completion_chain = self._prompt | self._llm
576
619
  chain = RunnableParallel(
577
620
  completion=completion_chain, prompt_value=self._prompt
578
621
  ) | RunnableLambda(lambda x: retry.parse_with_prompt(**x))
579
-
580
622
  for _ in range(n3):
581
623
  try:
582
624
  return chain.invoke({"SOURCE_CODE": block.original.text})
@@ -0,0 +1,139 @@
1
+ import math
2
+
3
+ from langchain.output_parsers import RetryWithErrorOutputParser
4
+ from langchain_core.exceptions import OutputParserException
5
+ from langchain_core.runnables import RunnableLambda, RunnableParallel
6
+
7
+ from janus.converter.converter import run_if_changed
8
+ from janus.converter.document import Documenter
9
+ from janus.language.block import TranslatedCodeBlock
10
+ from janus.llm.models_info import MODEL_PROMPT_ENGINES
11
+ from janus.parsers.refiner_parser import RefinerParser
12
+ from janus.parsers.uml import UMLSyntaxParser
13
+ from janus.utils.logger import create_logger
14
+
15
+ log = create_logger(__name__)
16
+
17
+
18
+ class DiagramGenerator(Documenter):
19
+ """DiagramGenerator
20
+
21
+ A class that translates code from one programming language to a set of diagrams.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ diagram_type="Activity",
27
+ add_documentation=False,
28
+ **kwargs,
29
+ ) -> None:
30
+ """Initialize the DiagramGenerator class
31
+
32
+ Arguments:
33
+ model: The LLM to use for translation. If an OpenAI model, the
34
+ `OPENAI_API_KEY` environment variable must be set and the
35
+ `OPENAI_ORG_ID` environment variable should be set if needed.
36
+ model_arguments: Additional arguments to pass to the LLM constructor.
37
+ source_language: The source programming language.
38
+ max_prompts: The maximum number of prompts to try before giving up.
39
+ db_path: path to chroma database
40
+ db_config: database configuraiton
41
+ diagram_type: type of PLANTUML diagram to generate
42
+ """
43
+ super().__init__(**kwargs)
44
+ self._diagram_type = diagram_type
45
+ self._add_documentation = add_documentation
46
+ self._documenter = None
47
+ self._diagram_parser = UMLSyntaxParser(language="plantuml")
48
+ if add_documentation:
49
+ self._diagram_prompt_template_name = "diagram_with_documentation"
50
+ else:
51
+ self._diagram_prompt_template_name = "diagram"
52
+ self._load_diagram_prompt_engine()
53
+
54
+ def _run_chain(self, block: TranslatedCodeBlock) -> str:
55
+ self._parser.set_reference(block.original)
56
+ n1 = round(self.max_prompts ** (1 / 3))
57
+
58
+ # Retries with the input, output, and error
59
+ n2 = round((self.max_prompts // n1) ** (1 / 2))
60
+
61
+ # Retries with just the input
62
+ n3 = math.ceil(self.max_prompts / (n1 * n2))
63
+
64
+ if self._add_documentation:
65
+ documentation_text = super()._run_chain(block)
66
+ refine_output = RefinerParser(
67
+ parser=self._diagram_parser,
68
+ initial_prompt=self._diagram_prompt.format(
69
+ **{
70
+ "SOURCE_CODE": block.original.text,
71
+ "DOCUMENTATION": documentation_text,
72
+ "DIAGRAM_TYPE": self._diagram_type,
73
+ }
74
+ ),
75
+ refiner=self._refiner,
76
+ max_retries=n1,
77
+ llm=self._llm,
78
+ )
79
+ else:
80
+ refine_output = RefinerParser(
81
+ parser=self._diagram_parser,
82
+ initial_prompt=self._diagram_prompt.format(
83
+ **{
84
+ "SOURCE_CODE": block.original.text,
85
+ "DIAGRAM_TYPE": self._diagram_type,
86
+ }
87
+ ),
88
+ refiner=self._refiner,
89
+ max_retries=n1,
90
+ llm=self._llm,
91
+ )
92
+ retry = RetryWithErrorOutputParser.from_llm(
93
+ llm=self._llm,
94
+ parser=refine_output,
95
+ max_retries=n2,
96
+ )
97
+ completion_chain = self._prompt | self._llm
98
+ chain = RunnableParallel(
99
+ completion=completion_chain, prompt_value=self._diagram_prompt
100
+ ) | RunnableLambda(lambda x: retry.parse_with_prompt(**x))
101
+ for _ in range(n3):
102
+ try:
103
+ if self._add_documentation:
104
+ return chain.invoke(
105
+ {
106
+ "SOURCE_CODE": block.original.text,
107
+ "DOCUMENTATION": documentation_text,
108
+ "DIAGRAM_TYPE": self._diagram_type,
109
+ }
110
+ )
111
+ else:
112
+ return chain.invoke(
113
+ {
114
+ "SOURCE_CODE": block.original.text,
115
+ "DIAGRAM_TYPE": self._diagram_type,
116
+ }
117
+ )
118
+ except OutputParserException:
119
+ pass
120
+
121
+ raise OutputParserException(f"Failed to parse after {n1*n2*n3} retries")
122
+
123
+ @run_if_changed(
124
+ "_diagram_prompt_template_name",
125
+ "_source_language",
126
+ )
127
+ def _load_diagram_prompt_engine(self) -> None:
128
+ """Load the prompt engine according to this instance's attributes.
129
+
130
+ If the relevant fields have not been changed since the last time this method was
131
+ called, nothing happens.
132
+ """
133
+ self._diagram_prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
134
+ source_language=self._source_language,
135
+ target_language="text",
136
+ target_version=None,
137
+ prompt_template=self._diagram_prompt_template_name,
138
+ )
139
+ self._diagram_prompt = self._diagram_prompt_engine.prompt
@@ -47,6 +47,7 @@ load_dotenv()
47
47
 
48
48
  openai_model_reroutes = {
49
49
  "gpt-4o": "gpt-4o-2024-05-13",
50
+ "gpt-4o-mini": "gpt-4o-mini",
50
51
  "gpt-4": "gpt-4-0613",
51
52
  "gpt-4-turbo": "gpt-4-turbo-2024-04-09",
52
53
  "gpt-4-turbo-preview": "gpt-4-0125-preview",
@@ -56,6 +57,7 @@ openai_model_reroutes = {
56
57
 
57
58
  openai_models = [
58
59
  "gpt-4o",
60
+ "gpt-4o-mini",
59
61
  "gpt-4",
60
62
  "gpt-4-turbo",
61
63
  "gpt-4-turbo-preview",
@@ -0,0 +1,49 @@
1
+ from langchain_core.exceptions import OutputParserException
2
+ from langchain_core.language_models import BaseLanguageModel
3
+ from langchain_core.output_parsers import BaseOutputParser
4
+
5
+ from janus.refiners.refiner import Refiner
6
+
7
+
8
+ class RefinerParser(BaseOutputParser):
9
+ """Parser for performing refinement with a refiner
10
+
11
+ Properties:
12
+ llm: the language model to use
13
+ parser: the parser to use for parsing llm output
14
+ initial_prompt: initial prompt used to generate output
15
+ refiner: refiner that gives new subsequent prompts
16
+ max_retires: maximum number of times to attempt refining
17
+ """
18
+
19
+ class Config:
20
+ arbitrary_types_allowed = True
21
+
22
+ llm: BaseLanguageModel
23
+ parser: BaseOutputParser
24
+ initial_prompt: str
25
+ refiner: Refiner
26
+ max_retries: int
27
+
28
+ def parse(self, text: str) -> str:
29
+ """Parses the text using the refiner
30
+
31
+ Arguments:
32
+ text: text to parse
33
+
34
+ Returns:
35
+ Parsed text
36
+ """
37
+ last_prompt = self.initial_prompt
38
+ for _ in range(self.max_retries):
39
+ try:
40
+ return self.parser.parse(text)
41
+ except OutputParserException as oe:
42
+ err = str(oe)
43
+ new_prompt, prompt_arguments = self.refiner.refine(last_prompt, text, err)
44
+ new_chain = new_prompt | self.llm
45
+ text = new_chain.invoke(prompt_arguments)
46
+ last_prompt = new_prompt.format(**prompt_arguments)
47
+ raise OutputParserException(
48
+ f"Error: unable to correct output after {self.max_retries} attempts"
49
+ )
@@ -0,0 +1,63 @@
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+
3
+ from janus.llm.models_info import MODEL_PROMPT_ENGINES
4
+
5
+
6
+ class Refiner:
7
+ def refine(
8
+ self, original_prompt: str, original_output: str, errors: str, **kwargs
9
+ ) -> tuple[ChatPromptTemplate, dict[str, str]]:
10
+ """Creates a new prompt based on feedback from original results
11
+
12
+ Arguments:
13
+ original_prompt: original prompt used to produce output
14
+ original_output: origial output of llm
15
+ errors: list of errors detected by parser
16
+
17
+ Returns:
18
+ Tuple of new prompt and prompt arguments
19
+ """
20
+ raise NotImplementedError
21
+
22
+
23
+ class BasicRefiner(Refiner):
24
+ def __init__(
25
+ self,
26
+ prompt_name: str,
27
+ model_name: str,
28
+ source_language: str,
29
+ ) -> None:
30
+ """Basic refiner, asks llm to fix output of previous prompt given errors
31
+
32
+ Arguments:
33
+ prompt_name: refinement prompt name to use
34
+ model_name: name of llm to use
35
+ source_language: source_langauge to use
36
+ """
37
+ self._prompt_name = prompt_name
38
+ self._model_name = model_name
39
+ self._source_language = source_language
40
+
41
+ def refine(
42
+ self, original_prompt: str, original_output: str, errors: str, **kwargs
43
+ ) -> tuple[ChatPromptTemplate, dict[str, str]]:
44
+ """Creates a new prompt based on feedback from original results
45
+
46
+ Arguments:
47
+ original_prompt: original prompt used to produce output
48
+ original_output: origial output of llm
49
+ errors: list of errors detected by parser
50
+
51
+ Returns:
52
+ Tuple of new prompt and prompt arguments
53
+ """
54
+ prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
55
+ prompt_template=self._prompt_name,
56
+ source_language=self._source_language,
57
+ )
58
+ prompt_arguments = {
59
+ "ORIGINAL_PROMPT": original_prompt,
60
+ "OUTPUT": original_output,
61
+ "ERRORS": errors,
62
+ }
63
+ return prompt_engine.prompt, prompt_arguments
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "janus-llm"
3
- version = "3.2.1"
3
+ version = "3.3.0"
4
4
  description = "A transcoding library using LLMs."
5
5
  authors = ["Michael Doyle <mdoyle@mitre.org>", "Chris Glasz <cglasz@mitre.org>",
6
6
  "Chris Tohline <ctohline@mitre.org>", "William Macke <wmacke@mitre.org>",
@@ -1,126 +0,0 @@
1
- import json
2
- from copy import deepcopy
3
-
4
- from janus.converter.converter import run_if_changed
5
- from janus.converter.document import Documenter
6
- from janus.language.block import TranslatedCodeBlock
7
- from janus.llm.models_info import MODEL_PROMPT_ENGINES
8
- from janus.parsers.uml import UMLSyntaxParser
9
- from janus.utils.logger import create_logger
10
-
11
- log = create_logger(__name__)
12
-
13
-
14
- class DiagramGenerator(Documenter):
15
- """DiagramGenerator
16
-
17
- A class that translates code from one programming language to a set of diagrams.
18
- """
19
-
20
- def __init__(
21
- self,
22
- diagram_type="Activity",
23
- add_documentation=False,
24
- **kwargs,
25
- ) -> None:
26
- """Initialize the DiagramGenerator class
27
-
28
- Arguments:
29
- model: The LLM to use for translation. If an OpenAI model, the
30
- `OPENAI_API_KEY` environment variable must be set and the
31
- `OPENAI_ORG_ID` environment variable should be set if needed.
32
- model_arguments: Additional arguments to pass to the LLM constructor.
33
- source_language: The source programming language.
34
- max_prompts: The maximum number of prompts to try before giving up.
35
- db_path: path to chroma database
36
- db_config: database configuraiton
37
- diagram_type: type of PLANTUML diagram to generate
38
- """
39
- super().__init__(**kwargs)
40
- self._diagram_type = diagram_type
41
- self._add_documentation = add_documentation
42
- self._documenter = None
43
- self._diagram_parser = UMLSyntaxParser(language="plantuml")
44
- if add_documentation:
45
- self._diagram_prompt_template_name = "diagram_with_documentation"
46
- else:
47
- self._diagram_prompt_template_name = "diagram"
48
- self._load_diagram_prompt_engine()
49
-
50
- def _add_translation(self, block: TranslatedCodeBlock) -> None:
51
- """Given an "empty" `TranslatedCodeBlock`, translate the code represented in
52
- `block.original`, setting the relevant fields in the translated block. The
53
- `TranslatedCodeBlock` is updated in-pace, nothing is returned. Note that this
54
- translates *only* the code for this block, not its children.
55
-
56
- Arguments:
57
- block: An empty `TranslatedCodeBlock`
58
- """
59
- if block.translated:
60
- return
61
-
62
- if block.original.text is None:
63
- block.translated = True
64
- return
65
-
66
- if self._add_documentation:
67
- documentation_block = deepcopy(block)
68
- super()._add_translation(documentation_block)
69
- if not documentation_block.translated:
70
- message = "Error: unable to produce documentation for code block"
71
- log.info(message)
72
- raise ValueError(message)
73
- documentation = json.loads(documentation_block.text)["docstring"]
74
-
75
- if self._llm is None:
76
- message = (
77
- "Model not configured correctly, cannot translate. Try setting "
78
- "the model"
79
- )
80
- log.error(message)
81
- raise ValueError(message)
82
-
83
- log.debug(f"[{block.name}] Translating...")
84
- log.debug(f"[{block.name}] Input text:\n{block.original.text}")
85
-
86
- self._parser.set_reference(block.original)
87
-
88
- query_and_parse = self.diagram_prompt | self._llm | self._diagram_parser
89
-
90
- if self._add_documentation:
91
- block.text = query_and_parse.invoke(
92
- {
93
- "SOURCE_CODE": block.original.text,
94
- "DIAGRAM_TYPE": self._diagram_type,
95
- "DOCUMENTATION": documentation,
96
- }
97
- )
98
- else:
99
- block.text = query_and_parse.invoke(
100
- {
101
- "SOURCE_CODE": block.original.text,
102
- "DIAGRAM_TYPE": self._diagram_type,
103
- }
104
- )
105
- block.tokens = self._llm.get_num_tokens(block.text)
106
- block.translated = True
107
-
108
- log.debug(f"[{block.name}] Output code:\n{block.text}")
109
-
110
- @run_if_changed(
111
- "_diagram_prompt_template_name",
112
- "_source_language",
113
- )
114
- def _load_diagram_prompt_engine(self) -> None:
115
- """Load the prompt engine according to this instance's attributes.
116
-
117
- If the relevant fields have not been changed since the last time this method was
118
- called, nothing happens.
119
- """
120
- self._diagram_prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
121
- source_language=self._source_language,
122
- target_language="text",
123
- target_version=None,
124
- prompt_template=self._diagram_prompt_template_name,
125
- )
126
- self.diagram_prompt = self._diagram_prompt_engine.prompt
File without changes
File without changes
File without changes
File without changes