janus-llm 4.2.0__py3-none-any.whl → 4.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
janus/__init__.py CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.2.0"
8
+ __version__ = "4.3.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/cli.py CHANGED
@@ -19,6 +19,7 @@ from janus.converter.aggregator import Aggregator
19
19
  from janus.converter.converter import Converter
20
20
  from janus.converter.diagram import DiagramGenerator
21
21
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
22
+ from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
22
23
  from janus.converter.partition import Partitioner
23
24
  from janus.converter.requirements import RequirementsDocumenter
24
25
  from janus.converter.translate import Translator
@@ -127,7 +128,7 @@ embedding = typer.Typer(
127
128
 
128
129
  def version_callback(value: bool) -> None:
129
130
  if value:
130
- from janus import __version__ as version
131
+ from . import __version__ as version
131
132
 
132
133
  print(f"Janus CLI [blue]v{version}[/blue]")
133
134
  raise typer.Exit()
@@ -655,6 +656,16 @@ def partition(
655
656
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
656
657
  ),
657
658
  ] = "file",
659
+ refiner_types: Annotated[
660
+ list[str],
661
+ typer.Option(
662
+ "-r",
663
+ "--refiner",
664
+ help="List of refiner types to use. Add -r for each refiner to use in\
665
+ refinement chain",
666
+ click_type=click.Choice(list(REFINERS.keys())),
667
+ ),
668
+ ] = ["JanusRefiner"],
658
669
  max_tokens: Annotated[
659
670
  int,
660
671
  typer.Option(
@@ -673,6 +684,7 @@ def partition(
673
684
  ),
674
685
  ] = 8192,
675
686
  ):
687
+ refiner_types = [REFINERS[r] for r in refiner_types]
676
688
  model_arguments = dict(temperature=temperature)
677
689
  kwargs = dict(
678
690
  model=llm_name,
@@ -681,6 +693,7 @@ def partition(
681
693
  max_prompts=max_prompts,
682
694
  max_tokens=max_tokens,
683
695
  splitter_type=splitter_type,
696
+ refiner_types=refiner_types,
684
697
  partition_token_limit=partition_token_limit,
685
698
  )
686
699
  partitioner = Partitioner(**kwargs)
@@ -815,6 +828,139 @@ def diagram(
815
828
  diagram_generator.translate(input_dir, output_dir, overwrite, collection)
816
829
 
817
830
 
831
+ @app.command(
832
+ help="LLM self evaluation",
833
+ no_args_is_help=True,
834
+ )
835
+ def llm_self_eval(
836
+ input_dir: Annotated[
837
+ Path,
838
+ typer.Option(
839
+ "--input",
840
+ "-i",
841
+ help="The directory containing the source code to be evaluated. "
842
+ "The files should all be in one flat directory.",
843
+ ),
844
+ ],
845
+ language: Annotated[
846
+ str,
847
+ typer.Option(
848
+ "--language",
849
+ "-l",
850
+ help="The language of the source code.",
851
+ click_type=click.Choice(sorted(LANGUAGES)),
852
+ ),
853
+ ],
854
+ output_dir: Annotated[
855
+ Path,
856
+ typer.Option(
857
+ "--output-dir", "-o", help="The directory to store the evaluations in."
858
+ ),
859
+ ],
860
+ llm_name: Annotated[
861
+ str,
862
+ typer.Option(
863
+ "--llm",
864
+ "-L",
865
+ help="The custom name of the model set with 'janus llm add'.",
866
+ ),
867
+ ] = "gpt-4o",
868
+ evaluation_type: Annotated[
869
+ str,
870
+ typer.Option(
871
+ "--evaluation-type",
872
+ "-e",
873
+ help="Type of output to evaluate.",
874
+ click_type=click.Choice(["incose", "comments"]),
875
+ ),
876
+ ] = "incose",
877
+ max_prompts: Annotated[
878
+ int,
879
+ typer.Option(
880
+ "--max-prompts",
881
+ "-m",
882
+ help="The maximum number of times to prompt a model on one functional block "
883
+ "before exiting the application. This is to prevent wasting too much money.",
884
+ ),
885
+ ] = 10,
886
+ overwrite: Annotated[
887
+ bool,
888
+ typer.Option(
889
+ "--overwrite/--preserve",
890
+ help="Whether to overwrite existing files in the output directory",
891
+ ),
892
+ ] = False,
893
+ temperature: Annotated[
894
+ float,
895
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
896
+ ] = 0.7,
897
+ collection: Annotated[
898
+ str,
899
+ typer.Option(
900
+ "--collection",
901
+ "-c",
902
+ help="If set, will put the translated result into a Chroma DB "
903
+ "collection with the name provided.",
904
+ ),
905
+ ] = None,
906
+ splitter_type: Annotated[
907
+ str,
908
+ typer.Option(
909
+ "-S",
910
+ "--splitter",
911
+ help="Name of custom splitter to use",
912
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
913
+ ),
914
+ ] = "file",
915
+ refiner_types: Annotated[
916
+ list[str],
917
+ typer.Option(
918
+ "-r",
919
+ "--refiner",
920
+ help="List of refiner types to use. Add -r for each refiner to use in\
921
+ refinement chain",
922
+ click_type=click.Choice(list(REFINERS.keys())),
923
+ ),
924
+ ] = ["JanusRefiner"],
925
+ eval_items_per_request: Annotated[
926
+ int,
927
+ typer.Option(
928
+ "--eval-items-per-request",
929
+ "-rc",
930
+ help="The maximum number of evaluation items per request",
931
+ ),
932
+ ] = None,
933
+ max_tokens: Annotated[
934
+ int,
935
+ typer.Option(
936
+ "--max-tokens",
937
+ "-M",
938
+ help="The maximum number of tokens the model will take in. "
939
+ "If unspecificed, model's default max will be used.",
940
+ ),
941
+ ] = None,
942
+ ):
943
+ model_arguments = dict(temperature=temperature)
944
+ refiner_types = [REFINERS[r] for r in refiner_types]
945
+ kwargs = dict(
946
+ eval_items_per_request=eval_items_per_request,
947
+ model=llm_name,
948
+ model_arguments=model_arguments,
949
+ source_language=language,
950
+ max_prompts=max_prompts,
951
+ max_tokens=max_tokens,
952
+ splitter_type=splitter_type,
953
+ refiner_types=refiner_types,
954
+ )
955
+ # Setting parser type here
956
+ if evaluation_type == "incose":
957
+ evaluator = RequirementEvaluator(**kwargs)
958
+ elif evaluation_type == "comments":
959
+ evaluator = InlineCommentEvaluator(**kwargs)
960
+
961
+ evaluator.translate(input_dir, output_dir, overwrite, collection)
962
+
963
+
818
964
  @db.command("init", help="Connect to or create a database.")
819
965
  def db_init(
820
966
  path: Annotated[
@@ -1116,13 +1262,12 @@ def llm_add(
1116
1262
  show_choices=False,
1117
1263
  )
1118
1264
  params = dict(
1119
- # OpenAI uses the "model_name" key for what we're calling "long_model_id"
1120
- model_name=MODEL_ID_TO_LONG_ID[model_id],
1265
+ model_name=model_name,
1121
1266
  temperature=0.7,
1122
1267
  n=1,
1123
1268
  )
1124
- max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
1125
- model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
1269
+ max_tokens = TOKEN_LIMITS[model_name]
1270
+ model_cost = COST_PER_1K_TOKENS[model_name]
1126
1271
  cfg = {
1127
1272
  "model_type": model_type,
1128
1273
  "model_id": model_id,
@@ -464,6 +464,7 @@ class Converter:
464
464
  for in_path, out_path in in_out_pairs:
465
465
  # Translate the file, skip it if there's a rate limit error
466
466
  try:
467
+ log.info(f"Processing {in_path.relative_to(input_directory)}")
467
468
  out_block = self.translate_file(in_path)
468
469
  total_cost += out_block.total_cost
469
470
  except RateLimitError:
@@ -1,15 +1,241 @@
1
+ import json
2
+ import re
3
+ from copy import deepcopy
4
+
5
+ from langchain_core.runnables import Runnable, RunnableLambda, RunnableParallel
6
+
1
7
  from janus.converter.converter import Converter
8
+ from janus.language.block import TranslatedCodeBlock
2
9
  from janus.language.combine import JsonCombiner
3
- from janus.parsers.eval_parser import EvaluationParser
10
+ from janus.parsers.eval_parsers.incose_parser import IncoseParser
11
+ from janus.parsers.eval_parsers.inline_comment_parser import InlineCommentParser
4
12
  from janus.utils.logger import create_logger
5
13
 
6
14
  log = create_logger(__name__)
7
15
 
8
16
 
9
17
  class Evaluator(Converter):
10
- def __init__(self, **kwargs):
18
+ """Evaluator
19
+
20
+ A class that performs an LLM self evaluation"
21
+ "on an input target, with an associated prompt.
22
+
23
+ Current valid evaluation types:
24
+ ['incose', 'comments']
25
+
26
+ """
27
+
28
+ def __init__(self, **kwargs) -> None:
29
+ """Initialize the Evaluator class
30
+
31
+ Arguments:
32
+ model: The LLM to use for translation. If an OpenAI model, the
33
+ `OPENAI_API_KEY` environment variable must be set and the
34
+ `OPENAI_ORG_ID` environment variable should be set if needed.
35
+ model_arguments: Additional arguments to pass to the LLM constructor.
36
+ max_prompts: The maximum number of prompts to try before giving up.
37
+ """
38
+ super().__init__(**kwargs)
39
+ self._combiner = JsonCombiner()
40
+ self._load_parameters()
41
+
42
+
43
+ class RequirementEvaluator(Evaluator):
44
+ """INCOSE Requirement Evaluator
45
+
46
+ A class that performs an LLM self evaluation on an input target,
47
+ with an associated prompt.
48
+
49
+ The evaluation prompts are for Incose Evaluations
50
+
51
+ """
52
+
53
+ def __init__(self, eval_items_per_request: int | None = None, **kwargs) -> None:
54
+ """Initialize the Evaluator class
55
+
56
+ Arguments:
57
+ model: The LLM to use for translation. If an OpenAI model, the
58
+ `OPENAI_API_KEY` environment variable must be set and the
59
+ `OPENAI_ORG_ID` environment variable should be set if needed.
60
+ model_arguments: Additional arguments to pass to the LLM constructor.
61
+ max_prompts: The maximum number of prompts to try before giving up.
62
+ """
63
+ super().__init__(**kwargs)
64
+ self.eval_items_per_request = eval_items_per_request
65
+ self._parser = IncoseParser()
66
+ self.set_prompt("eval_prompts/incose")
67
+
68
+ def _input_runnable(self) -> Runnable:
69
+ def _get_code(json_text: str) -> str:
70
+ return json.loads(json_text)["code"]
71
+
72
+ def _get_reqs(json_text: str) -> str:
73
+ return json.dumps(json.loads(json_text)["requirements"])
74
+
75
+ return RunnableLambda(self._parser.parse_input) | RunnableParallel(
76
+ SOURCE_CODE=_get_code,
77
+ REQUIREMENTS=_get_reqs,
78
+ context=self._retriever,
79
+ )
80
+
81
+ def _add_translation(self, block: TranslatedCodeBlock):
82
+ if block.translated:
83
+ return
84
+
85
+ if block.original.text is None:
86
+ block.translated = True
87
+ return
88
+
89
+ if self.eval_items_per_request is None:
90
+ return super()._add_translation(block)
91
+
92
+ input_obj = json.loads(block.original.text)
93
+ requirements = input_obj.get("requirements", [])
94
+
95
+ if not requirements:
96
+ log.debug(f"[{block.name}] Skipping empty block")
97
+ block.translated = True
98
+ block.text = None
99
+ block.complete = True
100
+ return
101
+
102
+ # For some reason requirements objects are in nested lists?
103
+ while isinstance(requirements[0], list):
104
+ requirements = [r for lst in requirements for r in lst]
105
+
106
+ if len(requirements) <= self.eval_items_per_request:
107
+ input_obj["requirements"] = requirements
108
+ block.original.text = json.dumps(input_obj)
109
+ return super()._add_translation(block)
110
+
111
+ block.processing_time = 0
112
+ block.cost = 0
113
+ block.retries = 0
114
+ obj = {}
115
+ for i in range(0, len(requirements), self.eval_items_per_request):
116
+ # Build a new TranslatedBlock using the new working text
117
+ working_requirements = requirements[i : i + self.eval_items_per_request]
118
+ working_copy = deepcopy(block.original)
119
+ working_obj = json.loads(working_copy.text) # type: ignore
120
+ working_obj["requirements"] = working_requirements
121
+ working_copy.text = json.dumps(working_obj)
122
+ working_block = TranslatedCodeBlock(working_copy, self._target_language)
123
+
124
+ # Run the LLM on the working text
125
+ super()._add_translation(working_block)
126
+
127
+ # Update metadata to include for all runs
128
+ block.retries += working_block.retries
129
+ block.cost += working_block.cost
130
+ block.processing_time += working_block.processing_time
131
+
132
+ # Update the output text to merge this section's output in
133
+ obj.update(json.loads(working_block.text))
134
+
135
+ block.text = json.dumps(obj)
136
+ block.tokens = self._llm.get_num_tokens(block.text)
137
+ block.translated = True
138
+
139
+ log.debug(
140
+ f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
141
+ )
142
+
143
+
144
+ class InlineCommentEvaluator(Evaluator):
145
+ """Inline Comment Evaluator
146
+
147
+ A class that performs an LLM self evaluation on inline comments,
148
+ with an associated prompt.
149
+ """
150
+
151
+ def __init__(self, eval_items_per_request: int | None = None, **kwargs) -> None:
152
+ """Initialize the Evaluator class
153
+
154
+ Arguments:
155
+ model: The LLM to use for translation. If an OpenAI model, the
156
+ `OPENAI_API_KEY` environment variable must be set and the
157
+ `OPENAI_ORG_ID` environment variable should be set if needed.
158
+ model_arguments: Additional arguments to pass to the LLM constructor.
159
+ max_prompts: The maximum number of prompts to try before giving up.
160
+ """
11
161
  super().__init__(**kwargs)
12
- self.set_prompt("evaluate")
13
162
  self._combiner = JsonCombiner()
14
- self._parser = EvaluationParser()
15
163
  self._load_parameters()
164
+ self._parser = InlineCommentParser()
165
+ self.set_prompt("eval_prompts/inline_comments")
166
+ self.eval_items_per_request = eval_items_per_request
167
+
168
+ def _add_translation(self, block: TranslatedCodeBlock):
169
+ if block.translated:
170
+ return
171
+
172
+ if block.original.text is None:
173
+ block.translated = True
174
+ return
175
+
176
+ if self.eval_items_per_request is None:
177
+ return super()._add_translation(block)
178
+
179
+ comment_pattern = r"<(?:INLINE|BLOCK)_COMMENT \w{8}>.*$"
180
+ comments = list(
181
+ re.finditer(comment_pattern, block.original.text, flags=re.MULTILINE)
182
+ )
183
+
184
+ if not comments:
185
+ log.info(f"[{block.name}] Skipping commentless block")
186
+ block.translated = True
187
+ block.text = None
188
+ block.complete = True
189
+ return
190
+
191
+ if len(comments) <= self.eval_items_per_request:
192
+ return super()._add_translation(block)
193
+
194
+ comment_group_indices = list(range(0, len(comments), self.eval_items_per_request))
195
+ log.debug(
196
+ f"[{block.name}] Block contains more than {self.eval_items_per_request}"
197
+ f" comments, splitting {len(comments)} comments into"
198
+ f" {len(comment_group_indices)} groups"
199
+ )
200
+
201
+ block.processing_time = 0
202
+ block.cost = 0
203
+ block.retries = 0
204
+ obj = {}
205
+ for i in range(0, len(comments), self.eval_items_per_request):
206
+ # Split the text into the section containing comments of interest,
207
+ # all the text prior to those comments, and all the text after them
208
+ working_comments = comments[i : i + self.eval_items_per_request]
209
+ start_idx = working_comments[0].start()
210
+ end_idx = working_comments[-1].end()
211
+ prefix = block.original.text[:start_idx]
212
+ keeper = block.original.text[start_idx:end_idx]
213
+ suffix = block.original.text[end_idx:]
214
+
215
+ # Strip all comment placeholders outside of the section of interest
216
+ prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
217
+ suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
218
+
219
+ # Build a new TranslatedBlock using the new working text
220
+ working_copy = deepcopy(block.original)
221
+ working_copy.text = prefix + keeper + suffix
222
+ working_block = TranslatedCodeBlock(working_copy, self._target_language)
223
+
224
+ # Run the LLM on the working text
225
+ super()._add_translation(working_block)
226
+
227
+ # Update metadata to include for all runs
228
+ block.retries += working_block.retries
229
+ block.cost += working_block.cost
230
+ block.processing_time += working_block.processing_time
231
+
232
+ # Update the output text to merge this section's output in
233
+ obj.update(json.loads(working_block.text))
234
+
235
+ block.text = json.dumps(obj)
236
+ block.tokens = self._llm.get_num_tokens(block.text)
237
+ block.translated = True
238
+
239
+ log.debug(
240
+ f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
241
+ )
@@ -20,7 +20,7 @@ class TestAlcSplitter(unittest.TestCase):
20
20
  def test_split(self):
21
21
  """Test the split method."""
22
22
  tree_root = self.splitter.split(self.test_file)
23
- self.assertAlmostEqual(tree_root.n_descendents, 32, delta=5)
23
+ self.assertAlmostEqual(tree_root.n_descendents, 16, delta=2)
24
24
  self.assertLessEqual(tree_root.max_tokens, self.splitter.max_tokens)
25
25
  self.assertFalse(tree_root.complete)
26
26
  self.combiner.combine_children(tree_root)
janus/language/alc/alc.py CHANGED
@@ -79,10 +79,15 @@ class AlcSplitter(TreeSitterSplitter):
79
79
  if len(sects) > 1:
80
80
  block.children = []
81
81
  for sect in sects:
82
- if sect[0].node_type in sect_types:
83
- sect_node = self.merge_nodes(sect)
84
- sect_node.children = sect
85
- sect_node.node_type = NodeType(str(sect[0].node_type)[:5])
82
+ node_type = sect[0].node_type
83
+ if node_type in sect_types:
84
+ if len(sect) == 1:
85
+ # Don't make a node its own child
86
+ sect_node = sect[0]
87
+ else:
88
+ sect_node = self.merge_nodes(sect)
89
+ sect_node.children = sect
90
+ sect_node.node_type = NodeType(str(node_type)[:5])
86
91
  block.children.append(sect_node)
87
92
  else:
88
93
  block.children.extend(sect)
@@ -275,42 +275,50 @@ class Splitter(FileManager):
275
275
 
276
276
  groups = [[n] for n in nodes]
277
277
  while len(groups) > 1 and min(adj_sums) <= self.max_tokens and any(merge_allowed):
278
- # Get the indices of the adjacent nodes that would result in the
279
- # smallest possible merged snippet. Ignore protected nodes.
278
+ # Get the index of the node that would result in the smallest
279
+ # merged snippet when merged with the node that follows it.
280
+ # Ignore protected nodes.
280
281
  mergeable_indices = compress(range(len(adj_sums)), merge_allowed)
281
- i0 = int(min(mergeable_indices, key=adj_sums.__getitem__))
282
- i1 = i0 + 1
282
+ C = int(min(mergeable_indices, key=adj_sums.__getitem__))
283
+
284
+ # C: Central index
285
+ # L: Index to the left
286
+ # R: Index to the right (to be merged in to C)
287
+ # N: Next index (to the right of R, the "new R")
288
+ L, R, N = C - 1, C + 1, C + 2
283
289
 
284
290
  # Recalculate the length. We can't simply use the adj_sum, because
285
291
  # it is an underestimate due to the adjoining suffix/prefix.
286
- central_node = groups[i0][-1]
287
- merged_text = "".join([text_chunks[i0], central_node.suffix, text_chunks[i1]])
292
+ central_node = groups[C][-1]
293
+ merged_text = "".join([text_chunks[C], central_node.suffix, text_chunks[R]])
288
294
  merged_text_length = self._count_tokens(merged_text)
289
295
 
290
296
  # If the true length of the merged pair is too long, don't merge them
291
297
  # Instead, correct the estimate, since shorter pairs may yet exist
292
298
  if merged_text_length > self.max_tokens:
293
- adj_sums[i0] = merged_text_length
299
+ adj_sums[C] = merged_text_length
294
300
  continue
295
301
 
296
302
  # Update adjacent sum estimates
297
- if i0 > 0:
298
- adj_sums[i0 - 1] += merged_text_length
299
- if i1 < len(adj_sums) - 1:
300
- adj_sums[i1 + 1] += merged_text_length
301
-
302
- if i0 > 0 and i1 < len(merge_allowed) - 1:
303
- if not (merge_allowed[i0 - 1] and merge_allowed[i1 + 1]):
304
- merge_allowed[i0 - 1] = merge_allowed[i1 + 1] = False
303
+ if L >= 0:
304
+ adj_sums[L] = lengths[L] + merged_text_length
305
+ if N < len(adj_sums):
306
+ adj_sums[R] = lengths[N] + merged_text_length
305
307
 
306
308
  # The potential merge length for this pair is removed
307
- adj_sums.pop(i0)
308
- merge_allowed.pop(i0)
309
+ adj_sums.pop(C)
310
+
311
+ # The merged-in node is removed from the protected list
312
+ # The merge_allowed list need not be updated - if the node now to
313
+ # its right is protected, the merge_allowed element corresponding
314
+ # to the merged neighbor will have been True, and now corresponds
315
+ # to the merged node.
316
+ merge_allowed.pop(C)
309
317
 
310
318
  # Merge the pair of node groups
311
- groups[i0 : i1 + 1] = [groups[i0] + groups[i1]]
312
- text_chunks[i0 : i1 + 1] = [merged_text]
313
- lengths[i0 : i1 + 1] = [merged_text_length]
319
+ groups[C:N] = [groups[C] + groups[R]]
320
+ text_chunks[C:N] = [merged_text]
321
+ lengths[C:N] = [merged_text_length]
314
322
 
315
323
  return groups
316
324
 
@@ -403,13 +411,13 @@ class Splitter(FileManager):
403
411
  self._split_into_lines(node)
404
412
 
405
413
  def _split_into_lines(self, node: CodeBlock):
406
- split_text = re.split(r"(\n+)", node.text)
414
+ split_text = list(re.split(r"(\n+)", node.text))
407
415
 
408
416
  # If the string didn't start/end with newlines, make sure to include
409
417
  # empty strings for the prefix/suffixes
410
- if split_text[0].strip("\n"):
418
+ if not re.match(r"^\n+$", split_text[0]):
411
419
  split_text = [""] + split_text
412
- if split_text[-1].strip("\n"):
420
+ if not re.match(r"^\n+$", split_text[-1]):
413
421
  split_text.append("")
414
422
  betweens = split_text[::2]
415
423
  lines = split_text[1::2]
@@ -154,7 +154,15 @@ class TreeSitterSplitter(Splitter):
154
154
  The pointer to the language.
155
155
  """
156
156
  lib = cdll.LoadLibrary(os.fspath(so_file))
157
- language_function = getattr(lib, f"tree_sitter_{self.language}")
157
+ # Added this try-except block to handle the case where the language is not
158
+ # supported in lowercase by the creator of the grammar. Ex: COBOL
159
+ # https://github.com/yutaro-sakamoto/tree-sitter-cobol/blob/main/grammar.js#L13
160
+ try:
161
+ language_function = getattr(lib, f"tree_sitter_{self.language}")
162
+ except AttributeError:
163
+ language = self.language.upper()
164
+ language_function = getattr(lib, f"tree_sitter_{language}")
165
+
158
166
  language_function.restype = c_void_p
159
167
  pointer = language_function()
160
168
  return pointer
janus/llm/models_info.py CHANGED
@@ -6,9 +6,13 @@ from typing import Callable, Protocol, TypeVar
6
6
  from dotenv import load_dotenv
7
7
  from langchain_community.llms import HuggingFaceTextGenInference
8
8
  from langchain_core.runnables import Runnable
9
- from langchain_openai import AzureChatOpenAI
9
+ from langchain_openai import AzureChatOpenAI, ChatOpenAI
10
10
 
11
- from janus.llm.model_callbacks import COST_PER_1K_TOKENS, azure_model_reroutes
11
+ from janus.llm.model_callbacks import (
12
+ COST_PER_1K_TOKENS,
13
+ azure_model_reroutes,
14
+ openai_model_reroutes,
15
+ )
12
16
  from janus.prompts.prompt import (
13
17
  ChatGptPromptEngine,
14
18
  ClaudePromptEngine,
@@ -127,7 +131,7 @@ bedrock_models = [
127
131
  all_models = [*azure_models, *bedrock_models]
128
132
 
129
133
  MODEL_TYPE_CONSTRUCTORS: dict[str, ModelType] = {
130
- # "OpenAI": ChatOpenAI,
134
+ "OpenAI": ChatOpenAI,
131
135
  "HuggingFace": HuggingFaceTextGenInference,
132
136
  "Azure": AzureChatOpenAI,
133
137
  "Bedrock": Bedrock,
@@ -137,7 +141,7 @@ MODEL_TYPE_CONSTRUCTORS: dict[str, ModelType] = {
137
141
 
138
142
 
139
143
  MODEL_PROMPT_ENGINES: dict[str, Callable[..., PromptEngine]] = {
140
- # **{m: ChatGptPromptEngine for m in openai_models},
144
+ **{m: ChatGptPromptEngine for m in openai_models},
141
145
  **{m: ChatGptPromptEngine for m in azure_models},
142
146
  **{m: ClaudePromptEngine for m in claude_models},
143
147
  **{m: Llama2PromptEngine for m in llama2_models},
@@ -148,7 +152,7 @@ MODEL_PROMPT_ENGINES: dict[str, Callable[..., PromptEngine]] = {
148
152
  }
149
153
 
150
154
  MODEL_ID_TO_LONG_ID = {
151
- # **{m: mr for m, mr in openai_model_reroutes.items()},
155
+ **{m: mr for m, mr in openai_model_reroutes.items()},
152
156
  **{m: mr for m, mr in azure_model_reroutes.items()},
153
157
  "bedrock-claude-v2": "anthropic.claude-v2",
154
158
  "bedrock-claude-instant-v1": "anthropic.claude-instant-v1",
@@ -181,7 +185,7 @@ DEFAULT_MODELS = list(MODEL_DEFAULT_ARGUMENTS.keys())
181
185
  MODEL_CONFIG_DIR = Path.home().expanduser() / ".janus" / "llm"
182
186
 
183
187
  MODEL_TYPES: dict[str, PromptEngine] = {
184
- # **{m: "OpenAI" for m in openai_models},
188
+ **{m: "OpenAI" for m in openai_models},
185
189
  **{m: "Azure" for m in azure_models},
186
190
  **{m: "BedrockChat" for m in bedrock_models},
187
191
  }
@@ -289,15 +293,16 @@ def load_model(model_id) -> JanusModel:
289
293
  # log.warning("Waiting 10 seconds...")
290
294
  # Give enough time for the user to read the warnings and cancel
291
295
  # time.sleep(10)
292
- raise DeprecationWarning("OpenAI models are no longer supported.")
296
+ # raise DeprecationWarning("OpenAI models are no longer supported.")
293
297
 
294
298
  elif model_type_name == "Azure":
295
299
  model_args.update(
296
- {
297
- "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
298
- "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
299
- "api_version": os.getenv("OPENAI_API_VERSION", "2024-02-01"),
300
- }
300
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
301
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
302
+ api_version=os.getenv("OPENAI_API_VERSION", "2024-02-01"),
303
+ azure_deployment=model_id,
304
+ request_timeout=3600,
305
+ max_tokens=4096,
301
306
  )
302
307
 
303
308
  model_type = MODEL_TYPE_CONSTRUCTORS[model_type_name]
@@ -0,0 +1,134 @@
1
+ import json
2
+ import random
3
+ import uuid
4
+ from typing import List
5
+
6
+ from langchain.output_parsers import PydanticOutputParser
7
+ from langchain_core.exceptions import OutputParserException
8
+ from langchain_core.messages import BaseMessage
9
+ from langchain_core.pydantic_v1 import BaseModel, Field, validator
10
+
11
+ from janus.language.block import CodeBlock
12
+ from janus.parsers.parser import JanusParser
13
+ from janus.utils.logger import create_logger
14
+
15
+ log = create_logger(__name__)
16
+ RNG = random.Random()
17
+
18
+
19
+ class Criteria(BaseModel):
20
+ reasoning: str = Field(description="A short explanation for the given assessment")
21
+ score: str = Field("A simple `pass` or `fail`")
22
+
23
+ @validator("score")
24
+ def score_is_valid(cls, v: str):
25
+ v = v.lower().strip()
26
+ if v not in {"pass", "fail"}:
27
+ raise OutputParserException("Score must be either 'pass' or 'fail'")
28
+ return v
29
+
30
+
31
+ class Requirement(BaseModel):
32
+ requirement_id: str = Field(description="The 8-character comment ID")
33
+ requirement: str = Field(description="The original requirement being evaluated")
34
+ C1: Criteria
35
+ C2: Criteria
36
+ C3: Criteria
37
+ C4: Criteria
38
+ C5: Criteria
39
+ C6: Criteria
40
+ C7: Criteria
41
+ C8: Criteria
42
+ C9: Criteria
43
+
44
+
45
+ class RequirementList(BaseModel):
46
+ __root__: List[Requirement] = Field(
47
+ description=(
48
+ "A list of requirement evaluations. Each element should include"
49
+ " the requirement's 8-character ID in the `requirement_id` field,"
50
+ " the original requirement in the 'requirement' field, "
51
+ " and nine score objects corresponding to each criterion."
52
+ )
53
+ )
54
+
55
+
56
+ class IncoseParser(JanusParser, PydanticOutputParser):
57
+ requirements: dict[str, str]
58
+
59
+ def __init__(self):
60
+ PydanticOutputParser.__init__(
61
+ self,
62
+ pydantic_object=RequirementList,
63
+ requirements={},
64
+ )
65
+
66
+ def parse_input(self, block: CodeBlock) -> str:
67
+ # TODO: Perform comment stripping/placeholding here rather than in script
68
+ text = super().parse_input(block)
69
+ RNG.seed(text)
70
+
71
+ obj = json.loads(text)
72
+
73
+ # For some reason requirements objects are in a double list?
74
+ reqs = obj["requirements"]
75
+
76
+ # Generate a unique ID for each requirement (ensure they are unique)
77
+ req_ids = set()
78
+ while len(req_ids) < len(reqs):
79
+ req_ids.add(str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8])
80
+
81
+ self.requirements = dict(zip(req_ids, reqs))
82
+ reqs_str = "\n\n".join(
83
+ f"Requirement {rid} : {req}" for rid, req in self.requirements.items()
84
+ )
85
+ obj["requirements"] = reqs_str
86
+ return json.dumps(obj)
87
+
88
+ def parse(self, text: str | BaseMessage) -> str:
89
+ if isinstance(text, BaseMessage):
90
+ text = str(text.content)
91
+
92
+ # Strip everything outside the JSON object
93
+ begin, end = text.find("["), text.rfind("]")
94
+ text = text[begin : end + 1]
95
+
96
+ try:
97
+ out: RequirementList = super().parse(text)
98
+ except json.JSONDecodeError as e:
99
+ log.debug(f"Invalid JSON object. Output:\n{text}")
100
+ raise OutputParserException(f"Got invalid JSON object. Error: {e}")
101
+
102
+ evals: dict[str, dict] = {c.requirement_id: c.dict() for c in out.__root__}
103
+
104
+ seen_keys = set(evals.keys())
105
+ expected_keys = set(self.requirements.keys())
106
+ missing_keys = expected_keys.difference(seen_keys)
107
+ invalid_keys = seen_keys.difference(expected_keys)
108
+ if missing_keys:
109
+ log.debug(f"Missing keys: {missing_keys}")
110
+ if invalid_keys:
111
+ log.debug(f"Invalid keys: {invalid_keys}")
112
+ log.debug(f"Missing keys: {missing_keys}")
113
+ raise OutputParserException(
114
+ f"Got invalid return object. Missing the following expected "
115
+ f"keys: {missing_keys}"
116
+ )
117
+
118
+ for key in invalid_keys:
119
+ del evals[key]
120
+
121
+ for rid in evals.keys():
122
+ evals[rid]["requirement"] = self.requirements[rid]
123
+ evals[rid].pop("requirement_id")
124
+
125
+ return json.dumps(evals)
126
+
127
+ def parse_combined_output(self, text: str) -> str:
128
+ if not text.strip():
129
+ return str({})
130
+ objs = [json.loads(line.strip()) for line in text.split("\n") if line.strip()]
131
+ output_obj = {}
132
+ for obj in objs:
133
+ output_obj.update(obj)
134
+ return json.dumps(output_obj)
@@ -0,0 +1,112 @@
1
+ import json
2
+ import re
3
+ from typing import Any
4
+
5
+ from langchain.output_parsers import PydanticOutputParser
6
+ from langchain_core.exceptions import OutputParserException
7
+ from langchain_core.messages import BaseMessage
8
+ from langchain_core.pydantic_v1 import BaseModel, Field, conint
9
+
10
+ from janus.language.block import CodeBlock
11
+ from janus.parsers.parser import JanusParser
12
+ from janus.utils.logger import create_logger
13
+
14
+ log = create_logger(__name__)
15
+
16
+
17
+ class Criteria(BaseModel):
18
+ reasoning: str = Field(description="A short explanation for the given score")
19
+ # Constrained to an integer between 1 and 4
20
+ score: conint(ge=1, le=4) = Field( # type: ignore
21
+ description="An integer score between 1 and 4 (inclusive), 4 being the best"
22
+ )
23
+
24
+
25
+ class Comment(BaseModel):
26
+ comment_id: str = Field(description="The 8-character comment ID")
27
+ completeness: Criteria = Field(description="The completeness of the comment")
28
+ hallucination: Criteria = Field(description="The factualness of the comment")
29
+ readability: Criteria = Field(description="The readability of the comment")
30
+ usefulness: Criteria = Field(description="The usefulness of the comment")
31
+
32
+
33
+ class CommentList(BaseModel):
34
+ __root__: list[Comment] = Field(
35
+ description=(
36
+ "A list of inline comment evaluations. Each element should include"
37
+ " the comment's 8-character ID in the `comment_id` field, and four"
38
+ " score objects corresponding to each metric (`completeness`,"
39
+ " `hallucination`, `readability`, and `usefulness`)."
40
+ )
41
+ )
42
+
43
+
44
+ class InlineCommentParser(JanusParser, PydanticOutputParser):
45
+ comments: dict[str, str]
46
+
47
+ def __init__(self):
48
+ PydanticOutputParser.__init__(
49
+ self,
50
+ pydantic_object=CommentList,
51
+ comments=[],
52
+ )
53
+
54
+ def parse_input(self, block: CodeBlock) -> str:
55
+ # TODO: Perform comment stripping/placeholding here rather than in script
56
+ text = super().parse_input(block)
57
+ self.comments = dict(
58
+ re.findall(
59
+ r"<(?:BLOCK|INLINE)_COMMENT (\w{8})> (.*)$",
60
+ text,
61
+ flags=re.MULTILINE,
62
+ )
63
+ )
64
+ return text
65
+
66
+ def parse(self, text: str | BaseMessage) -> str:
67
+ if isinstance(text, BaseMessage):
68
+ text = str(text.content)
69
+
70
+ # Strip everything outside the JSON object
71
+ begin, end = text.find("["), text.rfind("]")
72
+ text = text[begin : end + 1]
73
+
74
+ try:
75
+ out: CommentList = super().parse(text)
76
+ except json.JSONDecodeError as e:
77
+ log.debug(f"Invalid JSON object. Output:\n{text}")
78
+ raise OutputParserException(f"Got invalid JSON object. Error: {e}")
79
+
80
+ evals: dict[str, Any] = {c.comment_id: c.dict() for c in out.__root__}
81
+
82
+ seen_keys = set(evals.keys())
83
+ expected_keys = set(self.comments.keys())
84
+ missing_keys = expected_keys.difference(seen_keys)
85
+ invalid_keys = seen_keys.difference(expected_keys)
86
+ if missing_keys:
87
+ log.debug(f"Missing keys: {missing_keys}")
88
+ if invalid_keys:
89
+ log.debug(f"Invalid keys: {invalid_keys}")
90
+ log.debug(f"Missing keys: {missing_keys}")
91
+ raise OutputParserException(
92
+ f"Got invalid return object. Missing the following expected "
93
+ f"keys: {missing_keys}"
94
+ )
95
+
96
+ for key in invalid_keys:
97
+ del evals[key]
98
+
99
+ for cid in evals.keys():
100
+ evals[cid]["comment"] = self.comments[cid]
101
+ evals[cid].pop("comment_id")
102
+
103
+ return json.dumps(evals)
104
+
105
+ def parse_combined_output(self, text: str) -> str:
106
+ if not text.strip():
107
+ return str({})
108
+ objs = [json.loads(line.strip()) for line in text.split("\n") if line.strip()]
109
+ output_obj = {}
110
+ for obj in objs:
111
+ output_obj.update(obj)
112
+ return json.dumps(output_obj)
@@ -36,6 +36,29 @@ class PartitionList(BaseModel):
36
36
  )
37
37
 
38
38
 
39
+ # The following IDs appear in the prompt example. If the LLM produces them,
40
+ # they should be ignored
41
+ EXAMPLE_IDS = {
42
+ "0d2f4f8d",
43
+ "def2a953",
44
+ "75315253",
45
+ "e7f928da",
46
+ "1781b2a9",
47
+ "2fe21e27",
48
+ "9aef6179",
49
+ "6061bd82",
50
+ "22bd0c30",
51
+ "5d85e19e",
52
+ "06027969",
53
+ "91b722fb",
54
+ "4b3f79be",
55
+ "k57w964a",
56
+ "51638s96",
57
+ "065o6q32",
58
+ "j5q6p852",
59
+ }
60
+
61
+
39
62
  class PartitionParser(JanusParser, PydanticOutputParser):
40
63
  token_limit: int
41
64
  model: BaseLanguageModel
@@ -59,7 +82,10 @@ class PartitionParser(JanusParser, PydanticOutputParser):
59
82
  # Generate a unique ID for each line (ensure they are unique)
60
83
  line_ids = set()
61
84
  while len(line_ids) < len(self.lines):
62
- line_ids.add(str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8])
85
+ line_id = str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8]
86
+ if line_id in EXAMPLE_IDS:
87
+ continue
88
+ line_ids.add(line_id)
63
89
 
64
90
  # Prepend each line with the corresponding ID, save the mapping
65
91
  self.line_id_to_index = {lid: i for i, lid in enumerate(line_ids)}
@@ -72,18 +98,24 @@ class PartitionParser(JanusParser, PydanticOutputParser):
72
98
  if isinstance(text, BaseMessage):
73
99
  text = str(text.content)
74
100
 
101
+ # Strip everything outside the JSON object
102
+ begin, end = text.find("["), text.rfind("]")
103
+ text = text[begin : end + 1]
104
+
75
105
  try:
76
106
  out: PartitionList = super().parse(text)
77
107
  except (OutputParserException, json.JSONDecodeError):
78
108
  log.debug(f"Invalid JSON object. Output:\n{text}")
79
109
  raise
80
110
 
111
+ # Get partition locations, discard reasoning
112
+ partition_locations = {partition.location for partition in out.__root__}
113
+
114
+ # Ignore IDs from the example input
115
+ partition_locations.difference_update(EXAMPLE_IDS)
116
+
81
117
  # Locate any invalid line IDs, raise exception if any found
82
- invalid_splits = [
83
- partition.location
84
- for partition in out.__root__
85
- if partition.location not in self.line_id_to_index
86
- ]
118
+ invalid_splits = partition_locations.difference(self.line_id_to_index)
87
119
  if invalid_splits:
88
120
  err_msg = (
89
121
  f"{len(invalid_splits)} line ID(s) not found in input: "
@@ -95,9 +127,9 @@ class PartitionParser(JanusParser, PydanticOutputParser):
95
127
  # Map line IDs to indices (so they can be sorted and lines indexed)
96
128
  index_to_line_id = {0: "START", None: "END"}
97
129
  split_points = {0}
98
- for partition in out.__root__:
99
- index = self.line_id_to_index[partition.location]
100
- index_to_line_id[index] = partition.location
130
+ for partition in partition_locations:
131
+ index = self.line_id_to_index[partition]
132
+ index_to_line_id[index] = partition
101
133
  split_points.add(index)
102
134
 
103
135
  # Get partition start/ends, chunks, chunk lengths
janus/refiners/refiner.py CHANGED
@@ -2,6 +2,7 @@ import re
2
2
  from typing import Any
3
3
 
4
4
  from langchain.output_parsers import RetryWithErrorOutputParser
5
+ from langchain_core.exceptions import OutputParserException
5
6
  from langchain_core.output_parsers import StrOutputParser
6
7
  from langchain_core.prompt_values import PromptValue
7
8
  from langchain_core.runnables import RunnableSerializable
@@ -26,6 +27,35 @@ class JanusRefiner(JanusParser):
26
27
  raise NotImplementedError
27
28
 
28
29
 
30
+ class SimpleRetry(JanusRefiner):
31
+ max_retries: int
32
+ retry_chain: RunnableSerializable
33
+
34
+ def __init__(
35
+ self,
36
+ llm: JanusModel,
37
+ parser: JanusParser,
38
+ max_retries: int,
39
+ ):
40
+ retry_chain = llm | StrOutputParser()
41
+ super().__init__(
42
+ retry_chain=retry_chain,
43
+ parser=parser,
44
+ max_retries=max_retries,
45
+ )
46
+
47
+ def parse_completion(
48
+ self, completion: str, prompt_value: PromptValue, **kwargs
49
+ ) -> Any:
50
+ for retry_number in range(self.max_retries):
51
+ try:
52
+ return self.parser.parse(completion)
53
+ except OutputParserException:
54
+ completion = self.retry_chain.invoke(prompt_value)
55
+
56
+ return self.parser.parse(completion)
57
+
58
+
29
59
  class FixParserExceptions(JanusRefiner, RetryWithErrorOutputParser):
30
60
  def __init__(self, llm: JanusModel, parser: JanusParser, max_retries: int):
31
61
  retry_prompt = MODEL_PROMPT_ENGINES[llm.short_model_id](
janus/utils/enums.py CHANGED
@@ -89,6 +89,20 @@ LANGUAGES: Dict[str, Dict[str, Any]] = {
89
89
  "url": "https://github.com/stsewd/tree-sitter-comment",
90
90
  "example": "# This is a comment\n",
91
91
  },
92
+ "cobol": {
93
+ "comment": "*",
94
+ "suffix": "cbl",
95
+ "url": "https://github.com/yutaro-sakamoto/tree-sitter-cobol",
96
+ "example": (
97
+ " IDENTIFICATION DIVISION.\n"
98
+ " PROGRAM-ID. HelloWorld.\n"
99
+ " ENVIRONMENT DIVISION.\n"
100
+ " DATA DIVISION.\n"
101
+ " PROCEDURE DIVISION.\n"
102
+ ' DISPLAY "Hello, World!".\n'
103
+ " STOP RUN.\n"
104
+ ),
105
+ },
92
106
  "commonlisp": {
93
107
  "comment": ";;",
94
108
  "suffix": "lisp",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 4.2.0
3
+ Version: 4.3.1
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -1,17 +1,17 @@
1
- janus/__init__.py,sha256=8ZZh7ctoYQaClu_ak9pFc5eYVEcaSju33Ru0vZBp_iM,361
1
+ janus/__init__.py,sha256=hbiNcSyVowLc5sEqV1GU1B22molrn1w3rOxtKlgrl2E,361
2
2
  janus/__main__.py,sha256=lEkpNtLVPtFo8ySDZeXJ_NXDHb0GVdZFPWB4gD4RPS8,64
3
3
  janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
5
5
  janus/_tests/test_cli.py,sha256=6ef7h11bg4i7Q6L1-r0ZdcY7YrH4n472kvDiA03T4c8,4275
6
- janus/cli.py,sha256=eGmzu8aei1QNN_WaWeMYltgIHdKr1MPwG2Er0AEBIuo,42563
6
+ janus/cli.py,sha256=zo8EEp0Y33jPCzMUGGRXxjr629ZPMIrVGk3FxinpyDQ,46851
7
7
  janus/converter/__init__.py,sha256=Jnp3TsJ4M1LWDAzXFSyxzMpygbYOxkR-qYxU-G6Gi1k,395
8
8
  janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  janus/converter/_tests/test_translate.py,sha256=T5CzNrwHqJWfb39Izq84R9WvM3toSlJq31SeA_U7d_4,5641
10
10
  janus/converter/aggregator.py,sha256=MuAXMKmq6PuUo_w6ljyiuDn81Gk2dN-Ci7FVeLc6vhs,1966
11
- janus/converter/converter.py,sha256=1WFGy8LozO8pVjbPcYJa9-TTZqgNxwUs7oDca86TcvE,26174
11
+ janus/converter/converter.py,sha256=citSpcCsI1bDfckK38smGNafDHsc8DC9quSoXD2J-Kc,26253
12
12
  janus/converter/diagram.py,sha256=-wktVBPrSBgNIQfHIfa2bJNg6L9CYJQgrr9-xU8DFPw,1646
13
13
  janus/converter/document.py,sha256=qNt2UncMheUBadXCFHGq74tqCrvZub5DCgZpd3Qa54o,4564
14
- janus/converter/evaluate.py,sha256=APWQUY3gjAXqkJkPzvj0UA4wPK3Cv9QSJLM-YK9t-ng,476
14
+ janus/converter/evaluate.py,sha256=Bdue1ESQfMVFFRK4l0CvqwLyzt5bqOKy1LB9a8Hqub0,9150
15
15
  janus/converter/partition.py,sha256=ASvv4hAue44qHobO4kqr_tKr-eJsXCPPdD3NtNd9V-E,993
16
16
  janus/converter/requirements.py,sha256=9tvQ40FZJtG8niIFn45gPQCgKKHVPPoFLinBv6RAqO4,2027
17
17
  janus/converter/translate.py,sha256=S1DPZdmX9Vrn_sJPcobvXmhmS8U53yl5cRXjsmXPtas,4246
@@ -30,8 +30,8 @@ janus/language/_tests/test_combine.py,sha256=sjVVPUg4LYkAmazXGUw_S1xPrzWm67_0tCx
30
30
  janus/language/_tests/test_splitter.py,sha256=Hqexa39LLEXlK3ZUw7Zot4PUIACvye2vkq0Jaox0T10,373
31
31
  janus/language/alc/__init__.py,sha256=j7vOMGhT1Vri6p8dsjSaY-fkO5uFn0sJ0nrNGGvcizM,42
32
32
  janus/language/alc/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- janus/language/alc/_tests/test_alc.py,sha256=jrvZCsz3uvbq6pqTKmymWNDgb20HHs69tj9TRbx69aM,1018
34
- janus/language/alc/alc.py,sha256=fKZDtbeLXiJ2e8t-eWSjeAW6WCMCTn2Fw7_jxvMPdNc,6863
33
+ janus/language/alc/_tests/test_alc.py,sha256=8LKidOPJDlMonRBX9w8AVOKHhyR-O2srW4ntzw5rEEs,1018
34
+ janus/language/alc/alc.py,sha256=YteDO6DR5hnQULjI3j8Je-w05MH50ZARtXB66FqkZi4,7088
35
35
  janus/language/binary/__init__.py,sha256=AlNAe12ZA366kcGSrQ1FJyOdbwxFqGBFkYR2K6yL818,51
36
36
  janus/language/binary/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  janus/language/binary/_tests/test_binary.py,sha256=cIKIxjj6kIY3rcxLwqUPESP9bxWrHqMHx9TNuICgfeQ,1724
@@ -52,14 +52,14 @@ janus/language/naive/registry.py,sha256=8YQX1q0IdAm7t69-oC_00I-vfkdRnHuX-OD3KEjE
52
52
  janus/language/naive/simple_ast.py,sha256=YzeUJomVsnttJc8tI9eDROb2Hx9Vm9XKmOnLEp3TkzI,3112
53
53
  janus/language/naive/tag_splitter.py,sha256=IXWMn9tBVUGAtzvQi89GhoZ6g7fPXk5MzO0kMCr2mb0,2045
54
54
  janus/language/node.py,sha256=baoYFtapwBQqBtUN6EvHFYRkbR-EcEw1b3fQvH9zIAM,204
55
- janus/language/splitter.py,sha256=pYvDhGAYDDP7E4CZeNn76I6zVnHMNj6gTFUegKXyRPk,17005
55
+ janus/language/splitter.py,sha256=ZpNIzv0ijbcH7EMnY8DIxAf0ji7-ym1iYJXS9ei_F78,17389
56
56
  janus/language/treesitter/__init__.py,sha256=mUliw7ZJLZ8NkJKyUQMSoUV82hYXE0HvLHrEdGPJF4Q,43
57
57
  janus/language/treesitter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
58
  janus/language/treesitter/_tests/test_treesitter.py,sha256=fmr_mFSja7vaCVu0TVyLDua3A94jMjY4AqSC5NqnOdQ,2179
59
- janus/language/treesitter/treesitter.py,sha256=q7fqfFxt7QsqM6tE39uqutRMsOfEgBd3omv7zVZSEOc,7517
59
+ janus/language/treesitter/treesitter.py,sha256=FdsBO8CEo6l9D77aHXns5jRSoZzkvrRGZFCW3oNw15c,7928
60
60
  janus/llm/__init__.py,sha256=TKLYvnsWKWfxMucy-lCLQ-4bkN9ENotJZDywDEQmrKg,45
61
61
  janus/llm/model_callbacks.py,sha256=cHRZBpYgAwiYbA2k0GQ7DBwBFQZJpEGMUBV3Q_5GTpU,7940
62
- janus/llm/models_info.py,sha256=6ImXTgCeNkMPtW-9swdaWXISixb-UUqq6OCUl8kPxCs,10612
62
+ janus/llm/models_info.py,sha256=tHH5Hf7zWBpD5zSuhxx_Tp1fQMPTKPr9EuevacDiUTU,10711
63
63
  janus/metrics/__init__.py,sha256=AsxtZJUzZiXJPr2ehPPltuYP-ddechjg6X85WZUO7mA,241
64
64
  janus/metrics/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
65
  janus/metrics/_tests/reference.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE8,13
@@ -89,25 +89,27 @@ janus/parsers/_tests/test_code_parser.py,sha256=3ay5QpUPcynX_EJ-YLl3PR28poutUkT7
89
89
  janus/parsers/code_parser.py,sha256=3l0HfzgrvJuiwk779s9ZsgUl3xbp1nE1qZxh8aDYRBI,873
90
90
  janus/parsers/doc_parser.py,sha256=0pUsNZ9hKQLjIi8L8BgkOBHQZ_EGoFLHrBQ4hoDkjSw,5862
91
91
  janus/parsers/eval_parser.py,sha256=Gjh6aTZgpYd2ASJUEPMo4LpCL00cBmbOqc4KM3hy8x8,2922
92
+ janus/parsers/eval_parsers/incose_parser.py,sha256=udyK-24ocfrB1SzmggcERm73dBynrCj4MFSBV8k7YDM,4478
93
+ janus/parsers/eval_parsers/inline_comment_parser.py,sha256=QzKgzeWPhyIEkLxJBpeutSocSJjjXEcWRRS635bXEO8,3973
92
94
  janus/parsers/parser.py,sha256=y6VV64bgVidf-oEFla3I--_28tnJsPBc6QUD_SkbfSE,1614
93
- janus/parsers/partition_parser.py,sha256=z9EoqttHacegZzhkoGa-j4vxuzaleDuq32FonzaXsW8,4974
95
+ janus/parsers/partition_parser.py,sha256=IW5_aNYL4g-PzB_qJ0g0NlwLiaAGGewR5iUYF19PVL4,5738
94
96
  janus/parsers/reqs_parser.py,sha256=uRQC41Iqp22GjIvakb5UKv70UWHkcOTbOVl_RDnipYw,2438
95
97
  janus/parsers/uml.py,sha256=SwaoG9QrHKQP8rSxlf3qu_rp7OMQqYSmLgDYBapOa9M,3379
96
98
  janus/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
99
  janus/prompts/prompt.py,sha256=3796YXIzzIec9b0iUzd8VZlq-AdQbzq8qUGXLy4KH-0,10586
98
- janus/refiners/refiner.py,sha256=f2YDLnG2TF3Kws40chVOBQ91DD6zf2B1wcoP6WeQcIk,3829
100
+ janus/refiners/refiner.py,sha256=ZHP0hUIv8eLpHJSd2SP1Sex6q6SdJgH7HIPgXPBw_gI,4672
99
101
  janus/refiners/uml.py,sha256=ZFvFLxOdbolYuOmZh_8K6kiHCWKuudqP71sr_TammxM,866
100
102
  janus/retrievers/retriever.py,sha256=n6MzoNZs0GJCH4eqQPS3gFlVHZ3eETr7FuHYbyPzTuo,3506
101
103
  janus/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
104
  janus/utils/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
105
  janus/utils/_tests/test_logger.py,sha256=jkkvrCTKwsFCsZtmyuvc-WJ0rC7LJi2Z91sIe4IiKzA,2209
104
106
  janus/utils/_tests/test_progress.py,sha256=Rs_u5PiGjP-L-o6C1fhwfE1ig8jYu9Xo9s4p8yPysl8,491
105
- janus/utils/enums.py,sha256=AoilbdiYyMvY2Mp0AM4xlbLSELfut2XMwhIM1S_msP4,27610
107
+ janus/utils/enums.py,sha256=gmvX3MYnHAwu4ZypidENIZ27M5NI_YegY3PpCDJS34Q,28094
106
108
  janus/utils/logger.py,sha256=KZeuaMAnlSZCsj4yL0P6N-JzZwpxXygzACWfdZFeuek,2337
107
109
  janus/utils/pdf_docs_reader.py,sha256=beMKHdYrFwg0m_i7n0OTJrut3sf4rEWFd7P_80A76WY,5140
108
110
  janus/utils/progress.py,sha256=PIpcQec7SrhsfqB25LHj2CDDkfm9umZx90d9LZnAx6k,1469
109
- janus_llm-4.2.0.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
110
- janus_llm-4.2.0.dist-info/METADATA,sha256=5iwBiBTpucpwF3UxClv2P25y9QOpaWsaEGFFyF7mmTU,4574
111
- janus_llm-4.2.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
112
- janus_llm-4.2.0.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
113
- janus_llm-4.2.0.dist-info/RECORD,,
111
+ janus_llm-4.3.1.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
112
+ janus_llm-4.3.1.dist-info/METADATA,sha256=ZeUGDDKbJjHSk2Wkzf-4zXLIwaYZqua-5_HVFbzV2yg,4574
113
+ janus_llm-4.3.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
114
+ janus_llm-4.3.1.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
115
+ janus_llm-4.3.1.dist-info/RECORD,,