janus-llm 4.1.0__py3-none-any.whl → 4.2.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
janus/__init__.py CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.1.0"
8
+ __version__ = "4.2.0"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/cli.py CHANGED
@@ -13,10 +13,13 @@ from rich.console import Console
13
13
  from rich.prompt import Confirm
14
14
  from typing_extensions import Annotated
15
15
 
16
+ import janus.refiners.refiner
17
+ import janus.refiners.uml
16
18
  from janus.converter.aggregator import Aggregator
17
19
  from janus.converter.converter import Converter
18
20
  from janus.converter.diagram import DiagramGenerator
19
21
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
22
+ from janus.converter.partition import Partitioner
20
23
  from janus.converter.requirements import RequirementsDocumenter
21
24
  from janus.converter.translate import Translator
22
25
  from janus.embedding.collections import Collections
@@ -44,7 +47,6 @@ from janus.llm.models_info import (
44
47
  openai_models,
45
48
  )
46
49
  from janus.metrics.cli import evaluate
47
- from janus.refiners.refiner import REFINERS
48
50
  from janus.utils.enums import LANGUAGES
49
51
  from janus.utils.logger import create_logger
50
52
 
@@ -69,6 +71,18 @@ with open(db_file, "r") as f:
69
71
  collections_config_file = Path(db_loc) / "collections.json"
70
72
 
71
73
 
74
+ def get_subclasses(cls):
75
+ return set(cls.__subclasses__()).union(
76
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
77
+ )
78
+
79
+
80
+ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
81
+ {janus.refiners.refiner.JanusRefiner}
82
+ )
83
+ REFINERS = {r.__name__: r for r in REFINER_TYPES}
84
+
85
+
72
86
  def get_collections_config():
73
87
  if collections_config_file.exists():
74
88
  with open(collections_config_file, "r") as f:
@@ -244,22 +258,23 @@ def translate(
244
258
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
245
259
  ),
246
260
  ] = "file",
247
- refiner_type: Annotated[
248
- str,
261
+ refiner_types: Annotated[
262
+ list[str],
249
263
  typer.Option(
250
264
  "-r",
251
265
  "--refiner",
252
- help="Name of custom refiner to use",
266
+ help="List of refiner types to use. Add -r for each refiner to use in\
267
+ refinement chain",
253
268
  click_type=click.Choice(list(REFINERS.keys())),
254
269
  ),
255
- ] = "none",
270
+ ] = ["JanusRefiner"],
256
271
  retriever_type: Annotated[
257
272
  str,
258
273
  typer.Option(
259
274
  "-R",
260
275
  "--retriever",
261
276
  help="Name of custom retriever to use",
262
- click_type=click.Choice(["active_usings"]),
277
+ click_type=click.Choice(["active_usings", "language_docs"]),
263
278
  ),
264
279
  ] = None,
265
280
  max_tokens: Annotated[
@@ -272,6 +287,7 @@ def translate(
272
287
  ),
273
288
  ] = None,
274
289
  ):
290
+ refiner_types = [REFINERS[r] for r in refiner_types]
275
291
  try:
276
292
  target_language, target_version = target_lang.split("-")
277
293
  except ValueError:
@@ -296,7 +312,7 @@ def translate(
296
312
  db_path=db_loc,
297
313
  db_config=collections_config,
298
314
  splitter_type=splitter_type,
299
- refiner_type=refiner_type,
315
+ refiner_types=refiner_types,
300
316
  retriever_type=retriever_type,
301
317
  )
302
318
  translator.translate(input_dir, output_dir, overwrite, collection)
@@ -402,22 +418,23 @@ def document(
402
418
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
403
419
  ),
404
420
  ] = "file",
405
- refiner_type: Annotated[
406
- str,
421
+ refiner_types: Annotated[
422
+ list[str],
407
423
  typer.Option(
408
424
  "-r",
409
425
  "--refiner",
410
- help="Name of custom refiner to use",
426
+ help="List of refiner types to use. Add -r for each refiner to use in\
427
+ refinement chain",
411
428
  click_type=click.Choice(list(REFINERS.keys())),
412
429
  ),
413
- ] = "none",
430
+ ] = ["JanusRefiner"],
414
431
  retriever_type: Annotated[
415
432
  str,
416
433
  typer.Option(
417
434
  "-R",
418
435
  "--retriever",
419
436
  help="Name of custom retriever to use",
420
- click_type=click.Choice(["active_usings"]),
437
+ click_type=click.Choice(["active_usings", "language_docs"]),
421
438
  ),
422
439
  ] = None,
423
440
  max_tokens: Annotated[
@@ -430,6 +447,7 @@ def document(
430
447
  ),
431
448
  ] = None,
432
449
  ):
450
+ refiner_types = [REFINERS[r] for r in refiner_types]
433
451
  model_arguments = dict(temperature=temperature)
434
452
  collections_config = get_collections_config()
435
453
  kwargs = dict(
@@ -441,7 +459,7 @@ def document(
441
459
  db_path=db_loc,
442
460
  db_config=collections_config,
443
461
  splitter_type=splitter_type,
444
- refiner_type=refiner_type,
462
+ refiner_types=refiner_types,
445
463
  retriever_type=retriever_type,
446
464
  )
447
465
  if doc_mode == "madlibs":
@@ -458,12 +476,6 @@ def document(
458
476
  documenter.translate(input_dir, output_dir, overwrite, collection)
459
477
 
460
478
 
461
- def get_subclasses(cls):
462
- return set(cls.__subclasses__()).union(
463
- set(s for c in cls.__subclasses__() for s in get_subclasses(c))
464
- )
465
-
466
-
467
479
  @app.command()
468
480
  def aggregate(
469
481
  input_dir: Annotated[
@@ -578,6 +590,103 @@ def aggregate(
578
590
  aggregator.translate(input_dir, output_dir, overwrite, collection)
579
591
 
580
592
 
593
+ @app.command(
594
+ help="Partition input code using an LLM.",
595
+ no_args_is_help=True,
596
+ )
597
+ def partition(
598
+ input_dir: Annotated[
599
+ Path,
600
+ typer.Option(
601
+ "--input",
602
+ "-i",
603
+ help="The directory containing the source code to be partitioned. ",
604
+ ),
605
+ ],
606
+ language: Annotated[
607
+ str,
608
+ typer.Option(
609
+ "--language",
610
+ "-l",
611
+ help="The language of the source code.",
612
+ click_type=click.Choice(sorted(LANGUAGES)),
613
+ ),
614
+ ],
615
+ output_dir: Annotated[
616
+ Path,
617
+ typer.Option(
618
+ "--output-dir", "-o", help="The directory to store the partitioned code in."
619
+ ),
620
+ ],
621
+ llm_name: Annotated[
622
+ str,
623
+ typer.Option(
624
+ "--llm",
625
+ "-L",
626
+ help="The custom name of the model set with 'janus llm add'.",
627
+ ),
628
+ ] = "gpt-4o",
629
+ max_prompts: Annotated[
630
+ int,
631
+ typer.Option(
632
+ "--max-prompts",
633
+ "-m",
634
+ help="The maximum number of times to prompt a model on one functional block "
635
+ "before exiting the application. This is to prevent wasting too much money.",
636
+ ),
637
+ ] = 10,
638
+ overwrite: Annotated[
639
+ bool,
640
+ typer.Option(
641
+ "--overwrite/--preserve",
642
+ help="Whether to overwrite existing files in the output directory",
643
+ ),
644
+ ] = False,
645
+ temperature: Annotated[
646
+ float,
647
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
648
+ ] = 0.7,
649
+ splitter_type: Annotated[
650
+ str,
651
+ typer.Option(
652
+ "-S",
653
+ "--splitter",
654
+ help="Name of custom splitter to use",
655
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
656
+ ),
657
+ ] = "file",
658
+ max_tokens: Annotated[
659
+ int,
660
+ typer.Option(
661
+ "--max-tokens",
662
+ "-M",
663
+ help="The maximum number of tokens the model will take in. "
664
+ "If unspecificed, model's default max will be used.",
665
+ ),
666
+ ] = None,
667
+ partition_token_limit: Annotated[
668
+ int,
669
+ typer.Option(
670
+ "--partition-tokens",
671
+ "-pt",
672
+ help="The limit on the number of tokens per partition.",
673
+ ),
674
+ ] = 8192,
675
+ ):
676
+ model_arguments = dict(temperature=temperature)
677
+ kwargs = dict(
678
+ model=llm_name,
679
+ model_arguments=model_arguments,
680
+ source_language=language,
681
+ max_prompts=max_prompts,
682
+ max_tokens=max_tokens,
683
+ splitter_type=splitter_type,
684
+ partition_token_limit=partition_token_limit,
685
+ )
686
+ partitioner = Partitioner(**kwargs)
687
+ partitioner.translate(input_dir, output_dir, overwrite)
688
+
689
+
581
690
  @app.command(
582
691
  help="Diagram input code using an LLM.",
583
692
  no_args_is_help=True,
@@ -667,25 +776,27 @@ def diagram(
667
776
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
668
777
  ),
669
778
  ] = "file",
670
- refiner_type: Annotated[
671
- str,
779
+ refiner_types: Annotated[
780
+ list[str],
672
781
  typer.Option(
673
782
  "-r",
674
783
  "--refiner",
675
- help="Name of custom refiner to use",
784
+ help="List of refiner types to use. Add -r for each refiner to use in\
785
+ refinement chain",
676
786
  click_type=click.Choice(list(REFINERS.keys())),
677
787
  ),
678
- ] = "none",
788
+ ] = ["JanusRefiner"],
679
789
  retriever_type: Annotated[
680
790
  str,
681
791
  typer.Option(
682
792
  "-R",
683
793
  "--retriever",
684
794
  help="Name of custom retriever to use",
685
- click_type=click.Choice(["active_usings"]),
795
+ click_type=click.Choice(["active_usings", "language_docs"]),
686
796
  ),
687
797
  ] = None,
688
798
  ):
799
+ refiner_types = [REFINERS[r] for r in refiner_types]
689
800
  model_arguments = dict(temperature=temperature)
690
801
  collections_config = get_collections_config()
691
802
  diagram_generator = DiagramGenerator(
@@ -696,7 +807,7 @@ def diagram(
696
807
  db_path=db_loc,
697
808
  db_config=collections_config,
698
809
  splitter_type=splitter_type,
699
- refiner_type=refiner_type,
810
+ refiner_types=refiner_types,
700
811
  retriever_type=retriever_type,
701
812
  diagram_type=diagram_type,
702
813
  add_documentation=add_documentation,
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
2
2
  from janus.converter.diagram import DiagramGenerator
3
3
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
4
4
  from janus.converter.evaluate import Evaluator
5
+ from janus.converter.partition import Partitioner
5
6
  from janus.converter.requirements import RequirementsDocumenter
6
7
  from janus.converter.translate import Translator
@@ -6,7 +6,12 @@ from typing import Any
6
6
 
7
7
  from langchain_core.exceptions import OutputParserException
8
8
  from langchain_core.prompts import ChatPromptTemplate
9
- from langchain_core.runnables import Runnable, RunnableParallel, RunnablePassthrough
9
+ from langchain_core.runnables import (
10
+ Runnable,
11
+ RunnableLambda,
12
+ RunnableParallel,
13
+ RunnablePassthrough,
14
+ )
10
15
  from openai import BadRequestError, RateLimitError
11
16
  from pydantic import ValidationError
12
17
 
@@ -23,15 +28,14 @@ from janus.language.splitter import (
23
28
  from janus.llm.model_callbacks import get_model_callback
24
29
  from janus.llm.models_info import MODEL_PROMPT_ENGINES, JanusModel, load_model
25
30
  from janus.parsers.parser import GenericParser, JanusParser
26
- from janus.refiners.refiner import (
27
- FixParserExceptions,
28
- HallucinationRefiner,
29
- JanusRefiner,
30
- ReflectionRefiner,
31
- )
31
+ from janus.refiners.refiner import JanusRefiner
32
32
 
33
33
  # from janus.refiners.refiner import BasicRefiner, Refiner
34
- from janus.retrievers.retriever import ActiveUsingsRetriever, JanusRetriever
34
+ from janus.retrievers.retriever import (
35
+ ActiveUsingsRetriever,
36
+ JanusRetriever,
37
+ LanguageDocsRetriever,
38
+ )
35
39
  from janus.utils.enums import LANGUAGES
36
40
  from janus.utils.logger import create_logger
37
41
 
@@ -78,7 +82,7 @@ class Converter:
78
82
  protected_node_types: tuple[str, ...] = (),
79
83
  prune_node_types: tuple[str, ...] = (),
80
84
  splitter_type: str = "file",
81
- refiner_type: str | None = None,
85
+ refiner_types: list[type[JanusRefiner]] = [JanusRefiner],
82
86
  retriever_type: str | None = None,
83
87
  ) -> None:
84
88
  """Initialize a Converter instance.
@@ -105,6 +109,7 @@ class Converter:
105
109
  - None
106
110
  retriever_type: The type of retriever to use. Valid values:
107
111
  - "active_usings"
112
+ - "language_docs"
108
113
  - None
109
114
  """
110
115
  self._changed_attrs: set = set()
@@ -133,10 +138,11 @@ class Converter:
133
138
  self._prompt: ChatPromptTemplate
134
139
 
135
140
  self._parser: JanusParser = GenericParser()
141
+ self._base_parser: JanusParser = GenericParser()
136
142
  self._combiner: Combiner = Combiner()
137
143
 
138
144
  self._splitter_type: str
139
- self._refiner_type: str | None
145
+ self._refiner_types: list[type[JanusRefiner]]
140
146
  self._retriever_type: str | None
141
147
 
142
148
  self._splitter: Splitter
@@ -144,7 +150,7 @@ class Converter:
144
150
  self._retriever: JanusRetriever
145
151
 
146
152
  self.set_splitter(splitter_type=splitter_type)
147
- self.set_refiner(refiner_type=refiner_type)
153
+ self.set_refiner_types(refiner_types=refiner_types)
148
154
  self.set_retriever(retriever_type=retriever_type)
149
155
  self.set_model(model_name=model, **model_arguments)
150
156
  self.set_prompt(prompt_template=prompt_template)
@@ -170,7 +176,7 @@ class Converter:
170
176
  self._load_model()
171
177
  self._load_prompt()
172
178
  self._load_retriever()
173
- self._load_refiner()
179
+ self._load_refiner_chain()
174
180
  self._load_splitter()
175
181
  self._load_vectorizer()
176
182
  self._load_chain()
@@ -210,13 +216,13 @@ class Converter:
210
216
 
211
217
  self._splitter_type = splitter_type
212
218
 
213
- def set_refiner(self, refiner_type: str | None) -> None:
219
+ def set_refiner_types(self, refiner_types: list[type[JanusRefiner]]) -> None:
214
220
  """Validate and set the refiner type
215
221
 
216
222
  Arguments:
217
223
  refiner_type: the type of refiner to use
218
224
  """
219
- self._refiner_type = refiner_type
225
+ self._refiner_types = refiner_types
220
226
 
221
227
  def set_retriever(self, retriever_type: str | None) -> None:
222
228
  """Validate and set the retriever type
@@ -355,48 +361,40 @@ class Converter:
355
361
  def _load_retriever(self):
356
362
  if self._retriever_type == "active_usings":
357
363
  self._retriever = ActiveUsingsRetriever()
364
+ elif self._retriever_type == "language_docs":
365
+ self._retriever = LanguageDocsRetriever(self._llm, self._source_language)
358
366
  else:
359
367
  self._retriever = JanusRetriever()
360
368
 
361
- @run_if_changed("_refiner_type", "_model_name", "max_prompts", "_parser", "_llm")
362
- def _load_refiner(self) -> None:
363
- """Load the refiner according to this instance's attributes.
364
-
365
- If the relevant fields have not been changed since the last time this method was
366
- called, nothing happens.
367
- """
368
- if self._refiner_type == "parser":
369
- self._refiner = FixParserExceptions(
370
- llm=self._llm,
371
- parser=self._parser,
372
- max_retries=self.max_prompts,
373
- )
374
- elif self._refiner_type == "reflection":
375
- self._refiner = ReflectionRefiner(
376
- llm=self._llm,
377
- parser=self._parser,
378
- max_retries=self.max_prompts,
369
+ @run_if_changed("_refiner_types", "_model_name", "max_prompts", "_parser")
370
+ def _load_refiner_chain(self) -> None:
371
+ self._refiner_chain = RunnableParallel(
372
+ completion=self._llm,
373
+ prompt_value=RunnablePassthrough(),
374
+ )
375
+ for refiner_type in self._refiner_types[:-1]:
376
+ # NOTE: Do NOT remove refiner_type=refiner_type from lambda.
377
+ # Due to lambda capture, must be present or chain will not
378
+ # be correctly constructed.
379
+ self._refiner_chain = self._refiner_chain | RunnableParallel(
380
+ completion=lambda x, refiner_type=refiner_type: refiner_type(
381
+ llm=self._llm,
382
+ parser=self._base_parser,
383
+ max_retries=self.max_prompts,
384
+ ).parse_completion(**x),
385
+ prompt_value=lambda x: x["prompt_value"],
379
386
  )
380
- elif self._refiner_type == "hallucination":
381
- self._refiner = HallucinationRefiner(
387
+ self._refiner_chain = self._refiner_chain | RunnableLambda(
388
+ lambda x: self._refiner_types[-1](
382
389
  llm=self._llm,
383
390
  parser=self._parser,
384
391
  max_retries=self.max_prompts,
385
- )
386
- else:
387
- self._refiner = JanusRefiner(parser=self._parser)
392
+ ).parse_completion(**x)
393
+ )
388
394
 
389
- @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner")
395
+ @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner_chain")
390
396
  def _load_chain(self):
391
- self.chain = (
392
- self._input_runnable()
393
- | self._prompt
394
- | RunnableParallel(
395
- completion=self._llm,
396
- prompt_value=RunnablePassthrough(),
397
- )
398
- | self._refiner.parse_runnable
399
- )
397
+ self.chain = self._input_runnable() | self._prompt | self._refiner_chain
400
398
 
401
399
  def _input_runnable(self) -> Runnable:
402
400
  return RunnableParallel(
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+
3
+ from janus.converter.converter import Converter
4
+ from janus.language.block import TranslatedCodeBlock
5
+ from janus.parsers.partition_parser import PartitionParser
6
+ from janus.utils.logger import create_logger
7
+
8
+ log = create_logger(__name__)
9
+
10
+
11
+ class Partitioner(Converter):
12
+ def __init__(self, partition_token_limit: int, **kwargs):
13
+ super().__init__(**kwargs)
14
+ self.set_prompt("partition")
15
+ self._load_model()
16
+ self._parser = PartitionParser(
17
+ token_limit=partition_token_limit,
18
+ model=self._llm,
19
+ )
20
+ self._target_language = self._source_language
21
+ self._target_suffix = self._source_suffix
22
+ self._load_parameters()
23
+
24
+ def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
25
+ output_str = self._parser.parse_combined_output(block.complete_text)
26
+ out_path.parent.mkdir(parents=True, exist_ok=True)
27
+ out_path.write_text(output_str, encoding="utf-8")
janus/language/combine.py CHANGED
@@ -1,3 +1,5 @@
1
+ import re
2
+
1
3
  from janus.language.block import CodeBlock, TranslatedCodeBlock
2
4
  from janus.language.file import FileManager
3
5
  from janus.utils.logger import create_logger
@@ -90,3 +92,23 @@ class ChunkCombiner(Combiner):
90
92
  root: The functional code block to combine with its children.
91
93
  """
92
94
  return root
95
+
96
+
97
+ class PartitionCombiner(Combiner):
98
+ @staticmethod
99
+ def combine(root: CodeBlock) -> None:
100
+ """A combiner which inserts partition tags between code blocks"""
101
+ queue = [root]
102
+ while queue:
103
+ block = queue.pop(0)
104
+ if block.children:
105
+ queue.extend(block.children)
106
+ else:
107
+ block.affixes = (block.prefix, block.suffix + "\n<JANUS_PARTITION>\n")
108
+
109
+ super(PartitionCombiner, PartitionCombiner).combine(root)
110
+ root.text = re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.text)
111
+ root.affixes = (
112
+ root.prefix,
113
+ re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.suffix),
114
+ )
janus/llm/models_info.py CHANGED
@@ -90,6 +90,7 @@ claude_models = [
90
90
  "bedrock-claude-instant-v1",
91
91
  "bedrock-claude-haiku",
92
92
  "bedrock-claude-sonnet",
93
+ "bedrock-claude-sonnet-3.5",
93
94
  ]
94
95
  llama2_models = [
95
96
  "bedrock-llama2-70b",
@@ -153,6 +154,7 @@ MODEL_ID_TO_LONG_ID = {
153
154
  "bedrock-claude-instant-v1": "anthropic.claude-instant-v1",
154
155
  "bedrock-claude-haiku": "anthropic.claude-3-haiku-20240307-v1:0",
155
156
  "bedrock-claude-sonnet": "anthropic.claude-3-sonnet-20240229-v1:0",
157
+ "bedrock-claude-sonnet-3.5": "anthropic.claude-3-5-sonnet-20240620-v1:0",
156
158
  "bedrock-llama2-70b": "meta.llama2-70b-v1",
157
159
  "bedrock-llama2-70b-chat": "meta.llama2-70b-chat-v1",
158
160
  "bedrock-llama2-13b": "meta.llama2-13b-chat-v1",
@@ -200,6 +202,7 @@ TOKEN_LIMITS: dict[str, int] = {
200
202
  "anthropic.claude-instant-v1": 100_000,
201
203
  "anthropic.claude-3-haiku-20240307-v1:0": 248_000,
202
204
  "anthropic.claude-3-sonnet-20240229-v1:0": 248_000,
205
+ "anthropic.claude-3-5-sonnet-20240620-v1:0": 200_000,
203
206
  "meta.llama2-70b-v1": 4096,
204
207
  "meta.llama2-70b-chat-v1": 4096,
205
208
  "meta.llama2-13b-chat-v1": 4096,
@@ -0,0 +1,136 @@
1
+ import json
2
+ import random
3
+ import uuid
4
+
5
+ from langchain.output_parsers import PydanticOutputParser
6
+ from langchain_core.exceptions import OutputParserException
7
+ from langchain_core.language_models import BaseLanguageModel
8
+ from langchain_core.messages import BaseMessage
9
+ from langchain_core.pydantic_v1 import BaseModel, Field
10
+
11
+ from janus.language.block import CodeBlock
12
+ from janus.parsers.parser import JanusParser
13
+ from janus.utils.logger import create_logger
14
+
15
+ log = create_logger(__name__)
16
+ RNG = random.Random()
17
+
18
+
19
+ class PartitionObject(BaseModel):
20
+ reasoning: str = Field(
21
+ description="An explanation for why the code should be split at this point"
22
+ )
23
+ location: str = Field(
24
+ description="The 8-character line label which should start a new chunk"
25
+ )
26
+
27
+
28
+ class PartitionList(BaseModel):
29
+ __root__: list[PartitionObject] = Field(
30
+ description=(
31
+ "A list of appropriate split points, each with a `reasoning` field "
32
+ "that explains a justification for splitting the code at that point, "
33
+ "and a `location` field which is simply the 8-character line ID. "
34
+ "The `reasoning` field should always be included first."
35
+ )
36
+ )
37
+
38
+
39
+ class PartitionParser(JanusParser, PydanticOutputParser):
40
+ token_limit: int
41
+ model: BaseLanguageModel
42
+ lines: list[str] = []
43
+ line_id_to_index: dict[str, int] = {}
44
+
45
+ def __init__(self, token_limit: int, model: BaseLanguageModel):
46
+ PydanticOutputParser.__init__(
47
+ self,
48
+ pydantic_object=PartitionList,
49
+ model=model,
50
+ token_limit=token_limit,
51
+ )
52
+
53
+ def parse_input(self, block: CodeBlock) -> str:
54
+ code = str(block.text)
55
+ RNG.seed(code)
56
+
57
+ self.lines = code.split("\n")
58
+
59
+ # Generate a unique ID for each line (ensure they are unique)
60
+ line_ids = set()
61
+ while len(line_ids) < len(self.lines):
62
+ line_ids.add(str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8])
63
+
64
+ # Prepend each line with the corresponding ID, save the mapping
65
+ self.line_id_to_index = {lid: i for i, lid in enumerate(line_ids)}
66
+ processed = "\n".join(
67
+ f"{line_id}\t{self.lines[i]}" for line_id, i in self.line_id_to_index.items()
68
+ )
69
+ return processed
70
+
71
+ def parse(self, text: str | BaseMessage) -> str:
72
+ if isinstance(text, BaseMessage):
73
+ text = str(text.content)
74
+
75
+ try:
76
+ out: PartitionList = super().parse(text)
77
+ except (OutputParserException, json.JSONDecodeError):
78
+ log.debug(f"Invalid JSON object. Output:\n{text}")
79
+ raise
80
+
81
+ # Locate any invalid line IDs, raise exception if any found
82
+ invalid_splits = [
83
+ partition.location
84
+ for partition in out.__root__
85
+ if partition.location not in self.line_id_to_index
86
+ ]
87
+ if invalid_splits:
88
+ err_msg = (
89
+ f"{len(invalid_splits)} line ID(s) not found in input: "
90
+ + ", ".join(invalid_splits)
91
+ )
92
+ log.warning(err_msg)
93
+ raise OutputParserException(err_msg)
94
+
95
+ # Map line IDs to indices (so they can be sorted and lines indexed)
96
+ index_to_line_id = {0: "START", None: "END"}
97
+ split_points = {0}
98
+ for partition in out.__root__:
99
+ index = self.line_id_to_index[partition.location]
100
+ index_to_line_id[index] = partition.location
101
+ split_points.add(index)
102
+
103
+ # Get partition start/ends, chunks, chunk lengths
104
+ split_points = sorted(split_points) + [None]
105
+ partition_indices = list(zip(split_points, split_points[1:]))
106
+ partition_points = [
107
+ (index_to_line_id[i0], index_to_line_id[i1]) for i0, i1 in partition_indices
108
+ ]
109
+ chunks = ["\n".join(self.lines[i0:i1]) for i0, i1 in partition_indices]
110
+ chunk_tokens = list(map(self.model.get_num_tokens, chunks))
111
+
112
+ # Collect any chunks that exceed token limit
113
+ oversized_indices: list[int] = [
114
+ i for i, n in enumerate(chunk_tokens) if n > self.token_limit
115
+ ]
116
+ if oversized_indices:
117
+ data = list(zip(partition_points, chunks, chunk_tokens))
118
+ data = [data[i] for i in oversized_indices]
119
+
120
+ problem_points = "\n".join(
121
+ [
122
+ f"{i0} to {i1} ({t / self.token_limit:.1f}x maximum length)"
123
+ for (i0, i1), _, t in data
124
+ ]
125
+ )
126
+ log.warning(f"Found {len(data)} oversized chunks:\n{problem_points}")
127
+ log.debug(
128
+ "Oversized chunks:\n"
129
+ + "\n#############\n".join(chunk for _, chunk, _ in data)
130
+ )
131
+ raise OutputParserException(
132
+ f"The following segments are too long and must be "
133
+ f"further subdivided:\n{problem_points}"
134
+ )
135
+
136
+ return "\n<JANUS_PARTITION>\n".join(chunks)
janus/refiners/refiner.py CHANGED
@@ -1,3 +1,4 @@
1
+ import re
1
2
  from typing import Any
2
3
 
3
4
  from langchain.output_parsers import RetryWithErrorOutputParser
@@ -27,7 +28,7 @@ class JanusRefiner(JanusParser):
27
28
 
28
29
  class FixParserExceptions(JanusRefiner, RetryWithErrorOutputParser):
29
30
  def __init__(self, llm: JanusModel, parser: JanusParser, max_retries: int):
30
- retry_prompt = MODEL_PROMPT_ENGINES[llm.model_id](
31
+ retry_prompt = MODEL_PROMPT_ENGINES[llm.short_model_id](
31
32
  source_language="text",
32
33
  prompt_template="refinement/fix_exceptions",
33
34
  ).prompt
@@ -46,6 +47,7 @@ class ReflectionRefiner(JanusRefiner):
46
47
  max_retries: int
47
48
  reflection_chain: RunnableSerializable
48
49
  revision_chain: RunnableSerializable
50
+ reflection_prompt_name: str
49
51
 
50
52
  def __init__(
51
53
  self,
@@ -54,11 +56,11 @@ class ReflectionRefiner(JanusRefiner):
54
56
  max_retries: int,
55
57
  prompt_template_name: str = "refinement/reflection",
56
58
  ):
57
- reflection_prompt = MODEL_PROMPT_ENGINES[llm.model_id](
59
+ reflection_prompt = MODEL_PROMPT_ENGINES[llm.short_model_id](
58
60
  source_language="text",
59
61
  prompt_template=prompt_template_name,
60
62
  ).prompt
61
- revision_prompt = MODEL_PROMPT_ENGINES[llm.model_id](
63
+ revision_prompt = MODEL_PROMPT_ENGINES[llm.short_model_id](
62
64
  source_language="text",
63
65
  prompt_template="refinement/revision",
64
66
  ).prompt
@@ -66,6 +68,7 @@ class ReflectionRefiner(JanusRefiner):
66
68
  reflection_chain = reflection_prompt | llm | StrOutputParser()
67
69
  revision_chain = revision_prompt | llm | StrOutputParser()
68
70
  super().__init__(
71
+ reflection_prompt_name=prompt_template_name,
69
72
  reflection_chain=reflection_chain,
70
73
  revision_chain=revision_chain,
71
74
  parser=parser,
@@ -75,6 +78,7 @@ class ReflectionRefiner(JanusRefiner):
75
78
  def parse_completion(
76
79
  self, completion: str, prompt_value: PromptValue, **kwargs
77
80
  ) -> Any:
81
+ log.info(f"Reflection Prompt: {self.reflection_prompt_name}")
78
82
  for retry_number in range(self.max_retries):
79
83
  reflection = self.reflection_chain.invoke(
80
84
  dict(
@@ -82,7 +86,7 @@ class ReflectionRefiner(JanusRefiner):
82
86
  completion=completion,
83
87
  )
84
88
  )
85
- if reflection.strip() == "LGTM":
89
+ if re.search(r"\bLGTM\b", reflection) is not None:
86
90
  return self.parser.parse(completion)
87
91
  if not retry_number:
88
92
  log.info(f"Completion:\n{completion}")
@@ -105,11 +109,3 @@ class HallucinationRefiner(ReflectionRefiner):
105
109
  prompt_template_name="refinement/hallucination",
106
110
  **kwargs,
107
111
  )
108
-
109
-
110
- REFINERS = dict(
111
- none=JanusRefiner,
112
- parser=FixParserExceptions,
113
- reflection=ReflectionRefiner,
114
- hallucination=HallucinationRefiner,
115
- )
janus/refiners/uml.py ADDED
@@ -0,0 +1,33 @@
1
+ from janus.llm.models_info import JanusModel
2
+ from janus.parsers.parser import JanusParser
3
+ from janus.refiners.refiner import ReflectionRefiner
4
+
5
+
6
+ class ALCFixUMLVariablesRefiner(ReflectionRefiner):
7
+ def __init__(
8
+ self,
9
+ llm: JanusModel,
10
+ parser: JanusParser,
11
+ max_retries: int,
12
+ ):
13
+ super().__init__(
14
+ llm=llm,
15
+ parser=parser,
16
+ max_retries=max_retries,
17
+ prompt_template_name="refinement/uml/alc_fix_variables",
18
+ )
19
+
20
+
21
+ class FixUMLConnectionsRefiner(ReflectionRefiner):
22
+ def __init__(
23
+ self,
24
+ llm: JanusModel,
25
+ parser: JanusParser,
26
+ max_retries: int,
27
+ ):
28
+ super().__init__(
29
+ llm=llm,
30
+ parser=parser,
31
+ max_retries=max_retries,
32
+ prompt_template_name="refinement/uml/fix_connections",
33
+ )
@@ -1,7 +1,16 @@
1
+ from typing import List
2
+
3
+ from langchain_core.documents import Document
4
+ from langchain_core.output_parsers import StrOutputParser
1
5
  from langchain_core.retrievers import BaseRetriever
2
6
  from langchain_core.runnables import Runnable, RunnableConfig
3
7
 
4
8
  from janus.language.block import CodeBlock
9
+ from janus.llm.models_info import MODEL_PROMPT_ENGINES, JanusModel
10
+ from janus.utils.logger import create_logger
11
+ from janus.utils.pdf_docs_reader import PDFDocsReader
12
+
13
+ log = create_logger(__name__)
5
14
 
6
15
 
7
16
  class JanusRetriever(Runnable):
@@ -40,3 +49,54 @@ class TextSearchRetriever(JanusRetriever):
40
49
  docs = self.retriever.invoke(code_block.text)
41
50
  context = "\n\n".join(doc.page_content for doc in docs)
42
51
  return f"You may use the following additional context: {context}"
52
+
53
+
54
+ class LanguageDocsRetriever(JanusRetriever):
55
+ def __init__(
56
+ self,
57
+ llm: JanusModel,
58
+ language_name: str,
59
+ prompt_template_name: str = "retrieval/language_docs",
60
+ ):
61
+ super().__init__()
62
+ self.llm: JanusModel = llm
63
+ self.language: str = language_name
64
+
65
+ self.PDF_reader = PDFDocsReader(
66
+ language=self.language,
67
+ )
68
+
69
+ language_docs_prompt = MODEL_PROMPT_ENGINES[self.llm.short_model_id](
70
+ source_language=self.language,
71
+ prompt_template=prompt_template_name,
72
+ ).prompt
73
+
74
+ parser: StrOutputParser = StrOutputParser()
75
+ self.chain = language_docs_prompt | self.llm | parser
76
+
77
+ def get_context(self, code_block: CodeBlock) -> str:
78
+ functionality_to_reference: str = self.chain.invoke(
79
+ dict({"SOURCE_CODE": code_block.text, "SOURCE_LANGUAGE": self.language})
80
+ )
81
+ if functionality_to_reference == "NODOCS":
82
+ log.debug("No Opcodes requested from language docs retriever.")
83
+ return ""
84
+ else:
85
+ functionality_to_reference: List = functionality_to_reference.split(", ")
86
+ log.debug(
87
+ f"List of opcodes requested by language docs retriever"
88
+ f"to search the {self.language} "
89
+ f"docs for: {functionality_to_reference}"
90
+ )
91
+
92
+ docs: List[Document] = self.PDF_reader.search_language_reference(
93
+ functionality_to_reference
94
+ )
95
+ context = "\n\n".join(doc.page_content for doc in docs)
96
+ if context:
97
+ return (
98
+ f"You may reference the following excerpts from the {self.language} "
99
+ f"language documentation: {context}"
100
+ )
101
+ else:
102
+ return ""
@@ -0,0 +1,134 @@
1
+ import os
2
+ import time
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+
6
+ import joblib
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_core.documents import Document
9
+ from langchain_unstructured import UnstructuredLoader
10
+ from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+
13
+ from janus.utils.logger import create_logger
14
+
15
+ log = create_logger(__name__)
16
+
17
+
18
+ class PDFDocsReader:
19
+ def __init__(
20
+ self,
21
+ language: str,
22
+ chunk_size: int = 1000,
23
+ chunk_overlap: int = 100,
24
+ start_page: Optional[int] = None,
25
+ end_page: Optional[int] = None,
26
+ vectorizer: CountVectorizer = TfidfVectorizer(),
27
+ ):
28
+ self.retrieval_docs_dir: Path = Path(
29
+ os.getenv("RETRIEVAL_DOCS_DIR", "retrieval_docs")
30
+ )
31
+ self.language = language
32
+ self.chunk_size = chunk_size
33
+ self.chunk_overlap = chunk_overlap
34
+ self.start_page = start_page
35
+ self.end_page = end_page
36
+ self.vectorizer = vectorizer
37
+ self.documents = self.load_and_chunk_pdf()
38
+ self.doc_vectors = self.vectorize_documents()
39
+
40
+ def load_and_chunk_pdf(self) -> List[str]:
41
+ pdf_path = self.retrieval_docs_dir / f"{self.language}.pdf"
42
+ pickled_documents_path = (
43
+ self.retrieval_docs_dir / f"{self.language}_documents.pkl"
44
+ )
45
+
46
+ if pickled_documents_path.exists():
47
+ log.debug(
48
+ f"Loading pre-chunked PDF from {pickled_documents_path}. "
49
+ f"If you want to regenerate retrieval docs for {self.language}, "
50
+ f"delete the file at {pickled_documents_path}, "
51
+ f"then add a new {self.language}.pdf."
52
+ )
53
+ documents = joblib.load(pickled_documents_path)
54
+ else:
55
+ if not pdf_path.exists():
56
+ raise FileNotFoundError(
57
+ f"Language docs retrieval is enabled, but no PDF for language "
58
+ f"'{self.language}' was found. Move a "
59
+ f"{self.language} reference manual to "
60
+ f"{pdf_path.absolute()} "
61
+ f"(the path to the directory of PDF docs can be "
62
+ f"set with the env variable 'RETRIEVAL_DOCS_DIR')."
63
+ )
64
+ log.info(
65
+ f"Chunking reference PDF for {self.language} using unstructured - "
66
+ f"if your PDF has many pages, this could take a while..."
67
+ )
68
+ start_time = time.time()
69
+ loader = UnstructuredLoader(
70
+ pdf_path,
71
+ chunking_strategy="basic",
72
+ max_characters=1000000,
73
+ include_orig_elements=False,
74
+ start_page=self.start_page,
75
+ end_page=self.end_page,
76
+ )
77
+ docs = loader.load()
78
+ text = "\n\n".join([doc.page_content for doc in docs])
79
+ text_splitter = RecursiveCharacterTextSplitter(
80
+ chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
81
+ )
82
+ documents = text_splitter.split_text(text)
83
+ log.info(f"Document store created for language: {self.language}")
84
+ end_time = time.time()
85
+ log.info(
86
+ f"Processing time for {self.language} PDF: "
87
+ f"{end_time - start_time} seconds"
88
+ )
89
+
90
+ joblib.dump(documents, pickled_documents_path)
91
+ log.debug(f"Documents saved to {pickled_documents_path}")
92
+
93
+ return documents
94
+
95
+ def vectorize_documents(self) -> (TfidfVectorizer, any):
96
+ doc_vectors = self.vectorizer.fit_transform(self.documents)
97
+ return doc_vectors
98
+
99
+ def search_language_reference(
100
+ self,
101
+ query: List[str],
102
+ top_k: int = 1,
103
+ min_similarity: float = 0.1,
104
+ ) -> List[Document]:
105
+ """Searches through the vectorized PDF for the query using
106
+ tf-idf and returns a list of langchain Documents."""
107
+
108
+ docs: List[Document] = []
109
+
110
+ for item in query:
111
+ # Transform the query using the TF-IDF vectorizer
112
+ query_vector = self.vectorizer.transform([item])
113
+
114
+ # Calculate cosine similarities between the query and document vectors
115
+ similarities = cosine_similarity(query_vector, self.doc_vectors).flatten()
116
+
117
+ # Get the indices of documents with similarity above the threshold
118
+ valid_indices = [
119
+ i for i, sim in enumerate(similarities) if sim >= min_similarity
120
+ ]
121
+
122
+ # Sort the valid indices by similarity score in descending order
123
+ sorted_indices = sorted(
124
+ valid_indices, key=lambda i: similarities[i], reverse=True
125
+ )
126
+
127
+ # Limit to top-k results
128
+ top_indices = sorted_indices[:top_k]
129
+
130
+ # Retrieve the top-k most relevant documents
131
+ docs += [Document(page_content=self.documents[i]) for i in top_indices]
132
+ log.debug(f"Langauge documentation search result: {docs}")
133
+
134
+ return docs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 4.1.0
3
+ Version: 4.2.0
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -23,20 +23,28 @@ Requires-Dist: langchain-anthropic (>=0.1.15,<0.2.0)
23
23
  Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
24
24
  Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
25
25
  Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
26
+ Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
26
27
  Requires-Dist: nltk (>=3.8.1,<4.0.0)
27
28
  Requires-Dist: numpy (>=1.24.3,<2.0.0)
28
29
  Requires-Dist: openai (>=1.14.0,<2.0.0)
30
+ Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
29
31
  Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
30
32
  Requires-Dist: py-rouge (>=1.1,<2.0)
33
+ Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
31
34
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
32
35
  Requires-Dist: rich (>=13.7.1,<14.0.0)
33
36
  Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
37
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
34
38
  Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
39
+ Requires-Dist: tesseract (>=0.1.3,<0.2.0)
35
40
  Requires-Dist: text-generation (>=0.6.0,<0.7.0)
36
41
  Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
37
42
  Requires-Dist: transformers (>=4.31.0,<5.0.0)
38
43
  Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)
39
44
  Requires-Dist: typer (>=0.9.0,<0.10.0)
45
+ Requires-Dist: unstructured (>=0.15.9,<0.16.0)
46
+ Requires-Dist: unstructured-inference (>=0.7.36,<0.8.0)
47
+ Requires-Dist: unstructured-pytesseract (>=0.3.13,<0.4.0)
40
48
  Project-URL: Documentation, https://janus-llm.github.io/janus-llm
41
49
  Project-URL: Repository, https://github.com/janus-llm/janus-llm
42
50
  Description-Content-Type: text/markdown
@@ -1,17 +1,18 @@
1
- janus/__init__.py,sha256=DBaGeUwOFIZW4QsQV06WFYM6RAz6pNdOZxJjTj3Uf7I,361
1
+ janus/__init__.py,sha256=8ZZh7ctoYQaClu_ak9pFc5eYVEcaSju33Ru0vZBp_iM,361
2
2
  janus/__main__.py,sha256=lEkpNtLVPtFo8ySDZeXJ_NXDHb0GVdZFPWB4gD4RPS8,64
3
3
  janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
5
5
  janus/_tests/test_cli.py,sha256=6ef7h11bg4i7Q6L1-r0ZdcY7YrH4n472kvDiA03T4c8,4275
6
- janus/cli.py,sha256=MkeV0FiYXmEcMnERne6pSrLSyc5xhny2bORLY-19A6o,39091
7
- janus/converter/__init__.py,sha256=U2EOMcCykiC0ZqhorNefOP_04hOF18qhYoPKrVp1Vrk,345
6
+ janus/cli.py,sha256=eGmzu8aei1QNN_WaWeMYltgIHdKr1MPwG2Er0AEBIuo,42563
7
+ janus/converter/__init__.py,sha256=Jnp3TsJ4M1LWDAzXFSyxzMpygbYOxkR-qYxU-G6Gi1k,395
8
8
  janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  janus/converter/_tests/test_translate.py,sha256=T5CzNrwHqJWfb39Izq84R9WvM3toSlJq31SeA_U7d_4,5641
10
10
  janus/converter/aggregator.py,sha256=MuAXMKmq6PuUo_w6ljyiuDn81Gk2dN-Ci7FVeLc6vhs,1966
11
- janus/converter/converter.py,sha256=HWw-a4j-Qq0AWDyyk6cMVOwYjJeXxa3bvs9OOlKo_KI,25965
11
+ janus/converter/converter.py,sha256=1WFGy8LozO8pVjbPcYJa9-TTZqgNxwUs7oDca86TcvE,26174
12
12
  janus/converter/diagram.py,sha256=-wktVBPrSBgNIQfHIfa2bJNg6L9CYJQgrr9-xU8DFPw,1646
13
13
  janus/converter/document.py,sha256=qNt2UncMheUBadXCFHGq74tqCrvZub5DCgZpd3Qa54o,4564
14
14
  janus/converter/evaluate.py,sha256=APWQUY3gjAXqkJkPzvj0UA4wPK3Cv9QSJLM-YK9t-ng,476
15
+ janus/converter/partition.py,sha256=ASvv4hAue44qHobO4kqr_tKr-eJsXCPPdD3NtNd9V-E,993
15
16
  janus/converter/requirements.py,sha256=9tvQ40FZJtG8niIFn45gPQCgKKHVPPoFLinBv6RAqO4,2027
16
17
  janus/converter/translate.py,sha256=S1DPZdmX9Vrn_sJPcobvXmhmS8U53yl5cRXjsmXPtas,4246
17
18
  janus/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -37,7 +38,7 @@ janus/language/binary/_tests/test_binary.py,sha256=cIKIxjj6kIY3rcxLwqUPESP9bxWrH
37
38
  janus/language/binary/binary.py,sha256=PHsVa8jcM7sW9aTboGRWXj6ewQznz0kVPNWtP4B9YPU,6555
38
39
  janus/language/binary/reveng/decompile_script.py,sha256=veW51oJzuO-4UD3Er062jXZ_FYtTFo9OCkl82Z2xr6A,2182
39
40
  janus/language/block.py,sha256=2rjAYUosHFfWRgLnzf50uAgTMST4Md9Kx6JrlUfEfX4,9398
40
- janus/language/combine.py,sha256=Wtve06fa-_Wjv_V5RIf1Nfmg0UxcOEtFNj4vVHpSNbo,2940
41
+ janus/language/combine.py,sha256=egZRl1xZXAFXa2ZjjfqnNckc9uxuo6e1MJgkRrCgvd8,3650
41
42
  janus/language/file.py,sha256=jy-cReAoI6F97TXR5bbhPyt8XyUZCdFYnVboubDA_y4,571
42
43
  janus/language/mumps/__init__.py,sha256=-Ou_wJ-JgHezfp1dub2_qCYNiK9wO-zo2MlqxM9qiwE,48
43
44
  janus/language/mumps/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -58,7 +59,7 @@ janus/language/treesitter/_tests/test_treesitter.py,sha256=fmr_mFSja7vaCVu0TVyLD
58
59
  janus/language/treesitter/treesitter.py,sha256=q7fqfFxt7QsqM6tE39uqutRMsOfEgBd3omv7zVZSEOc,7517
59
60
  janus/llm/__init__.py,sha256=TKLYvnsWKWfxMucy-lCLQ-4bkN9ENotJZDywDEQmrKg,45
60
61
  janus/llm/model_callbacks.py,sha256=cHRZBpYgAwiYbA2k0GQ7DBwBFQZJpEGMUBV3Q_5GTpU,7940
61
- janus/llm/models_info.py,sha256=KWOQiWU1oAJB9nspI6N6Q32vHRH8loO8xM-Ys497fxc,10443
62
+ janus/llm/models_info.py,sha256=6ImXTgCeNkMPtW-9swdaWXISixb-UUqq6OCUl8kPxCs,10612
62
63
  janus/metrics/__init__.py,sha256=AsxtZJUzZiXJPr2ehPPltuYP-ddechjg6X85WZUO7mA,241
63
64
  janus/metrics/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
65
  janus/metrics/_tests/reference.py,sha256=hiaJPP9CXkvFBV_wL-gOe_BzELTw0nvB6uCxhxtIiE8,13
@@ -89,21 +90,24 @@ janus/parsers/code_parser.py,sha256=3l0HfzgrvJuiwk779s9ZsgUl3xbp1nE1qZxh8aDYRBI,
89
90
  janus/parsers/doc_parser.py,sha256=0pUsNZ9hKQLjIi8L8BgkOBHQZ_EGoFLHrBQ4hoDkjSw,5862
90
91
  janus/parsers/eval_parser.py,sha256=Gjh6aTZgpYd2ASJUEPMo4LpCL00cBmbOqc4KM3hy8x8,2922
91
92
  janus/parsers/parser.py,sha256=y6VV64bgVidf-oEFla3I--_28tnJsPBc6QUD_SkbfSE,1614
93
+ janus/parsers/partition_parser.py,sha256=z9EoqttHacegZzhkoGa-j4vxuzaleDuq32FonzaXsW8,4974
92
94
  janus/parsers/reqs_parser.py,sha256=uRQC41Iqp22GjIvakb5UKv70UWHkcOTbOVl_RDnipYw,2438
93
95
  janus/parsers/uml.py,sha256=SwaoG9QrHKQP8rSxlf3qu_rp7OMQqYSmLgDYBapOa9M,3379
94
96
  janus/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
97
  janus/prompts/prompt.py,sha256=3796YXIzzIec9b0iUzd8VZlq-AdQbzq8qUGXLy4KH-0,10586
96
- janus/refiners/refiner.py,sha256=lKAVS9Nszx8aO7hLB0xzdMmUNfhX7K5SaXwcit2QBHs,3774
97
- janus/retrievers/retriever.py,sha256=WzsYn329GbyJfmJ9KyU9yiYdhLkuS6WPKTKoHx5fBYQ,1339
98
+ janus/refiners/refiner.py,sha256=f2YDLnG2TF3Kws40chVOBQ91DD6zf2B1wcoP6WeQcIk,3829
99
+ janus/refiners/uml.py,sha256=ZFvFLxOdbolYuOmZh_8K6kiHCWKuudqP71sr_TammxM,866
100
+ janus/retrievers/retriever.py,sha256=n6MzoNZs0GJCH4eqQPS3gFlVHZ3eETr7FuHYbyPzTuo,3506
98
101
  janus/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
102
  janus/utils/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
103
  janus/utils/_tests/test_logger.py,sha256=jkkvrCTKwsFCsZtmyuvc-WJ0rC7LJi2Z91sIe4IiKzA,2209
101
104
  janus/utils/_tests/test_progress.py,sha256=Rs_u5PiGjP-L-o6C1fhwfE1ig8jYu9Xo9s4p8yPysl8,491
102
105
  janus/utils/enums.py,sha256=AoilbdiYyMvY2Mp0AM4xlbLSELfut2XMwhIM1S_msP4,27610
103
106
  janus/utils/logger.py,sha256=KZeuaMAnlSZCsj4yL0P6N-JzZwpxXygzACWfdZFeuek,2337
107
+ janus/utils/pdf_docs_reader.py,sha256=beMKHdYrFwg0m_i7n0OTJrut3sf4rEWFd7P_80A76WY,5140
104
108
  janus/utils/progress.py,sha256=PIpcQec7SrhsfqB25LHj2CDDkfm9umZx90d9LZnAx6k,1469
105
- janus_llm-4.1.0.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
106
- janus_llm-4.1.0.dist-info/METADATA,sha256=RPA9U5cyO_-ulE45F1gBbTOnkRcjdBTOARyJs9XAvgg,4184
107
- janus_llm-4.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
108
- janus_llm-4.1.0.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
109
- janus_llm-4.1.0.dist-info/RECORD,,
109
+ janus_llm-4.2.0.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
110
+ janus_llm-4.2.0.dist-info/METADATA,sha256=5iwBiBTpucpwF3UxClv2P25y9QOpaWsaEGFFyF7mmTU,4574
111
+ janus_llm-4.2.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
112
+ janus_llm-4.2.0.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
113
+ janus_llm-4.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any