janus-llm 3.4.3__py3-none-any.whl → 3.5.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
janus/__init__.py CHANGED
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "3.4.3"
8
+ __version__ = "3.5.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/cli.py CHANGED
@@ -2,7 +2,7 @@ import json
2
2
  import logging
3
3
  import os
4
4
  from pathlib import Path
5
- from typing import Optional
5
+ from typing import List, Optional
6
6
 
7
7
  import click
8
8
  import typer
@@ -12,6 +12,8 @@ from rich.console import Console
12
12
  from rich.prompt import Confirm
13
13
  from typing_extensions import Annotated
14
14
 
15
+ from janus.converter.aggregator import Aggregator
16
+ from janus.converter.converter import Converter
15
17
  from janus.converter.diagram import DiagramGenerator
16
18
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
17
19
  from janus.converter.requirements import RequirementsDocumenter
@@ -439,6 +441,126 @@ def document(
439
441
  documenter.translate(input_dir, output_dir, overwrite, collection)
440
442
 
441
443
 
444
+ def get_subclasses(cls):
445
+ return set(cls.__subclasses__()).union(
446
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
447
+ )
448
+
449
+
450
+ @app.command()
451
+ def aggregate(
452
+ input_dir: Annotated[
453
+ Path,
454
+ typer.Option(
455
+ "--input",
456
+ "-i",
457
+ help="The directory containing the source code to be translated. "
458
+ "The files should all be in one flat directory.",
459
+ ),
460
+ ],
461
+ language: Annotated[
462
+ str,
463
+ typer.Option(
464
+ "--language",
465
+ "-l",
466
+ help="The language of the source code.",
467
+ click_type=click.Choice(sorted(LANGUAGES)),
468
+ ),
469
+ ],
470
+ output_dir: Annotated[
471
+ Path,
472
+ typer.Option(
473
+ "--output-dir", "-o", help="The directory to store the translated code in."
474
+ ),
475
+ ],
476
+ llm_name: Annotated[
477
+ str,
478
+ typer.Option(
479
+ "--llm",
480
+ "-L",
481
+ help="The custom name of the model set with 'janus llm add'.",
482
+ ),
483
+ ] = "gpt-4o",
484
+ max_prompts: Annotated[
485
+ int,
486
+ typer.Option(
487
+ "--max-prompts",
488
+ "-m",
489
+ help="The maximum number of times to prompt a model on one functional block "
490
+ "before exiting the application. This is to prevent wasting too much money.",
491
+ ),
492
+ ] = 10,
493
+ overwrite: Annotated[
494
+ bool,
495
+ typer.Option(
496
+ "--overwrite/--preserve",
497
+ help="Whether to overwrite existing files in the output directory",
498
+ ),
499
+ ] = False,
500
+ temperature: Annotated[
501
+ float,
502
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
503
+ ] = 0.7,
504
+ collection: Annotated[
505
+ str,
506
+ typer.Option(
507
+ "--collection",
508
+ "-c",
509
+ help="If set, will put the translated result into a Chroma DB "
510
+ "collection with the name provided.",
511
+ ),
512
+ ] = None,
513
+ splitter_type: Annotated[
514
+ str,
515
+ typer.Option(
516
+ "-S",
517
+ "--splitter",
518
+ help="Name of custom splitter to use",
519
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
520
+ ),
521
+ ] = "file",
522
+ intermediate_converters: Annotated[
523
+ List[str],
524
+ typer.Option(
525
+ "-C",
526
+ "--converter",
527
+ help="Name of an intermediate converter to use",
528
+ click_type=click.Choice([c.__name__ for c in get_subclasses(Converter)]),
529
+ ),
530
+ ] = ["Documenter"],
531
+ ):
532
+ converter_subclasses = get_subclasses(Converter)
533
+ converter_subclasses_map = {c.__name__: c for c in converter_subclasses}
534
+ model_arguments = dict(temperature=temperature)
535
+ collections_config = get_collections_config()
536
+ converters = []
537
+ for ic in intermediate_converters:
538
+ converters.append(
539
+ converter_subclasses_map[ic](
540
+ model=llm_name,
541
+ model_arguments=model_arguments,
542
+ source_language=language,
543
+ max_prompts=max_prompts,
544
+ db_path=db_loc,
545
+ db_config=collections_config,
546
+ splitter_type=splitter_type,
547
+ )
548
+ )
549
+
550
+ aggregator = Aggregator(
551
+ intermediate_converters=converters,
552
+ model=llm_name,
553
+ model_arguments=model_arguments,
554
+ source_language=language,
555
+ max_prompts=max_prompts,
556
+ db_path=db_loc,
557
+ db_config=collections_config,
558
+ splitter_type=splitter_type,
559
+ prompt_template="basic_aggregation",
560
+ )
561
+ aggregator.translate(input_dir, output_dir, overwrite, collection)
562
+
563
+
442
564
  @app.command(
443
565
  help="Diagram input code using an LLM.",
444
566
  no_args_is_help=True,
@@ -0,0 +1,52 @@
1
+ from copy import deepcopy
2
+ from typing import List
3
+
4
+ from janus.converter.converter import Converter
5
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
6
+
7
+
8
+ class Aggregator(Converter):
9
+ def __init__(
10
+ self,
11
+ intermediate_converters: List[Converter],
12
+ separator: str = "\n==============\n",
13
+ **kwargs,
14
+ ):
15
+ """
16
+ Initialization Method
17
+ Arguments:
18
+ intermediate_converters - list of converters to use
19
+ separator: separator string to partition different outputs before combination
20
+ """
21
+ self._intermediate_converters = intermediate_converters
22
+ self._separator = separator
23
+ super().__init__(**kwargs)
24
+ self._load_parameters()
25
+
26
+ def _iterative_translate(self, root: CodeBlock) -> TranslatedCodeBlock:
27
+ res = TranslatedCodeBlock(root, language=self._target_language)
28
+ return self._recursive_translate(res)
29
+
30
+ def _recursive_translate(self, root: TranslatedCodeBlock) -> None:
31
+ """
32
+ Recursively translates code blocks from a bottom up approach
33
+ """
34
+ original_text = root.original.text
35
+ if len(root.children) > 0:
36
+ for c in root.children:
37
+ self._recursive_translate(c)
38
+ root.original.text = self._combine_blocks(root.children, self._separator)
39
+ else:
40
+ int_reps = [deepcopy(root) for ic in self._intermediate_converters]
41
+ for ic, r in zip(self._intermediate_converters, int_reps):
42
+ ic._add_translation(r)
43
+ root.original.text = self._combine_blocks(int_reps, self._separator)
44
+ self._add_translation(root)
45
+ root.original.text = original_text
46
+ return root
47
+
48
+ def _combine_blocks(self, blocks: List[TranslatedCodeBlock], separator: str) -> str:
49
+ """
50
+ Combines code blocks into a single piece of text
51
+ """
52
+ return separator.join([block.text for block in blocks])
@@ -130,7 +130,7 @@ class DiagramGenerator(Documenter):
130
130
  If the relevant fields have not been changed since the last time this method was
131
131
  called, nothing happens.
132
132
  """
133
- self._diagram_prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
133
+ self._diagram_prompt_engine = MODEL_PROMPT_ENGINES[self._model_id](
134
134
  source_language=self._source_language,
135
135
  target_language="text",
136
136
  target_version=None,
@@ -125,7 +125,7 @@ class MadLibsDocumenter(Documenter):
125
125
  out_text = self._parser.parse(working_block.text)
126
126
  obj.update(json.loads(out_text))
127
127
 
128
- self._parser.set_reference(block.original)
128
+ self._parser.parse_input(block.original)
129
129
  block.text = self._parser.parse(json.dumps(obj))
130
130
  block.tokens = self._llm.get_num_tokens(block.text)
131
131
  block.translated = True
@@ -92,11 +92,12 @@ class MadlibsDocumentationParser(JanusParser):
92
92
  def __init__(self):
93
93
  super().__init__(expected_keys=[])
94
94
 
95
- def parse_input(self, block: CodeBlock):
95
+ def parse_input(self, block: CodeBlock) -> str:
96
96
  # TODO: Perform comment stripping/placeholding here rather than in script
97
97
  text = super().parse_input(block)
98
98
  comment_ids = re.findall(r"<(?:BLOCK|INLINE)_COMMENT (\w{8})>", text)
99
99
  self.expected_keys = set(comment_ids)
100
+ return text
100
101
 
101
102
  def parse(self, text: str | BaseMessage) -> str:
102
103
  if isinstance(text, BaseMessage):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 3.4.3
3
+ Version: 3.5.1
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -1,15 +1,16 @@
1
- janus/__init__.py,sha256=HX3kYLkqXTi8C9sXb0P1mGki4DMJJzxHSQSxoilv7sA,361
1
+ janus/__init__.py,sha256=nMslKA6BZu1FW0pfi-tziluM8r6gETz2o0YNYzKC6Lo,361
2
2
  janus/__main__.py,sha256=lEkpNtLVPtFo8ySDZeXJ_NXDHb0GVdZFPWB4gD4RPS8,64
3
3
  janus/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  janus/_tests/conftest.py,sha256=V7uW-oq3YbFiRPvrq15YoVVrA1n_83pjgiyTZ-IUGW8,963
5
5
  janus/_tests/test_cli.py,sha256=oYJsUGWfpBJWEGRG5NGxdJedU5DU_m6fwJ7xEbJVYl0,4244
6
- janus/cli.py,sha256=_92FvDV4qza0nSmyiXqacYxyo1gY6IPwD4gCm6kZfqI,33213
6
+ janus/cli.py,sha256=84deHkbRWxT2ThHa9b08HBIIThy-mw1b5WCfjWOcuME,36950
7
7
  janus/converter/__init__.py,sha256=U2EOMcCykiC0ZqhorNefOP_04hOF18qhYoPKrVp1Vrk,345
8
8
  janus/converter/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  janus/converter/_tests/test_translate.py,sha256=yzcFEGc_z8QmBBBmC9dZnfL9tT8CD1rkpc8Hz44Jp4c,5631
10
+ janus/converter/aggregator.py,sha256=MuAXMKmq6PuUo_w6ljyiuDn81Gk2dN-Ci7FVeLc6vhs,1966
10
11
  janus/converter/converter.py,sha256=GL4moCOi5vvvsEjAf_7Vrn5GHq12nptH47AAP4soeQc,27956
11
- janus/converter/diagram.py,sha256=AyxkoyfMoQ7jpuOqyJ3bIvYqHJSEhmR-gYrIsq-d8tk,5285
12
- janus/converter/document.py,sha256=hsW512veNjFWbdl5WriuUdNmMEqZy8ktRvqn9rRmA6E,4566
12
+ janus/converter/diagram.py,sha256=sqztG_8T_gQ4njk5V7tg1QCgT6gFbL3CUy7t2l0X5i4,5283
13
+ janus/converter/document.py,sha256=qNt2UncMheUBadXCFHGq74tqCrvZub5DCgZpd3Qa54o,4564
13
14
  janus/converter/evaluate.py,sha256=APWQUY3gjAXqkJkPzvj0UA4wPK3Cv9QSJLM-YK9t-ng,476
14
15
  janus/converter/requirements.py,sha256=9tvQ40FZJtG8niIFn45gPQCgKKHVPPoFLinBv6RAqO4,2027
15
16
  janus/converter/translate.py,sha256=0brQTlSfBYmXtoM8QYIOiyr0LrTr0S1n68Du-BR7_WQ,4236
@@ -85,7 +86,7 @@ janus/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
86
  janus/parsers/_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
87
  janus/parsers/_tests/test_code_parser.py,sha256=3ay5QpUPcynX_EJ-YLl3PR28poutUkT7qvyWUSQ7Too,928
87
88
  janus/parsers/code_parser.py,sha256=3l0HfzgrvJuiwk779s9ZsgUl3xbp1nE1qZxh8aDYRBI,873
88
- janus/parsers/doc_parser.py,sha256=G7kDdJ3davHhEFXqwNYw7hOB26hKH_d_0XdpyyBHq_U,5835
89
+ janus/parsers/doc_parser.py,sha256=0pUsNZ9hKQLjIi8L8BgkOBHQZ_EGoFLHrBQ4hoDkjSw,5862
89
90
  janus/parsers/eval_parser.py,sha256=Gjh6aTZgpYd2ASJUEPMo4LpCL00cBmbOqc4KM3hy8x8,2922
90
91
  janus/parsers/parser.py,sha256=y6VV64bgVidf-oEFla3I--_28tnJsPBc6QUD_SkbfSE,1614
91
92
  janus/parsers/refiner_parser.py,sha256=W7ecDX7j6N-uWtRBYFD1EiEzDpDQws16nrVFzyArzf8,1632
@@ -101,8 +102,8 @@ janus/utils/_tests/test_progress.py,sha256=Rs_u5PiGjP-L-o6C1fhwfE1ig8jYu9Xo9s4p8
101
102
  janus/utils/enums.py,sha256=AoilbdiYyMvY2Mp0AM4xlbLSELfut2XMwhIM1S_msP4,27610
102
103
  janus/utils/logger.py,sha256=KZeuaMAnlSZCsj4yL0P6N-JzZwpxXygzACWfdZFeuek,2337
103
104
  janus/utils/progress.py,sha256=PIpcQec7SrhsfqB25LHj2CDDkfm9umZx90d9LZnAx6k,1469
104
- janus_llm-3.4.3.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
105
- janus_llm-3.4.3.dist-info/METADATA,sha256=rFGRSJIXyld0d04oeG6s7iC9HfQlyuN6I7b1UaCXXWo,4184
106
- janus_llm-3.4.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
107
- janus_llm-3.4.3.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
108
- janus_llm-3.4.3.dist-info/RECORD,,
105
+ janus_llm-3.5.1.dist-info/LICENSE,sha256=_j0st0a-HB6MRbP3_BW3PUqpS16v54luyy-1zVyl8NU,10789
106
+ janus_llm-3.5.1.dist-info/METADATA,sha256=kIZdLtkybJ-S_dyglL8kPRwnt2wND8wS2nEyM76FL-U,4184
107
+ janus_llm-3.5.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
108
+ janus_llm-3.5.1.dist-info/entry_points.txt,sha256=OGhQwzj6pvXp79B0SaBD5apGekCu7Dwe9fZZT_TZ544,39
109
+ janus_llm-3.5.1.dist-info/RECORD,,