janus-llm 3.4.3__tar.gz → 3.5.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. {janus_llm-3.4.3 → janus_llm-3.5.1}/PKG-INFO +1 -1
  2. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/__init__.py +1 -1
  3. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/cli.py +123 -1
  4. janus_llm-3.5.1/janus/converter/aggregator.py +52 -0
  5. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/diagram.py +1 -1
  6. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/document.py +1 -1
  7. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/doc_parser.py +2 -1
  8. {janus_llm-3.4.3 → janus_llm-3.5.1}/pyproject.toml +1 -1
  9. {janus_llm-3.4.3 → janus_llm-3.5.1}/LICENSE +0 -0
  10. {janus_llm-3.4.3 → janus_llm-3.5.1}/README.md +0 -0
  11. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/__main__.py +0 -0
  12. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/_tests/__init__.py +0 -0
  13. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/_tests/conftest.py +0 -0
  14. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/_tests/test_cli.py +0 -0
  15. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/__init__.py +0 -0
  16. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/_tests/__init__.py +0 -0
  17. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/_tests/test_translate.py +0 -0
  18. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/converter.py +0 -0
  19. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/evaluate.py +0 -0
  20. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/requirements.py +0 -0
  21. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/converter/translate.py +0 -0
  22. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/__init__.py +0 -0
  23. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/_tests/__init__.py +0 -0
  24. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/_tests/test_collections.py +0 -0
  25. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/_tests/test_database.py +0 -0
  26. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/_tests/test_vectorize.py +0 -0
  27. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/collections.py +0 -0
  28. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/database.py +0 -0
  29. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/embedding_models_info.py +0 -0
  30. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/embedding/vectorize.py +0 -0
  31. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/__init__.py +0 -0
  32. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/_tests/__init__.py +0 -0
  33. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/_tests/test_combine.py +0 -0
  34. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/_tests/test_splitter.py +0 -0
  35. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/alc/__init__.py +0 -0
  36. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/alc/_tests/__init__.py +0 -0
  37. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/alc/_tests/test_alc.py +0 -0
  38. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/alc/alc.py +0 -0
  39. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/binary/__init__.py +0 -0
  40. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/binary/_tests/__init__.py +0 -0
  41. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/binary/_tests/test_binary.py +0 -0
  42. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/binary/binary.py +0 -0
  43. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/binary/reveng/decompile_script.py +0 -0
  44. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/block.py +0 -0
  45. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/combine.py +0 -0
  46. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/file.py +0 -0
  47. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/mumps/__init__.py +0 -0
  48. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/mumps/_tests/__init__.py +0 -0
  49. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/mumps/_tests/test_mumps.py +0 -0
  50. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/mumps/mumps.py +0 -0
  51. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/mumps/patterns.py +0 -0
  52. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/naive/__init__.py +0 -0
  53. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/naive/basic_splitter.py +0 -0
  54. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/naive/chunk_splitter.py +0 -0
  55. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/naive/registry.py +0 -0
  56. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/naive/simple_ast.py +0 -0
  57. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/naive/tag_splitter.py +0 -0
  58. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/node.py +0 -0
  59. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/splitter.py +0 -0
  60. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/treesitter/__init__.py +0 -0
  61. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/treesitter/_tests/__init__.py +0 -0
  62. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
  63. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/language/treesitter/treesitter.py +0 -0
  64. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/llm/__init__.py +0 -0
  65. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/llm/model_callbacks.py +0 -0
  66. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/llm/models_info.py +0 -0
  67. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/__init__.py +0 -0
  68. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/__init__.py +0 -0
  69. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/reference.py +0 -0
  70. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/target.py +0 -0
  71. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_bleu.py +0 -0
  72. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_chrf.py +0 -0
  73. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_file_pairing.py +0 -0
  74. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_llm.py +0 -0
  75. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_reading.py +0 -0
  76. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_rouge_score.py +0 -0
  77. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_similarity_score.py +0 -0
  78. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
  79. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/bleu.py +0 -0
  80. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/chrf.py +0 -0
  81. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/cli.py +0 -0
  82. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/complexity_metrics.py +0 -0
  83. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/file_pairing.py +0 -0
  84. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/llm_metrics.py +0 -0
  85. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/metric.py +0 -0
  86. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/reading.py +0 -0
  87. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/rouge_score.py +0 -0
  88. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/similarity.py +0 -0
  89. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/metrics/splitting.py +0 -0
  90. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/__init__.py +0 -0
  91. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/_tests/__init__.py +0 -0
  92. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/_tests/test_code_parser.py +0 -0
  93. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/code_parser.py +0 -0
  94. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/eval_parser.py +0 -0
  95. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/parser.py +0 -0
  96. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/refiner_parser.py +0 -0
  97. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/reqs_parser.py +0 -0
  98. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/parsers/uml.py +0 -0
  99. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/prompts/__init__.py +0 -0
  100. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/prompts/prompt.py +0 -0
  101. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/refiners/refiner.py +0 -0
  102. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/__init__.py +0 -0
  103. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/_tests/__init__.py +0 -0
  104. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/_tests/test_logger.py +0 -0
  105. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/_tests/test_progress.py +0 -0
  106. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/enums.py +0 -0
  107. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/logger.py +0 -0
  108. {janus_llm-3.4.3 → janus_llm-3.5.1}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 3.4.3
3
+ Version: 3.5.1
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "3.4.3"
8
+ __version__ = "3.5.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -2,7 +2,7 @@ import json
2
2
  import logging
3
3
  import os
4
4
  from pathlib import Path
5
- from typing import Optional
5
+ from typing import List, Optional
6
6
 
7
7
  import click
8
8
  import typer
@@ -12,6 +12,8 @@ from rich.console import Console
12
12
  from rich.prompt import Confirm
13
13
  from typing_extensions import Annotated
14
14
 
15
+ from janus.converter.aggregator import Aggregator
16
+ from janus.converter.converter import Converter
15
17
  from janus.converter.diagram import DiagramGenerator
16
18
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
17
19
  from janus.converter.requirements import RequirementsDocumenter
@@ -439,6 +441,126 @@ def document(
439
441
  documenter.translate(input_dir, output_dir, overwrite, collection)
440
442
 
441
443
 
444
+ def get_subclasses(cls):
445
+ return set(cls.__subclasses__()).union(
446
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
447
+ )
448
+
449
+
450
+ @app.command()
451
+ def aggregate(
452
+ input_dir: Annotated[
453
+ Path,
454
+ typer.Option(
455
+ "--input",
456
+ "-i",
457
+ help="The directory containing the source code to be translated. "
458
+ "The files should all be in one flat directory.",
459
+ ),
460
+ ],
461
+ language: Annotated[
462
+ str,
463
+ typer.Option(
464
+ "--language",
465
+ "-l",
466
+ help="The language of the source code.",
467
+ click_type=click.Choice(sorted(LANGUAGES)),
468
+ ),
469
+ ],
470
+ output_dir: Annotated[
471
+ Path,
472
+ typer.Option(
473
+ "--output-dir", "-o", help="The directory to store the translated code in."
474
+ ),
475
+ ],
476
+ llm_name: Annotated[
477
+ str,
478
+ typer.Option(
479
+ "--llm",
480
+ "-L",
481
+ help="The custom name of the model set with 'janus llm add'.",
482
+ ),
483
+ ] = "gpt-4o",
484
+ max_prompts: Annotated[
485
+ int,
486
+ typer.Option(
487
+ "--max-prompts",
488
+ "-m",
489
+ help="The maximum number of times to prompt a model on one functional block "
490
+ "before exiting the application. This is to prevent wasting too much money.",
491
+ ),
492
+ ] = 10,
493
+ overwrite: Annotated[
494
+ bool,
495
+ typer.Option(
496
+ "--overwrite/--preserve",
497
+ help="Whether to overwrite existing files in the output directory",
498
+ ),
499
+ ] = False,
500
+ temperature: Annotated[
501
+ float,
502
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
503
+ ] = 0.7,
504
+ collection: Annotated[
505
+ str,
506
+ typer.Option(
507
+ "--collection",
508
+ "-c",
509
+ help="If set, will put the translated result into a Chroma DB "
510
+ "collection with the name provided.",
511
+ ),
512
+ ] = None,
513
+ splitter_type: Annotated[
514
+ str,
515
+ typer.Option(
516
+ "-S",
517
+ "--splitter",
518
+ help="Name of custom splitter to use",
519
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
520
+ ),
521
+ ] = "file",
522
+ intermediate_converters: Annotated[
523
+ List[str],
524
+ typer.Option(
525
+ "-C",
526
+ "--converter",
527
+ help="Name of an intermediate converter to use",
528
+ click_type=click.Choice([c.__name__ for c in get_subclasses(Converter)]),
529
+ ),
530
+ ] = ["Documenter"],
531
+ ):
532
+ converter_subclasses = get_subclasses(Converter)
533
+ converter_subclasses_map = {c.__name__: c for c in converter_subclasses}
534
+ model_arguments = dict(temperature=temperature)
535
+ collections_config = get_collections_config()
536
+ converters = []
537
+ for ic in intermediate_converters:
538
+ converters.append(
539
+ converter_subclasses_map[ic](
540
+ model=llm_name,
541
+ model_arguments=model_arguments,
542
+ source_language=language,
543
+ max_prompts=max_prompts,
544
+ db_path=db_loc,
545
+ db_config=collections_config,
546
+ splitter_type=splitter_type,
547
+ )
548
+ )
549
+
550
+ aggregator = Aggregator(
551
+ intermediate_converters=converters,
552
+ model=llm_name,
553
+ model_arguments=model_arguments,
554
+ source_language=language,
555
+ max_prompts=max_prompts,
556
+ db_path=db_loc,
557
+ db_config=collections_config,
558
+ splitter_type=splitter_type,
559
+ prompt_template="basic_aggregation",
560
+ )
561
+ aggregator.translate(input_dir, output_dir, overwrite, collection)
562
+
563
+
442
564
  @app.command(
443
565
  help="Diagram input code using an LLM.",
444
566
  no_args_is_help=True,
@@ -0,0 +1,52 @@
1
+ from copy import deepcopy
2
+ from typing import List
3
+
4
+ from janus.converter.converter import Converter
5
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
6
+
7
+
8
+ class Aggregator(Converter):
9
+ def __init__(
10
+ self,
11
+ intermediate_converters: List[Converter],
12
+ separator: str = "\n==============\n",
13
+ **kwargs,
14
+ ):
15
+ """
16
+ Initialization Method
17
+ Arguments:
18
+ intermediate_converters - list of converters to use
19
+ separator: separator string to partition different outputs before combination
20
+ """
21
+ self._intermediate_converters = intermediate_converters
22
+ self._separator = separator
23
+ super().__init__(**kwargs)
24
+ self._load_parameters()
25
+
26
+ def _iterative_translate(self, root: CodeBlock) -> TranslatedCodeBlock:
27
+ res = TranslatedCodeBlock(root, language=self._target_language)
28
+ return self._recursive_translate(res)
29
+
30
+ def _recursive_translate(self, root: TranslatedCodeBlock) -> None:
31
+ """
32
+ Recursively translates code blocks from a bottom up approach
33
+ """
34
+ original_text = root.original.text
35
+ if len(root.children) > 0:
36
+ for c in root.children:
37
+ self._recursive_translate(c)
38
+ root.original.text = self._combine_blocks(root.children, self._separator)
39
+ else:
40
+ int_reps = [deepcopy(root) for ic in self._intermediate_converters]
41
+ for ic, r in zip(self._intermediate_converters, int_reps):
42
+ ic._add_translation(r)
43
+ root.original.text = self._combine_blocks(int_reps, self._separator)
44
+ self._add_translation(root)
45
+ root.original.text = original_text
46
+ return root
47
+
48
+ def _combine_blocks(self, blocks: List[TranslatedCodeBlock], separator: str) -> str:
49
+ """
50
+ Combines code blocks into a single piece of text
51
+ """
52
+ return separator.join([block.text for block in blocks])
@@ -130,7 +130,7 @@ class DiagramGenerator(Documenter):
130
130
  If the relevant fields have not been changed since the last time this method was
131
131
  called, nothing happens.
132
132
  """
133
- self._diagram_prompt_engine = MODEL_PROMPT_ENGINES[self._model_name](
133
+ self._diagram_prompt_engine = MODEL_PROMPT_ENGINES[self._model_id](
134
134
  source_language=self._source_language,
135
135
  target_language="text",
136
136
  target_version=None,
@@ -125,7 +125,7 @@ class MadLibsDocumenter(Documenter):
125
125
  out_text = self._parser.parse(working_block.text)
126
126
  obj.update(json.loads(out_text))
127
127
 
128
- self._parser.set_reference(block.original)
128
+ self._parser.parse_input(block.original)
129
129
  block.text = self._parser.parse(json.dumps(obj))
130
130
  block.tokens = self._llm.get_num_tokens(block.text)
131
131
  block.translated = True
@@ -92,11 +92,12 @@ class MadlibsDocumentationParser(JanusParser):
92
92
  def __init__(self):
93
93
  super().__init__(expected_keys=[])
94
94
 
95
- def parse_input(self, block: CodeBlock):
95
+ def parse_input(self, block: CodeBlock) -> str:
96
96
  # TODO: Perform comment stripping/placeholding here rather than in script
97
97
  text = super().parse_input(block)
98
98
  comment_ids = re.findall(r"<(?:BLOCK|INLINE)_COMMENT (\w{8})>", text)
99
99
  self.expected_keys = set(comment_ids)
100
+ return text
100
101
 
101
102
  def parse(self, text: str | BaseMessage) -> str:
102
103
  if isinstance(text, BaseMessage):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "janus-llm"
3
- version = "3.4.3"
3
+ version = "3.5.1"
4
4
  description = "A transcoding library using LLMs."
5
5
  authors = ["Michael Doyle <mdoyle@mitre.org>", "Chris Glasz <cglasz@mitre.org>",
6
6
  "Chris Tohline <ctohline@mitre.org>", "William Macke <wmacke@mitre.org>",
File without changes
File without changes
File without changes