janus-llm 4.2.0__tar.gz → 4.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. {janus_llm-4.2.0 → janus_llm-4.3.1}/PKG-INFO +1 -1
  2. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/__init__.py +1 -1
  3. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/cli.py +150 -5
  4. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/converter.py +1 -0
  5. janus_llm-4.3.1/janus/converter/evaluate.py +241 -0
  6. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/alc/_tests/test_alc.py +1 -1
  7. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/alc/alc.py +9 -4
  8. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/splitter.py +31 -23
  9. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/treesitter/treesitter.py +9 -1
  10. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/llm/models_info.py +17 -12
  11. janus_llm-4.3.1/janus/parsers/eval_parsers/incose_parser.py +134 -0
  12. janus_llm-4.3.1/janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
  13. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/partition_parser.py +41 -9
  14. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/refiners/refiner.py +30 -0
  15. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/enums.py +14 -0
  16. {janus_llm-4.2.0 → janus_llm-4.3.1}/pyproject.toml +1 -1
  17. janus_llm-4.2.0/janus/converter/evaluate.py +0 -15
  18. {janus_llm-4.2.0 → janus_llm-4.3.1}/LICENSE +0 -0
  19. {janus_llm-4.2.0 → janus_llm-4.3.1}/README.md +0 -0
  20. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/__main__.py +0 -0
  21. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/_tests/__init__.py +0 -0
  22. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/_tests/conftest.py +0 -0
  23. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/_tests/test_cli.py +0 -0
  24. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/__init__.py +0 -0
  25. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/_tests/__init__.py +0 -0
  26. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/_tests/test_translate.py +0 -0
  27. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/aggregator.py +0 -0
  28. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/diagram.py +0 -0
  29. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/document.py +0 -0
  30. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/partition.py +0 -0
  31. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/requirements.py +0 -0
  32. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/converter/translate.py +0 -0
  33. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/__init__.py +0 -0
  34. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/_tests/__init__.py +0 -0
  35. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_collections.py +0 -0
  36. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_database.py +0 -0
  37. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_vectorize.py +0 -0
  38. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/collections.py +0 -0
  39. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/database.py +0 -0
  40. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/embedding_models_info.py +0 -0
  41. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/embedding/vectorize.py +0 -0
  42. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/__init__.py +0 -0
  43. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/_tests/__init__.py +0 -0
  44. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/_tests/test_combine.py +0 -0
  45. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/_tests/test_splitter.py +0 -0
  46. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/alc/__init__.py +0 -0
  47. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/alc/_tests/__init__.py +0 -0
  48. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/binary/__init__.py +0 -0
  49. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/binary/_tests/__init__.py +0 -0
  50. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/binary/_tests/test_binary.py +0 -0
  51. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/binary/binary.py +0 -0
  52. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/binary/reveng/decompile_script.py +0 -0
  53. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/block.py +0 -0
  54. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/combine.py +0 -0
  55. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/file.py +0 -0
  56. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/mumps/__init__.py +0 -0
  57. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/mumps/_tests/__init__.py +0 -0
  58. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/mumps/_tests/test_mumps.py +0 -0
  59. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/mumps/mumps.py +0 -0
  60. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/mumps/patterns.py +0 -0
  61. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/naive/__init__.py +0 -0
  62. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/naive/basic_splitter.py +0 -0
  63. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/naive/chunk_splitter.py +0 -0
  64. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/naive/registry.py +0 -0
  65. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/naive/simple_ast.py +0 -0
  66. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/naive/tag_splitter.py +0 -0
  67. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/node.py +0 -0
  68. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/treesitter/__init__.py +0 -0
  69. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/treesitter/_tests/__init__.py +0 -0
  70. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
  71. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/llm/__init__.py +0 -0
  72. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/llm/model_callbacks.py +0 -0
  73. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/__init__.py +0 -0
  74. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/__init__.py +0 -0
  75. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/reference.py +0 -0
  76. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/target.py +0 -0
  77. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_bleu.py +0 -0
  78. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_chrf.py +0 -0
  79. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_file_pairing.py +0 -0
  80. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_llm.py +0 -0
  81. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_reading.py +0 -0
  82. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_rouge_score.py +0 -0
  83. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_similarity_score.py +0 -0
  84. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
  85. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/bleu.py +0 -0
  86. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/chrf.py +0 -0
  87. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/cli.py +0 -0
  88. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/complexity_metrics.py +0 -0
  89. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/file_pairing.py +0 -0
  90. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/llm_metrics.py +0 -0
  91. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/metric.py +0 -0
  92. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/reading.py +0 -0
  93. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/rouge_score.py +0 -0
  94. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/similarity.py +0 -0
  95. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/metrics/splitting.py +0 -0
  96. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/__init__.py +0 -0
  97. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/_tests/__init__.py +0 -0
  98. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/_tests/test_code_parser.py +0 -0
  99. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/code_parser.py +0 -0
  100. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/doc_parser.py +0 -0
  101. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/eval_parser.py +0 -0
  102. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/parser.py +0 -0
  103. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/reqs_parser.py +0 -0
  104. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/parsers/uml.py +0 -0
  105. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/prompts/__init__.py +0 -0
  106. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/prompts/prompt.py +0 -0
  107. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/refiners/uml.py +0 -0
  108. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/retrievers/retriever.py +0 -0
  109. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/__init__.py +0 -0
  110. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/_tests/__init__.py +0 -0
  111. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/_tests/test_logger.py +0 -0
  112. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/_tests/test_progress.py +0 -0
  113. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/logger.py +0 -0
  114. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/pdf_docs_reader.py +0 -0
  115. {janus_llm-4.2.0 → janus_llm-4.3.1}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 4.2.0
3
+ Version: 4.3.1
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.2.0"
8
+ __version__ = "4.3.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -19,6 +19,7 @@ from janus.converter.aggregator import Aggregator
19
19
  from janus.converter.converter import Converter
20
20
  from janus.converter.diagram import DiagramGenerator
21
21
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
22
+ from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
22
23
  from janus.converter.partition import Partitioner
23
24
  from janus.converter.requirements import RequirementsDocumenter
24
25
  from janus.converter.translate import Translator
@@ -127,7 +128,7 @@ embedding = typer.Typer(
127
128
 
128
129
  def version_callback(value: bool) -> None:
129
130
  if value:
130
- from janus import __version__ as version
131
+ from . import __version__ as version
131
132
 
132
133
  print(f"Janus CLI [blue]v{version}[/blue]")
133
134
  raise typer.Exit()
@@ -655,6 +656,16 @@ def partition(
655
656
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
656
657
  ),
657
658
  ] = "file",
659
+ refiner_types: Annotated[
660
+ list[str],
661
+ typer.Option(
662
+ "-r",
663
+ "--refiner",
664
+ help="List of refiner types to use. Add -r for each refiner to use in\
665
+ refinement chain",
666
+ click_type=click.Choice(list(REFINERS.keys())),
667
+ ),
668
+ ] = ["JanusRefiner"],
658
669
  max_tokens: Annotated[
659
670
  int,
660
671
  typer.Option(
@@ -673,6 +684,7 @@ def partition(
673
684
  ),
674
685
  ] = 8192,
675
686
  ):
687
+ refiner_types = [REFINERS[r] for r in refiner_types]
676
688
  model_arguments = dict(temperature=temperature)
677
689
  kwargs = dict(
678
690
  model=llm_name,
@@ -681,6 +693,7 @@ def partition(
681
693
  max_prompts=max_prompts,
682
694
  max_tokens=max_tokens,
683
695
  splitter_type=splitter_type,
696
+ refiner_types=refiner_types,
684
697
  partition_token_limit=partition_token_limit,
685
698
  )
686
699
  partitioner = Partitioner(**kwargs)
@@ -815,6 +828,139 @@ def diagram(
815
828
  diagram_generator.translate(input_dir, output_dir, overwrite, collection)
816
829
 
817
830
 
831
+ @app.command(
832
+ help="LLM self evaluation",
833
+ no_args_is_help=True,
834
+ )
835
+ def llm_self_eval(
836
+ input_dir: Annotated[
837
+ Path,
838
+ typer.Option(
839
+ "--input",
840
+ "-i",
841
+ help="The directory containing the source code to be evaluated. "
842
+ "The files should all be in one flat directory.",
843
+ ),
844
+ ],
845
+ language: Annotated[
846
+ str,
847
+ typer.Option(
848
+ "--language",
849
+ "-l",
850
+ help="The language of the source code.",
851
+ click_type=click.Choice(sorted(LANGUAGES)),
852
+ ),
853
+ ],
854
+ output_dir: Annotated[
855
+ Path,
856
+ typer.Option(
857
+ "--output-dir", "-o", help="The directory to store the evaluations in."
858
+ ),
859
+ ],
860
+ llm_name: Annotated[
861
+ str,
862
+ typer.Option(
863
+ "--llm",
864
+ "-L",
865
+ help="The custom name of the model set with 'janus llm add'.",
866
+ ),
867
+ ] = "gpt-4o",
868
+ evaluation_type: Annotated[
869
+ str,
870
+ typer.Option(
871
+ "--evaluation-type",
872
+ "-e",
873
+ help="Type of output to evaluate.",
874
+ click_type=click.Choice(["incose", "comments"]),
875
+ ),
876
+ ] = "incose",
877
+ max_prompts: Annotated[
878
+ int,
879
+ typer.Option(
880
+ "--max-prompts",
881
+ "-m",
882
+ help="The maximum number of times to prompt a model on one functional block "
883
+ "before exiting the application. This is to prevent wasting too much money.",
884
+ ),
885
+ ] = 10,
886
+ overwrite: Annotated[
887
+ bool,
888
+ typer.Option(
889
+ "--overwrite/--preserve",
890
+ help="Whether to overwrite existing files in the output directory",
891
+ ),
892
+ ] = False,
893
+ temperature: Annotated[
894
+ float,
895
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
896
+ ] = 0.7,
897
+ collection: Annotated[
898
+ str,
899
+ typer.Option(
900
+ "--collection",
901
+ "-c",
902
+ help="If set, will put the translated result into a Chroma DB "
903
+ "collection with the name provided.",
904
+ ),
905
+ ] = None,
906
+ splitter_type: Annotated[
907
+ str,
908
+ typer.Option(
909
+ "-S",
910
+ "--splitter",
911
+ help="Name of custom splitter to use",
912
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
913
+ ),
914
+ ] = "file",
915
+ refiner_types: Annotated[
916
+ list[str],
917
+ typer.Option(
918
+ "-r",
919
+ "--refiner",
920
+ help="List of refiner types to use. Add -r for each refiner to use in\
921
+ refinement chain",
922
+ click_type=click.Choice(list(REFINERS.keys())),
923
+ ),
924
+ ] = ["JanusRefiner"],
925
+ eval_items_per_request: Annotated[
926
+ int,
927
+ typer.Option(
928
+ "--eval-items-per-request",
929
+ "-rc",
930
+ help="The maximum number of evaluation items per request",
931
+ ),
932
+ ] = None,
933
+ max_tokens: Annotated[
934
+ int,
935
+ typer.Option(
936
+ "--max-tokens",
937
+ "-M",
938
+ help="The maximum number of tokens the model will take in. "
939
+ "If unspecificed, model's default max will be used.",
940
+ ),
941
+ ] = None,
942
+ ):
943
+ model_arguments = dict(temperature=temperature)
944
+ refiner_types = [REFINERS[r] for r in refiner_types]
945
+ kwargs = dict(
946
+ eval_items_per_request=eval_items_per_request,
947
+ model=llm_name,
948
+ model_arguments=model_arguments,
949
+ source_language=language,
950
+ max_prompts=max_prompts,
951
+ max_tokens=max_tokens,
952
+ splitter_type=splitter_type,
953
+ refiner_types=refiner_types,
954
+ )
955
+ # Setting parser type here
956
+ if evaluation_type == "incose":
957
+ evaluator = RequirementEvaluator(**kwargs)
958
+ elif evaluation_type == "comments":
959
+ evaluator = InlineCommentEvaluator(**kwargs)
960
+
961
+ evaluator.translate(input_dir, output_dir, overwrite, collection)
962
+
963
+
818
964
  @db.command("init", help="Connect to or create a database.")
819
965
  def db_init(
820
966
  path: Annotated[
@@ -1116,13 +1262,12 @@ def llm_add(
1116
1262
  show_choices=False,
1117
1263
  )
1118
1264
  params = dict(
1119
- # OpenAI uses the "model_name" key for what we're calling "long_model_id"
1120
- model_name=MODEL_ID_TO_LONG_ID[model_id],
1265
+ model_name=model_name,
1121
1266
  temperature=0.7,
1122
1267
  n=1,
1123
1268
  )
1124
- max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
1125
- model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
1269
+ max_tokens = TOKEN_LIMITS[model_name]
1270
+ model_cost = COST_PER_1K_TOKENS[model_name]
1126
1271
  cfg = {
1127
1272
  "model_type": model_type,
1128
1273
  "model_id": model_id,
@@ -464,6 +464,7 @@ class Converter:
464
464
  for in_path, out_path in in_out_pairs:
465
465
  # Translate the file, skip it if there's a rate limit error
466
466
  try:
467
+ log.info(f"Processing {in_path.relative_to(input_directory)}")
467
468
  out_block = self.translate_file(in_path)
468
469
  total_cost += out_block.total_cost
469
470
  except RateLimitError:
@@ -0,0 +1,241 @@
1
+ import json
2
+ import re
3
+ from copy import deepcopy
4
+
5
+ from langchain_core.runnables import Runnable, RunnableLambda, RunnableParallel
6
+
7
+ from janus.converter.converter import Converter
8
+ from janus.language.block import TranslatedCodeBlock
9
+ from janus.language.combine import JsonCombiner
10
+ from janus.parsers.eval_parsers.incose_parser import IncoseParser
11
+ from janus.parsers.eval_parsers.inline_comment_parser import InlineCommentParser
12
+ from janus.utils.logger import create_logger
13
+
14
+ log = create_logger(__name__)
15
+
16
+
17
+ class Evaluator(Converter):
18
+ """Evaluator
19
+
20
+ A class that performs an LLM self evaluation"
21
+ "on an input target, with an associated prompt.
22
+
23
+ Current valid evaluation types:
24
+ ['incose', 'comments']
25
+
26
+ """
27
+
28
+ def __init__(self, **kwargs) -> None:
29
+ """Initialize the Evaluator class
30
+
31
+ Arguments:
32
+ model: The LLM to use for translation. If an OpenAI model, the
33
+ `OPENAI_API_KEY` environment variable must be set and the
34
+ `OPENAI_ORG_ID` environment variable should be set if needed.
35
+ model_arguments: Additional arguments to pass to the LLM constructor.
36
+ max_prompts: The maximum number of prompts to try before giving up.
37
+ """
38
+ super().__init__(**kwargs)
39
+ self._combiner = JsonCombiner()
40
+ self._load_parameters()
41
+
42
+
43
+ class RequirementEvaluator(Evaluator):
44
+ """INCOSE Requirement Evaluator
45
+
46
+ A class that performs an LLM self evaluation on an input target,
47
+ with an associated prompt.
48
+
49
+ The evaluation prompts are for Incose Evaluations
50
+
51
+ """
52
+
53
+ def __init__(self, eval_items_per_request: int | None = None, **kwargs) -> None:
54
+ """Initialize the Evaluator class
55
+
56
+ Arguments:
57
+ model: The LLM to use for translation. If an OpenAI model, the
58
+ `OPENAI_API_KEY` environment variable must be set and the
59
+ `OPENAI_ORG_ID` environment variable should be set if needed.
60
+ model_arguments: Additional arguments to pass to the LLM constructor.
61
+ max_prompts: The maximum number of prompts to try before giving up.
62
+ """
63
+ super().__init__(**kwargs)
64
+ self.eval_items_per_request = eval_items_per_request
65
+ self._parser = IncoseParser()
66
+ self.set_prompt("eval_prompts/incose")
67
+
68
+ def _input_runnable(self) -> Runnable:
69
+ def _get_code(json_text: str) -> str:
70
+ return json.loads(json_text)["code"]
71
+
72
+ def _get_reqs(json_text: str) -> str:
73
+ return json.dumps(json.loads(json_text)["requirements"])
74
+
75
+ return RunnableLambda(self._parser.parse_input) | RunnableParallel(
76
+ SOURCE_CODE=_get_code,
77
+ REQUIREMENTS=_get_reqs,
78
+ context=self._retriever,
79
+ )
80
+
81
+ def _add_translation(self, block: TranslatedCodeBlock):
82
+ if block.translated:
83
+ return
84
+
85
+ if block.original.text is None:
86
+ block.translated = True
87
+ return
88
+
89
+ if self.eval_items_per_request is None:
90
+ return super()._add_translation(block)
91
+
92
+ input_obj = json.loads(block.original.text)
93
+ requirements = input_obj.get("requirements", [])
94
+
95
+ if not requirements:
96
+ log.debug(f"[{block.name}] Skipping empty block")
97
+ block.translated = True
98
+ block.text = None
99
+ block.complete = True
100
+ return
101
+
102
+ # For some reason requirements objects are in nested lists?
103
+ while isinstance(requirements[0], list):
104
+ requirements = [r for lst in requirements for r in lst]
105
+
106
+ if len(requirements) <= self.eval_items_per_request:
107
+ input_obj["requirements"] = requirements
108
+ block.original.text = json.dumps(input_obj)
109
+ return super()._add_translation(block)
110
+
111
+ block.processing_time = 0
112
+ block.cost = 0
113
+ block.retries = 0
114
+ obj = {}
115
+ for i in range(0, len(requirements), self.eval_items_per_request):
116
+ # Build a new TranslatedBlock using the new working text
117
+ working_requirements = requirements[i : i + self.eval_items_per_request]
118
+ working_copy = deepcopy(block.original)
119
+ working_obj = json.loads(working_copy.text) # type: ignore
120
+ working_obj["requirements"] = working_requirements
121
+ working_copy.text = json.dumps(working_obj)
122
+ working_block = TranslatedCodeBlock(working_copy, self._target_language)
123
+
124
+ # Run the LLM on the working text
125
+ super()._add_translation(working_block)
126
+
127
+ # Update metadata to include for all runs
128
+ block.retries += working_block.retries
129
+ block.cost += working_block.cost
130
+ block.processing_time += working_block.processing_time
131
+
132
+ # Update the output text to merge this section's output in
133
+ obj.update(json.loads(working_block.text))
134
+
135
+ block.text = json.dumps(obj)
136
+ block.tokens = self._llm.get_num_tokens(block.text)
137
+ block.translated = True
138
+
139
+ log.debug(
140
+ f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
141
+ )
142
+
143
+
144
+ class InlineCommentEvaluator(Evaluator):
145
+ """Inline Comment Evaluator
146
+
147
+ A class that performs an LLM self evaluation on inline comments,
148
+ with an associated prompt.
149
+ """
150
+
151
+ def __init__(self, eval_items_per_request: int | None = None, **kwargs) -> None:
152
+ """Initialize the Evaluator class
153
+
154
+ Arguments:
155
+ model: The LLM to use for translation. If an OpenAI model, the
156
+ `OPENAI_API_KEY` environment variable must be set and the
157
+ `OPENAI_ORG_ID` environment variable should be set if needed.
158
+ model_arguments: Additional arguments to pass to the LLM constructor.
159
+ max_prompts: The maximum number of prompts to try before giving up.
160
+ """
161
+ super().__init__(**kwargs)
162
+ self._combiner = JsonCombiner()
163
+ self._load_parameters()
164
+ self._parser = InlineCommentParser()
165
+ self.set_prompt("eval_prompts/inline_comments")
166
+ self.eval_items_per_request = eval_items_per_request
167
+
168
+ def _add_translation(self, block: TranslatedCodeBlock):
169
+ if block.translated:
170
+ return
171
+
172
+ if block.original.text is None:
173
+ block.translated = True
174
+ return
175
+
176
+ if self.eval_items_per_request is None:
177
+ return super()._add_translation(block)
178
+
179
+ comment_pattern = r"<(?:INLINE|BLOCK)_COMMENT \w{8}>.*$"
180
+ comments = list(
181
+ re.finditer(comment_pattern, block.original.text, flags=re.MULTILINE)
182
+ )
183
+
184
+ if not comments:
185
+ log.info(f"[{block.name}] Skipping commentless block")
186
+ block.translated = True
187
+ block.text = None
188
+ block.complete = True
189
+ return
190
+
191
+ if len(comments) <= self.eval_items_per_request:
192
+ return super()._add_translation(block)
193
+
194
+ comment_group_indices = list(range(0, len(comments), self.eval_items_per_request))
195
+ log.debug(
196
+ f"[{block.name}] Block contains more than {self.eval_items_per_request}"
197
+ f" comments, splitting {len(comments)} comments into"
198
+ f" {len(comment_group_indices)} groups"
199
+ )
200
+
201
+ block.processing_time = 0
202
+ block.cost = 0
203
+ block.retries = 0
204
+ obj = {}
205
+ for i in range(0, len(comments), self.eval_items_per_request):
206
+ # Split the text into the section containing comments of interest,
207
+ # all the text prior to those comments, and all the text after them
208
+ working_comments = comments[i : i + self.eval_items_per_request]
209
+ start_idx = working_comments[0].start()
210
+ end_idx = working_comments[-1].end()
211
+ prefix = block.original.text[:start_idx]
212
+ keeper = block.original.text[start_idx:end_idx]
213
+ suffix = block.original.text[end_idx:]
214
+
215
+ # Strip all comment placeholders outside of the section of interest
216
+ prefix = re.sub(comment_pattern, "", prefix, flags=re.MULTILINE)
217
+ suffix = re.sub(comment_pattern, "", suffix, flags=re.MULTILINE)
218
+
219
+ # Build a new TranslatedBlock using the new working text
220
+ working_copy = deepcopy(block.original)
221
+ working_copy.text = prefix + keeper + suffix
222
+ working_block = TranslatedCodeBlock(working_copy, self._target_language)
223
+
224
+ # Run the LLM on the working text
225
+ super()._add_translation(working_block)
226
+
227
+ # Update metadata to include for all runs
228
+ block.retries += working_block.retries
229
+ block.cost += working_block.cost
230
+ block.processing_time += working_block.processing_time
231
+
232
+ # Update the output text to merge this section's output in
233
+ obj.update(json.loads(working_block.text))
234
+
235
+ block.text = json.dumps(obj)
236
+ block.tokens = self._llm.get_num_tokens(block.text)
237
+ block.translated = True
238
+
239
+ log.debug(
240
+ f"[{block.name}] Output code:\n{json.dumps(json.loads(block.text), indent=2)}"
241
+ )
@@ -20,7 +20,7 @@ class TestAlcSplitter(unittest.TestCase):
20
20
  def test_split(self):
21
21
  """Test the split method."""
22
22
  tree_root = self.splitter.split(self.test_file)
23
- self.assertAlmostEqual(tree_root.n_descendents, 32, delta=5)
23
+ self.assertAlmostEqual(tree_root.n_descendents, 16, delta=2)
24
24
  self.assertLessEqual(tree_root.max_tokens, self.splitter.max_tokens)
25
25
  self.assertFalse(tree_root.complete)
26
26
  self.combiner.combine_children(tree_root)
@@ -79,10 +79,15 @@ class AlcSplitter(TreeSitterSplitter):
79
79
  if len(sects) > 1:
80
80
  block.children = []
81
81
  for sect in sects:
82
- if sect[0].node_type in sect_types:
83
- sect_node = self.merge_nodes(sect)
84
- sect_node.children = sect
85
- sect_node.node_type = NodeType(str(sect[0].node_type)[:5])
82
+ node_type = sect[0].node_type
83
+ if node_type in sect_types:
84
+ if len(sect) == 1:
85
+ # Don't make a node its own child
86
+ sect_node = sect[0]
87
+ else:
88
+ sect_node = self.merge_nodes(sect)
89
+ sect_node.children = sect
90
+ sect_node.node_type = NodeType(str(node_type)[:5])
86
91
  block.children.append(sect_node)
87
92
  else:
88
93
  block.children.extend(sect)
@@ -275,42 +275,50 @@ class Splitter(FileManager):
275
275
 
276
276
  groups = [[n] for n in nodes]
277
277
  while len(groups) > 1 and min(adj_sums) <= self.max_tokens and any(merge_allowed):
278
- # Get the indices of the adjacent nodes that would result in the
279
- # smallest possible merged snippet. Ignore protected nodes.
278
+ # Get the index of the node that would result in the smallest
279
+ # merged snippet when merged with the node that follows it.
280
+ # Ignore protected nodes.
280
281
  mergeable_indices = compress(range(len(adj_sums)), merge_allowed)
281
- i0 = int(min(mergeable_indices, key=adj_sums.__getitem__))
282
- i1 = i0 + 1
282
+ C = int(min(mergeable_indices, key=adj_sums.__getitem__))
283
+
284
+ # C: Central index
285
+ # L: Index to the left
286
+ # R: Index to the right (to be merged in to C)
287
+ # N: Next index (to the right of R, the "new R")
288
+ L, R, N = C - 1, C + 1, C + 2
283
289
 
284
290
  # Recalculate the length. We can't simply use the adj_sum, because
285
291
  # it is an underestimate due to the adjoining suffix/prefix.
286
- central_node = groups[i0][-1]
287
- merged_text = "".join([text_chunks[i0], central_node.suffix, text_chunks[i1]])
292
+ central_node = groups[C][-1]
293
+ merged_text = "".join([text_chunks[C], central_node.suffix, text_chunks[R]])
288
294
  merged_text_length = self._count_tokens(merged_text)
289
295
 
290
296
  # If the true length of the merged pair is too long, don't merge them
291
297
  # Instead, correct the estimate, since shorter pairs may yet exist
292
298
  if merged_text_length > self.max_tokens:
293
- adj_sums[i0] = merged_text_length
299
+ adj_sums[C] = merged_text_length
294
300
  continue
295
301
 
296
302
  # Update adjacent sum estimates
297
- if i0 > 0:
298
- adj_sums[i0 - 1] += merged_text_length
299
- if i1 < len(adj_sums) - 1:
300
- adj_sums[i1 + 1] += merged_text_length
301
-
302
- if i0 > 0 and i1 < len(merge_allowed) - 1:
303
- if not (merge_allowed[i0 - 1] and merge_allowed[i1 + 1]):
304
- merge_allowed[i0 - 1] = merge_allowed[i1 + 1] = False
303
+ if L >= 0:
304
+ adj_sums[L] = lengths[L] + merged_text_length
305
+ if N < len(adj_sums):
306
+ adj_sums[R] = lengths[N] + merged_text_length
305
307
 
306
308
  # The potential merge length for this pair is removed
307
- adj_sums.pop(i0)
308
- merge_allowed.pop(i0)
309
+ adj_sums.pop(C)
310
+
311
+ # The merged-in node is removed from the protected list
312
+ # The merge_allowed list need not be updated - if the node now to
313
+ # its right is protected, the merge_allowed element corresponding
314
+ # to the merged neighbor will have been True, and now corresponds
315
+ # to the merged node.
316
+ merge_allowed.pop(C)
309
317
 
310
318
  # Merge the pair of node groups
311
- groups[i0 : i1 + 1] = [groups[i0] + groups[i1]]
312
- text_chunks[i0 : i1 + 1] = [merged_text]
313
- lengths[i0 : i1 + 1] = [merged_text_length]
319
+ groups[C:N] = [groups[C] + groups[R]]
320
+ text_chunks[C:N] = [merged_text]
321
+ lengths[C:N] = [merged_text_length]
314
322
 
315
323
  return groups
316
324
 
@@ -403,13 +411,13 @@ class Splitter(FileManager):
403
411
  self._split_into_lines(node)
404
412
 
405
413
  def _split_into_lines(self, node: CodeBlock):
406
- split_text = re.split(r"(\n+)", node.text)
414
+ split_text = list(re.split(r"(\n+)", node.text))
407
415
 
408
416
  # If the string didn't start/end with newlines, make sure to include
409
417
  # empty strings for the prefix/suffixes
410
- if split_text[0].strip("\n"):
418
+ if not re.match(r"^\n+$", split_text[0]):
411
419
  split_text = [""] + split_text
412
- if split_text[-1].strip("\n"):
420
+ if not re.match(r"^\n+$", split_text[-1]):
413
421
  split_text.append("")
414
422
  betweens = split_text[::2]
415
423
  lines = split_text[1::2]
@@ -154,7 +154,15 @@ class TreeSitterSplitter(Splitter):
154
154
  The pointer to the language.
155
155
  """
156
156
  lib = cdll.LoadLibrary(os.fspath(so_file))
157
- language_function = getattr(lib, f"tree_sitter_{self.language}")
157
+ # Added this try-except block to handle the case where the language is not
158
+ # supported in lowercase by the creator of the grammar. Ex: COBOL
159
+ # https://github.com/yutaro-sakamoto/tree-sitter-cobol/blob/main/grammar.js#L13
160
+ try:
161
+ language_function = getattr(lib, f"tree_sitter_{self.language}")
162
+ except AttributeError:
163
+ language = self.language.upper()
164
+ language_function = getattr(lib, f"tree_sitter_{language}")
165
+
158
166
  language_function.restype = c_void_p
159
167
  pointer = language_function()
160
168
  return pointer