janus-llm 4.0.0__tar.gz → 4.2.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. {janus_llm-4.0.0 → janus_llm-4.2.0}/PKG-INFO +9 -1
  2. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/__init__.py +1 -1
  3. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/cli.py +161 -26
  4. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/__init__.py +1 -0
  5. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/_tests/test_translate.py +2 -2
  6. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/converter.py +45 -47
  7. janus_llm-4.2.0/janus/converter/partition.py +27 -0
  8. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/combine.py +22 -0
  9. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/llm/model_callbacks.py +9 -0
  10. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/llm/models_info.py +41 -17
  11. janus_llm-4.2.0/janus/parsers/partition_parser.py +136 -0
  12. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/refiners/refiner.py +8 -12
  13. janus_llm-4.2.0/janus/refiners/uml.py +33 -0
  14. janus_llm-4.2.0/janus/retrievers/retriever.py +102 -0
  15. janus_llm-4.2.0/janus/utils/pdf_docs_reader.py +134 -0
  16. {janus_llm-4.0.0 → janus_llm-4.2.0}/pyproject.toml +9 -1
  17. janus_llm-4.0.0/janus/retrievers/retriever.py +0 -42
  18. {janus_llm-4.0.0 → janus_llm-4.2.0}/LICENSE +0 -0
  19. {janus_llm-4.0.0 → janus_llm-4.2.0}/README.md +0 -0
  20. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/__main__.py +0 -0
  21. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/_tests/__init__.py +0 -0
  22. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/_tests/conftest.py +0 -0
  23. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/_tests/test_cli.py +0 -0
  24. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/_tests/__init__.py +0 -0
  25. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/aggregator.py +0 -0
  26. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/diagram.py +0 -0
  27. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/document.py +0 -0
  28. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/evaluate.py +0 -0
  29. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/requirements.py +0 -0
  30. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/converter/translate.py +0 -0
  31. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/__init__.py +0 -0
  32. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/_tests/__init__.py +0 -0
  33. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/_tests/test_collections.py +0 -0
  34. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/_tests/test_database.py +0 -0
  35. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/_tests/test_vectorize.py +0 -0
  36. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/collections.py +0 -0
  37. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/database.py +0 -0
  38. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/embedding_models_info.py +0 -0
  39. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/embedding/vectorize.py +0 -0
  40. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/__init__.py +0 -0
  41. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/_tests/__init__.py +0 -0
  42. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/_tests/test_combine.py +0 -0
  43. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/_tests/test_splitter.py +0 -0
  44. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/alc/__init__.py +0 -0
  45. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/alc/_tests/__init__.py +0 -0
  46. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/alc/_tests/test_alc.py +0 -0
  47. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/alc/alc.py +0 -0
  48. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/binary/__init__.py +0 -0
  49. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/binary/_tests/__init__.py +0 -0
  50. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/binary/_tests/test_binary.py +0 -0
  51. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/binary/binary.py +0 -0
  52. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/binary/reveng/decompile_script.py +0 -0
  53. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/block.py +0 -0
  54. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/file.py +0 -0
  55. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/mumps/__init__.py +0 -0
  56. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/mumps/_tests/__init__.py +0 -0
  57. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/mumps/_tests/test_mumps.py +0 -0
  58. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/mumps/mumps.py +0 -0
  59. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/mumps/patterns.py +0 -0
  60. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/naive/__init__.py +0 -0
  61. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/naive/basic_splitter.py +0 -0
  62. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/naive/chunk_splitter.py +0 -0
  63. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/naive/registry.py +0 -0
  64. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/naive/simple_ast.py +0 -0
  65. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/naive/tag_splitter.py +0 -0
  66. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/node.py +0 -0
  67. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/splitter.py +0 -0
  68. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/treesitter/__init__.py +0 -0
  69. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/treesitter/_tests/__init__.py +0 -0
  70. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
  71. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/language/treesitter/treesitter.py +0 -0
  72. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/llm/__init__.py +0 -0
  73. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/__init__.py +0 -0
  74. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/__init__.py +0 -0
  75. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/reference.py +0 -0
  76. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/target.py +0 -0
  77. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_bleu.py +0 -0
  78. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_chrf.py +0 -0
  79. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_file_pairing.py +0 -0
  80. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_llm.py +0 -0
  81. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_reading.py +0 -0
  82. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_rouge_score.py +0 -0
  83. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_similarity_score.py +0 -0
  84. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
  85. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/bleu.py +0 -0
  86. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/chrf.py +0 -0
  87. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/cli.py +0 -0
  88. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/complexity_metrics.py +0 -0
  89. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/file_pairing.py +0 -0
  90. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/llm_metrics.py +0 -0
  91. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/metric.py +0 -0
  92. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/reading.py +0 -0
  93. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/rouge_score.py +0 -0
  94. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/similarity.py +0 -0
  95. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/metrics/splitting.py +0 -0
  96. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/__init__.py +0 -0
  97. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/_tests/__init__.py +0 -0
  98. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/_tests/test_code_parser.py +0 -0
  99. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/code_parser.py +0 -0
  100. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/doc_parser.py +0 -0
  101. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/eval_parser.py +0 -0
  102. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/parser.py +0 -0
  103. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/reqs_parser.py +0 -0
  104. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/parsers/uml.py +0 -0
  105. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/prompts/__init__.py +0 -0
  106. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/prompts/prompt.py +0 -0
  107. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/__init__.py +0 -0
  108. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/_tests/__init__.py +0 -0
  109. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/_tests/test_logger.py +0 -0
  110. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/_tests/test_progress.py +0 -0
  111. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/enums.py +0 -0
  112. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/logger.py +0 -0
  113. {janus_llm-4.0.0 → janus_llm-4.2.0}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 4.0.0
3
+ Version: 4.2.0
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -23,20 +23,28 @@ Requires-Dist: langchain-anthropic (>=0.1.15,<0.2.0)
23
23
  Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
24
24
  Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
25
25
  Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
26
+ Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
26
27
  Requires-Dist: nltk (>=3.8.1,<4.0.0)
27
28
  Requires-Dist: numpy (>=1.24.3,<2.0.0)
28
29
  Requires-Dist: openai (>=1.14.0,<2.0.0)
30
+ Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
29
31
  Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
30
32
  Requires-Dist: py-rouge (>=1.1,<2.0)
33
+ Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
31
34
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
32
35
  Requires-Dist: rich (>=13.7.1,<14.0.0)
33
36
  Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
37
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
34
38
  Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
39
+ Requires-Dist: tesseract (>=0.1.3,<0.2.0)
35
40
  Requires-Dist: text-generation (>=0.6.0,<0.7.0)
36
41
  Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
37
42
  Requires-Dist: transformers (>=4.31.0,<5.0.0)
38
43
  Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)
39
44
  Requires-Dist: typer (>=0.9.0,<0.10.0)
45
+ Requires-Dist: unstructured (>=0.15.9,<0.16.0)
46
+ Requires-Dist: unstructured-inference (>=0.7.36,<0.8.0)
47
+ Requires-Dist: unstructured-pytesseract (>=0.3.13,<0.4.0)
40
48
  Project-URL: Documentation, https://janus-llm.github.io/janus-llm
41
49
  Project-URL: Repository, https://github.com/janus-llm/janus-llm
42
50
  Description-Content-Type: text/markdown
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.0.0"
8
+ __version__ = "4.2.0"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -13,10 +13,13 @@ from rich.console import Console
13
13
  from rich.prompt import Confirm
14
14
  from typing_extensions import Annotated
15
15
 
16
+ import janus.refiners.refiner
17
+ import janus.refiners.uml
16
18
  from janus.converter.aggregator import Aggregator
17
19
  from janus.converter.converter import Converter
18
20
  from janus.converter.diagram import DiagramGenerator
19
21
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
22
+ from janus.converter.partition import Partitioner
20
23
  from janus.converter.requirements import RequirementsDocumenter
21
24
  from janus.converter.translate import Translator
22
25
  from janus.embedding.collections import Collections
@@ -39,11 +42,11 @@ from janus.llm.models_info import (
39
42
  MODEL_TYPE_CONSTRUCTORS,
40
43
  MODEL_TYPES,
41
44
  TOKEN_LIMITS,
45
+ azure_models,
42
46
  bedrock_models,
43
47
  openai_models,
44
48
  )
45
49
  from janus.metrics.cli import evaluate
46
- from janus.refiners.refiner import REFINERS
47
50
  from janus.utils.enums import LANGUAGES
48
51
  from janus.utils.logger import create_logger
49
52
 
@@ -68,6 +71,18 @@ with open(db_file, "r") as f:
68
71
  collections_config_file = Path(db_loc) / "collections.json"
69
72
 
70
73
 
74
+ def get_subclasses(cls):
75
+ return set(cls.__subclasses__()).union(
76
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
77
+ )
78
+
79
+
80
+ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
81
+ {janus.refiners.refiner.JanusRefiner}
82
+ )
83
+ REFINERS = {r.__name__: r for r in REFINER_TYPES}
84
+
85
+
71
86
  def get_collections_config():
72
87
  if collections_config_file.exists():
73
88
  with open(collections_config_file, "r") as f:
@@ -243,22 +258,23 @@ def translate(
243
258
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
244
259
  ),
245
260
  ] = "file",
246
- refiner_type: Annotated[
247
- str,
261
+ refiner_types: Annotated[
262
+ list[str],
248
263
  typer.Option(
249
264
  "-r",
250
265
  "--refiner",
251
- help="Name of custom refiner to use",
266
+ help="List of refiner types to use. Add -r for each refiner to use in\
267
+ refinement chain",
252
268
  click_type=click.Choice(list(REFINERS.keys())),
253
269
  ),
254
- ] = "none",
270
+ ] = ["JanusRefiner"],
255
271
  retriever_type: Annotated[
256
272
  str,
257
273
  typer.Option(
258
274
  "-R",
259
275
  "--retriever",
260
276
  help="Name of custom retriever to use",
261
- click_type=click.Choice(["active_usings"]),
277
+ click_type=click.Choice(["active_usings", "language_docs"]),
262
278
  ),
263
279
  ] = None,
264
280
  max_tokens: Annotated[
@@ -271,6 +287,7 @@ def translate(
271
287
  ),
272
288
  ] = None,
273
289
  ):
290
+ refiner_types = [REFINERS[r] for r in refiner_types]
274
291
  try:
275
292
  target_language, target_version = target_lang.split("-")
276
293
  except ValueError:
@@ -295,7 +312,7 @@ def translate(
295
312
  db_path=db_loc,
296
313
  db_config=collections_config,
297
314
  splitter_type=splitter_type,
298
- refiner_type=refiner_type,
315
+ refiner_types=refiner_types,
299
316
  retriever_type=retriever_type,
300
317
  )
301
318
  translator.translate(input_dir, output_dir, overwrite, collection)
@@ -401,22 +418,23 @@ def document(
401
418
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
402
419
  ),
403
420
  ] = "file",
404
- refiner_type: Annotated[
405
- str,
421
+ refiner_types: Annotated[
422
+ list[str],
406
423
  typer.Option(
407
424
  "-r",
408
425
  "--refiner",
409
- help="Name of custom refiner to use",
426
+ help="List of refiner types to use. Add -r for each refiner to use in\
427
+ refinement chain",
410
428
  click_type=click.Choice(list(REFINERS.keys())),
411
429
  ),
412
- ] = "none",
430
+ ] = ["JanusRefiner"],
413
431
  retriever_type: Annotated[
414
432
  str,
415
433
  typer.Option(
416
434
  "-R",
417
435
  "--retriever",
418
436
  help="Name of custom retriever to use",
419
- click_type=click.Choice(["active_usings"]),
437
+ click_type=click.Choice(["active_usings", "language_docs"]),
420
438
  ),
421
439
  ] = None,
422
440
  max_tokens: Annotated[
@@ -429,6 +447,7 @@ def document(
429
447
  ),
430
448
  ] = None,
431
449
  ):
450
+ refiner_types = [REFINERS[r] for r in refiner_types]
432
451
  model_arguments = dict(temperature=temperature)
433
452
  collections_config = get_collections_config()
434
453
  kwargs = dict(
@@ -440,7 +459,7 @@ def document(
440
459
  db_path=db_loc,
441
460
  db_config=collections_config,
442
461
  splitter_type=splitter_type,
443
- refiner_type=refiner_type,
462
+ refiner_types=refiner_types,
444
463
  retriever_type=retriever_type,
445
464
  )
446
465
  if doc_mode == "madlibs":
@@ -457,12 +476,6 @@ def document(
457
476
  documenter.translate(input_dir, output_dir, overwrite, collection)
458
477
 
459
478
 
460
- def get_subclasses(cls):
461
- return set(cls.__subclasses__()).union(
462
- set(s for c in cls.__subclasses__() for s in get_subclasses(c))
463
- )
464
-
465
-
466
479
  @app.command()
467
480
  def aggregate(
468
481
  input_dir: Annotated[
@@ -577,6 +590,103 @@ def aggregate(
577
590
  aggregator.translate(input_dir, output_dir, overwrite, collection)
578
591
 
579
592
 
593
+ @app.command(
594
+ help="Partition input code using an LLM.",
595
+ no_args_is_help=True,
596
+ )
597
+ def partition(
598
+ input_dir: Annotated[
599
+ Path,
600
+ typer.Option(
601
+ "--input",
602
+ "-i",
603
+ help="The directory containing the source code to be partitioned. ",
604
+ ),
605
+ ],
606
+ language: Annotated[
607
+ str,
608
+ typer.Option(
609
+ "--language",
610
+ "-l",
611
+ help="The language of the source code.",
612
+ click_type=click.Choice(sorted(LANGUAGES)),
613
+ ),
614
+ ],
615
+ output_dir: Annotated[
616
+ Path,
617
+ typer.Option(
618
+ "--output-dir", "-o", help="The directory to store the partitioned code in."
619
+ ),
620
+ ],
621
+ llm_name: Annotated[
622
+ str,
623
+ typer.Option(
624
+ "--llm",
625
+ "-L",
626
+ help="The custom name of the model set with 'janus llm add'.",
627
+ ),
628
+ ] = "gpt-4o",
629
+ max_prompts: Annotated[
630
+ int,
631
+ typer.Option(
632
+ "--max-prompts",
633
+ "-m",
634
+ help="The maximum number of times to prompt a model on one functional block "
635
+ "before exiting the application. This is to prevent wasting too much money.",
636
+ ),
637
+ ] = 10,
638
+ overwrite: Annotated[
639
+ bool,
640
+ typer.Option(
641
+ "--overwrite/--preserve",
642
+ help="Whether to overwrite existing files in the output directory",
643
+ ),
644
+ ] = False,
645
+ temperature: Annotated[
646
+ float,
647
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
648
+ ] = 0.7,
649
+ splitter_type: Annotated[
650
+ str,
651
+ typer.Option(
652
+ "-S",
653
+ "--splitter",
654
+ help="Name of custom splitter to use",
655
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
656
+ ),
657
+ ] = "file",
658
+ max_tokens: Annotated[
659
+ int,
660
+ typer.Option(
661
+ "--max-tokens",
662
+ "-M",
663
+ help="The maximum number of tokens the model will take in. "
664
+ "If unspecificed, model's default max will be used.",
665
+ ),
666
+ ] = None,
667
+ partition_token_limit: Annotated[
668
+ int,
669
+ typer.Option(
670
+ "--partition-tokens",
671
+ "-pt",
672
+ help="The limit on the number of tokens per partition.",
673
+ ),
674
+ ] = 8192,
675
+ ):
676
+ model_arguments = dict(temperature=temperature)
677
+ kwargs = dict(
678
+ model=llm_name,
679
+ model_arguments=model_arguments,
680
+ source_language=language,
681
+ max_prompts=max_prompts,
682
+ max_tokens=max_tokens,
683
+ splitter_type=splitter_type,
684
+ partition_token_limit=partition_token_limit,
685
+ )
686
+ partitioner = Partitioner(**kwargs)
687
+ partitioner.translate(input_dir, output_dir, overwrite)
688
+
689
+
580
690
  @app.command(
581
691
  help="Diagram input code using an LLM.",
582
692
  no_args_is_help=True,
@@ -666,25 +776,27 @@ def diagram(
666
776
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
667
777
  ),
668
778
  ] = "file",
669
- refiner_type: Annotated[
670
- str,
779
+ refiner_types: Annotated[
780
+ list[str],
671
781
  typer.Option(
672
782
  "-r",
673
783
  "--refiner",
674
- help="Name of custom refiner to use",
784
+ help="List of refiner types to use. Add -r for each refiner to use in\
785
+ refinement chain",
675
786
  click_type=click.Choice(list(REFINERS.keys())),
676
787
  ),
677
- ] = "none",
788
+ ] = ["JanusRefiner"],
678
789
  retriever_type: Annotated[
679
790
  str,
680
791
  typer.Option(
681
792
  "-R",
682
793
  "--retriever",
683
794
  help="Name of custom retriever to use",
684
- click_type=click.Choice(["active_usings"]),
795
+ click_type=click.Choice(["active_usings", "language_docs"]),
685
796
  ),
686
797
  ] = None,
687
798
  ):
799
+ refiner_types = [REFINERS[r] for r in refiner_types]
688
800
  model_arguments = dict(temperature=temperature)
689
801
  collections_config = get_collections_config()
690
802
  diagram_generator = DiagramGenerator(
@@ -695,7 +807,7 @@ def diagram(
695
807
  db_path=db_loc,
696
808
  db_config=collections_config,
697
809
  splitter_type=splitter_type,
698
- refiner_type=refiner_type,
810
+ refiner_types=refiner_types,
699
811
  retriever_type=retriever_type,
700
812
  diagram_type=diagram_type,
701
813
  add_documentation=add_documentation,
@@ -952,7 +1064,7 @@ def llm_add(
952
1064
  help="The type of the model",
953
1065
  click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
954
1066
  ),
955
- ] = "OpenAI",
1067
+ ] = "Azure",
956
1068
  ):
957
1069
  if not MODEL_CONFIG_DIR.exists():
958
1070
  MODEL_CONFIG_DIR.mkdir(parents=True)
@@ -996,6 +1108,7 @@ def llm_add(
996
1108
  "model_cost": {"input": in_cost, "output": out_cost},
997
1109
  }
998
1110
  elif model_type == "OpenAI":
1111
+ print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
999
1112
  model_id = typer.prompt(
1000
1113
  "Enter the model ID (list model IDs with `janus llm ls -a`)",
1001
1114
  default="gpt-4o",
@@ -1017,6 +1130,28 @@ def llm_add(
1017
1130
  "token_limit": max_tokens,
1018
1131
  "model_cost": model_cost,
1019
1132
  }
1133
+ elif model_type == "Azure":
1134
+ model_id = typer.prompt(
1135
+ "Enter the model ID (list model IDs with `janus llm ls -a`)",
1136
+ default="gpt-4o",
1137
+ type=click.Choice(azure_models),
1138
+ show_choices=False,
1139
+ )
1140
+ params = dict(
1141
+ # Azure uses the "azure_deployment" key for what we're calling "long_model_id"
1142
+ azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
1143
+ temperature=0.7,
1144
+ n=1,
1145
+ )
1146
+ max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
1147
+ model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
1148
+ cfg = {
1149
+ "model_type": model_type,
1150
+ "model_id": model_id,
1151
+ "model_args": params,
1152
+ "token_limit": max_tokens,
1153
+ "model_cost": model_cost,
1154
+ }
1020
1155
  elif model_type == "BedrockChat" or model_type == "Bedrock":
1021
1156
  model_id = typer.prompt(
1022
1157
  "Enter the model ID (list model IDs with `janus llm ls -a`)",
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
2
2
  from janus.converter.diagram import DiagramGenerator
3
3
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
4
4
  from janus.converter.evaluate import Evaluator
5
+ from janus.converter.partition import Partitioner
5
6
  from janus.converter.requirements import RequirementsDocumenter
6
7
  from janus.converter.translate import Translator
@@ -90,14 +90,14 @@ class TestDiagramGenerator(unittest.TestCase):
90
90
  def setUp(self):
91
91
  """Set up the tests."""
92
92
  self.diagram_generator = DiagramGenerator(
93
- model="gpt-4o",
93
+ model="gpt-4o-mini",
94
94
  source_language="fortran",
95
95
  diagram_type="Activity",
96
96
  )
97
97
 
98
98
  def test_init(self):
99
99
  """Test __init__ method."""
100
- self.assertEqual(self.diagram_generator._model_name, "gpt-4o")
100
+ self.assertEqual(self.diagram_generator._model_name, "gpt-4o-mini")
101
101
  self.assertEqual(self.diagram_generator._source_language, "fortran")
102
102
  self.assertEqual(self.diagram_generator._diagram_type, "Activity")
103
103
 
@@ -6,7 +6,12 @@ from typing import Any
6
6
 
7
7
  from langchain_core.exceptions import OutputParserException
8
8
  from langchain_core.prompts import ChatPromptTemplate
9
- from langchain_core.runnables import Runnable, RunnableParallel, RunnablePassthrough
9
+ from langchain_core.runnables import (
10
+ Runnable,
11
+ RunnableLambda,
12
+ RunnableParallel,
13
+ RunnablePassthrough,
14
+ )
10
15
  from openai import BadRequestError, RateLimitError
11
16
  from pydantic import ValidationError
12
17
 
@@ -23,15 +28,14 @@ from janus.language.splitter import (
23
28
  from janus.llm.model_callbacks import get_model_callback
24
29
  from janus.llm.models_info import MODEL_PROMPT_ENGINES, JanusModel, load_model
25
30
  from janus.parsers.parser import GenericParser, JanusParser
26
- from janus.refiners.refiner import (
27
- FixParserExceptions,
28
- HallucinationRefiner,
29
- JanusRefiner,
30
- ReflectionRefiner,
31
- )
31
+ from janus.refiners.refiner import JanusRefiner
32
32
 
33
33
  # from janus.refiners.refiner import BasicRefiner, Refiner
34
- from janus.retrievers.retriever import ActiveUsingsRetriever, JanusRetriever
34
+ from janus.retrievers.retriever import (
35
+ ActiveUsingsRetriever,
36
+ JanusRetriever,
37
+ LanguageDocsRetriever,
38
+ )
35
39
  from janus.utils.enums import LANGUAGES
36
40
  from janus.utils.logger import create_logger
37
41
 
@@ -78,7 +82,7 @@ class Converter:
78
82
  protected_node_types: tuple[str, ...] = (),
79
83
  prune_node_types: tuple[str, ...] = (),
80
84
  splitter_type: str = "file",
81
- refiner_type: str | None = None,
85
+ refiner_types: list[type[JanusRefiner]] = [JanusRefiner],
82
86
  retriever_type: str | None = None,
83
87
  ) -> None:
84
88
  """Initialize a Converter instance.
@@ -105,6 +109,7 @@ class Converter:
105
109
  - None
106
110
  retriever_type: The type of retriever to use. Valid values:
107
111
  - "active_usings"
112
+ - "language_docs"
108
113
  - None
109
114
  """
110
115
  self._changed_attrs: set = set()
@@ -133,10 +138,11 @@ class Converter:
133
138
  self._prompt: ChatPromptTemplate
134
139
 
135
140
  self._parser: JanusParser = GenericParser()
141
+ self._base_parser: JanusParser = GenericParser()
136
142
  self._combiner: Combiner = Combiner()
137
143
 
138
144
  self._splitter_type: str
139
- self._refiner_type: str | None
145
+ self._refiner_types: list[type[JanusRefiner]]
140
146
  self._retriever_type: str | None
141
147
 
142
148
  self._splitter: Splitter
@@ -144,7 +150,7 @@ class Converter:
144
150
  self._retriever: JanusRetriever
145
151
 
146
152
  self.set_splitter(splitter_type=splitter_type)
147
- self.set_refiner(refiner_type=refiner_type)
153
+ self.set_refiner_types(refiner_types=refiner_types)
148
154
  self.set_retriever(retriever_type=retriever_type)
149
155
  self.set_model(model_name=model, **model_arguments)
150
156
  self.set_prompt(prompt_template=prompt_template)
@@ -170,7 +176,7 @@ class Converter:
170
176
  self._load_model()
171
177
  self._load_prompt()
172
178
  self._load_retriever()
173
- self._load_refiner()
179
+ self._load_refiner_chain()
174
180
  self._load_splitter()
175
181
  self._load_vectorizer()
176
182
  self._load_chain()
@@ -210,13 +216,13 @@ class Converter:
210
216
 
211
217
  self._splitter_type = splitter_type
212
218
 
213
- def set_refiner(self, refiner_type: str | None) -> None:
219
+ def set_refiner_types(self, refiner_types: list[type[JanusRefiner]]) -> None:
214
220
  """Validate and set the refiner type
215
221
 
216
222
  Arguments:
217
223
  refiner_type: the type of refiner to use
218
224
  """
219
- self._refiner_type = refiner_type
225
+ self._refiner_types = refiner_types
220
226
 
221
227
  def set_retriever(self, retriever_type: str | None) -> None:
222
228
  """Validate and set the retriever type
@@ -355,48 +361,40 @@ class Converter:
355
361
  def _load_retriever(self):
356
362
  if self._retriever_type == "active_usings":
357
363
  self._retriever = ActiveUsingsRetriever()
364
+ elif self._retriever_type == "language_docs":
365
+ self._retriever = LanguageDocsRetriever(self._llm, self._source_language)
358
366
  else:
359
367
  self._retriever = JanusRetriever()
360
368
 
361
- @run_if_changed("_refiner_type", "_model_name", "max_prompts", "_parser", "_llm")
362
- def _load_refiner(self) -> None:
363
- """Load the refiner according to this instance's attributes.
364
-
365
- If the relevant fields have not been changed since the last time this method was
366
- called, nothing happens.
367
- """
368
- if self._refiner_type == "parser":
369
- self._refiner = FixParserExceptions(
370
- llm=self._llm,
371
- parser=self._parser,
372
- max_retries=self.max_prompts,
373
- )
374
- elif self._refiner_type == "reflection":
375
- self._refiner = ReflectionRefiner(
376
- llm=self._llm,
377
- parser=self._parser,
378
- max_retries=self.max_prompts,
369
+ @run_if_changed("_refiner_types", "_model_name", "max_prompts", "_parser")
370
+ def _load_refiner_chain(self) -> None:
371
+ self._refiner_chain = RunnableParallel(
372
+ completion=self._llm,
373
+ prompt_value=RunnablePassthrough(),
374
+ )
375
+ for refiner_type in self._refiner_types[:-1]:
376
+ # NOTE: Do NOT remove refiner_type=refiner_type from lambda.
377
+ # Due to lambda capture, must be present or chain will not
378
+ # be correctly constructed.
379
+ self._refiner_chain = self._refiner_chain | RunnableParallel(
380
+ completion=lambda x, refiner_type=refiner_type: refiner_type(
381
+ llm=self._llm,
382
+ parser=self._base_parser,
383
+ max_retries=self.max_prompts,
384
+ ).parse_completion(**x),
385
+ prompt_value=lambda x: x["prompt_value"],
379
386
  )
380
- elif self._refiner_type == "hallucination":
381
- self._refiner = HallucinationRefiner(
387
+ self._refiner_chain = self._refiner_chain | RunnableLambda(
388
+ lambda x: self._refiner_types[-1](
382
389
  llm=self._llm,
383
390
  parser=self._parser,
384
391
  max_retries=self.max_prompts,
385
- )
386
- else:
387
- self._refiner = JanusRefiner(parser=self._parser)
392
+ ).parse_completion(**x)
393
+ )
388
394
 
389
- @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner")
395
+ @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner_chain")
390
396
  def _load_chain(self):
391
- self.chain = (
392
- self._input_runnable()
393
- | self._prompt
394
- | RunnableParallel(
395
- completion=self._llm,
396
- prompt_value=RunnablePassthrough(),
397
- )
398
- | self._refiner.parse_runnable
399
- )
397
+ self.chain = self._input_runnable() | self._prompt | self._refiner_chain
400
398
 
401
399
  def _input_runnable(self) -> Runnable:
402
400
  return RunnableParallel(
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+
3
+ from janus.converter.converter import Converter
4
+ from janus.language.block import TranslatedCodeBlock
5
+ from janus.parsers.partition_parser import PartitionParser
6
+ from janus.utils.logger import create_logger
7
+
8
+ log = create_logger(__name__)
9
+
10
+
11
+ class Partitioner(Converter):
12
+ def __init__(self, partition_token_limit: int, **kwargs):
13
+ super().__init__(**kwargs)
14
+ self.set_prompt("partition")
15
+ self._load_model()
16
+ self._parser = PartitionParser(
17
+ token_limit=partition_token_limit,
18
+ model=self._llm,
19
+ )
20
+ self._target_language = self._source_language
21
+ self._target_suffix = self._source_suffix
22
+ self._load_parameters()
23
+
24
+ def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
25
+ output_str = self._parser.parse_combined_output(block.complete_text)
26
+ out_path.parent.mkdir(parents=True, exist_ok=True)
27
+ out_path.write_text(output_str, encoding="utf-8")
@@ -1,3 +1,5 @@
1
+ import re
2
+
1
3
  from janus.language.block import CodeBlock, TranslatedCodeBlock
2
4
  from janus.language.file import FileManager
3
5
  from janus.utils.logger import create_logger
@@ -90,3 +92,23 @@ class ChunkCombiner(Combiner):
90
92
  root: The functional code block to combine with its children.
91
93
  """
92
94
  return root
95
+
96
+
97
+ class PartitionCombiner(Combiner):
98
+ @staticmethod
99
+ def combine(root: CodeBlock) -> None:
100
+ """A combiner which inserts partition tags between code blocks"""
101
+ queue = [root]
102
+ while queue:
103
+ block = queue.pop(0)
104
+ if block.children:
105
+ queue.extend(block.children)
106
+ else:
107
+ block.affixes = (block.prefix, block.suffix + "\n<JANUS_PARTITION>\n")
108
+
109
+ super(PartitionCombiner, PartitionCombiner).combine(root)
110
+ root.text = re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.text)
111
+ root.affixes = (
112
+ root.prefix,
113
+ re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.suffix),
114
+ )
@@ -23,6 +23,11 @@ openai_model_reroutes = {
23
23
  "gpt-3.5-turbo-16k-0613": "gpt-3.5-turbo-0125",
24
24
  }
25
25
 
26
+ azure_model_reroutes = {
27
+ "gpt-4o": "gpt-4o-2024-08-06",
28
+ "gpt-4o-mini": "gpt-4o-mini",
29
+ "gpt-3.5-turbo-16k": "gpt35-turbo-16k",
30
+ }
26
31
 
27
32
  # Updated 2024-06-21
28
33
  COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
@@ -31,6 +36,10 @@ COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
31
36
  "gpt-4-0125-preview": {"input": 0.01, "output": 0.03},
32
37
  "gpt-4-0613": {"input": 0.03, "output": 0.06},
33
38
  "gpt-4o-2024-05-13": {"input": 0.005, "output": 0.015},
39
+ "gpt-4o-2024-08-06": {"input": 0.00275, "output": 0.011},
40
+ "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
41
+ "gpt35-turbo-16k": {"input": 0.003, "output": 0.004},
42
+ "gpt-35-turbo-16k": {"input": 0.003, "output": 0.004},
34
43
  "anthropic.claude-v2": {"input": 0.008, "output": 0.024},
35
44
  "anthropic.claude-instant-v1": {"input": 0.0008, "output": 0.0024},
36
45
  "anthropic.claude-3-haiku-20240307-v1:0": {"input": 0.00025, "output": 0.00125},