janus-llm 4.1.0__tar.gz → 4.2.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. {janus_llm-4.1.0 → janus_llm-4.2.0}/PKG-INFO +9 -1
  2. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/__init__.py +1 -1
  3. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/cli.py +136 -25
  4. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/__init__.py +1 -0
  5. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/converter.py +45 -47
  6. janus_llm-4.2.0/janus/converter/partition.py +27 -0
  7. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/combine.py +22 -0
  8. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/llm/models_info.py +3 -0
  9. janus_llm-4.2.0/janus/parsers/partition_parser.py +136 -0
  10. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/refiners/refiner.py +8 -12
  11. janus_llm-4.2.0/janus/refiners/uml.py +33 -0
  12. janus_llm-4.2.0/janus/retrievers/retriever.py +102 -0
  13. janus_llm-4.2.0/janus/utils/pdf_docs_reader.py +134 -0
  14. {janus_llm-4.1.0 → janus_llm-4.2.0}/pyproject.toml +9 -1
  15. janus_llm-4.1.0/janus/retrievers/retriever.py +0 -42
  16. {janus_llm-4.1.0 → janus_llm-4.2.0}/LICENSE +0 -0
  17. {janus_llm-4.1.0 → janus_llm-4.2.0}/README.md +0 -0
  18. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/__main__.py +0 -0
  19. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/_tests/__init__.py +0 -0
  20. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/_tests/conftest.py +0 -0
  21. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/_tests/test_cli.py +0 -0
  22. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/_tests/__init__.py +0 -0
  23. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/_tests/test_translate.py +0 -0
  24. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/aggregator.py +0 -0
  25. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/diagram.py +0 -0
  26. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/document.py +0 -0
  27. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/evaluate.py +0 -0
  28. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/requirements.py +0 -0
  29. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/converter/translate.py +0 -0
  30. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/__init__.py +0 -0
  31. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/_tests/__init__.py +0 -0
  32. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/_tests/test_collections.py +0 -0
  33. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/_tests/test_database.py +0 -0
  34. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/_tests/test_vectorize.py +0 -0
  35. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/collections.py +0 -0
  36. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/database.py +0 -0
  37. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/embedding_models_info.py +0 -0
  38. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/embedding/vectorize.py +0 -0
  39. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/__init__.py +0 -0
  40. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/_tests/__init__.py +0 -0
  41. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/_tests/test_combine.py +0 -0
  42. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/_tests/test_splitter.py +0 -0
  43. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/alc/__init__.py +0 -0
  44. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/alc/_tests/__init__.py +0 -0
  45. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/alc/_tests/test_alc.py +0 -0
  46. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/alc/alc.py +0 -0
  47. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/binary/__init__.py +0 -0
  48. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/binary/_tests/__init__.py +0 -0
  49. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/binary/_tests/test_binary.py +0 -0
  50. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/binary/binary.py +0 -0
  51. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/binary/reveng/decompile_script.py +0 -0
  52. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/block.py +0 -0
  53. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/file.py +0 -0
  54. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/mumps/__init__.py +0 -0
  55. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/mumps/_tests/__init__.py +0 -0
  56. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/mumps/_tests/test_mumps.py +0 -0
  57. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/mumps/mumps.py +0 -0
  58. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/mumps/patterns.py +0 -0
  59. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/naive/__init__.py +0 -0
  60. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/naive/basic_splitter.py +0 -0
  61. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/naive/chunk_splitter.py +0 -0
  62. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/naive/registry.py +0 -0
  63. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/naive/simple_ast.py +0 -0
  64. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/naive/tag_splitter.py +0 -0
  65. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/node.py +0 -0
  66. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/splitter.py +0 -0
  67. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/treesitter/__init__.py +0 -0
  68. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/treesitter/_tests/__init__.py +0 -0
  69. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
  70. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/language/treesitter/treesitter.py +0 -0
  71. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/llm/__init__.py +0 -0
  72. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/llm/model_callbacks.py +0 -0
  73. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/__init__.py +0 -0
  74. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/__init__.py +0 -0
  75. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/reference.py +0 -0
  76. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/target.py +0 -0
  77. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_bleu.py +0 -0
  78. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_chrf.py +0 -0
  79. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_file_pairing.py +0 -0
  80. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_llm.py +0 -0
  81. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_reading.py +0 -0
  82. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_rouge_score.py +0 -0
  83. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_similarity_score.py +0 -0
  84. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
  85. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/bleu.py +0 -0
  86. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/chrf.py +0 -0
  87. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/cli.py +0 -0
  88. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/complexity_metrics.py +0 -0
  89. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/file_pairing.py +0 -0
  90. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/llm_metrics.py +0 -0
  91. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/metric.py +0 -0
  92. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/reading.py +0 -0
  93. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/rouge_score.py +0 -0
  94. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/similarity.py +0 -0
  95. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/metrics/splitting.py +0 -0
  96. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/__init__.py +0 -0
  97. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/_tests/__init__.py +0 -0
  98. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/_tests/test_code_parser.py +0 -0
  99. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/code_parser.py +0 -0
  100. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/doc_parser.py +0 -0
  101. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/eval_parser.py +0 -0
  102. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/parser.py +0 -0
  103. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/reqs_parser.py +0 -0
  104. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/parsers/uml.py +0 -0
  105. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/prompts/__init__.py +0 -0
  106. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/prompts/prompt.py +0 -0
  107. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/__init__.py +0 -0
  108. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/_tests/__init__.py +0 -0
  109. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/_tests/test_logger.py +0 -0
  110. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/_tests/test_progress.py +0 -0
  111. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/enums.py +0 -0
  112. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/logger.py +0 -0
  113. {janus_llm-4.1.0 → janus_llm-4.2.0}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 4.1.0
3
+ Version: 4.2.0
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -23,20 +23,28 @@ Requires-Dist: langchain-anthropic (>=0.1.15,<0.2.0)
23
23
  Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
24
24
  Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
25
25
  Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
26
+ Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
26
27
  Requires-Dist: nltk (>=3.8.1,<4.0.0)
27
28
  Requires-Dist: numpy (>=1.24.3,<2.0.0)
28
29
  Requires-Dist: openai (>=1.14.0,<2.0.0)
30
+ Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
29
31
  Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
30
32
  Requires-Dist: py-rouge (>=1.1,<2.0)
33
+ Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
31
34
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
32
35
  Requires-Dist: rich (>=13.7.1,<14.0.0)
33
36
  Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
37
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
34
38
  Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
39
+ Requires-Dist: tesseract (>=0.1.3,<0.2.0)
35
40
  Requires-Dist: text-generation (>=0.6.0,<0.7.0)
36
41
  Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
37
42
  Requires-Dist: transformers (>=4.31.0,<5.0.0)
38
43
  Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)
39
44
  Requires-Dist: typer (>=0.9.0,<0.10.0)
45
+ Requires-Dist: unstructured (>=0.15.9,<0.16.0)
46
+ Requires-Dist: unstructured-inference (>=0.7.36,<0.8.0)
47
+ Requires-Dist: unstructured-pytesseract (>=0.3.13,<0.4.0)
40
48
  Project-URL: Documentation, https://janus-llm.github.io/janus-llm
41
49
  Project-URL: Repository, https://github.com/janus-llm/janus-llm
42
50
  Description-Content-Type: text/markdown
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.1.0"
8
+ __version__ = "4.2.0"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -13,10 +13,13 @@ from rich.console import Console
13
13
  from rich.prompt import Confirm
14
14
  from typing_extensions import Annotated
15
15
 
16
+ import janus.refiners.refiner
17
+ import janus.refiners.uml
16
18
  from janus.converter.aggregator import Aggregator
17
19
  from janus.converter.converter import Converter
18
20
  from janus.converter.diagram import DiagramGenerator
19
21
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
22
+ from janus.converter.partition import Partitioner
20
23
  from janus.converter.requirements import RequirementsDocumenter
21
24
  from janus.converter.translate import Translator
22
25
  from janus.embedding.collections import Collections
@@ -44,7 +47,6 @@ from janus.llm.models_info import (
44
47
  openai_models,
45
48
  )
46
49
  from janus.metrics.cli import evaluate
47
- from janus.refiners.refiner import REFINERS
48
50
  from janus.utils.enums import LANGUAGES
49
51
  from janus.utils.logger import create_logger
50
52
 
@@ -69,6 +71,18 @@ with open(db_file, "r") as f:
69
71
  collections_config_file = Path(db_loc) / "collections.json"
70
72
 
71
73
 
74
+ def get_subclasses(cls):
75
+ return set(cls.__subclasses__()).union(
76
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
77
+ )
78
+
79
+
80
+ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
81
+ {janus.refiners.refiner.JanusRefiner}
82
+ )
83
+ REFINERS = {r.__name__: r for r in REFINER_TYPES}
84
+
85
+
72
86
  def get_collections_config():
73
87
  if collections_config_file.exists():
74
88
  with open(collections_config_file, "r") as f:
@@ -244,22 +258,23 @@ def translate(
244
258
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
245
259
  ),
246
260
  ] = "file",
247
- refiner_type: Annotated[
248
- str,
261
+ refiner_types: Annotated[
262
+ list[str],
249
263
  typer.Option(
250
264
  "-r",
251
265
  "--refiner",
252
- help="Name of custom refiner to use",
266
+ help="List of refiner types to use. Add -r for each refiner to use in\
267
+ refinement chain",
253
268
  click_type=click.Choice(list(REFINERS.keys())),
254
269
  ),
255
- ] = "none",
270
+ ] = ["JanusRefiner"],
256
271
  retriever_type: Annotated[
257
272
  str,
258
273
  typer.Option(
259
274
  "-R",
260
275
  "--retriever",
261
276
  help="Name of custom retriever to use",
262
- click_type=click.Choice(["active_usings"]),
277
+ click_type=click.Choice(["active_usings", "language_docs"]),
263
278
  ),
264
279
  ] = None,
265
280
  max_tokens: Annotated[
@@ -272,6 +287,7 @@ def translate(
272
287
  ),
273
288
  ] = None,
274
289
  ):
290
+ refiner_types = [REFINERS[r] for r in refiner_types]
275
291
  try:
276
292
  target_language, target_version = target_lang.split("-")
277
293
  except ValueError:
@@ -296,7 +312,7 @@ def translate(
296
312
  db_path=db_loc,
297
313
  db_config=collections_config,
298
314
  splitter_type=splitter_type,
299
- refiner_type=refiner_type,
315
+ refiner_types=refiner_types,
300
316
  retriever_type=retriever_type,
301
317
  )
302
318
  translator.translate(input_dir, output_dir, overwrite, collection)
@@ -402,22 +418,23 @@ def document(
402
418
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
403
419
  ),
404
420
  ] = "file",
405
- refiner_type: Annotated[
406
- str,
421
+ refiner_types: Annotated[
422
+ list[str],
407
423
  typer.Option(
408
424
  "-r",
409
425
  "--refiner",
410
- help="Name of custom refiner to use",
426
+ help="List of refiner types to use. Add -r for each refiner to use in\
427
+ refinement chain",
411
428
  click_type=click.Choice(list(REFINERS.keys())),
412
429
  ),
413
- ] = "none",
430
+ ] = ["JanusRefiner"],
414
431
  retriever_type: Annotated[
415
432
  str,
416
433
  typer.Option(
417
434
  "-R",
418
435
  "--retriever",
419
436
  help="Name of custom retriever to use",
420
- click_type=click.Choice(["active_usings"]),
437
+ click_type=click.Choice(["active_usings", "language_docs"]),
421
438
  ),
422
439
  ] = None,
423
440
  max_tokens: Annotated[
@@ -430,6 +447,7 @@ def document(
430
447
  ),
431
448
  ] = None,
432
449
  ):
450
+ refiner_types = [REFINERS[r] for r in refiner_types]
433
451
  model_arguments = dict(temperature=temperature)
434
452
  collections_config = get_collections_config()
435
453
  kwargs = dict(
@@ -441,7 +459,7 @@ def document(
441
459
  db_path=db_loc,
442
460
  db_config=collections_config,
443
461
  splitter_type=splitter_type,
444
- refiner_type=refiner_type,
462
+ refiner_types=refiner_types,
445
463
  retriever_type=retriever_type,
446
464
  )
447
465
  if doc_mode == "madlibs":
@@ -458,12 +476,6 @@ def document(
458
476
  documenter.translate(input_dir, output_dir, overwrite, collection)
459
477
 
460
478
 
461
- def get_subclasses(cls):
462
- return set(cls.__subclasses__()).union(
463
- set(s for c in cls.__subclasses__() for s in get_subclasses(c))
464
- )
465
-
466
-
467
479
  @app.command()
468
480
  def aggregate(
469
481
  input_dir: Annotated[
@@ -578,6 +590,103 @@ def aggregate(
578
590
  aggregator.translate(input_dir, output_dir, overwrite, collection)
579
591
 
580
592
 
593
+ @app.command(
594
+ help="Partition input code using an LLM.",
595
+ no_args_is_help=True,
596
+ )
597
+ def partition(
598
+ input_dir: Annotated[
599
+ Path,
600
+ typer.Option(
601
+ "--input",
602
+ "-i",
603
+ help="The directory containing the source code to be partitioned. ",
604
+ ),
605
+ ],
606
+ language: Annotated[
607
+ str,
608
+ typer.Option(
609
+ "--language",
610
+ "-l",
611
+ help="The language of the source code.",
612
+ click_type=click.Choice(sorted(LANGUAGES)),
613
+ ),
614
+ ],
615
+ output_dir: Annotated[
616
+ Path,
617
+ typer.Option(
618
+ "--output-dir", "-o", help="The directory to store the partitioned code in."
619
+ ),
620
+ ],
621
+ llm_name: Annotated[
622
+ str,
623
+ typer.Option(
624
+ "--llm",
625
+ "-L",
626
+ help="The custom name of the model set with 'janus llm add'.",
627
+ ),
628
+ ] = "gpt-4o",
629
+ max_prompts: Annotated[
630
+ int,
631
+ typer.Option(
632
+ "--max-prompts",
633
+ "-m",
634
+ help="The maximum number of times to prompt a model on one functional block "
635
+ "before exiting the application. This is to prevent wasting too much money.",
636
+ ),
637
+ ] = 10,
638
+ overwrite: Annotated[
639
+ bool,
640
+ typer.Option(
641
+ "--overwrite/--preserve",
642
+ help="Whether to overwrite existing files in the output directory",
643
+ ),
644
+ ] = False,
645
+ temperature: Annotated[
646
+ float,
647
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
648
+ ] = 0.7,
649
+ splitter_type: Annotated[
650
+ str,
651
+ typer.Option(
652
+ "-S",
653
+ "--splitter",
654
+ help="Name of custom splitter to use",
655
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
656
+ ),
657
+ ] = "file",
658
+ max_tokens: Annotated[
659
+ int,
660
+ typer.Option(
661
+ "--max-tokens",
662
+ "-M",
663
+ help="The maximum number of tokens the model will take in. "
664
+ "If unspecificed, model's default max will be used.",
665
+ ),
666
+ ] = None,
667
+ partition_token_limit: Annotated[
668
+ int,
669
+ typer.Option(
670
+ "--partition-tokens",
671
+ "-pt",
672
+ help="The limit on the number of tokens per partition.",
673
+ ),
674
+ ] = 8192,
675
+ ):
676
+ model_arguments = dict(temperature=temperature)
677
+ kwargs = dict(
678
+ model=llm_name,
679
+ model_arguments=model_arguments,
680
+ source_language=language,
681
+ max_prompts=max_prompts,
682
+ max_tokens=max_tokens,
683
+ splitter_type=splitter_type,
684
+ partition_token_limit=partition_token_limit,
685
+ )
686
+ partitioner = Partitioner(**kwargs)
687
+ partitioner.translate(input_dir, output_dir, overwrite)
688
+
689
+
581
690
  @app.command(
582
691
  help="Diagram input code using an LLM.",
583
692
  no_args_is_help=True,
@@ -667,25 +776,27 @@ def diagram(
667
776
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
668
777
  ),
669
778
  ] = "file",
670
- refiner_type: Annotated[
671
- str,
779
+ refiner_types: Annotated[
780
+ list[str],
672
781
  typer.Option(
673
782
  "-r",
674
783
  "--refiner",
675
- help="Name of custom refiner to use",
784
+ help="List of refiner types to use. Add -r for each refiner to use in\
785
+ refinement chain",
676
786
  click_type=click.Choice(list(REFINERS.keys())),
677
787
  ),
678
- ] = "none",
788
+ ] = ["JanusRefiner"],
679
789
  retriever_type: Annotated[
680
790
  str,
681
791
  typer.Option(
682
792
  "-R",
683
793
  "--retriever",
684
794
  help="Name of custom retriever to use",
685
- click_type=click.Choice(["active_usings"]),
795
+ click_type=click.Choice(["active_usings", "language_docs"]),
686
796
  ),
687
797
  ] = None,
688
798
  ):
799
+ refiner_types = [REFINERS[r] for r in refiner_types]
689
800
  model_arguments = dict(temperature=temperature)
690
801
  collections_config = get_collections_config()
691
802
  diagram_generator = DiagramGenerator(
@@ -696,7 +807,7 @@ def diagram(
696
807
  db_path=db_loc,
697
808
  db_config=collections_config,
698
809
  splitter_type=splitter_type,
699
- refiner_type=refiner_type,
810
+ refiner_types=refiner_types,
700
811
  retriever_type=retriever_type,
701
812
  diagram_type=diagram_type,
702
813
  add_documentation=add_documentation,
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
2
2
  from janus.converter.diagram import DiagramGenerator
3
3
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
4
4
  from janus.converter.evaluate import Evaluator
5
+ from janus.converter.partition import Partitioner
5
6
  from janus.converter.requirements import RequirementsDocumenter
6
7
  from janus.converter.translate import Translator
@@ -6,7 +6,12 @@ from typing import Any
6
6
 
7
7
  from langchain_core.exceptions import OutputParserException
8
8
  from langchain_core.prompts import ChatPromptTemplate
9
- from langchain_core.runnables import Runnable, RunnableParallel, RunnablePassthrough
9
+ from langchain_core.runnables import (
10
+ Runnable,
11
+ RunnableLambda,
12
+ RunnableParallel,
13
+ RunnablePassthrough,
14
+ )
10
15
  from openai import BadRequestError, RateLimitError
11
16
  from pydantic import ValidationError
12
17
 
@@ -23,15 +28,14 @@ from janus.language.splitter import (
23
28
  from janus.llm.model_callbacks import get_model_callback
24
29
  from janus.llm.models_info import MODEL_PROMPT_ENGINES, JanusModel, load_model
25
30
  from janus.parsers.parser import GenericParser, JanusParser
26
- from janus.refiners.refiner import (
27
- FixParserExceptions,
28
- HallucinationRefiner,
29
- JanusRefiner,
30
- ReflectionRefiner,
31
- )
31
+ from janus.refiners.refiner import JanusRefiner
32
32
 
33
33
  # from janus.refiners.refiner import BasicRefiner, Refiner
34
- from janus.retrievers.retriever import ActiveUsingsRetriever, JanusRetriever
34
+ from janus.retrievers.retriever import (
35
+ ActiveUsingsRetriever,
36
+ JanusRetriever,
37
+ LanguageDocsRetriever,
38
+ )
35
39
  from janus.utils.enums import LANGUAGES
36
40
  from janus.utils.logger import create_logger
37
41
 
@@ -78,7 +82,7 @@ class Converter:
78
82
  protected_node_types: tuple[str, ...] = (),
79
83
  prune_node_types: tuple[str, ...] = (),
80
84
  splitter_type: str = "file",
81
- refiner_type: str | None = None,
85
+ refiner_types: list[type[JanusRefiner]] = [JanusRefiner],
82
86
  retriever_type: str | None = None,
83
87
  ) -> None:
84
88
  """Initialize a Converter instance.
@@ -105,6 +109,7 @@ class Converter:
105
109
  - None
106
110
  retriever_type: The type of retriever to use. Valid values:
107
111
  - "active_usings"
112
+ - "language_docs"
108
113
  - None
109
114
  """
110
115
  self._changed_attrs: set = set()
@@ -133,10 +138,11 @@ class Converter:
133
138
  self._prompt: ChatPromptTemplate
134
139
 
135
140
  self._parser: JanusParser = GenericParser()
141
+ self._base_parser: JanusParser = GenericParser()
136
142
  self._combiner: Combiner = Combiner()
137
143
 
138
144
  self._splitter_type: str
139
- self._refiner_type: str | None
145
+ self._refiner_types: list[type[JanusRefiner]]
140
146
  self._retriever_type: str | None
141
147
 
142
148
  self._splitter: Splitter
@@ -144,7 +150,7 @@ class Converter:
144
150
  self._retriever: JanusRetriever
145
151
 
146
152
  self.set_splitter(splitter_type=splitter_type)
147
- self.set_refiner(refiner_type=refiner_type)
153
+ self.set_refiner_types(refiner_types=refiner_types)
148
154
  self.set_retriever(retriever_type=retriever_type)
149
155
  self.set_model(model_name=model, **model_arguments)
150
156
  self.set_prompt(prompt_template=prompt_template)
@@ -170,7 +176,7 @@ class Converter:
170
176
  self._load_model()
171
177
  self._load_prompt()
172
178
  self._load_retriever()
173
- self._load_refiner()
179
+ self._load_refiner_chain()
174
180
  self._load_splitter()
175
181
  self._load_vectorizer()
176
182
  self._load_chain()
@@ -210,13 +216,13 @@ class Converter:
210
216
 
211
217
  self._splitter_type = splitter_type
212
218
 
213
- def set_refiner(self, refiner_type: str | None) -> None:
219
+ def set_refiner_types(self, refiner_types: list[type[JanusRefiner]]) -> None:
214
220
  """Validate and set the refiner type
215
221
 
216
222
  Arguments:
217
223
  refiner_type: the type of refiner to use
218
224
  """
219
- self._refiner_type = refiner_type
225
+ self._refiner_types = refiner_types
220
226
 
221
227
  def set_retriever(self, retriever_type: str | None) -> None:
222
228
  """Validate and set the retriever type
@@ -355,48 +361,40 @@ class Converter:
355
361
  def _load_retriever(self):
356
362
  if self._retriever_type == "active_usings":
357
363
  self._retriever = ActiveUsingsRetriever()
364
+ elif self._retriever_type == "language_docs":
365
+ self._retriever = LanguageDocsRetriever(self._llm, self._source_language)
358
366
  else:
359
367
  self._retriever = JanusRetriever()
360
368
 
361
- @run_if_changed("_refiner_type", "_model_name", "max_prompts", "_parser", "_llm")
362
- def _load_refiner(self) -> None:
363
- """Load the refiner according to this instance's attributes.
364
-
365
- If the relevant fields have not been changed since the last time this method was
366
- called, nothing happens.
367
- """
368
- if self._refiner_type == "parser":
369
- self._refiner = FixParserExceptions(
370
- llm=self._llm,
371
- parser=self._parser,
372
- max_retries=self.max_prompts,
373
- )
374
- elif self._refiner_type == "reflection":
375
- self._refiner = ReflectionRefiner(
376
- llm=self._llm,
377
- parser=self._parser,
378
- max_retries=self.max_prompts,
369
+ @run_if_changed("_refiner_types", "_model_name", "max_prompts", "_parser")
370
+ def _load_refiner_chain(self) -> None:
371
+ self._refiner_chain = RunnableParallel(
372
+ completion=self._llm,
373
+ prompt_value=RunnablePassthrough(),
374
+ )
375
+ for refiner_type in self._refiner_types[:-1]:
376
+ # NOTE: Do NOT remove refiner_type=refiner_type from lambda.
377
+ # Due to lambda capture, must be present or chain will not
378
+ # be correctly constructed.
379
+ self._refiner_chain = self._refiner_chain | RunnableParallel(
380
+ completion=lambda x, refiner_type=refiner_type: refiner_type(
381
+ llm=self._llm,
382
+ parser=self._base_parser,
383
+ max_retries=self.max_prompts,
384
+ ).parse_completion(**x),
385
+ prompt_value=lambda x: x["prompt_value"],
379
386
  )
380
- elif self._refiner_type == "hallucination":
381
- self._refiner = HallucinationRefiner(
387
+ self._refiner_chain = self._refiner_chain | RunnableLambda(
388
+ lambda x: self._refiner_types[-1](
382
389
  llm=self._llm,
383
390
  parser=self._parser,
384
391
  max_retries=self.max_prompts,
385
- )
386
- else:
387
- self._refiner = JanusRefiner(parser=self._parser)
392
+ ).parse_completion(**x)
393
+ )
388
394
 
389
- @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner")
395
+ @run_if_changed("_parser", "_retriever", "_prompt", "_llm", "_refiner_chain")
390
396
  def _load_chain(self):
391
- self.chain = (
392
- self._input_runnable()
393
- | self._prompt
394
- | RunnableParallel(
395
- completion=self._llm,
396
- prompt_value=RunnablePassthrough(),
397
- )
398
- | self._refiner.parse_runnable
399
- )
397
+ self.chain = self._input_runnable() | self._prompt | self._refiner_chain
400
398
 
401
399
  def _input_runnable(self) -> Runnable:
402
400
  return RunnableParallel(
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+
3
+ from janus.converter.converter import Converter
4
+ from janus.language.block import TranslatedCodeBlock
5
+ from janus.parsers.partition_parser import PartitionParser
6
+ from janus.utils.logger import create_logger
7
+
8
+ log = create_logger(__name__)
9
+
10
+
11
+ class Partitioner(Converter):
12
+ def __init__(self, partition_token_limit: int, **kwargs):
13
+ super().__init__(**kwargs)
14
+ self.set_prompt("partition")
15
+ self._load_model()
16
+ self._parser = PartitionParser(
17
+ token_limit=partition_token_limit,
18
+ model=self._llm,
19
+ )
20
+ self._target_language = self._source_language
21
+ self._target_suffix = self._source_suffix
22
+ self._load_parameters()
23
+
24
+ def _save_to_file(self, block: TranslatedCodeBlock, out_path: Path) -> None:
25
+ output_str = self._parser.parse_combined_output(block.complete_text)
26
+ out_path.parent.mkdir(parents=True, exist_ok=True)
27
+ out_path.write_text(output_str, encoding="utf-8")
@@ -1,3 +1,5 @@
1
+ import re
2
+
1
3
  from janus.language.block import CodeBlock, TranslatedCodeBlock
2
4
  from janus.language.file import FileManager
3
5
  from janus.utils.logger import create_logger
@@ -90,3 +92,23 @@ class ChunkCombiner(Combiner):
90
92
  root: The functional code block to combine with its children.
91
93
  """
92
94
  return root
95
+
96
+
97
+ class PartitionCombiner(Combiner):
98
+ @staticmethod
99
+ def combine(root: CodeBlock) -> None:
100
+ """A combiner which inserts partition tags between code blocks"""
101
+ queue = [root]
102
+ while queue:
103
+ block = queue.pop(0)
104
+ if block.children:
105
+ queue.extend(block.children)
106
+ else:
107
+ block.affixes = (block.prefix, block.suffix + "\n<JANUS_PARTITION>\n")
108
+
109
+ super(PartitionCombiner, PartitionCombiner).combine(root)
110
+ root.text = re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.text)
111
+ root.affixes = (
112
+ root.prefix,
113
+ re.sub(r"(?:\n<JANUS_PARTITION>\n)+$", "", root.suffix),
114
+ )
@@ -90,6 +90,7 @@ claude_models = [
90
90
  "bedrock-claude-instant-v1",
91
91
  "bedrock-claude-haiku",
92
92
  "bedrock-claude-sonnet",
93
+ "bedrock-claude-sonnet-3.5",
93
94
  ]
94
95
  llama2_models = [
95
96
  "bedrock-llama2-70b",
@@ -153,6 +154,7 @@ MODEL_ID_TO_LONG_ID = {
153
154
  "bedrock-claude-instant-v1": "anthropic.claude-instant-v1",
154
155
  "bedrock-claude-haiku": "anthropic.claude-3-haiku-20240307-v1:0",
155
156
  "bedrock-claude-sonnet": "anthropic.claude-3-sonnet-20240229-v1:0",
157
+ "bedrock-claude-sonnet-3.5": "anthropic.claude-3-5-sonnet-20240620-v1:0",
156
158
  "bedrock-llama2-70b": "meta.llama2-70b-v1",
157
159
  "bedrock-llama2-70b-chat": "meta.llama2-70b-chat-v1",
158
160
  "bedrock-llama2-13b": "meta.llama2-13b-chat-v1",
@@ -200,6 +202,7 @@ TOKEN_LIMITS: dict[str, int] = {
200
202
  "anthropic.claude-instant-v1": 100_000,
201
203
  "anthropic.claude-3-haiku-20240307-v1:0": 248_000,
202
204
  "anthropic.claude-3-sonnet-20240229-v1:0": 248_000,
205
+ "anthropic.claude-3-5-sonnet-20240620-v1:0": 200_000,
203
206
  "meta.llama2-70b-v1": 4096,
204
207
  "meta.llama2-70b-chat-v1": 4096,
205
208
  "meta.llama2-13b-chat-v1": 4096,