janus-llm 4.1.0__tar.gz → 4.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. {janus_llm-4.1.0 → janus_llm-4.3.1}/PKG-INFO +9 -1
  2. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/__init__.py +1 -1
  3. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/cli.py +286 -30
  4. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/__init__.py +1 -0
  5. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/converter.py +46 -47
  6. janus_llm-4.3.1/janus/converter/evaluate.py +241 -0
  7. janus_llm-4.3.1/janus/converter/partition.py +27 -0
  8. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/_tests/test_alc.py +1 -1
  9. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/alc.py +9 -4
  10. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/combine.py +22 -0
  11. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/splitter.py +31 -23
  12. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/treesitter.py +9 -1
  13. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/llm/models_info.py +20 -12
  14. janus_llm-4.3.1/janus/parsers/eval_parsers/incose_parser.py +134 -0
  15. janus_llm-4.3.1/janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
  16. janus_llm-4.3.1/janus/parsers/partition_parser.py +168 -0
  17. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/refiners/refiner.py +38 -12
  18. janus_llm-4.3.1/janus/refiners/uml.py +33 -0
  19. janus_llm-4.3.1/janus/retrievers/retriever.py +102 -0
  20. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/enums.py +14 -0
  21. janus_llm-4.3.1/janus/utils/pdf_docs_reader.py +134 -0
  22. {janus_llm-4.1.0 → janus_llm-4.3.1}/pyproject.toml +9 -1
  23. janus_llm-4.1.0/janus/converter/evaluate.py +0 -15
  24. janus_llm-4.1.0/janus/retrievers/retriever.py +0 -42
  25. {janus_llm-4.1.0 → janus_llm-4.3.1}/LICENSE +0 -0
  26. {janus_llm-4.1.0 → janus_llm-4.3.1}/README.md +0 -0
  27. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/__main__.py +0 -0
  28. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/_tests/__init__.py +0 -0
  29. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/_tests/conftest.py +0 -0
  30. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/_tests/test_cli.py +0 -0
  31. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/_tests/__init__.py +0 -0
  32. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/_tests/test_translate.py +0 -0
  33. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/aggregator.py +0 -0
  34. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/diagram.py +0 -0
  35. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/document.py +0 -0
  36. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/requirements.py +0 -0
  37. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/converter/translate.py +0 -0
  38. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/__init__.py +0 -0
  39. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/__init__.py +0 -0
  40. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_collections.py +0 -0
  41. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_database.py +0 -0
  42. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/_tests/test_vectorize.py +0 -0
  43. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/collections.py +0 -0
  44. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/database.py +0 -0
  45. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/embedding_models_info.py +0 -0
  46. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/embedding/vectorize.py +0 -0
  47. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/__init__.py +0 -0
  48. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/_tests/__init__.py +0 -0
  49. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/_tests/test_combine.py +0 -0
  50. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/_tests/test_splitter.py +0 -0
  51. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/__init__.py +0 -0
  52. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/alc/_tests/__init__.py +0 -0
  53. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/__init__.py +0 -0
  54. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/_tests/__init__.py +0 -0
  55. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/_tests/test_binary.py +0 -0
  56. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/binary.py +0 -0
  57. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/binary/reveng/decompile_script.py +0 -0
  58. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/block.py +0 -0
  59. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/file.py +0 -0
  60. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/__init__.py +0 -0
  61. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/_tests/__init__.py +0 -0
  62. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/_tests/test_mumps.py +0 -0
  63. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/mumps.py +0 -0
  64. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/mumps/patterns.py +0 -0
  65. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/__init__.py +0 -0
  66. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/basic_splitter.py +0 -0
  67. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/chunk_splitter.py +0 -0
  68. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/registry.py +0 -0
  69. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/simple_ast.py +0 -0
  70. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/naive/tag_splitter.py +0 -0
  71. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/node.py +0 -0
  72. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/__init__.py +0 -0
  73. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/_tests/__init__.py +0 -0
  74. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/language/treesitter/_tests/test_treesitter.py +0 -0
  75. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/llm/__init__.py +0 -0
  76. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/llm/model_callbacks.py +0 -0
  77. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/__init__.py +0 -0
  78. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/__init__.py +0 -0
  79. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/reference.py +0 -0
  80. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/target.py +0 -0
  81. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_bleu.py +0 -0
  82. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_chrf.py +0 -0
  83. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_file_pairing.py +0 -0
  84. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_llm.py +0 -0
  85. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_reading.py +0 -0
  86. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_rouge_score.py +0 -0
  87. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_similarity_score.py +0 -0
  88. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/_tests/test_treesitter_metrics.py +0 -0
  89. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/bleu.py +0 -0
  90. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/chrf.py +0 -0
  91. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/cli.py +0 -0
  92. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/complexity_metrics.py +0 -0
  93. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/file_pairing.py +0 -0
  94. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/llm_metrics.py +0 -0
  95. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/metric.py +0 -0
  96. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/reading.py +0 -0
  97. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/rouge_score.py +0 -0
  98. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/similarity.py +0 -0
  99. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/metrics/splitting.py +0 -0
  100. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/__init__.py +0 -0
  101. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/_tests/__init__.py +0 -0
  102. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/_tests/test_code_parser.py +0 -0
  103. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/code_parser.py +0 -0
  104. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/doc_parser.py +0 -0
  105. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/eval_parser.py +0 -0
  106. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/parser.py +0 -0
  107. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/reqs_parser.py +0 -0
  108. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/parsers/uml.py +0 -0
  109. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/prompts/__init__.py +0 -0
  110. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/prompts/prompt.py +0 -0
  111. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/__init__.py +0 -0
  112. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/_tests/__init__.py +0 -0
  113. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/_tests/test_logger.py +0 -0
  114. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/_tests/test_progress.py +0 -0
  115. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/logger.py +0 -0
  116. {janus_llm-4.1.0 → janus_llm-4.3.1}/janus/utils/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: janus-llm
3
- Version: 4.1.0
3
+ Version: 4.3.1
4
4
  Summary: A transcoding library using LLMs.
5
5
  Home-page: https://github.com/janus-llm/janus-llm
6
6
  License: Apache 2.0
@@ -23,20 +23,28 @@ Requires-Dist: langchain-anthropic (>=0.1.15,<0.2.0)
23
23
  Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
24
24
  Requires-Dist: langchain-core (>=0.2.0,<0.3.0)
25
25
  Requires-Dist: langchain-openai (>=0.1.8,<0.2.0)
26
+ Requires-Dist: langchain-unstructured (>=0.1.2,<0.2.0)
26
27
  Requires-Dist: nltk (>=3.8.1,<4.0.0)
27
28
  Requires-Dist: numpy (>=1.24.3,<2.0.0)
28
29
  Requires-Dist: openai (>=1.14.0,<2.0.0)
30
+ Requires-Dist: pi-heif (>=0.20.0,<0.21.0)
29
31
  Requires-Dist: py-readability-metrics (>=1.4.5,<2.0.0)
30
32
  Requires-Dist: py-rouge (>=1.1,<2.0)
33
+ Requires-Dist: pytesseract (>=0.3.13,<0.4.0)
31
34
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
32
35
  Requires-Dist: rich (>=13.7.1,<14.0.0)
33
36
  Requires-Dist: sacrebleu (>=2.4.1,<3.0.0)
37
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
34
38
  Requires-Dist: sentence-transformers (>=2.6.1,<3.0.0) ; extra == "hf-local" or extra == "all"
39
+ Requires-Dist: tesseract (>=0.1.3,<0.2.0)
35
40
  Requires-Dist: text-generation (>=0.6.0,<0.7.0)
36
41
  Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
37
42
  Requires-Dist: transformers (>=4.31.0,<5.0.0)
38
43
  Requires-Dist: tree-sitter (>=0.21.0,<0.22.0)
39
44
  Requires-Dist: typer (>=0.9.0,<0.10.0)
45
+ Requires-Dist: unstructured (>=0.15.9,<0.16.0)
46
+ Requires-Dist: unstructured-inference (>=0.7.36,<0.8.0)
47
+ Requires-Dist: unstructured-pytesseract (>=0.3.13,<0.4.0)
40
48
  Project-URL: Documentation, https://janus-llm.github.io/janus-llm
41
49
  Project-URL: Repository, https://github.com/janus-llm/janus-llm
42
50
  Description-Content-Type: text/markdown
@@ -5,7 +5,7 @@ from langchain_core._api.deprecation import LangChainDeprecationWarning
5
5
  from janus.converter.translate import Translator
6
6
  from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "4.1.0"
8
+ __version__ = "4.3.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -13,10 +13,14 @@ from rich.console import Console
13
13
  from rich.prompt import Confirm
14
14
  from typing_extensions import Annotated
15
15
 
16
+ import janus.refiners.refiner
17
+ import janus.refiners.uml
16
18
  from janus.converter.aggregator import Aggregator
17
19
  from janus.converter.converter import Converter
18
20
  from janus.converter.diagram import DiagramGenerator
19
21
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
22
+ from janus.converter.evaluate import InlineCommentEvaluator, RequirementEvaluator
23
+ from janus.converter.partition import Partitioner
20
24
  from janus.converter.requirements import RequirementsDocumenter
21
25
  from janus.converter.translate import Translator
22
26
  from janus.embedding.collections import Collections
@@ -44,7 +48,6 @@ from janus.llm.models_info import (
44
48
  openai_models,
45
49
  )
46
50
  from janus.metrics.cli import evaluate
47
- from janus.refiners.refiner import REFINERS
48
51
  from janus.utils.enums import LANGUAGES
49
52
  from janus.utils.logger import create_logger
50
53
 
@@ -69,6 +72,18 @@ with open(db_file, "r") as f:
69
72
  collections_config_file = Path(db_loc) / "collections.json"
70
73
 
71
74
 
75
+ def get_subclasses(cls):
76
+ return set(cls.__subclasses__()).union(
77
+ set(s for c in cls.__subclasses__() for s in get_subclasses(c))
78
+ )
79
+
80
+
81
+ REFINER_TYPES = get_subclasses(janus.refiners.refiner.JanusRefiner).union(
82
+ {janus.refiners.refiner.JanusRefiner}
83
+ )
84
+ REFINERS = {r.__name__: r for r in REFINER_TYPES}
85
+
86
+
72
87
  def get_collections_config():
73
88
  if collections_config_file.exists():
74
89
  with open(collections_config_file, "r") as f:
@@ -113,7 +128,7 @@ embedding = typer.Typer(
113
128
 
114
129
  def version_callback(value: bool) -> None:
115
130
  if value:
116
- from janus import __version__ as version
131
+ from . import __version__ as version
117
132
 
118
133
  print(f"Janus CLI [blue]v{version}[/blue]")
119
134
  raise typer.Exit()
@@ -244,22 +259,23 @@ def translate(
244
259
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
245
260
  ),
246
261
  ] = "file",
247
- refiner_type: Annotated[
248
- str,
262
+ refiner_types: Annotated[
263
+ list[str],
249
264
  typer.Option(
250
265
  "-r",
251
266
  "--refiner",
252
- help="Name of custom refiner to use",
267
+ help="List of refiner types to use. Add -r for each refiner to use in\
268
+ refinement chain",
253
269
  click_type=click.Choice(list(REFINERS.keys())),
254
270
  ),
255
- ] = "none",
271
+ ] = ["JanusRefiner"],
256
272
  retriever_type: Annotated[
257
273
  str,
258
274
  typer.Option(
259
275
  "-R",
260
276
  "--retriever",
261
277
  help="Name of custom retriever to use",
262
- click_type=click.Choice(["active_usings"]),
278
+ click_type=click.Choice(["active_usings", "language_docs"]),
263
279
  ),
264
280
  ] = None,
265
281
  max_tokens: Annotated[
@@ -272,6 +288,7 @@ def translate(
272
288
  ),
273
289
  ] = None,
274
290
  ):
291
+ refiner_types = [REFINERS[r] for r in refiner_types]
275
292
  try:
276
293
  target_language, target_version = target_lang.split("-")
277
294
  except ValueError:
@@ -296,7 +313,7 @@ def translate(
296
313
  db_path=db_loc,
297
314
  db_config=collections_config,
298
315
  splitter_type=splitter_type,
299
- refiner_type=refiner_type,
316
+ refiner_types=refiner_types,
300
317
  retriever_type=retriever_type,
301
318
  )
302
319
  translator.translate(input_dir, output_dir, overwrite, collection)
@@ -402,22 +419,23 @@ def document(
402
419
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
403
420
  ),
404
421
  ] = "file",
405
- refiner_type: Annotated[
406
- str,
422
+ refiner_types: Annotated[
423
+ list[str],
407
424
  typer.Option(
408
425
  "-r",
409
426
  "--refiner",
410
- help="Name of custom refiner to use",
427
+ help="List of refiner types to use. Add -r for each refiner to use in\
428
+ refinement chain",
411
429
  click_type=click.Choice(list(REFINERS.keys())),
412
430
  ),
413
- ] = "none",
431
+ ] = ["JanusRefiner"],
414
432
  retriever_type: Annotated[
415
433
  str,
416
434
  typer.Option(
417
435
  "-R",
418
436
  "--retriever",
419
437
  help="Name of custom retriever to use",
420
- click_type=click.Choice(["active_usings"]),
438
+ click_type=click.Choice(["active_usings", "language_docs"]),
421
439
  ),
422
440
  ] = None,
423
441
  max_tokens: Annotated[
@@ -430,6 +448,7 @@ def document(
430
448
  ),
431
449
  ] = None,
432
450
  ):
451
+ refiner_types = [REFINERS[r] for r in refiner_types]
433
452
  model_arguments = dict(temperature=temperature)
434
453
  collections_config = get_collections_config()
435
454
  kwargs = dict(
@@ -441,7 +460,7 @@ def document(
441
460
  db_path=db_loc,
442
461
  db_config=collections_config,
443
462
  splitter_type=splitter_type,
444
- refiner_type=refiner_type,
463
+ refiner_types=refiner_types,
445
464
  retriever_type=retriever_type,
446
465
  )
447
466
  if doc_mode == "madlibs":
@@ -458,12 +477,6 @@ def document(
458
477
  documenter.translate(input_dir, output_dir, overwrite, collection)
459
478
 
460
479
 
461
- def get_subclasses(cls):
462
- return set(cls.__subclasses__()).union(
463
- set(s for c in cls.__subclasses__() for s in get_subclasses(c))
464
- )
465
-
466
-
467
480
  @app.command()
468
481
  def aggregate(
469
482
  input_dir: Annotated[
@@ -578,6 +591,115 @@ def aggregate(
578
591
  aggregator.translate(input_dir, output_dir, overwrite, collection)
579
592
 
580
593
 
594
+ @app.command(
595
+ help="Partition input code using an LLM.",
596
+ no_args_is_help=True,
597
+ )
598
+ def partition(
599
+ input_dir: Annotated[
600
+ Path,
601
+ typer.Option(
602
+ "--input",
603
+ "-i",
604
+ help="The directory containing the source code to be partitioned. ",
605
+ ),
606
+ ],
607
+ language: Annotated[
608
+ str,
609
+ typer.Option(
610
+ "--language",
611
+ "-l",
612
+ help="The language of the source code.",
613
+ click_type=click.Choice(sorted(LANGUAGES)),
614
+ ),
615
+ ],
616
+ output_dir: Annotated[
617
+ Path,
618
+ typer.Option(
619
+ "--output-dir", "-o", help="The directory to store the partitioned code in."
620
+ ),
621
+ ],
622
+ llm_name: Annotated[
623
+ str,
624
+ typer.Option(
625
+ "--llm",
626
+ "-L",
627
+ help="The custom name of the model set with 'janus llm add'.",
628
+ ),
629
+ ] = "gpt-4o",
630
+ max_prompts: Annotated[
631
+ int,
632
+ typer.Option(
633
+ "--max-prompts",
634
+ "-m",
635
+ help="The maximum number of times to prompt a model on one functional block "
636
+ "before exiting the application. This is to prevent wasting too much money.",
637
+ ),
638
+ ] = 10,
639
+ overwrite: Annotated[
640
+ bool,
641
+ typer.Option(
642
+ "--overwrite/--preserve",
643
+ help="Whether to overwrite existing files in the output directory",
644
+ ),
645
+ ] = False,
646
+ temperature: Annotated[
647
+ float,
648
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
649
+ ] = 0.7,
650
+ splitter_type: Annotated[
651
+ str,
652
+ typer.Option(
653
+ "-S",
654
+ "--splitter",
655
+ help="Name of custom splitter to use",
656
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
657
+ ),
658
+ ] = "file",
659
+ refiner_types: Annotated[
660
+ list[str],
661
+ typer.Option(
662
+ "-r",
663
+ "--refiner",
664
+ help="List of refiner types to use. Add -r for each refiner to use in\
665
+ refinement chain",
666
+ click_type=click.Choice(list(REFINERS.keys())),
667
+ ),
668
+ ] = ["JanusRefiner"],
669
+ max_tokens: Annotated[
670
+ int,
671
+ typer.Option(
672
+ "--max-tokens",
673
+ "-M",
674
+ help="The maximum number of tokens the model will take in. "
675
+ "If unspecificed, model's default max will be used.",
676
+ ),
677
+ ] = None,
678
+ partition_token_limit: Annotated[
679
+ int,
680
+ typer.Option(
681
+ "--partition-tokens",
682
+ "-pt",
683
+ help="The limit on the number of tokens per partition.",
684
+ ),
685
+ ] = 8192,
686
+ ):
687
+ refiner_types = [REFINERS[r] for r in refiner_types]
688
+ model_arguments = dict(temperature=temperature)
689
+ kwargs = dict(
690
+ model=llm_name,
691
+ model_arguments=model_arguments,
692
+ source_language=language,
693
+ max_prompts=max_prompts,
694
+ max_tokens=max_tokens,
695
+ splitter_type=splitter_type,
696
+ refiner_types=refiner_types,
697
+ partition_token_limit=partition_token_limit,
698
+ )
699
+ partitioner = Partitioner(**kwargs)
700
+ partitioner.translate(input_dir, output_dir, overwrite)
701
+
702
+
581
703
  @app.command(
582
704
  help="Diagram input code using an LLM.",
583
705
  no_args_is_help=True,
@@ -667,25 +789,27 @@ def diagram(
667
789
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
668
790
  ),
669
791
  ] = "file",
670
- refiner_type: Annotated[
671
- str,
792
+ refiner_types: Annotated[
793
+ list[str],
672
794
  typer.Option(
673
795
  "-r",
674
796
  "--refiner",
675
- help="Name of custom refiner to use",
797
+ help="List of refiner types to use. Add -r for each refiner to use in\
798
+ refinement chain",
676
799
  click_type=click.Choice(list(REFINERS.keys())),
677
800
  ),
678
- ] = "none",
801
+ ] = ["JanusRefiner"],
679
802
  retriever_type: Annotated[
680
803
  str,
681
804
  typer.Option(
682
805
  "-R",
683
806
  "--retriever",
684
807
  help="Name of custom retriever to use",
685
- click_type=click.Choice(["active_usings"]),
808
+ click_type=click.Choice(["active_usings", "language_docs"]),
686
809
  ),
687
810
  ] = None,
688
811
  ):
812
+ refiner_types = [REFINERS[r] for r in refiner_types]
689
813
  model_arguments = dict(temperature=temperature)
690
814
  collections_config = get_collections_config()
691
815
  diagram_generator = DiagramGenerator(
@@ -696,7 +820,7 @@ def diagram(
696
820
  db_path=db_loc,
697
821
  db_config=collections_config,
698
822
  splitter_type=splitter_type,
699
- refiner_type=refiner_type,
823
+ refiner_types=refiner_types,
700
824
  retriever_type=retriever_type,
701
825
  diagram_type=diagram_type,
702
826
  add_documentation=add_documentation,
@@ -704,6 +828,139 @@ def diagram(
704
828
  diagram_generator.translate(input_dir, output_dir, overwrite, collection)
705
829
 
706
830
 
831
+ @app.command(
832
+ help="LLM self evaluation",
833
+ no_args_is_help=True,
834
+ )
835
+ def llm_self_eval(
836
+ input_dir: Annotated[
837
+ Path,
838
+ typer.Option(
839
+ "--input",
840
+ "-i",
841
+ help="The directory containing the source code to be evaluated. "
842
+ "The files should all be in one flat directory.",
843
+ ),
844
+ ],
845
+ language: Annotated[
846
+ str,
847
+ typer.Option(
848
+ "--language",
849
+ "-l",
850
+ help="The language of the source code.",
851
+ click_type=click.Choice(sorted(LANGUAGES)),
852
+ ),
853
+ ],
854
+ output_dir: Annotated[
855
+ Path,
856
+ typer.Option(
857
+ "--output-dir", "-o", help="The directory to store the evaluations in."
858
+ ),
859
+ ],
860
+ llm_name: Annotated[
861
+ str,
862
+ typer.Option(
863
+ "--llm",
864
+ "-L",
865
+ help="The custom name of the model set with 'janus llm add'.",
866
+ ),
867
+ ] = "gpt-4o",
868
+ evaluation_type: Annotated[
869
+ str,
870
+ typer.Option(
871
+ "--evaluation-type",
872
+ "-e",
873
+ help="Type of output to evaluate.",
874
+ click_type=click.Choice(["incose", "comments"]),
875
+ ),
876
+ ] = "incose",
877
+ max_prompts: Annotated[
878
+ int,
879
+ typer.Option(
880
+ "--max-prompts",
881
+ "-m",
882
+ help="The maximum number of times to prompt a model on one functional block "
883
+ "before exiting the application. This is to prevent wasting too much money.",
884
+ ),
885
+ ] = 10,
886
+ overwrite: Annotated[
887
+ bool,
888
+ typer.Option(
889
+ "--overwrite/--preserve",
890
+ help="Whether to overwrite existing files in the output directory",
891
+ ),
892
+ ] = False,
893
+ temperature: Annotated[
894
+ float,
895
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
896
+ ] = 0.7,
897
+ collection: Annotated[
898
+ str,
899
+ typer.Option(
900
+ "--collection",
901
+ "-c",
902
+ help="If set, will put the translated result into a Chroma DB "
903
+ "collection with the name provided.",
904
+ ),
905
+ ] = None,
906
+ splitter_type: Annotated[
907
+ str,
908
+ typer.Option(
909
+ "-S",
910
+ "--splitter",
911
+ help="Name of custom splitter to use",
912
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
913
+ ),
914
+ ] = "file",
915
+ refiner_types: Annotated[
916
+ list[str],
917
+ typer.Option(
918
+ "-r",
919
+ "--refiner",
920
+ help="List of refiner types to use. Add -r for each refiner to use in\
921
+ refinement chain",
922
+ click_type=click.Choice(list(REFINERS.keys())),
923
+ ),
924
+ ] = ["JanusRefiner"],
925
+ eval_items_per_request: Annotated[
926
+ int,
927
+ typer.Option(
928
+ "--eval-items-per-request",
929
+ "-rc",
930
+ help="The maximum number of evaluation items per request",
931
+ ),
932
+ ] = None,
933
+ max_tokens: Annotated[
934
+ int,
935
+ typer.Option(
936
+ "--max-tokens",
937
+ "-M",
938
+ help="The maximum number of tokens the model will take in. "
939
+ "If unspecificed, model's default max will be used.",
940
+ ),
941
+ ] = None,
942
+ ):
943
+ model_arguments = dict(temperature=temperature)
944
+ refiner_types = [REFINERS[r] for r in refiner_types]
945
+ kwargs = dict(
946
+ eval_items_per_request=eval_items_per_request,
947
+ model=llm_name,
948
+ model_arguments=model_arguments,
949
+ source_language=language,
950
+ max_prompts=max_prompts,
951
+ max_tokens=max_tokens,
952
+ splitter_type=splitter_type,
953
+ refiner_types=refiner_types,
954
+ )
955
+ # Setting parser type here
956
+ if evaluation_type == "incose":
957
+ evaluator = RequirementEvaluator(**kwargs)
958
+ elif evaluation_type == "comments":
959
+ evaluator = InlineCommentEvaluator(**kwargs)
960
+
961
+ evaluator.translate(input_dir, output_dir, overwrite, collection)
962
+
963
+
707
964
  @db.command("init", help="Connect to or create a database.")
708
965
  def db_init(
709
966
  path: Annotated[
@@ -1005,13 +1262,12 @@ def llm_add(
1005
1262
  show_choices=False,
1006
1263
  )
1007
1264
  params = dict(
1008
- # OpenAI uses the "model_name" key for what we're calling "long_model_id"
1009
- model_name=MODEL_ID_TO_LONG_ID[model_id],
1265
+ model_name=model_name,
1010
1266
  temperature=0.7,
1011
1267
  n=1,
1012
1268
  )
1013
- max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
1014
- model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
1269
+ max_tokens = TOKEN_LIMITS[model_name]
1270
+ model_cost = COST_PER_1K_TOKENS[model_name]
1015
1271
  cfg = {
1016
1272
  "model_type": model_type,
1017
1273
  "model_id": model_id,
@@ -2,5 +2,6 @@ from janus.converter.converter import Converter
2
2
  from janus.converter.diagram import DiagramGenerator
3
3
  from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
4
4
  from janus.converter.evaluate import Evaluator
5
+ from janus.converter.partition import Partitioner
5
6
  from janus.converter.requirements import RequirementsDocumenter
6
7
  from janus.converter.translate import Translator