janus-llm 4.3.1__py3-none-any.whl → 4.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +117 -0
  10. janus/cli/constants.py +49 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +207 -0
  13. janus/cli/document.py +183 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +191 -0
  16. janus/cli/partition.py +134 -0
  17. janus/cli/pipeline.py +123 -0
  18. janus/cli/self_eval.py +147 -0
  19. janus/cli/translate.py +192 -0
  20. janus/converter/__init__.py +1 -1
  21. janus/converter/_tests/test_translate.py +7 -5
  22. janus/converter/chain.py +180 -0
  23. janus/converter/converter.py +444 -153
  24. janus/converter/diagram.py +8 -6
  25. janus/converter/document.py +27 -16
  26. janus/converter/evaluate.py +143 -144
  27. janus/converter/partition.py +2 -10
  28. janus/converter/requirements.py +4 -40
  29. janus/converter/translate.py +3 -59
  30. janus/embedding/collections.py +1 -1
  31. janus/language/alc/_tests/alc.asm +3779 -0
  32. janus/language/binary/_tests/hello.bin +0 -0
  33. janus/language/block.py +78 -14
  34. janus/language/file.py +1 -1
  35. janus/language/mumps/_tests/mumps.m +235 -0
  36. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  37. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  38. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  39. janus/llm/models_info.py +9 -1
  40. janus/metrics/_tests/asm_test_file.asm +10 -0
  41. janus/metrics/_tests/mumps_test_file.m +6 -0
  42. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  43. janus/metrics/metric.py +47 -124
  44. janus/metrics/prompts/clarity.txt +8 -0
  45. janus/metrics/prompts/completeness.txt +16 -0
  46. janus/metrics/prompts/faithfulness.txt +10 -0
  47. janus/metrics/prompts/hallucination.txt +16 -0
  48. janus/metrics/prompts/quality.txt +8 -0
  49. janus/metrics/prompts/readability.txt +16 -0
  50. janus/metrics/prompts/usefulness.txt +16 -0
  51. janus/parsers/code_parser.py +4 -4
  52. janus/parsers/doc_parser.py +12 -9
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +6 -4
  55. janus/parsers/reqs_parser.py +11 -8
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +113 -4
  125. janus/utils/enums.py +127 -112
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/METADATA +18 -18
  128. janus_llm-4.4.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.4.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1488
  132. janus/metrics/_tests/test_llm.py +0 -90
  133. janus/metrics/llm_metrics.py +0 -202
  134. janus_llm-4.3.1.dist-info/RECORD +0 -115
  135. janus_llm-4.3.1.dist-info/entry_points.txt +0 -3
  136. {janus_llm-4.3.1.dist-info → janus_llm-4.4.5.dist-info}/LICENSE +0 -0
janus/cli/embedding.py ADDED
@@ -0,0 +1,122 @@
1
+ import click
2
+ import typer
3
+ from typing_extensions import Annotated
4
+
5
+ from janus.embedding.embedding_models_info import EmbeddingModelType
6
+
7
+ embedding = typer.Typer(
8
+ help="Embedding model commands",
9
+ add_completion=False,
10
+ no_args_is_help=True,
11
+ context_settings={"help_option_names": ["-h", "--help"]},
12
+ )
13
+
14
+
15
+ @embedding.command("add", help="Add an embedding model config to janus")
16
+ def embedding_add(
17
+ model_name: Annotated[
18
+ str, typer.Argument(help="The user's custom name for the model")
19
+ ],
20
+ model_type: Annotated[
21
+ str,
22
+ typer.Option(
23
+ "--type",
24
+ "-t",
25
+ help="The type of the model",
26
+ click_type=click.Choice(list(val.value for val in EmbeddingModelType)),
27
+ ),
28
+ ] = "OpenAI",
29
+ ):
30
+ import json
31
+ from pathlib import Path
32
+
33
+ from pydantic import AnyHttpUrl
34
+
35
+ from janus.embedding.embedding_models_info import (
36
+ EMBEDDING_COST_PER_MODEL,
37
+ EMBEDDING_MODEL_CONFIG_DIR,
38
+ EMBEDDING_TOKEN_LIMITS,
39
+ )
40
+
41
+ if not EMBEDDING_MODEL_CONFIG_DIR.exists():
42
+ EMBEDDING_MODEL_CONFIG_DIR.mkdir(parents=True)
43
+ model_cfg = EMBEDDING_MODEL_CONFIG_DIR / f"{model_name}.json"
44
+ if model_type in EmbeddingModelType.HuggingFaceInferenceAPI.values:
45
+ hf = typer.style("HuggingFaceInferenceAPI", fg="yellow")
46
+ url = typer.prompt(f"Enter the {hf} model's URL", type=str, value_proc=AnyHttpUrl)
47
+ api_model_name = typer.prompt("Enter the model's name", type=str, default="")
48
+ api_key = typer.prompt("Enter the API key", type=str, default="")
49
+ max_tokens = typer.prompt(
50
+ "Enter the model's maximum tokens", default=8191, type=int
51
+ )
52
+ in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
53
+ out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
54
+ params = dict(
55
+ model_name=api_model_name,
56
+ api_key=api_key,
57
+ )
58
+ cfg = {
59
+ "model_type": model_type,
60
+ "model_identifier": str(url),
61
+ "model_args": params,
62
+ "token_limit": max_tokens,
63
+ "model_cost": {"input": in_cost, "output": out_cost},
64
+ }
65
+ elif model_type in EmbeddingModelType.HuggingFaceLocal.values:
66
+ hf = typer.style("HuggingFace", fg="yellow")
67
+ model_id = typer.prompt(
68
+ f"Enter the {hf} model ID",
69
+ default="sentence-transformers/all-MiniLM-L6-v2",
70
+ type=str,
71
+ )
72
+ cache_folder = str(
73
+ Path(
74
+ typer.prompt(
75
+ "Enter the model's cache folder",
76
+ default=EMBEDDING_MODEL_CONFIG_DIR / "cache",
77
+ type=str,
78
+ )
79
+ )
80
+ )
81
+ max_tokens = typer.prompt(
82
+ "Enter the model's maximum tokens", default=8191, type=int
83
+ )
84
+ params = dict(
85
+ cache_folder=str(cache_folder),
86
+ )
87
+ cfg = {
88
+ "model_type": model_type,
89
+ "model_identifier": model_id,
90
+ "model_args": params,
91
+ "token_limit": max_tokens,
92
+ "model_cost": {"input": 0, "output": 0},
93
+ }
94
+ elif model_type in EmbeddingModelType.OpenAI.values:
95
+ available_models = list(EMBEDDING_COST_PER_MODEL.keys())
96
+
97
+ open_ai = typer.style("OpenAI", fg="green")
98
+ prompt = f"Enter the {open_ai} model name"
99
+
100
+ model_name = typer.prompt(
101
+ prompt,
102
+ default="text-embedding-3-small",
103
+ type=click.types.Choice(available_models),
104
+ show_choices=False,
105
+ )
106
+ params = dict(
107
+ model=model_name,
108
+ )
109
+ max_tokens = EMBEDDING_TOKEN_LIMITS[model_name]
110
+ model_cost = EMBEDDING_COST_PER_MODEL[model_name]
111
+ cfg = {
112
+ "model_type": model_type,
113
+ "model_identifier": model_name,
114
+ "model_args": params,
115
+ "token_limit": max_tokens,
116
+ "model_cost": model_cost,
117
+ }
118
+ else:
119
+ raise ValueError(f"Unknown model type {model_type}")
120
+ with open(model_cfg, "w") as f:
121
+ json.dump(cfg, f, indent=2)
122
+ print(f"Model config written to {model_cfg}")
janus/cli/llm.py ADDED
@@ -0,0 +1,191 @@
1
+ import click
2
+ import typer
3
+ from rich import print
4
+ from typing_extensions import Annotated
5
+
6
+ from janus.llm.models_info import MODEL_TYPE_CONSTRUCTORS
7
+
8
+ llm = typer.Typer(
9
+ help="LLM commands",
10
+ add_completion=False,
11
+ no_args_is_help=True,
12
+ context_settings={"help_option_names": ["-h", "--help"]},
13
+ )
14
+
15
+
16
+ @llm.command("add", help="Add a model config to janus")
17
+ def llm_add(
18
+ model_name: Annotated[
19
+ str, typer.Argument(help="The user's custom name of the model")
20
+ ],
21
+ model_type: Annotated[
22
+ str,
23
+ typer.Option(
24
+ "--type",
25
+ "-t",
26
+ help="The type of the model",
27
+ click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
28
+ ),
29
+ ] = "Azure",
30
+ ):
31
+ import json
32
+
33
+ from janus.llm.models_info import (
34
+ COST_PER_1K_TOKENS,
35
+ MODEL_CONFIG_DIR,
36
+ MODEL_ID_TO_LONG_ID,
37
+ TOKEN_LIMITS,
38
+ azure_models,
39
+ bedrock_models,
40
+ openai_models,
41
+ )
42
+
43
+ if not MODEL_CONFIG_DIR.exists():
44
+ MODEL_CONFIG_DIR.mkdir(parents=True)
45
+ model_cfg = MODEL_CONFIG_DIR / f"{model_name}.json"
46
+ if model_type == "HuggingFace":
47
+ url = typer.prompt("Enter the model's URL")
48
+ max_tokens = typer.prompt(
49
+ "Enter the model's token limit", default=65536, type=int
50
+ )
51
+ max_tokens = typer.prompt(
52
+ "Enter the model's max output tokens", default=8192, type=int
53
+ )
54
+ in_cost = typer.prompt("Enter the cost per input token", default=0, type=float)
55
+ out_cost = typer.prompt("Enter the cost per output token", default=0, type=float)
56
+ params = dict(
57
+ inference_server_url=url,
58
+ max_new_tokens=max_tokens,
59
+ top_k=10,
60
+ top_p=0.95,
61
+ typical_p=0.95,
62
+ temperature=0.01,
63
+ repetition_penalty=1.03,
64
+ timeout=240,
65
+ )
66
+ cfg = {
67
+ "model_type": model_type,
68
+ "model_id": "gpt-4o", # This is a placeholder to use the Azure PromptEngine
69
+ "model_args": params,
70
+ "token_limit": max_tokens,
71
+ "model_cost": {"input": in_cost, "output": out_cost},
72
+ "input_token_proportion": 0.4,
73
+ }
74
+ elif model_type == "HuggingFaceLocal":
75
+ model_id = typer.prompt("Enter the model ID")
76
+ task = typer.prompt("Enter the task")
77
+ max_tokens = typer.prompt(
78
+ "Enter the model's maximum tokens", default=4096, type=int
79
+ )
80
+ in_cost = 0
81
+ out_cost = 0
82
+ params = {"model_id": model_id, "task": task}
83
+ cfg = {
84
+ "model_type": model_type,
85
+ "model_args": params,
86
+ "token_limit": max_tokens,
87
+ "model_cost": {"input": in_cost, "output": out_cost},
88
+ "input_token_proportion": 0.4,
89
+ }
90
+ elif model_type == "OpenAI":
91
+ print("DEPRECATED: Use 'Azure' instead. CTRL+C to exit.")
92
+ model_id = typer.prompt(
93
+ "Enter the model ID (list model IDs with `janus llm ls -a`)",
94
+ default="gpt-4o",
95
+ type=click.Choice(openai_models),
96
+ show_choices=False,
97
+ )
98
+ params = dict(
99
+ model_name=model_name,
100
+ temperature=0.7,
101
+ n=1,
102
+ )
103
+ max_tokens = TOKEN_LIMITS[model_name]
104
+ model_cost = COST_PER_1K_TOKENS[model_name]
105
+ cfg = {
106
+ "model_type": model_type,
107
+ "model_id": model_id,
108
+ "model_args": params,
109
+ "token_limit": max_tokens,
110
+ "model_cost": model_cost,
111
+ "input_token_proportion": 0.4,
112
+ }
113
+ elif model_type == "Azure":
114
+ model_id = typer.prompt(
115
+ "Enter the model ID (list model IDs with `janus llm ls -a`)",
116
+ default="gpt-4o",
117
+ type=click.Choice(azure_models),
118
+ show_choices=False,
119
+ )
120
+ params = dict(
121
+ # Azure uses the "azure_deployment" key for what we're calling "long_model_id"
122
+ azure_deployment=MODEL_ID_TO_LONG_ID[model_id],
123
+ temperature=0.7,
124
+ n=1,
125
+ )
126
+ max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
127
+ model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
128
+ cfg = {
129
+ "model_type": model_type,
130
+ "model_id": model_id,
131
+ "model_args": params,
132
+ "token_limit": max_tokens,
133
+ "model_cost": model_cost,
134
+ "input_token_proportion": 0.4,
135
+ }
136
+ elif model_type == "BedrockChat" or model_type == "Bedrock":
137
+ model_id = typer.prompt(
138
+ "Enter the model ID (list model IDs with `janus llm ls -a`)",
139
+ default="bedrock-claude-sonnet",
140
+ type=click.Choice(bedrock_models),
141
+ show_choices=False,
142
+ )
143
+ params = dict(
144
+ # Bedrock uses the "model_id" key for what we're calling "long_model_id"
145
+ model_id=MODEL_ID_TO_LONG_ID[model_id],
146
+ model_kwargs={"temperature": 0.7},
147
+ )
148
+ max_tokens = TOKEN_LIMITS[MODEL_ID_TO_LONG_ID[model_id]]
149
+ model_cost = COST_PER_1K_TOKENS[MODEL_ID_TO_LONG_ID[model_id]]
150
+ cfg = {
151
+ "model_type": model_type,
152
+ "model_id": model_id,
153
+ "model_args": params,
154
+ "token_limit": max_tokens,
155
+ "model_cost": model_cost,
156
+ "input_token_proportion": 0.4,
157
+ }
158
+ else:
159
+ raise ValueError(f"Unknown model type {model_type}")
160
+ with open(model_cfg, "w") as f:
161
+ json.dump(cfg, f, indent=2)
162
+ print(f"Model config written to {model_cfg}")
163
+
164
+
165
+ @llm.command("ls", help="List all of the user-configured models")
166
+ def llm_ls(
167
+ all: Annotated[
168
+ bool,
169
+ typer.Option(
170
+ "--all",
171
+ "-a",
172
+ is_flag=True,
173
+ help="List all models, including the default model IDs.",
174
+ click_type=click.Choice(sorted(list(MODEL_TYPE_CONSTRUCTORS.keys()))),
175
+ ),
176
+ ] = False,
177
+ ):
178
+ import json
179
+
180
+ from janus.llm.models_info import MODEL_CONFIG_DIR, MODEL_TYPES
181
+
182
+ print("\n[green]User-configured models[/green]:")
183
+ for model_cfg in MODEL_CONFIG_DIR.glob("*.json"):
184
+ with open(model_cfg, "r") as f:
185
+ cfg = json.load(f)
186
+ print(f"\t[blue]{model_cfg.stem}[/blue]: [purple]{cfg['model_type']}[/purple]")
187
+
188
+ if all:
189
+ print("\n[green]Available model IDs[/green]:")
190
+ for model_id, model_type in MODEL_TYPES.items():
191
+ print(f"\t[blue]{model_id}[/blue]: [purple]{model_type}[/purple]")
janus/cli/partition.py ADDED
@@ -0,0 +1,134 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import click
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from janus.cli.constants import REFINERS
9
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
10
+ from janus.utils.enums import LANGUAGES
11
+
12
+
13
+ def partition(
14
+ input_dir: Annotated[
15
+ Path,
16
+ typer.Option(
17
+ "--input",
18
+ "-i",
19
+ help="The directory containing the source code to be partitioned. ",
20
+ ),
21
+ ],
22
+ language: Annotated[
23
+ str,
24
+ typer.Option(
25
+ "--language",
26
+ "-l",
27
+ help="The language of the source code.",
28
+ click_type=click.Choice(sorted(LANGUAGES)),
29
+ ),
30
+ ],
31
+ output_dir: Annotated[
32
+ Path,
33
+ typer.Option(
34
+ "--output", "-o", help="The directory to store the partitioned code in."
35
+ ),
36
+ ],
37
+ llm_name: Annotated[
38
+ str,
39
+ typer.Option(
40
+ "--llm",
41
+ "-L",
42
+ help="The custom name of the model set with 'janus llm add'.",
43
+ ),
44
+ ] = "gpt-4o",
45
+ failure_dir: Annotated[
46
+ Optional[Path],
47
+ typer.Option(
48
+ "--failure-directory",
49
+ "-f",
50
+ help="The directory to store failure files during translation",
51
+ ),
52
+ ] = None,
53
+ max_prompts: Annotated[
54
+ int,
55
+ typer.Option(
56
+ "--max-prompts",
57
+ "-m",
58
+ help="The maximum number of times to prompt a model on one functional block "
59
+ "before exiting the application. This is to prevent wasting too much money.",
60
+ ),
61
+ ] = 10,
62
+ overwrite: Annotated[
63
+ bool,
64
+ typer.Option(
65
+ "--overwrite/--preserve",
66
+ help="Whether to overwrite existing files in the output directory",
67
+ ),
68
+ ] = False,
69
+ temperature: Annotated[
70
+ float,
71
+ typer.Option("--temperature", "-t", help="Sampling temperature.", min=0, max=2),
72
+ ] = 0.7,
73
+ splitter_type: Annotated[
74
+ str,
75
+ typer.Option(
76
+ "-S",
77
+ "--splitter",
78
+ help="Name of custom splitter to use",
79
+ click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
80
+ ),
81
+ ] = "file",
82
+ refiner_types: Annotated[
83
+ list[str],
84
+ typer.Option(
85
+ "-r",
86
+ "--refiner",
87
+ help="List of refiner types to use. Add -r for each refiner to use in\
88
+ refinement chain",
89
+ click_type=click.Choice(list(REFINERS.keys())),
90
+ ),
91
+ ] = ["JanusRefiner"],
92
+ max_tokens: Annotated[
93
+ int,
94
+ typer.Option(
95
+ "--max-tokens",
96
+ "-M",
97
+ help="The maximum number of tokens the model will take in. "
98
+ "If unspecificed, model's default max will be used.",
99
+ ),
100
+ ] = None,
101
+ partition_token_limit: Annotated[
102
+ int,
103
+ typer.Option(
104
+ "--partition-tokens",
105
+ "-pt",
106
+ help="The limit on the number of tokens per partition.",
107
+ ),
108
+ ] = 8192,
109
+ use_janus_inputs: Annotated[
110
+ bool,
111
+ typer.Option(
112
+ "-j",
113
+ "--use-janus-inputs",
114
+ help="Present if converter should use janus inputs",
115
+ ),
116
+ ] = False,
117
+ ):
118
+ from janus.converter.partition import Partitioner
119
+
120
+ refiner_types = [REFINERS[r] for r in refiner_types]
121
+ model_arguments = dict(temperature=temperature)
122
+ kwargs = dict(
123
+ model=llm_name,
124
+ model_arguments=model_arguments,
125
+ source_language=language,
126
+ max_prompts=max_prompts,
127
+ max_tokens=max_tokens,
128
+ splitter_type=splitter_type,
129
+ refiner_types=refiner_types,
130
+ partition_token_limit=partition_token_limit,
131
+ use_janus_inputs=use_janus_inputs,
132
+ )
133
+ partitioner = Partitioner(**kwargs)
134
+ partitioner.translate(input_dir, output_dir, failure_dir, overwrite)
janus/cli/pipeline.py ADDED
@@ -0,0 +1,123 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ import click
6
+ import typer
7
+ from typing_extensions import Annotated
8
+
9
+ from janus.cli.constants import CONVERTERS
10
+ from janus.converter.chain import ConverterChain
11
+ from janus.utils.enums import LANGUAGES
12
+
13
+
14
+ def instiantiate(x):
15
+ if isinstance(x, dict):
16
+ if "type" in x:
17
+ if "args" not in x:
18
+ x["args"] = []
19
+ x["args"] = [instiantiate(a) for a in x["args"]]
20
+ if "kwargs" not in x:
21
+ x["kwargs"] = {}
22
+ x["kwargs"] = {k: instiantiate(x["kwargs"][k]) for k in x["kwargs"]}
23
+ if x["type"] not in CONVERTERS:
24
+ raise ValueError(f"Error: {x['type']} is not a Converter")
25
+ return CONVERTERS[x["type"]](*x["args"], **x["kwargs"])
26
+ else:
27
+ return {k: instiantiate(x[k]) for k in x}
28
+ elif isinstance(x, list):
29
+ return [instiantiate(a) for a in x]
30
+ else:
31
+ return x
32
+
33
+
34
+ def instiantiate_pipeline(
35
+ pipeline: list[dict],
36
+ language: str = "text",
37
+ model: str = "gpt-4o",
38
+ use_janus_inputs: None | bool = None,
39
+ ):
40
+ if "kwargs" not in pipeline[0]:
41
+ pipeline[0]["kwargs"] = {}
42
+ pipeline[0]["kwargs"].update(source_language=language, model=model)
43
+ if use_janus_inputs is not None:
44
+ pipeline[0]["kwargs"].update(janus_inputs=use_janus_inputs)
45
+ print(pipeline[0])
46
+ converters = [instiantiate(pipeline[0])]
47
+ for p in pipeline[1:]:
48
+ p["kwargs"].update(source_language=converters[-1].target_language, model=model)
49
+ converters.append(instiantiate(p))
50
+ return ConverterChain(*converters)
51
+
52
+
53
+ def pipeline(
54
+ pipeline_file: Annotated[
55
+ Path, typer.Option("-p", "--pipeline", help="Name of pipeline file to use")
56
+ ],
57
+ input_dir: Annotated[
58
+ Path,
59
+ typer.Option(
60
+ "--input",
61
+ "-i",
62
+ help="The directory containing the source code to be translated. "
63
+ "The files should all be in one flat directory.",
64
+ ),
65
+ ],
66
+ language: Annotated[
67
+ str,
68
+ typer.Option(
69
+ "--language",
70
+ "-l",
71
+ help="The language of the source code.",
72
+ click_type=click.Choice(sorted(LANGUAGES)),
73
+ ),
74
+ ],
75
+ output_dir: Annotated[
76
+ Path,
77
+ typer.Option(
78
+ "--output", "-o", help="The directory to store the translated code in."
79
+ ),
80
+ ],
81
+ llm_name: Annotated[
82
+ str,
83
+ typer.Option(
84
+ "--llm",
85
+ "-L",
86
+ help="The custom name of the model set with 'janus llm add'.",
87
+ ),
88
+ ],
89
+ failure_dir: Annotated[
90
+ Optional[Path],
91
+ typer.Option(
92
+ "--failure-directory",
93
+ "-f",
94
+ help="The directory to store failure files during documentation",
95
+ ),
96
+ ] = None,
97
+ overwrite: Annotated[
98
+ bool,
99
+ typer.Option(
100
+ "--overwrite/--preserve",
101
+ help="Whether to overwrite existing files in the output directory",
102
+ ),
103
+ ] = False,
104
+ use_janus_inputs: Annotated[
105
+ Optional[bool],
106
+ typer.Option(
107
+ "-j",
108
+ "--use-janus-inputs",
109
+ help="Present if converter chain should use janus input files",
110
+ ),
111
+ ] = None,
112
+ ):
113
+ with open(pipeline_file, "r") as f:
114
+ json_obj = json.load(f)
115
+ pipeline = instiantiate_pipeline(
116
+ json_obj, language=language, model=llm_name, use_janus_inputs=use_janus_inputs
117
+ )
118
+ pipeline.translate(
119
+ input_directory=input_dir,
120
+ output_directory=output_dir,
121
+ failure_directory=failure_dir,
122
+ overwrite=overwrite,
123
+ )