janus-llm 2.0.2__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. janus/__init__.py +2 -2
  2. janus/__main__.py +1 -1
  3. janus/_tests/test_cli.py +1 -2
  4. janus/cli.py +43 -51
  5. janus/converter/__init__.py +6 -0
  6. janus/converter/_tests/__init__.py +0 -0
  7. janus/{_tests → converter/_tests}/test_translate.py +11 -22
  8. janus/converter/converter.py +614 -0
  9. janus/converter/diagram.py +124 -0
  10. janus/converter/document.py +131 -0
  11. janus/converter/evaluate.py +15 -0
  12. janus/converter/requirements.py +50 -0
  13. janus/converter/translate.py +108 -0
  14. janus/embedding/_tests/test_collections.py +2 -2
  15. janus/language/_tests/test_splitter.py +1 -1
  16. janus/language/alc/__init__.py +1 -0
  17. janus/language/alc/_tests/__init__.py +0 -0
  18. janus/language/alc/_tests/test_alc.py +28 -0
  19. janus/language/alc/alc.py +87 -0
  20. janus/language/block.py +4 -2
  21. janus/language/combine.py +0 -1
  22. janus/language/mumps/mumps.py +2 -3
  23. janus/language/naive/__init__.py +1 -1
  24. janus/language/naive/basic_splitter.py +4 -4
  25. janus/language/naive/chunk_splitter.py +4 -4
  26. janus/language/naive/registry.py +1 -1
  27. janus/language/naive/simple_ast.py +23 -12
  28. janus/language/naive/tag_splitter.py +4 -4
  29. janus/language/splitter.py +10 -4
  30. janus/language/treesitter/treesitter.py +26 -8
  31. janus/llm/model_callbacks.py +34 -37
  32. janus/llm/models_info.py +16 -3
  33. janus/metrics/_tests/test_llm.py +2 -3
  34. janus/metrics/_tests/test_rouge_score.py +1 -1
  35. janus/metrics/_tests/test_similarity_score.py +1 -1
  36. janus/metrics/complexity_metrics.py +3 -4
  37. janus/metrics/metric.py +3 -4
  38. janus/metrics/reading.py +27 -5
  39. janus/prompts/prompt.py +67 -7
  40. janus/utils/enums.py +6 -5
  41. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/METADATA +1 -1
  42. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/RECORD +45 -35
  43. janus/converter.py +0 -158
  44. janus/translate.py +0 -981
  45. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/LICENSE +0 -0
  46. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/WHEEL +0 -0
  47. {janus_llm-2.0.2.dist-info → janus_llm-3.0.0.dist-info}/entry_points.txt +0 -0
janus/__init__.py CHANGED
@@ -2,10 +2,10 @@ import warnings
2
2
 
3
3
  from langchain_core._api.deprecation import LangChainDeprecationWarning
4
4
 
5
+ from .converter.translate import Translator
5
6
  from .metrics import * # noqa: F403
6
- from .translate import Translator
7
7
 
8
- __version__ = "2.0.2"
8
+ __version__ = "3.0.0"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/__main__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .cli import app
1
+ from janus.cli import app
2
2
 
3
3
  if __name__ == "__main__":
4
4
  app()
janus/_tests/test_cli.py CHANGED
@@ -104,7 +104,7 @@ class TestCli(unittest.TestCase):
104
104
  result = self.runner.invoke(app, ["db", "rm", "test-db-name", "-y"])
105
105
  self.assertEqual(result.exit_code, 0)
106
106
 
107
- @patch("janus.translate.Translator.translate", autospec=True)
107
+ @patch("janus.converter.translate.Translator.translate", autospec=True)
108
108
  def test_translate(self, mock_translate):
109
109
  # Arrange
110
110
  mock_instance = mock_translate.return_value
@@ -119,7 +119,6 @@ class TestCli(unittest.TestCase):
119
119
  overwrite=True,
120
120
  temp=0.7,
121
121
  prompt_template="simple",
122
- parser_type="code",
123
122
  collection=None,
124
123
  )
125
124
 
janus/cli.py CHANGED
@@ -12,34 +12,32 @@ from rich.console import Console
12
12
  from rich.prompt import Confirm
13
13
  from typing_extensions import Annotated
14
14
 
15
- from janus.language.naive.registry import CUSTOM_SPLITTERS
16
-
17
- from .embedding.collections import Collections
18
- from .embedding.database import ChromaEmbeddingDatabase
19
- from .embedding.embedding_models_info import (
15
+ from janus.converter.diagram import DiagramGenerator
16
+ from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
17
+ from janus.converter.requirements import RequirementsDocumenter
18
+ from janus.converter.translate import Translator
19
+ from janus.embedding.collections import Collections
20
+ from janus.embedding.database import ChromaEmbeddingDatabase
21
+ from janus.embedding.embedding_models_info import (
20
22
  EMBEDDING_COST_PER_MODEL,
21
23
  EMBEDDING_MODEL_CONFIG_DIR,
22
24
  EMBEDDING_TOKEN_LIMITS,
23
25
  EmbeddingModelType,
24
26
  )
25
- from .embedding.vectorize import ChromaDBVectorizer
26
- from .language.binary import BinarySplitter
27
- from .language.mumps import MumpsSplitter
28
- from .language.treesitter import TreeSitterSplitter
29
- from .llm.model_callbacks import COST_PER_1K_TOKENS
30
- from .llm.models_info import MODEL_CONFIG_DIR, MODEL_TYPE_CONSTRUCTORS, TOKEN_LIMITS
31
- from .metrics.cli import evaluate
32
- from .translate import (
33
- PARSER_TYPES,
34
- DiagramGenerator,
35
- Documenter,
36
- MadLibsDocumenter,
37
- MultiDocumenter,
38
- RequirementsDocumenter,
39
- Translator,
27
+ from janus.embedding.vectorize import ChromaDBVectorizer
28
+ from janus.language.binary import BinarySplitter
29
+ from janus.language.mumps import MumpsSplitter
30
+ from janus.language.naive.registry import CUSTOM_SPLITTERS
31
+ from janus.language.treesitter import TreeSitterSplitter
32
+ from janus.llm.model_callbacks import COST_PER_1K_TOKENS
33
+ from janus.llm.models_info import (
34
+ MODEL_CONFIG_DIR,
35
+ MODEL_TYPE_CONSTRUCTORS,
36
+ TOKEN_LIMITS,
40
37
  )
41
- from .utils.enums import LANGUAGES
42
- from .utils.logger import create_logger
38
+ from janus.metrics.cli import evaluate
39
+ from janus.utils.enums import LANGUAGES
40
+ from janus.utils.logger import create_logger
43
41
 
44
42
  httpx_logger = logging.getLogger("httpx")
45
43
  httpx_logger.setLevel(logging.WARNING)
@@ -72,10 +70,14 @@ def get_collections_config():
72
70
 
73
71
 
74
72
  app = typer.Typer(
75
- help="Choose a command",
73
+ help=(
74
+ "[bold][dark_orange]Janus[/dark_orange] is a CLI for translating, "
75
+ "documenting, and diagramming code using large language models.[/bold]"
76
+ ),
76
77
  add_completion=False,
77
78
  no_args_is_help=True,
78
79
  context_settings={"help_option_names": ["-h", "--help"]},
80
+ rich_markup_mode="rich",
79
81
  )
80
82
 
81
83
 
@@ -207,15 +209,6 @@ def translate(
207
209
  "path to a directory containing those template files.",
208
210
  ),
209
211
  ] = "simple",
210
- parser_type: Annotated[
211
- str,
212
- typer.Option(
213
- "--parser",
214
- "-P",
215
- click_type=click.Choice(sorted(PARSER_TYPES)),
216
- help="The type of parser to use.",
217
- ),
218
- ] = "code",
219
212
  collection: Annotated[
220
213
  str,
221
214
  typer.Option(
@@ -225,15 +218,15 @@ def translate(
225
218
  "collection with the name provided.",
226
219
  ),
227
220
  ] = None,
228
- custom_splitter: Annotated[
229
- Optional[str],
221
+ splitter_type: Annotated[
222
+ str,
230
223
  typer.Option(
231
- "-cs",
232
- "--custom-splitter",
224
+ "-S",
225
+ "--splitter",
233
226
  help="Name of custom splitter to use",
234
227
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
235
228
  ),
236
- ] = None,
229
+ ] = "file",
237
230
  max_tokens: Annotated[
238
231
  int,
239
232
  typer.Option(
@@ -265,10 +258,9 @@ def translate(
265
258
  max_prompts=max_prompts,
266
259
  max_tokens=max_tokens,
267
260
  prompt_template=prompt_template,
268
- parser_type=parser_type,
269
261
  db_path=db_loc,
270
262
  db_config=collections_config,
271
- custom_splitter=custom_splitter,
263
+ splitter_type=splitter_type,
272
264
  )
273
265
  translator.translate(input_dir, output_dir, overwrite, collection)
274
266
 
@@ -364,15 +356,15 @@ def document(
364
356
  "collection with the name provided.",
365
357
  ),
366
358
  ] = None,
367
- custom_splitter: Annotated[
368
- Optional[str],
359
+ splitter_type: Annotated[
360
+ str,
369
361
  typer.Option(
370
- "-cs",
371
- "--custom-splitter",
362
+ "-S",
363
+ "--splitter",
372
364
  help="Name of custom splitter to use",
373
365
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
374
366
  ),
375
- ] = None,
367
+ ] = "file",
376
368
  max_tokens: Annotated[
377
369
  int,
378
370
  typer.Option(
@@ -393,7 +385,7 @@ def document(
393
385
  max_tokens=max_tokens,
394
386
  db_path=db_loc,
395
387
  db_config=collections_config,
396
- custom_splitter=custom_splitter,
388
+ splitter_type=splitter_type,
397
389
  )
398
390
  if doc_mode == "madlibs":
399
391
  documenter = MadLibsDocumenter(
@@ -489,15 +481,15 @@ def diagram(
489
481
  help="Whether to use documentation in generation",
490
482
  ),
491
483
  ] = False,
492
- custom_splitter: Annotated[
493
- Optional[str],
484
+ splitter_type: Annotated[
485
+ str,
494
486
  typer.Option(
495
- "-cs",
496
- "--custom-splitter",
487
+ "-S",
488
+ "--splitter",
497
489
  help="Name of custom splitter to use",
498
490
  click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),
499
491
  ),
500
- ] = None,
492
+ ] = "file",
501
493
  ):
502
494
  model_arguments = dict(temperature=temperature)
503
495
  collections_config = get_collections_config()
@@ -510,7 +502,7 @@ def diagram(
510
502
  db_config=collections_config,
511
503
  diagram_type=diagram_type,
512
504
  add_documentation=add_documentation,
513
- custom_splitter=custom_splitter,
505
+ splitter_type=splitter_type,
514
506
  )
515
507
  diagram_generator.translate(input_dir, output_dir, overwrite, collection)
516
508
 
@@ -0,0 +1,6 @@
1
+ from .converter import Converter
2
+ from .diagram import DiagramGenerator
3
+ from .document import Documenter, MadLibsDocumenter, MultiDocumenter
4
+ from .evaluate import Evaluator
5
+ from .requirements import RequirementsDocumenter
6
+ from .translate import Translator
File without changes
@@ -7,8 +7,11 @@ from langchain.schema import Document
7
7
  from langchain.schema.embeddings import Embeddings
8
8
  from langchain.schema.vectorstore import VST, VectorStore
9
9
 
10
- from ..language.block import CodeBlock, TranslatedCodeBlock
11
- from ..translate import DiagramGenerator, Translator
10
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
11
+
12
+ from ..diagram import DiagramGenerator
13
+ from ..requirements import RequirementsDocumenter
14
+ from ..translate import Translator
12
15
 
13
16
  # from langchain.vectorstores import Chroma
14
17
 
@@ -84,14 +87,10 @@ class TestTranslator(unittest.TestCase):
84
87
  self.test_file = Path("janus/language/treesitter/_tests/languages/fortran.f90")
85
88
  self.TEST_FILE_EMBEDDING_COUNT = 14
86
89
 
87
- self.req_translator = Translator(
90
+ self.req_translator = RequirementsDocumenter(
88
91
  model="gpt-3.5-turbo-0125",
89
- # embeddings_override=MockEmbeddingsFactory(),
90
92
  source_language="fortran",
91
- target_language="text",
92
- target_version="3.10",
93
93
  prompt_template="requirements",
94
- parser_type="text",
95
94
  )
96
95
 
97
96
  @pytest.mark.translate
@@ -273,11 +272,6 @@ class TestTranslator(unittest.TestCase):
273
272
  # print_query_results(QUERY_STRING, n_results)
274
273
  # self.assertTrue(len(n_results) == 1, "Was splitting changed?")
275
274
 
276
- def test_output_as_requirements(self):
277
- """Is output type requirements?"""
278
- self.assertFalse(self.translator.outputting_requirements())
279
- self.assertTrue(self.req_translator.outputting_requirements())
280
-
281
275
  # @pytest.mark.slow
282
276
  # def test_document_embeddings_added_by_translate(self):
283
277
  # vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
@@ -310,7 +304,6 @@ class TestTranslator(unittest.TestCase):
310
304
  self.assertRaises(
311
305
  ValueError, self.translator.set_target_language, "gobbledy", "goobledy"
312
306
  )
313
- self.assertRaises(ValueError, self.translator.set_parser_type, "blah")
314
307
  self.assertRaises(
315
308
  ValueError, self.translator.set_source_language, "scribbledy-doop"
316
309
  )
@@ -360,13 +353,12 @@ class TestDiagramGenerator(unittest.TestCase):
360
353
 
361
354
 
362
355
  @pytest.mark.parametrize(
363
- "source_language,prompt_template,expected_target_language,expected_target_version,"
364
- "parser_type",
356
+ "source_language,prompt_template,expected_target_language,expected_target_version,",
365
357
  [
366
- ("python", "document_inline", "python", "3.10", "code"),
367
- ("fortran", "document", "text", None, "text"),
368
- ("mumps", "requirements", "text", None, "text"),
369
- ("python", "simple", "javascript", "es6", "code"),
358
+ ("python", "document_inline", "python", "3.10"),
359
+ ("fortran", "document", "text", None),
360
+ ("mumps", "requirements", "text", None),
361
+ ("python", "simple", "javascript", "es6"),
370
362
  ],
371
363
  )
372
364
  def test_language_combinations(
@@ -374,7 +366,6 @@ def test_language_combinations(
374
366
  prompt_template: str,
375
367
  expected_target_language: str,
376
368
  expected_target_version: str,
377
- parser_type: str,
378
369
  ):
379
370
  """Tests that translator target language settings are consistent
380
371
  with prompt template expectations.
@@ -383,12 +374,10 @@ def test_language_combinations(
383
374
  translator.set_model("gpt-3.5-turbo-0125")
384
375
  translator.set_source_language(source_language)
385
376
  translator.set_target_language(expected_target_language, expected_target_version)
386
- translator.set_parser_type(parser_type)
387
377
  translator.set_prompt(prompt_template)
388
378
  translator._load_parameters()
389
379
  assert translator._target_language == expected_target_language # nosec
390
380
  assert translator._target_version == expected_target_version # nosec
391
- assert translator._parser_type == parser_type # nosec
392
381
  assert translator._splitter.language == source_language # nosec
393
382
  assert translator._splitter.model.model_name == "gpt-3.5-turbo-0125" # nosec
394
383
  assert translator._prompt_template_name == prompt_template # nosec