janus-llm 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +3 -3
- janus/_tests/test_cli.py +3 -3
- janus/cli.py +1 -1
- janus/converter/__init__.py +6 -6
- janus/converter/_tests/test_translate.py +6 -233
- janus/converter/converter.py +49 -7
- janus/converter/diagram.py +68 -55
- janus/embedding/_tests/test_collections.py +2 -2
- janus/embedding/_tests/test_database.py +1 -1
- janus/embedding/_tests/test_vectorize.py +3 -3
- janus/embedding/collections.py +2 -2
- janus/embedding/database.py +1 -1
- janus/embedding/embedding_models_info.py +1 -1
- janus/embedding/vectorize.py +5 -5
- janus/language/_tests/test_combine.py +1 -1
- janus/language/_tests/test_splitter.py +1 -1
- janus/language/alc/_tests/test_alc.py +3 -3
- janus/language/alc/alc.py +5 -5
- janus/language/binary/_tests/test_binary.py +2 -2
- janus/language/binary/binary.py +5 -5
- janus/language/block.py +2 -2
- janus/language/combine.py +3 -3
- janus/language/file.py +2 -2
- janus/language/mumps/_tests/test_mumps.py +3 -3
- janus/language/mumps/mumps.py +5 -5
- janus/language/mumps/patterns.py +1 -1
- janus/language/naive/__init__.py +4 -4
- janus/language/naive/basic_splitter.py +4 -4
- janus/language/naive/chunk_splitter.py +4 -4
- janus/language/naive/registry.py +1 -1
- janus/language/naive/simple_ast.py +5 -5
- janus/language/naive/tag_splitter.py +4 -4
- janus/language/node.py +1 -1
- janus/language/splitter.py +4 -4
- janus/language/treesitter/_tests/test_treesitter.py +3 -3
- janus/language/treesitter/treesitter.py +4 -4
- janus/llm/__init__.py +1 -1
- janus/llm/model_callbacks.py +1 -1
- janus/llm/models_info.py +5 -3
- janus/metrics/_tests/test_bleu.py +1 -1
- janus/metrics/_tests/test_chrf.py +1 -1
- janus/metrics/_tests/test_file_pairing.py +1 -1
- janus/metrics/_tests/test_llm.py +2 -2
- janus/metrics/_tests/test_reading.py +1 -1
- janus/metrics/_tests/test_rouge_score.py +1 -1
- janus/metrics/_tests/test_similarity_score.py +1 -1
- janus/metrics/_tests/test_treesitter_metrics.py +2 -2
- janus/metrics/bleu.py +1 -1
- janus/metrics/chrf.py +1 -1
- janus/metrics/complexity_metrics.py +4 -4
- janus/metrics/file_pairing.py +5 -5
- janus/metrics/llm_metrics.py +1 -1
- janus/metrics/metric.py +7 -7
- janus/metrics/reading.py +1 -1
- janus/metrics/rouge_score.py +1 -1
- janus/metrics/similarity.py +2 -2
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/doc_parser.py +3 -3
- janus/parsers/eval_parser.py +2 -2
- janus/parsers/refiner_parser.py +49 -0
- janus/parsers/reqs_parser.py +3 -3
- janus/parsers/uml.py +1 -2
- janus/prompts/prompt.py +2 -2
- janus/refiners/refiner.py +63 -0
- janus/utils/_tests/test_logger.py +1 -1
- janus/utils/_tests/test_progress.py +1 -1
- janus/utils/progress.py +1 -1
- {janus_llm-3.2.0.dist-info → janus_llm-3.3.0.dist-info}/METADATA +1 -1
- janus_llm-3.3.0.dist-info/RECORD +107 -0
- janus_llm-3.2.0.dist-info/RECORD +0 -105
- {janus_llm-3.2.0.dist-info → janus_llm-3.3.0.dist-info}/LICENSE +0 -0
- {janus_llm-3.2.0.dist-info → janus_llm-3.3.0.dist-info}/WHEEL +0 -0
- {janus_llm-3.2.0.dist-info → janus_llm-3.3.0.dist-info}/entry_points.txt +0 -0
janus/embedding/vectorize.py
CHANGED
@@ -6,10 +6,10 @@ from typing import Any, Dict, Optional, Sequence
|
|
6
6
|
from chromadb import Client, Collection
|
7
7
|
from langchain_community.vectorstores import Chroma
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from .
|
12
|
-
from .
|
9
|
+
from janus.embedding.collections import Collections
|
10
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
11
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
12
|
+
from janus.utils.enums import EmbeddingType
|
13
13
|
|
14
14
|
|
15
15
|
class Vectorizer(object):
|
@@ -59,7 +59,7 @@ class Vectorizer(object):
|
|
59
59
|
self,
|
60
60
|
code_block: CodeBlock,
|
61
61
|
collection_name: EmbeddingType | str,
|
62
|
-
filename: str # perhaps this should be a relative path from the source, but for
|
62
|
+
filename: str, # perhaps this should be a relative path from the source, but for
|
63
63
|
# now we're all in 1 directory
|
64
64
|
) -> None:
|
65
65
|
"""Calculate `code_block` embedding, returning success & storing in `embedding_id`
|
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.alc import AlcSplitter
|
5
|
+
from janus.language.combine import Combiner
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestAlcSplitter(unittest.TestCase):
|
janus/language/alc/alc.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from langchain.schema.language_model import BaseLanguageModel
|
2
2
|
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
3
|
+
from janus.language.block import CodeBlock
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.node import NodeType
|
6
|
+
from janus.language.treesitter import TreeSitterSplitter
|
7
|
+
from janus.utils.logger import create_logger
|
8
8
|
|
9
9
|
log = create_logger(__name__)
|
10
10
|
|
@@ -5,8 +5,8 @@ from unittest.mock import patch
|
|
5
5
|
|
6
6
|
import pytest
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
8
|
+
from janus.language.binary import BinarySplitter
|
9
|
+
from janus.llm import load_model
|
10
10
|
|
11
11
|
|
12
12
|
class TestBinarySplitter(unittest.TestCase):
|
janus/language/binary/binary.py
CHANGED
@@ -7,11 +7,11 @@ from pathlib import Path
|
|
7
7
|
import tree_sitter
|
8
8
|
from langchain.schema.language_model import BaseLanguageModel
|
9
9
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
10
|
+
from janus.language.block import CodeBlock
|
11
|
+
from janus.language.combine import Combiner
|
12
|
+
from janus.language.treesitter import TreeSitterSplitter
|
13
|
+
from janus.utils.enums import LANGUAGES
|
14
|
+
from janus.utils.logger import create_logger
|
15
15
|
|
16
16
|
log = create_logger(__name__)
|
17
17
|
|
janus/language/block.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from functools import total_ordering
|
2
2
|
from typing import ForwardRef, Hashable, Optional, Tuple
|
3
3
|
|
4
|
-
from
|
5
|
-
from .
|
4
|
+
from janus.language.node import NodeType
|
5
|
+
from janus.utils.logger import create_logger
|
6
6
|
|
7
7
|
log = create_logger(__name__)
|
8
8
|
|
janus/language/combine.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from .
|
3
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
2
|
+
from janus.language.file import FileManager
|
3
|
+
from janus.utils.logger import create_logger
|
4
4
|
|
5
5
|
log = create_logger(__name__)
|
6
6
|
|
janus/language/file.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.mumps import MumpsSplitter
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestMumpsSplitter(unittest.TestCase):
|
janus/language/mumps/mumps.py
CHANGED
@@ -2,11 +2,11 @@ import re
|
|
2
2
|
|
3
3
|
from langchain.schema.language_model import BaseLanguageModel
|
4
4
|
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
5
|
+
from janus.language.block import CodeBlock
|
6
|
+
from janus.language.combine import Combiner
|
7
|
+
from janus.language.node import NodeType
|
8
|
+
from janus.language.splitter import Splitter
|
9
|
+
from janus.utils.logger import create_logger
|
10
10
|
|
11
11
|
log = create_logger(__name__)
|
12
12
|
|
janus/language/mumps/patterns.py
CHANGED
janus/language/naive/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .basic_splitter import FileSplitter
|
2
|
-
from .chunk_splitter import ChunkSplitter
|
3
|
-
from .simple_ast import get_flexible_ast, get_strict_ast
|
4
|
-
from .tag_splitter import TagSplitter
|
1
|
+
from janus.language.naive.basic_splitter import FileSplitter
|
2
|
+
from janus.language.naive.chunk_splitter import ChunkSplitter
|
3
|
+
from janus.language.naive.simple_ast import get_flexible_ast, get_strict_ast
|
4
|
+
from janus.language.naive.tag_splitter import TagSplitter
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.chunk_splitter import ChunkSplitter
|
3
|
+
from janus.language.naive.registry import register_splitter
|
4
|
+
from janus.language.splitter import FileSizeError
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("file")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.registry import register_splitter
|
3
|
+
from janus.language.node import NodeType
|
4
|
+
from janus.language.splitter import Splitter
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("chunk")
|
janus/language/naive/registry.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from .
|
1
|
+
from janus.language.alc.alc import AlcSplitter
|
2
|
+
from janus.language.mumps.mumps import MumpsSplitter
|
3
|
+
from janus.language.naive.registry import register_splitter
|
4
|
+
from janus.language.treesitter import TreeSitterSplitter
|
5
|
+
from janus.utils.enums import LANGUAGES
|
6
6
|
|
7
7
|
|
8
8
|
@register_splitter("ast-flex")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.registry import register_splitter
|
3
|
+
from janus.language.node import NodeType
|
4
|
+
from janus.language.splitter import Splitter
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("tag")
|
janus/language/node.py
CHANGED
janus/language/splitter.py
CHANGED
@@ -6,10 +6,10 @@ from typing import List
|
|
6
6
|
import tiktoken
|
7
7
|
from langchain.schema.language_model import BaseLanguageModel
|
8
8
|
|
9
|
-
from
|
10
|
-
from .
|
11
|
-
from .
|
12
|
-
from .
|
9
|
+
from janus.language.block import CodeBlock
|
10
|
+
from janus.language.file import FileManager
|
11
|
+
from janus.language.node import NodeType
|
12
|
+
from janus.utils.logger import create_logger
|
13
13
|
|
14
14
|
log = create_logger(__name__)
|
15
15
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.treesitter import TreeSitterSplitter
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestTreeSitterSplitter(unittest.TestCase):
|
@@ -9,10 +9,10 @@ import tree_sitter
|
|
9
9
|
from git import Repo
|
10
10
|
from langchain.schema.language_model import BaseLanguageModel
|
11
11
|
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
15
|
-
from
|
12
|
+
from janus.language.block import CodeBlock, NodeType
|
13
|
+
from janus.language.splitter import Splitter
|
14
|
+
from janus.utils.enums import LANGUAGES
|
15
|
+
from janus.utils.logger import create_logger
|
16
16
|
|
17
17
|
log = create_logger(__name__)
|
18
18
|
|
janus/llm/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
from .models_info import load_model
|
1
|
+
from janus.llm.models_info import load_model
|
janus/llm/model_callbacks.py
CHANGED
@@ -8,7 +8,7 @@ from langchain_core.messages import AIMessage
|
|
8
8
|
from langchain_core.outputs import ChatGeneration, LLMResult
|
9
9
|
from langchain_core.tracers.context import register_configure_hook
|
10
10
|
|
11
|
-
from
|
11
|
+
from janus.utils.logger import create_logger
|
12
12
|
|
13
13
|
log = create_logger(__name__)
|
14
14
|
|
janus/llm/models_info.py
CHANGED
@@ -8,7 +8,8 @@ from langchain_community.llms import HuggingFaceTextGenInference
|
|
8
8
|
from langchain_core.language_models import BaseLanguageModel
|
9
9
|
from langchain_openai import ChatOpenAI
|
10
10
|
|
11
|
-
from
|
11
|
+
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
12
|
+
from janus.prompts.prompt import (
|
12
13
|
ChatGptPromptEngine,
|
13
14
|
ClaudePromptEngine,
|
14
15
|
CoherePromptEngine,
|
@@ -18,8 +19,7 @@ from ..prompts.prompt import (
|
|
18
19
|
PromptEngine,
|
19
20
|
TitanPromptEngine,
|
20
21
|
)
|
21
|
-
from
|
22
|
-
from .model_callbacks import COST_PER_1K_TOKENS
|
22
|
+
from janus.utils.logger import create_logger
|
23
23
|
|
24
24
|
log = create_logger(__name__)
|
25
25
|
|
@@ -47,6 +47,7 @@ load_dotenv()
|
|
47
47
|
|
48
48
|
openai_model_reroutes = {
|
49
49
|
"gpt-4o": "gpt-4o-2024-05-13",
|
50
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
50
51
|
"gpt-4": "gpt-4-0613",
|
51
52
|
"gpt-4-turbo": "gpt-4-turbo-2024-04-09",
|
52
53
|
"gpt-4-turbo-preview": "gpt-4-0125-preview",
|
@@ -56,6 +57,7 @@ openai_model_reroutes = {
|
|
56
57
|
|
57
58
|
openai_models = [
|
58
59
|
"gpt-4o",
|
60
|
+
"gpt-4o-mini",
|
59
61
|
"gpt-4",
|
60
62
|
"gpt-4-turbo",
|
61
63
|
"gpt-4-turbo-preview",
|
janus/metrics/_tests/test_llm.py
CHANGED
@@ -3,8 +3,8 @@ from unittest.mock import patch
|
|
3
3
|
|
4
4
|
import pytest
|
5
5
|
|
6
|
-
from
|
7
|
-
from
|
6
|
+
from janus.llm.models_info import load_model
|
7
|
+
from janus.metrics.llm_metrics import llm_evaluate_option, llm_evaluate_ref_option
|
8
8
|
|
9
9
|
|
10
10
|
class TestLLMMetrics(unittest.TestCase):
|
@@ -3,8 +3,8 @@ from pathlib import Path
|
|
3
3
|
|
4
4
|
from typer.testing import CliRunner
|
5
5
|
|
6
|
-
from
|
7
|
-
from
|
6
|
+
from janus.cli import app
|
7
|
+
from janus.metrics.complexity_metrics import (
|
8
8
|
TreeSitterMetric,
|
9
9
|
cyclomatic_complexity,
|
10
10
|
difficulty,
|
janus/metrics/bleu.py
CHANGED
janus/metrics/chrf.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
import math
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from .
|
4
|
+
from janus.language.block import CodeBlock
|
5
|
+
from janus.language.treesitter import TreeSitterSplitter
|
6
|
+
from janus.metrics.metric import metric
|
7
|
+
from janus.utils.enums import LANGUAGES
|
8
8
|
|
9
9
|
|
10
10
|
class NodeException(Exception):
|
janus/metrics/file_pairing.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from typing import Any, Callable
|
2
2
|
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
3
|
+
from janus.language.binary import BinarySplitter
|
4
|
+
from janus.language.mumps import MumpsSplitter
|
5
|
+
from janus.language.node import NodeType
|
6
|
+
from janus.language.treesitter import TreeSitterSplitter
|
7
|
+
from janus.utils.enums import CUSTOM_SPLITTERS
|
8
8
|
|
9
9
|
FILE_PAIRING_METHODS: dict[str, Callable[[str, str], list[tuple[str, str]]]] = {}
|
10
10
|
|
janus/metrics/llm_metrics.py
CHANGED
@@ -9,7 +9,7 @@ from langchain_core.prompts import PromptTemplate
|
|
9
9
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
10
|
from typing_extensions import Annotated
|
11
11
|
|
12
|
-
from .metric import metric
|
12
|
+
from janus.metrics.metric import metric
|
13
13
|
|
14
14
|
|
15
15
|
class LLMMetricOutput(BaseModel):
|
janus/metrics/metric.py
CHANGED
@@ -7,13 +7,13 @@ import click
|
|
7
7
|
import typer
|
8
8
|
from typing_extensions import Annotated
|
9
9
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from .
|
15
|
-
from .
|
16
|
-
from .
|
10
|
+
from janus.llm import load_model
|
11
|
+
from janus.metrics.cli import evaluate
|
12
|
+
from janus.metrics.file_pairing import FILE_PAIRING_METHODS
|
13
|
+
from janus.metrics.splitting import SPLITTING_METHODS
|
14
|
+
from janus.utils.enums import LANGUAGES
|
15
|
+
from janus.utils.logger import create_logger
|
16
|
+
from janus.utils.progress import track
|
17
17
|
|
18
18
|
log = create_logger(__name__)
|
19
19
|
|
janus/metrics/reading.py
CHANGED
janus/metrics/rouge_score.py
CHANGED
janus/metrics/similarity.py
CHANGED
@@ -3,8 +3,8 @@ import typer
|
|
3
3
|
from langchain.evaluation import EmbeddingDistance, load_evaluator
|
4
4
|
from typing_extensions import Annotated
|
5
5
|
|
6
|
-
from
|
7
|
-
from .metric import metric
|
6
|
+
from janus.embedding.embedding_models_info import load_embedding_model
|
7
|
+
from janus.metrics.metric import metric
|
8
8
|
|
9
9
|
|
10
10
|
@metric(name="similarity-score", help="Distance between embeddings of strings.")
|
janus/parsers/code_parser.py
CHANGED
@@ -5,8 +5,8 @@ from langchain_core.exceptions import OutputParserException
|
|
5
5
|
from langchain_core.messages import BaseMessage
|
6
6
|
from langchain_core.output_parsers import StrOutputParser
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
8
|
+
from janus.language.block import CodeBlock
|
9
|
+
from janus.utils.logger import create_logger
|
10
10
|
|
11
11
|
log = create_logger(__name__)
|
12
12
|
|
janus/parsers/doc_parser.py
CHANGED
@@ -8,9 +8,9 @@ from langchain_core.exceptions import OutputParserException
|
|
8
8
|
from langchain_core.messages import AIMessage
|
9
9
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
10
|
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from .
|
11
|
+
from janus.language.block import CodeBlock
|
12
|
+
from janus.parsers.code_parser import JanusParser
|
13
|
+
from janus.utils.logger import create_logger
|
14
14
|
|
15
15
|
log = create_logger(__name__)
|
16
16
|
|
janus/parsers/eval_parser.py
CHANGED
@@ -3,8 +3,8 @@ import json
|
|
3
3
|
from langchain.output_parsers import PydanticOutputParser
|
4
4
|
from langchain_core.pydantic_v1 import BaseModel, Field, validator
|
5
5
|
|
6
|
-
from
|
7
|
-
from .
|
6
|
+
from janus.parsers.code_parser import JanusParser
|
7
|
+
from janus.utils.logger import create_logger
|
8
8
|
|
9
9
|
log = create_logger(__name__)
|
10
10
|
|