janus-llm 3.1.1__py3-none-any.whl → 3.2.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +3 -3
- janus/_tests/test_cli.py +3 -3
- janus/cli.py +65 -8
- janus/converter/__init__.py +6 -6
- janus/converter/_tests/test_translate.py +10 -238
- janus/converter/converter.py +6 -3
- janus/converter/translate.py +1 -1
- janus/embedding/_tests/test_collections.py +2 -2
- janus/embedding/_tests/test_database.py +1 -1
- janus/embedding/_tests/test_vectorize.py +3 -3
- janus/embedding/collections.py +2 -2
- janus/embedding/database.py +1 -1
- janus/embedding/embedding_models_info.py +1 -1
- janus/embedding/vectorize.py +5 -5
- janus/language/_tests/test_combine.py +1 -1
- janus/language/_tests/test_splitter.py +1 -1
- janus/language/alc/_tests/test_alc.py +6 -6
- janus/language/alc/alc.py +5 -5
- janus/language/binary/_tests/test_binary.py +4 -4
- janus/language/binary/binary.py +5 -5
- janus/language/block.py +2 -2
- janus/language/combine.py +3 -3
- janus/language/file.py +2 -2
- janus/language/mumps/_tests/test_mumps.py +5 -5
- janus/language/mumps/mumps.py +5 -5
- janus/language/mumps/patterns.py +1 -1
- janus/language/naive/__init__.py +4 -4
- janus/language/naive/basic_splitter.py +4 -4
- janus/language/naive/chunk_splitter.py +4 -4
- janus/language/naive/registry.py +1 -1
- janus/language/naive/simple_ast.py +5 -5
- janus/language/naive/tag_splitter.py +4 -4
- janus/language/node.py +1 -1
- janus/language/splitter.py +4 -4
- janus/language/treesitter/_tests/test_treesitter.py +5 -5
- janus/language/treesitter/treesitter.py +4 -4
- janus/llm/__init__.py +1 -1
- janus/llm/model_callbacks.py +1 -1
- janus/llm/models_info.py +45 -23
- janus/metrics/_tests/test_bleu.py +1 -1
- janus/metrics/_tests/test_chrf.py +1 -1
- janus/metrics/_tests/test_file_pairing.py +1 -1
- janus/metrics/_tests/test_llm.py +5 -5
- janus/metrics/_tests/test_reading.py +1 -1
- janus/metrics/_tests/test_rouge_score.py +1 -1
- janus/metrics/_tests/test_similarity_score.py +1 -1
- janus/metrics/_tests/test_treesitter_metrics.py +2 -2
- janus/metrics/bleu.py +1 -1
- janus/metrics/chrf.py +1 -1
- janus/metrics/complexity_metrics.py +4 -4
- janus/metrics/file_pairing.py +5 -5
- janus/metrics/llm_metrics.py +1 -1
- janus/metrics/metric.py +11 -11
- janus/metrics/reading.py +1 -1
- janus/metrics/rouge_score.py +1 -1
- janus/metrics/similarity.py +2 -2
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/doc_parser.py +3 -3
- janus/parsers/eval_parser.py +2 -2
- janus/parsers/reqs_parser.py +3 -3
- janus/parsers/uml.py +1 -2
- janus/prompts/prompt.py +2 -2
- janus/utils/_tests/test_logger.py +1 -1
- janus/utils/_tests/test_progress.py +1 -1
- janus/utils/progress.py +1 -1
- {janus_llm-3.1.1.dist-info → janus_llm-3.2.1.dist-info}/METADATA +1 -1
- janus_llm-3.2.1.dist-info/RECORD +105 -0
- janus_llm-3.1.1.dist-info/RECORD +0 -105
- {janus_llm-3.1.1.dist-info → janus_llm-3.2.1.dist-info}/LICENSE +0 -0
- {janus_llm-3.1.1.dist-info → janus_llm-3.2.1.dist-info}/WHEEL +0 -0
- {janus_llm-3.1.1.dist-info → janus_llm-3.2.1.dist-info}/entry_points.txt +0 -0
janus/language/alc/alc.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from langchain.schema.language_model import BaseLanguageModel
|
2
2
|
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
3
|
+
from janus.language.block import CodeBlock
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.node import NodeType
|
6
|
+
from janus.language.treesitter import TreeSitterSplitter
|
7
|
+
from janus.utils.logger import create_logger
|
8
8
|
|
9
9
|
log = create_logger(__name__)
|
10
10
|
|
@@ -5,17 +5,17 @@ from unittest.mock import patch
|
|
5
5
|
|
6
6
|
import pytest
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
8
|
+
from janus.language.binary import BinarySplitter
|
9
|
+
from janus.llm import load_model
|
10
10
|
|
11
11
|
|
12
12
|
class TestBinarySplitter(unittest.TestCase):
|
13
13
|
"""Tests for the BinarySplitter class."""
|
14
14
|
|
15
15
|
def setUp(self):
|
16
|
-
model_name = "gpt-
|
16
|
+
model_name = "gpt-4o"
|
17
17
|
self.binary_file = Path("janus/language/binary/_tests/hello")
|
18
|
-
self.llm, _, _ = load_model(model_name)
|
18
|
+
self.llm, _, _, _ = load_model(model_name)
|
19
19
|
self.splitter = BinarySplitter(model=self.llm)
|
20
20
|
os.environ["GHIDRA_INSTALL_PATH"] = "~/programs/ghidra_10.4_PUBLIC"
|
21
21
|
|
janus/language/binary/binary.py
CHANGED
@@ -7,11 +7,11 @@ from pathlib import Path
|
|
7
7
|
import tree_sitter
|
8
8
|
from langchain.schema.language_model import BaseLanguageModel
|
9
9
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
10
|
+
from janus.language.block import CodeBlock
|
11
|
+
from janus.language.combine import Combiner
|
12
|
+
from janus.language.treesitter import TreeSitterSplitter
|
13
|
+
from janus.utils.enums import LANGUAGES
|
14
|
+
from janus.utils.logger import create_logger
|
15
15
|
|
16
16
|
log = create_logger(__name__)
|
17
17
|
|
janus/language/block.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from functools import total_ordering
|
2
2
|
from typing import ForwardRef, Hashable, Optional, Tuple
|
3
3
|
|
4
|
-
from
|
5
|
-
from .
|
4
|
+
from janus.language.node import NodeType
|
5
|
+
from janus.utils.logger import create_logger
|
6
6
|
|
7
7
|
log = create_logger(__name__)
|
8
8
|
|
janus/language/combine.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from .
|
3
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
2
|
+
from janus.language.file import FileManager
|
3
|
+
from janus.utils.logger import create_logger
|
4
4
|
|
5
5
|
log = create_logger(__name__)
|
6
6
|
|
janus/language/file.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.mumps import MumpsSplitter
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestMumpsSplitter(unittest.TestCase):
|
@@ -11,8 +11,8 @@ class TestMumpsSplitter(unittest.TestCase):
|
|
11
11
|
|
12
12
|
def setUp(self):
|
13
13
|
"""Set up the tests."""
|
14
|
-
model_name = "gpt-
|
15
|
-
llm, _, _ = load_model(model_name)
|
14
|
+
model_name = "gpt-4o"
|
15
|
+
llm, _, _, _ = load_model(model_name)
|
16
16
|
self.splitter = MumpsSplitter(model=llm)
|
17
17
|
self.combiner = Combiner(language="mumps")
|
18
18
|
self.test_file = Path("janus/language/mumps/_tests/mumps.m")
|
janus/language/mumps/mumps.py
CHANGED
@@ -2,11 +2,11 @@ import re
|
|
2
2
|
|
3
3
|
from langchain.schema.language_model import BaseLanguageModel
|
4
4
|
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
5
|
+
from janus.language.block import CodeBlock
|
6
|
+
from janus.language.combine import Combiner
|
7
|
+
from janus.language.node import NodeType
|
8
|
+
from janus.language.splitter import Splitter
|
9
|
+
from janus.utils.logger import create_logger
|
10
10
|
|
11
11
|
log = create_logger(__name__)
|
12
12
|
|
janus/language/mumps/patterns.py
CHANGED
janus/language/naive/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .basic_splitter import FileSplitter
|
2
|
-
from .chunk_splitter import ChunkSplitter
|
3
|
-
from .simple_ast import get_flexible_ast, get_strict_ast
|
4
|
-
from .tag_splitter import TagSplitter
|
1
|
+
from janus.language.naive.basic_splitter import FileSplitter
|
2
|
+
from janus.language.naive.chunk_splitter import ChunkSplitter
|
3
|
+
from janus.language.naive.simple_ast import get_flexible_ast, get_strict_ast
|
4
|
+
from janus.language.naive.tag_splitter import TagSplitter
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.chunk_splitter import ChunkSplitter
|
3
|
+
from janus.language.naive.registry import register_splitter
|
4
|
+
from janus.language.splitter import FileSizeError
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("file")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.registry import register_splitter
|
3
|
+
from janus.language.node import NodeType
|
4
|
+
from janus.language.splitter import Splitter
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("chunk")
|
janus/language/naive/registry.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from .
|
1
|
+
from janus.language.alc.alc import AlcSplitter
|
2
|
+
from janus.language.mumps.mumps import MumpsSplitter
|
3
|
+
from janus.language.naive.registry import register_splitter
|
4
|
+
from janus.language.treesitter import TreeSitterSplitter
|
5
|
+
from janus.utils.enums import LANGUAGES
|
6
6
|
|
7
7
|
|
8
8
|
@register_splitter("ast-flex")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.registry import register_splitter
|
3
|
+
from janus.language.node import NodeType
|
4
|
+
from janus.language.splitter import Splitter
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("tag")
|
janus/language/node.py
CHANGED
janus/language/splitter.py
CHANGED
@@ -6,10 +6,10 @@ from typing import List
|
|
6
6
|
import tiktoken
|
7
7
|
from langchain.schema.language_model import BaseLanguageModel
|
8
8
|
|
9
|
-
from
|
10
|
-
from .
|
11
|
-
from .
|
12
|
-
from .
|
9
|
+
from janus.language.block import CodeBlock
|
10
|
+
from janus.language.file import FileManager
|
11
|
+
from janus.language.node import NodeType
|
12
|
+
from janus.utils.logger import create_logger
|
13
13
|
|
14
14
|
log = create_logger(__name__)
|
15
15
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.treesitter import TreeSitterSplitter
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestTreeSitterSplitter(unittest.TestCase):
|
@@ -11,9 +11,9 @@ class TestTreeSitterSplitter(unittest.TestCase):
|
|
11
11
|
|
12
12
|
def setUp(self):
|
13
13
|
"""Set up the tests."""
|
14
|
-
model_name = "gpt-
|
14
|
+
model_name = "gpt-4o"
|
15
15
|
self.maxDiff = None
|
16
|
-
self.llm, _, _ = load_model(model_name)
|
16
|
+
self.llm, _, _, _ = load_model(model_name)
|
17
17
|
|
18
18
|
def _split(self):
|
19
19
|
"""Split the test file."""
|
@@ -9,10 +9,10 @@ import tree_sitter
|
|
9
9
|
from git import Repo
|
10
10
|
from langchain.schema.language_model import BaseLanguageModel
|
11
11
|
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
15
|
-
from
|
12
|
+
from janus.language.block import CodeBlock, NodeType
|
13
|
+
from janus.language.splitter import Splitter
|
14
|
+
from janus.utils.enums import LANGUAGES
|
15
|
+
from janus.utils.logger import create_logger
|
16
16
|
|
17
17
|
log = create_logger(__name__)
|
18
18
|
|
janus/llm/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
from .models_info import load_model
|
1
|
+
from janus.llm.models_info import load_model
|
janus/llm/model_callbacks.py
CHANGED
@@ -8,7 +8,7 @@ from langchain_core.messages import AIMessage
|
|
8
8
|
from langchain_core.outputs import ChatGeneration, LLMResult
|
9
9
|
from langchain_core.tracers.context import register_configure_hook
|
10
10
|
|
11
|
-
from
|
11
|
+
from janus.utils.logger import create_logger
|
12
12
|
|
13
13
|
log = create_logger(__name__)
|
14
14
|
|
janus/llm/models_info.py
CHANGED
@@ -8,7 +8,8 @@ from langchain_community.llms import HuggingFaceTextGenInference
|
|
8
8
|
from langchain_core.language_models import BaseLanguageModel
|
9
9
|
from langchain_openai import ChatOpenAI
|
10
10
|
|
11
|
-
from
|
11
|
+
from janus.llm.model_callbacks import COST_PER_1K_TOKENS
|
12
|
+
from janus.prompts.prompt import (
|
12
13
|
ChatGptPromptEngine,
|
13
14
|
ClaudePromptEngine,
|
14
15
|
CoherePromptEngine,
|
@@ -18,8 +19,7 @@ from ..prompts.prompt import (
|
|
18
19
|
PromptEngine,
|
19
20
|
TitanPromptEngine,
|
20
21
|
)
|
21
|
-
from
|
22
|
-
from .model_callbacks import COST_PER_1K_TOKENS
|
22
|
+
from janus.utils.logger import create_logger
|
23
23
|
|
24
24
|
log = create_logger(__name__)
|
25
25
|
|
@@ -55,11 +55,12 @@ openai_model_reroutes = {
|
|
55
55
|
}
|
56
56
|
|
57
57
|
openai_models = [
|
58
|
-
"gpt-
|
59
|
-
"gpt-4
|
60
|
-
"gpt-4-
|
61
|
-
"gpt-
|
62
|
-
"gpt-3.5-turbo
|
58
|
+
"gpt-4o",
|
59
|
+
"gpt-4",
|
60
|
+
"gpt-4-turbo",
|
61
|
+
"gpt-4-turbo-preview",
|
62
|
+
"gpt-3.5-turbo",
|
63
|
+
"gpt-3.5-turbo-16k",
|
63
64
|
]
|
64
65
|
claude_models = [
|
65
66
|
"bedrock-claude-v2",
|
@@ -133,8 +134,8 @@ _open_ai_defaults: dict[str, str] = {
|
|
133
134
|
"openai_organization": os.getenv("OPENAI_ORG_ID"),
|
134
135
|
}
|
135
136
|
|
136
|
-
|
137
|
-
**{m:
|
137
|
+
MODEL_ID_TO_LONG_ID = {
|
138
|
+
**{m: mr for m, mr in openai_model_reroutes.items()},
|
138
139
|
"bedrock-claude-v2": "anthropic.claude-v2",
|
139
140
|
"bedrock-claude-instant-v1": "anthropic.claude-instant-v1",
|
140
141
|
"bedrock-claude-haiku": "anthropic.claude-3-haiku-20240307-v1:0",
|
@@ -157,7 +158,7 @@ model_identifiers = {
|
|
157
158
|
|
158
159
|
MODEL_DEFAULT_ARGUMENTS: dict[str, dict[str, str]] = {
|
159
160
|
k: (dict(model_name=k) if k in openai_models else dict(model_id=v))
|
160
|
-
for k, v in
|
161
|
+
for k, v in MODEL_ID_TO_LONG_ID.items()
|
161
162
|
}
|
162
163
|
|
163
164
|
DEFAULT_MODELS = list(MODEL_DEFAULT_ARGUMENTS.keys())
|
@@ -199,22 +200,38 @@ TOKEN_LIMITS: dict[str, int] = {
|
|
199
200
|
}
|
200
201
|
|
201
202
|
|
202
|
-
def
|
203
|
+
def get_available_model_names() -> list[str]:
|
204
|
+
avaialable_models = []
|
205
|
+
for file in MODEL_CONFIG_DIR.iterdir():
|
206
|
+
if file.is_file():
|
207
|
+
avaialable_models.append(MODEL_CONFIG_DIR.stem)
|
208
|
+
return avaialable_models
|
209
|
+
|
210
|
+
|
211
|
+
def load_model(user_model_name: str) -> tuple[BaseLanguageModel, int, dict[str, float]]:
|
203
212
|
if not MODEL_CONFIG_DIR.exists():
|
204
213
|
MODEL_CONFIG_DIR.mkdir(parents=True)
|
205
|
-
model_config_file = MODEL_CONFIG_DIR / f"{
|
214
|
+
model_config_file = MODEL_CONFIG_DIR / f"{user_model_name}.json"
|
206
215
|
if not model_config_file.exists():
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
216
|
+
log.warning(
|
217
|
+
f"Model {user_model_name} not found in user-defined models, searching "
|
218
|
+
f"default models for {user_model_name}."
|
219
|
+
)
|
220
|
+
model_id = user_model_name
|
221
|
+
if user_model_name not in DEFAULT_MODELS:
|
222
|
+
message = (
|
223
|
+
f"Model {user_model_name} not found in default models. Make sure to run "
|
224
|
+
"`janus llm add` first."
|
225
|
+
)
|
226
|
+
log.error(message)
|
227
|
+
raise ValueError(message)
|
212
228
|
model_config = {
|
213
|
-
"model_type": MODEL_TYPES[
|
214
|
-
"
|
215
|
-
"
|
229
|
+
"model_type": MODEL_TYPES[model_id],
|
230
|
+
"model_id": model_id,
|
231
|
+
"model_args": MODEL_DEFAULT_ARGUMENTS[model_id],
|
232
|
+
"token_limit": TOKEN_LIMITS.get(MODEL_ID_TO_LONG_ID[model_id], 4096),
|
216
233
|
"model_cost": COST_PER_1K_TOKENS.get(
|
217
|
-
|
234
|
+
MODEL_ID_TO_LONG_ID[model_id], {"input": 0, "output": 0}
|
218
235
|
),
|
219
236
|
}
|
220
237
|
with open(model_config_file, "w") as f:
|
@@ -227,4 +244,9 @@ def load_model(model_name: str) -> tuple[BaseLanguageModel, int, dict[str, float
|
|
227
244
|
if model_config["model_type"] == "OpenAI":
|
228
245
|
model_args.update(_open_ai_defaults)
|
229
246
|
model = model_constructor(**model_args)
|
230
|
-
return
|
247
|
+
return (
|
248
|
+
model,
|
249
|
+
model_config["model_id"],
|
250
|
+
model_config["token_limit"],
|
251
|
+
model_config["model_cost"],
|
252
|
+
)
|
janus/metrics/_tests/test_llm.py
CHANGED
@@ -3,8 +3,8 @@ from unittest.mock import patch
|
|
3
3
|
|
4
4
|
import pytest
|
5
5
|
|
6
|
-
from
|
7
|
-
from
|
6
|
+
from janus.llm.models_info import load_model
|
7
|
+
from janus.metrics.llm_metrics import llm_evaluate_option, llm_evaluate_ref_option
|
8
8
|
|
9
9
|
|
10
10
|
class TestLLMMetrics(unittest.TestCase):
|
@@ -53,7 +53,7 @@ class TestLLMMetrics(unittest.TestCase):
|
|
53
53
|
self.bad_code,
|
54
54
|
metric="quality",
|
55
55
|
language="python",
|
56
|
-
llm=load_model("gpt-
|
56
|
+
llm=load_model("gpt-4o")[0],
|
57
57
|
)
|
58
58
|
self.assertLess(bad_code_quality, 5)
|
59
59
|
|
@@ -63,7 +63,7 @@ class TestLLMMetrics(unittest.TestCase):
|
|
63
63
|
self.impressive_code,
|
64
64
|
metric="quality",
|
65
65
|
language="python",
|
66
|
-
llm=load_model("gpt-
|
66
|
+
llm=load_model("gpt-4o")[0],
|
67
67
|
)
|
68
68
|
self.assertGreater(impressive_code_quality, 5)
|
69
69
|
|
@@ -81,7 +81,7 @@ class TestLLMMetrics(unittest.TestCase):
|
|
81
81
|
self.impressive_code_reference,
|
82
82
|
metric="faithfulness",
|
83
83
|
language="python",
|
84
|
-
llm=load_model("gpt-
|
84
|
+
llm=load_model("gpt-4o")[0],
|
85
85
|
)
|
86
86
|
self.assertGreater(faithfulness, 8)
|
87
87
|
|
@@ -3,8 +3,8 @@ from pathlib import Path
|
|
3
3
|
|
4
4
|
from typer.testing import CliRunner
|
5
5
|
|
6
|
-
from
|
7
|
-
from
|
6
|
+
from janus.cli import app
|
7
|
+
from janus.metrics.complexity_metrics import (
|
8
8
|
TreeSitterMetric,
|
9
9
|
cyclomatic_complexity,
|
10
10
|
difficulty,
|
janus/metrics/bleu.py
CHANGED
janus/metrics/chrf.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
import math
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from .
|
4
|
+
from janus.language.block import CodeBlock
|
5
|
+
from janus.language.treesitter import TreeSitterSplitter
|
6
|
+
from janus.metrics.metric import metric
|
7
|
+
from janus.utils.enums import LANGUAGES
|
8
8
|
|
9
9
|
|
10
10
|
class NodeException(Exception):
|
janus/metrics/file_pairing.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from typing import Any, Callable
|
2
2
|
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
3
|
+
from janus.language.binary import BinarySplitter
|
4
|
+
from janus.language.mumps import MumpsSplitter
|
5
|
+
from janus.language.node import NodeType
|
6
|
+
from janus.language.treesitter import TreeSitterSplitter
|
7
|
+
from janus.utils.enums import CUSTOM_SPLITTERS
|
8
8
|
|
9
9
|
FILE_PAIRING_METHODS: dict[str, Callable[[str, str], list[tuple[str, str]]]] = {}
|
10
10
|
|
janus/metrics/llm_metrics.py
CHANGED
@@ -9,7 +9,7 @@ from langchain_core.prompts import PromptTemplate
|
|
9
9
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
10
10
|
from typing_extensions import Annotated
|
11
11
|
|
12
|
-
from .metric import metric
|
12
|
+
from janus.metrics.metric import metric
|
13
13
|
|
14
14
|
|
15
15
|
class LLMMetricOutput(BaseModel):
|
janus/metrics/metric.py
CHANGED
@@ -7,13 +7,13 @@ import click
|
|
7
7
|
import typer
|
8
8
|
from typing_extensions import Annotated
|
9
9
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from .
|
15
|
-
from .
|
16
|
-
from .
|
10
|
+
from janus.llm import load_model
|
11
|
+
from janus.metrics.cli import evaluate
|
12
|
+
from janus.metrics.file_pairing import FILE_PAIRING_METHODS
|
13
|
+
from janus.metrics.splitting import SPLITTING_METHODS
|
14
|
+
from janus.utils.enums import LANGUAGES
|
15
|
+
from janus.utils.logger import create_logger
|
16
|
+
from janus.utils.progress import track
|
17
17
|
|
18
18
|
log = create_logger(__name__)
|
19
19
|
|
@@ -112,7 +112,7 @@ def metric(
|
|
112
112
|
"-L",
|
113
113
|
help="The custom name of the model set with 'janus llm add'.",
|
114
114
|
),
|
115
|
-
] = "gpt-
|
115
|
+
] = "gpt-4o",
|
116
116
|
progress: Annotated[
|
117
117
|
bool,
|
118
118
|
typer.Option(
|
@@ -135,7 +135,7 @@ def metric(
|
|
135
135
|
**kwargs,
|
136
136
|
):
|
137
137
|
out = []
|
138
|
-
llm, token_limit, model_cost = load_model(llm_name)
|
138
|
+
llm, _, token_limit, model_cost = load_model(llm_name)
|
139
139
|
if json_file_name is not None:
|
140
140
|
with open(json_file_name, "r") as f:
|
141
141
|
json_obj = json.load(f)
|
@@ -274,7 +274,7 @@ def metric(
|
|
274
274
|
"-L",
|
275
275
|
help="The custom name of the model set with 'janus llm add'.",
|
276
276
|
),
|
277
|
-
] = "gpt-
|
277
|
+
] = "gpt-4o",
|
278
278
|
progress: Annotated[
|
279
279
|
bool,
|
280
280
|
typer.Option(
|
@@ -296,7 +296,7 @@ def metric(
|
|
296
296
|
*args,
|
297
297
|
**kwargs,
|
298
298
|
):
|
299
|
-
llm, token_limit, model_cost = load_model(llm_name)
|
299
|
+
llm, _, token_limit, model_cost = load_model(llm_name)
|
300
300
|
if json_file_name is not None:
|
301
301
|
with open(json_file_name, "r") as f:
|
302
302
|
json_obj = json.load(f)
|