janus-llm 1.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- janus/__init__.py +9 -1
- janus/__main__.py +4 -0
- janus/_tests/test_cli.py +128 -0
- janus/_tests/test_translate.py +49 -7
- janus/cli.py +530 -46
- janus/converter.py +50 -19
- janus/embedding/_tests/test_collections.py +2 -8
- janus/embedding/_tests/test_database.py +32 -0
- janus/embedding/_tests/test_vectorize.py +9 -4
- janus/embedding/collections.py +49 -6
- janus/embedding/embedding_models_info.py +130 -0
- janus/embedding/vectorize.py +53 -62
- janus/language/_tests/__init__.py +0 -0
- janus/language/_tests/test_combine.py +62 -0
- janus/language/_tests/test_splitter.py +16 -0
- janus/language/binary/_tests/test_binary.py +16 -1
- janus/language/binary/binary.py +10 -3
- janus/language/block.py +31 -30
- janus/language/combine.py +26 -34
- janus/language/mumps/_tests/test_mumps.py +2 -2
- janus/language/mumps/mumps.py +93 -9
- janus/language/naive/__init__.py +4 -0
- janus/language/naive/basic_splitter.py +14 -0
- janus/language/naive/chunk_splitter.py +26 -0
- janus/language/naive/registry.py +13 -0
- janus/language/naive/simple_ast.py +18 -0
- janus/language/naive/tag_splitter.py +61 -0
- janus/language/splitter.py +168 -74
- janus/language/treesitter/_tests/test_treesitter.py +19 -14
- janus/language/treesitter/treesitter.py +37 -13
- janus/llm/model_callbacks.py +177 -0
- janus/llm/models_info.py +165 -72
- janus/metrics/__init__.py +8 -0
- janus/metrics/_tests/__init__.py +0 -0
- janus/metrics/_tests/reference.py +2 -0
- janus/metrics/_tests/target.py +2 -0
- janus/metrics/_tests/test_bleu.py +56 -0
- janus/metrics/_tests/test_chrf.py +67 -0
- janus/metrics/_tests/test_file_pairing.py +59 -0
- janus/metrics/_tests/test_llm.py +91 -0
- janus/metrics/_tests/test_reading.py +28 -0
- janus/metrics/_tests/test_rouge_score.py +65 -0
- janus/metrics/_tests/test_similarity_score.py +23 -0
- janus/metrics/_tests/test_treesitter_metrics.py +110 -0
- janus/metrics/bleu.py +66 -0
- janus/metrics/chrf.py +55 -0
- janus/metrics/cli.py +7 -0
- janus/metrics/complexity_metrics.py +208 -0
- janus/metrics/file_pairing.py +113 -0
- janus/metrics/llm_metrics.py +202 -0
- janus/metrics/metric.py +466 -0
- janus/metrics/reading.py +70 -0
- janus/metrics/rouge_score.py +96 -0
- janus/metrics/similarity.py +53 -0
- janus/metrics/splitting.py +38 -0
- janus/parsers/_tests/__init__.py +0 -0
- janus/parsers/_tests/test_code_parser.py +32 -0
- janus/parsers/code_parser.py +24 -253
- janus/parsers/doc_parser.py +169 -0
- janus/parsers/eval_parser.py +80 -0
- janus/parsers/reqs_parser.py +72 -0
- janus/prompts/prompt.py +103 -30
- janus/translate.py +636 -111
- janus/utils/_tests/__init__.py +0 -0
- janus/utils/_tests/test_logger.py +67 -0
- janus/utils/_tests/test_progress.py +20 -0
- janus/utils/enums.py +56 -3
- janus/utils/progress.py +56 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/METADATA +27 -11
- janus_llm-2.0.1.dist-info/RECORD +94 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/WHEEL +1 -1
- janus_llm-1.0.0.dist-info/RECORD +0 -48
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/LICENSE +0 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest.mock import patch
|
3
|
+
|
4
|
+
import pytest
|
5
|
+
|
6
|
+
from janus.llm.models_info import load_model
|
7
|
+
|
8
|
+
from ..llm_metrics import llm_evaluate_option, llm_evaluate_ref_option
|
9
|
+
|
10
|
+
|
11
|
+
class TestLLMMetrics(unittest.TestCase):
|
12
|
+
def setUp(self):
|
13
|
+
self.bad_code = """
|
14
|
+
if __name__ == "__main__":
|
15
|
+
a1, a2, b3, b4 = 0, [1, 2000, "a"], 2, (1, 2)
|
16
|
+
for a in a2:
|
17
|
+
if b3:
|
18
|
+
elif not b3:
|
19
|
+
try:
|
20
|
+
pass
|
21
|
+
except:
|
22
|
+
raise ValueError
|
23
|
+
elif 1:
|
24
|
+
print(1)
|
25
|
+
else:
|
26
|
+
print(b4[0])
|
27
|
+
for (x, y) in range(a1, b3):
|
28
|
+
for i in range(003300):
|
29
|
+
for z in a2:
|
30
|
+
printf(b4[2])
|
31
|
+
"""
|
32
|
+
self.impressive_code = """
|
33
|
+
# This program prints out Hello, world!
|
34
|
+
|
35
|
+
print('Hello, world!')
|
36
|
+
"""
|
37
|
+
self.impressive_code_reference = """
|
38
|
+
# An implementation of python Hello, world!
|
39
|
+
|
40
|
+
print("'Hello, world!")
|
41
|
+
"""
|
42
|
+
|
43
|
+
@patch("janus.llm.models_info.load_model")
|
44
|
+
@patch("janus.metrics.llm_metrics.llm_evaluate")
|
45
|
+
@pytest.mark.llm_eval
|
46
|
+
def test_llm_self_eval_quality(self, mock_llm_evaluate, mock_load_model):
|
47
|
+
"""Test that the quality llm self eval recognizes bad_code as bad code
|
48
|
+
(<5 on a scale of 1-10)"""
|
49
|
+
mock_llm_evaluate.return_value = 4 # return a value less than 5
|
50
|
+
mock_load_model.return_value = [None] # return a dummy model
|
51
|
+
|
52
|
+
bad_code_quality = llm_evaluate_option(
|
53
|
+
self.bad_code,
|
54
|
+
self.bad_code,
|
55
|
+
metric="quality",
|
56
|
+
language="python",
|
57
|
+
llm=load_model("gpt-3.5-turbo-0125")[0],
|
58
|
+
)
|
59
|
+
self.assertLess(bad_code_quality, 5)
|
60
|
+
|
61
|
+
mock_llm_evaluate.return_value = 6 # return a value greater than 5
|
62
|
+
impressive_code_quality = llm_evaluate_option(
|
63
|
+
self.impressive_code,
|
64
|
+
self.impressive_code,
|
65
|
+
metric="quality",
|
66
|
+
language="python",
|
67
|
+
llm=load_model("gpt-3.5-turbo-0125")[0],
|
68
|
+
)
|
69
|
+
self.assertGreater(impressive_code_quality, 5)
|
70
|
+
|
71
|
+
@patch("janus.llm.models_info.load_model")
|
72
|
+
@patch("janus.metrics.llm_metrics.llm_evaluate")
|
73
|
+
@pytest.mark.llm_eval
|
74
|
+
def test_llm_self_eval_faithfulness(self, mock_llm_evaluate, mock_load_model):
|
75
|
+
"""The two Hello, world! samples are more or less the same,
|
76
|
+
so the faithfulness score should be high"""
|
77
|
+
mock_llm_evaluate.return_value = 9 # return a high value
|
78
|
+
mock_load_model.return_value = [None] # return a dummy model
|
79
|
+
|
80
|
+
faithfulness = llm_evaluate_ref_option(
|
81
|
+
self.impressive_code,
|
82
|
+
self.impressive_code_reference,
|
83
|
+
metric="faithfulness",
|
84
|
+
language="python",
|
85
|
+
llm=load_model("gpt-3.5-turbo-0125")[0],
|
86
|
+
)
|
87
|
+
self.assertGreater(faithfulness, 8)
|
88
|
+
|
89
|
+
|
90
|
+
if __name__ == "__main__":
|
91
|
+
unittest.main()
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from ..reading import _repeat_text, flesch, gunning_fog
|
4
|
+
|
5
|
+
|
6
|
+
class TestReading(unittest.TestCase):
|
7
|
+
def setUp(self):
|
8
|
+
self.text = "This is a sample text for testing readability metrics"
|
9
|
+
|
10
|
+
def test_repeat_text(self):
|
11
|
+
"""Test the _repeat_text function."""
|
12
|
+
repeated_text = _repeat_text(self.text)
|
13
|
+
self.assertIsInstance(repeated_text, str)
|
14
|
+
self.assertTrue(len(repeated_text.split()) >= 100)
|
15
|
+
|
16
|
+
def test_flesch(self):
|
17
|
+
"""Test the Flesch readability score."""
|
18
|
+
score = flesch(self.text)
|
19
|
+
self.assertAlmostEqual(score, 47.3, places=2)
|
20
|
+
|
21
|
+
def test_gunning_fog(self):
|
22
|
+
"""Test the Gunning-Fog readability score."""
|
23
|
+
score = gunning_fog(self.text)
|
24
|
+
self.assertAlmostEqual(score, 8.04, places=2)
|
25
|
+
|
26
|
+
|
27
|
+
if __name__ == "__main__":
|
28
|
+
unittest.main()
|
@@ -0,0 +1,65 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from janus.metrics.rouge_score import rouge
|
4
|
+
|
5
|
+
|
6
|
+
class TestRouge(unittest.TestCase):
|
7
|
+
def setUp(self):
|
8
|
+
self.target = "This is a test sentence."
|
9
|
+
self.reference = "This is a reference sentence."
|
10
|
+
|
11
|
+
def test_rouge_with_granularity_n(self):
|
12
|
+
score = rouge(
|
13
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="f"
|
14
|
+
)
|
15
|
+
self.assertIsInstance(score, float)
|
16
|
+
|
17
|
+
def test_rouge_with_granularity_l(self):
|
18
|
+
score = rouge(
|
19
|
+
self.target, self.reference, granularity="l", n_gram=2, score_type="f"
|
20
|
+
)
|
21
|
+
self.assertIsInstance(score, float)
|
22
|
+
|
23
|
+
def test_rouge_with_granularity_w(self):
|
24
|
+
score = rouge(
|
25
|
+
self.target, self.reference, granularity="w", n_gram=2, score_type="f"
|
26
|
+
)
|
27
|
+
self.assertIsInstance(score, float)
|
28
|
+
|
29
|
+
def test_rouge_with_invalid_granularity(self):
|
30
|
+
with self.assertRaises(ValueError):
|
31
|
+
rouge(
|
32
|
+
self.target,
|
33
|
+
self.reference,
|
34
|
+
granularity="invalid",
|
35
|
+
n_gram=2,
|
36
|
+
score_type="f",
|
37
|
+
)
|
38
|
+
|
39
|
+
def test_rouge_with_score_type_f(self):
|
40
|
+
score = rouge(
|
41
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="f"
|
42
|
+
)
|
43
|
+
self.assertIsInstance(score, float)
|
44
|
+
|
45
|
+
def test_rouge_with_score_type_p(self):
|
46
|
+
score = rouge(
|
47
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="p"
|
48
|
+
)
|
49
|
+
self.assertIsInstance(score, float)
|
50
|
+
|
51
|
+
def test_rouge_with_score_type_r(self):
|
52
|
+
score = rouge(
|
53
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="r"
|
54
|
+
)
|
55
|
+
self.assertIsInstance(score, float)
|
56
|
+
|
57
|
+
def test_rouge_with_invalid_score_type(self):
|
58
|
+
with self.assertRaises(ValueError):
|
59
|
+
rouge(
|
60
|
+
self.target,
|
61
|
+
self.reference,
|
62
|
+
granularity="n",
|
63
|
+
n_gram=2,
|
64
|
+
score_type="invalid",
|
65
|
+
)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from janus.metrics.similarity import similarity_score
|
4
|
+
|
5
|
+
|
6
|
+
class TestSimilarityScore(unittest.TestCase):
|
7
|
+
def setUp(self):
|
8
|
+
self.target = "This is a test sentence."
|
9
|
+
self.reference = "This is a reference sentence."
|
10
|
+
|
11
|
+
def test_similarity_score(self):
|
12
|
+
score = similarity_score(self.target, self.reference)
|
13
|
+
self.assertIsInstance(score, float)
|
14
|
+
|
15
|
+
def test_similarity_score_with_different_model(self):
|
16
|
+
score = similarity_score(
|
17
|
+
self.target, self.reference, model_name="text-embedding-ada-002"
|
18
|
+
)
|
19
|
+
self.assertIsInstance(score, float)
|
20
|
+
|
21
|
+
def test_similarity_score_with_different_distance(self):
|
22
|
+
score = similarity_score(self.target, self.reference, distance_metric="euclidean")
|
23
|
+
self.assertIsInstance(score, float)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import unittest
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from typer.testing import CliRunner
|
5
|
+
|
6
|
+
from ...cli import app
|
7
|
+
from ..complexity_metrics import (
|
8
|
+
TreeSitterMetric,
|
9
|
+
cyclomatic_complexity,
|
10
|
+
difficulty,
|
11
|
+
effort,
|
12
|
+
maintainability,
|
13
|
+
volume,
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
class TestTreesitterMetrics(unittest.TestCase):
|
18
|
+
def setUp(self):
|
19
|
+
self.runner = CliRunner()
|
20
|
+
asm_file = Path(__file__).parent.resolve() / "asm_test_file.asm"
|
21
|
+
self.asm_target_text = asm_file.read_text()
|
22
|
+
mumps_file = Path(__file__).parent.resolve() / "mumps_test_file.m"
|
23
|
+
self.mumps_target_text = mumps_file.read_text()
|
24
|
+
|
25
|
+
def test_cyclomatic_complexity(self):
|
26
|
+
"""Test the cyclomatic complexity function."""
|
27
|
+
function_score = cyclomatic_complexity(self.asm_target_text, language="ibmhlasm")
|
28
|
+
expected_score = 3
|
29
|
+
self.assertEqual(function_score, expected_score)
|
30
|
+
function_score = cyclomatic_complexity(self.mumps_target_text, language="mumps")
|
31
|
+
expected_score = 2
|
32
|
+
self.assertEqual(function_score, expected_score)
|
33
|
+
|
34
|
+
def test_length(self):
|
35
|
+
"""Test the get_program_vocabulary function."""
|
36
|
+
tsm_asm = TreeSitterMetric(code=self.asm_target_text, language="ibmhlasm")
|
37
|
+
function_score = tsm_asm.get_program_length()
|
38
|
+
expected_score = 18
|
39
|
+
self.assertEqual(function_score, expected_score)
|
40
|
+
tsm_mumps = TreeSitterMetric(code=self.mumps_target_text, language="mumps")
|
41
|
+
function_score = tsm_mumps.get_program_length()
|
42
|
+
expected_score = 11
|
43
|
+
self.assertEqual(function_score, expected_score)
|
44
|
+
|
45
|
+
def test_vocabulary(self):
|
46
|
+
"""Test the get_program_vocabulary function."""
|
47
|
+
tsm_asm = TreeSitterMetric(code=self.asm_target_text, language="ibmhlasm")
|
48
|
+
function_score = tsm_asm.get_program_vocabulary()
|
49
|
+
expected_score = 9
|
50
|
+
self.assertEqual(function_score, expected_score)
|
51
|
+
tsm_mumps = TreeSitterMetric(code=self.mumps_target_text, language="mumps")
|
52
|
+
function_score = tsm_mumps.get_program_vocabulary()
|
53
|
+
expected_score = 7
|
54
|
+
self.assertEqual(function_score, expected_score)
|
55
|
+
|
56
|
+
def test_difficulty(self):
|
57
|
+
"""Test the get_program_vocabulary function."""
|
58
|
+
function_score = difficulty(self.asm_target_text, language="ibmhlasm")
|
59
|
+
expected_score = 5
|
60
|
+
self.assertEqual(function_score, expected_score)
|
61
|
+
function_score = difficulty(self.mumps_target_text, language="mumps")
|
62
|
+
expected_score = 2.625
|
63
|
+
self.assertAlmostEqual(function_score, expected_score, places=2)
|
64
|
+
|
65
|
+
def test_effort(self):
|
66
|
+
"""Test the halstead effort."""
|
67
|
+
function_score = effort(self.asm_target_text, language="ibmhlasm")
|
68
|
+
self.assertAlmostEqual(function_score, 285.29, places=2)
|
69
|
+
function_score = effort(self.mumps_target_text, language="mumps")
|
70
|
+
self.assertAlmostEqual(function_score, 81.06, places=2)
|
71
|
+
|
72
|
+
def test_volume(self):
|
73
|
+
"""Test the halstead volume."""
|
74
|
+
function_score = volume(self.asm_target_text, language="ibmhlasm")
|
75
|
+
self.assertAlmostEqual(function_score, 57.06, places=2)
|
76
|
+
function_score = volume(self.mumps_target_text, language="mumps")
|
77
|
+
self.assertAlmostEqual(function_score, 30.88, places=2)
|
78
|
+
|
79
|
+
def test_maintainability(self):
|
80
|
+
"""Test the halstead volume."""
|
81
|
+
function_score = maintainability(self.asm_target_text, language="ibmhlasm")
|
82
|
+
self.assertAlmostEqual(function_score, 65.48, places=2)
|
83
|
+
function_score = maintainability(self.mumps_target_text, language="mumps")
|
84
|
+
self.assertAlmostEqual(function_score, 72.326, places=2)
|
85
|
+
|
86
|
+
def test_in_cli(self):
|
87
|
+
"""Test the function in the CLI."""
|
88
|
+
output_path = Path("test.json")
|
89
|
+
if output_path.exists():
|
90
|
+
output_path.unlink()
|
91
|
+
result = self.runner.invoke(
|
92
|
+
app,
|
93
|
+
[
|
94
|
+
"evaluate",
|
95
|
+
"cyclomatic-complexity",
|
96
|
+
"-l",
|
97
|
+
"ibmhlasm",
|
98
|
+
"-t",
|
99
|
+
"janus/language/treesitter/_tests/languages/ibmhlasm.asm",
|
100
|
+
"-o",
|
101
|
+
f"{output_path}",
|
102
|
+
],
|
103
|
+
)
|
104
|
+
self.assertEqual(result.exit_code, 0)
|
105
|
+
self.assertTrue(output_path.exists())
|
106
|
+
output_path.unlink()
|
107
|
+
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
unittest.main()
|
janus/metrics/bleu.py
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
from typing import Annotated, Optional
|
2
|
+
|
3
|
+
import click
|
4
|
+
import typer
|
5
|
+
from sacrebleu import sentence_bleu
|
6
|
+
|
7
|
+
from .metric import metric
|
8
|
+
|
9
|
+
# from sacrebleu import sentence_chrf
|
10
|
+
|
11
|
+
|
12
|
+
@metric(help="BLEU score using sacrebleu")
|
13
|
+
def bleu(
|
14
|
+
target: str,
|
15
|
+
reference: str,
|
16
|
+
smooth_method: Annotated[
|
17
|
+
str,
|
18
|
+
typer.Option(
|
19
|
+
click_type=click.Choice(["exp", "floor", "add-k", "none"]),
|
20
|
+
help="Smoothing method to use.",
|
21
|
+
),
|
22
|
+
] = "exp",
|
23
|
+
smooth_value: Annotated[
|
24
|
+
Optional[float],
|
25
|
+
typer.Option(
|
26
|
+
help="Smoothing value (only for 'floor' and 'add-k').",
|
27
|
+
),
|
28
|
+
] = None,
|
29
|
+
lowercase: Annotated[
|
30
|
+
bool,
|
31
|
+
typer.Option(
|
32
|
+
help="Whether to lowercase the data.",
|
33
|
+
),
|
34
|
+
] = False,
|
35
|
+
use_effective_order: Annotated[
|
36
|
+
bool,
|
37
|
+
typer.Option(
|
38
|
+
help="Whether to use n-gram orders without matches.",
|
39
|
+
),
|
40
|
+
] = True,
|
41
|
+
**kwargs,
|
42
|
+
) -> float:
|
43
|
+
"""Computes BLEU score using sacrebleu
|
44
|
+
|
45
|
+
Arguments:
|
46
|
+
target: The target text.
|
47
|
+
reference: The reference text.
|
48
|
+
smooth_method: smoothing method to use.
|
49
|
+
smooth_value: smoothing value (only for floor and add-k).
|
50
|
+
lowercase: whether to lowercase the data.
|
51
|
+
use_effective_order: Don't use n-gram orders without matches.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
The BLEU score float.
|
55
|
+
"""
|
56
|
+
score = sentence_bleu(
|
57
|
+
target,
|
58
|
+
[reference],
|
59
|
+
smooth_method=smooth_method,
|
60
|
+
smooth_value=smooth_value,
|
61
|
+
lowercase=lowercase,
|
62
|
+
use_effective_order=use_effective_order,
|
63
|
+
)
|
64
|
+
# Dividing by 100 to get the score in the range [0, 1]
|
65
|
+
# sacrebleu gives the score in percentage
|
66
|
+
return float(score.score) / 100.0
|
janus/metrics/chrf.py
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
import typer
|
2
|
+
from sacrebleu import sentence_chrf
|
3
|
+
|
4
|
+
from .metric import metric
|
5
|
+
|
6
|
+
|
7
|
+
@metric(help="chrF score using sacrebleu")
|
8
|
+
def chrf(
|
9
|
+
target: str,
|
10
|
+
reference: str,
|
11
|
+
n_char_order: int = typer.Option(
|
12
|
+
default=6,
|
13
|
+
help=(
|
14
|
+
"A character n-gram order. If n_char_order=6, the metrics refers to the "
|
15
|
+
"official chrF/chrF++."
|
16
|
+
),
|
17
|
+
),
|
18
|
+
n_word_order: int = typer.Option(
|
19
|
+
default=2,
|
20
|
+
help=(
|
21
|
+
"A word n-gram order. If n_word_order=2, the metric refers to the official "
|
22
|
+
"chrF++. If n_word_order=0, the metric is equivalent to the original ChrF."
|
23
|
+
),
|
24
|
+
),
|
25
|
+
beta: float = typer.Option(
|
26
|
+
default=2.0,
|
27
|
+
help=(
|
28
|
+
"Determines importance of recall w.r.t. precision. If beta=1, their "
|
29
|
+
"importance is equal."
|
30
|
+
),
|
31
|
+
),
|
32
|
+
**kwargs,
|
33
|
+
) -> float:
|
34
|
+
"""Calculate the chrF Score using Torchmetrics.
|
35
|
+
|
36
|
+
Arguments:
|
37
|
+
target: The target text.
|
38
|
+
reference: The reference text.
|
39
|
+
n_char_order: The character order.
|
40
|
+
n_word_order: The word order.
|
41
|
+
beta: The beta value.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
The chrF score.
|
45
|
+
"""
|
46
|
+
score = sentence_chrf(
|
47
|
+
target,
|
48
|
+
[reference],
|
49
|
+
char_order=n_char_order,
|
50
|
+
word_order=n_word_order,
|
51
|
+
beta=beta,
|
52
|
+
)
|
53
|
+
# Dividing by 100 to get the score in the range [0, 1]
|
54
|
+
# sacrebleu gives the score in percentage
|
55
|
+
return float(score.score) / 100.0
|
janus/metrics/cli.py
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
import math
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from janus.language.block import CodeBlock
|
5
|
+
from janus.language.treesitter.treesitter import TreeSitterSplitter
|
6
|
+
from janus.utils.enums import LANGUAGES
|
7
|
+
|
8
|
+
from .metric import metric
|
9
|
+
|
10
|
+
|
11
|
+
class NodeException(Exception):
|
12
|
+
pass
|
13
|
+
|
14
|
+
|
15
|
+
class TreeSitterMetric:
|
16
|
+
"""A class for calculating node-based complexity metrics of code."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
code: str,
|
21
|
+
language: Optional[str],
|
22
|
+
):
|
23
|
+
"""
|
24
|
+
Arguments:
|
25
|
+
code: The code to get metrics on
|
26
|
+
language: The language the code is written in
|
27
|
+
"""
|
28
|
+
if language is None:
|
29
|
+
raise ValueError("Error: must provide language for tree-sitter metrics")
|
30
|
+
self.branch_nodes: List[str] = LANGUAGES[language].get("branch_node_types")
|
31
|
+
self.operation_nodes: List[str] = LANGUAGES[language].get("operation_node_types")
|
32
|
+
self.operand_nodes: List[str] = LANGUAGES[language].get("operand_node_types")
|
33
|
+
self.code = code
|
34
|
+
self.language = language
|
35
|
+
self.splitter = TreeSitterSplitter(
|
36
|
+
language=language,
|
37
|
+
)
|
38
|
+
self.ast = self.splitter._get_ast(code)
|
39
|
+
|
40
|
+
def get_cyclomatic_complexity(self) -> int:
|
41
|
+
if not self.branch_nodes:
|
42
|
+
raise NodeException(f"No branch nodes are set for {self.language}")
|
43
|
+
else:
|
44
|
+
return self._count_nodes_of_type(self.ast, self.branch_nodes) + 1
|
45
|
+
|
46
|
+
def get_lines_of_code(self) -> int:
|
47
|
+
return self.code.count("\n")
|
48
|
+
|
49
|
+
"""
|
50
|
+
The following metrics are based on Halstead complexity measures:
|
51
|
+
https://en.wikipedia.org/wiki/Halstead_complexity_measures
|
52
|
+
"""
|
53
|
+
|
54
|
+
def get_program_vocabulary(self) -> int:
|
55
|
+
if not self.operation_nodes:
|
56
|
+
raise NodeException(f"No operation nodes are set for {self.language}")
|
57
|
+
else:
|
58
|
+
return self._count_nodes_of_type(
|
59
|
+
self.ast, self.operand_nodes, distinct=True
|
60
|
+
) + self._count_nodes_of_type(self.ast, self.operation_nodes, distinct=True)
|
61
|
+
|
62
|
+
def get_program_length(self) -> int:
|
63
|
+
if not self.operation_nodes:
|
64
|
+
raise NodeException(f"No operation nodes are set for {self.language}")
|
65
|
+
else:
|
66
|
+
return self._count_nodes_of_type(
|
67
|
+
self.ast, self.operation_nodes
|
68
|
+
) + self._count_nodes_of_type(self.ast, self.operand_nodes)
|
69
|
+
|
70
|
+
def get_volume(self) -> float:
|
71
|
+
vocabulary = self.get_program_vocabulary()
|
72
|
+
if not vocabulary:
|
73
|
+
raise ValueError(
|
74
|
+
"Volume cannot be calculated because program vocabulary is 0. \
|
75
|
+
Confirm that your code is parsing properly."
|
76
|
+
)
|
77
|
+
return self.get_program_length() * math.log2(vocabulary)
|
78
|
+
|
79
|
+
def get_difficulty(self) -> float:
|
80
|
+
return (
|
81
|
+
self._count_nodes_of_type(self.ast, self.operation_nodes, distinct=True)
|
82
|
+
/ 2
|
83
|
+
* self._count_nodes_of_type(self.ast, self.operand_nodes, distinct=False)
|
84
|
+
/ self._count_nodes_of_type(self.ast, self.operand_nodes, distinct=True)
|
85
|
+
)
|
86
|
+
|
87
|
+
def get_effort(self) -> float:
|
88
|
+
return self.get_volume() * self.get_difficulty()
|
89
|
+
|
90
|
+
def get_time_to_program(self) -> float:
|
91
|
+
return self.get_effort() / 18
|
92
|
+
|
93
|
+
def get_num_bugs(self) -> float:
|
94
|
+
return self.get_effort() ** (2 / 3) / 3000
|
95
|
+
|
96
|
+
def get_maintainability(self) -> float:
|
97
|
+
volume = self.get_volume()
|
98
|
+
cyclomatic_complexity = self.get_cyclomatic_complexity()
|
99
|
+
lines_of_code = self.get_lines_of_code()
|
100
|
+
if not (volume and lines_of_code):
|
101
|
+
raise ValueError(
|
102
|
+
"Maintainability cannot be calculated because volume or lines of code\
|
103
|
+
is 0. Confirm that your code is parsing properly."
|
104
|
+
)
|
105
|
+
return max(
|
106
|
+
0,
|
107
|
+
(
|
108
|
+
171
|
109
|
+
- (5.2 * math.log(volume))
|
110
|
+
- (0.23 * cyclomatic_complexity)
|
111
|
+
- (16.2 * math.log(lines_of_code))
|
112
|
+
)
|
113
|
+
* 100
|
114
|
+
/ 171,
|
115
|
+
)
|
116
|
+
|
117
|
+
def _count_nodes_of_type(
|
118
|
+
self, code_block: CodeBlock, nodes: List[str], distinct=False
|
119
|
+
) -> int:
|
120
|
+
"""Recurse through all nodes of a CodeBlock,
|
121
|
+
take count of the number of nodes of a specified type"""
|
122
|
+
seen_nodes = set()
|
123
|
+
count = 0
|
124
|
+
nodes_left = [code_block]
|
125
|
+
while nodes_left:
|
126
|
+
node = nodes_left.pop()
|
127
|
+
if str(node.node_type) in nodes:
|
128
|
+
if distinct:
|
129
|
+
seen_nodes.add(node.text.strip())
|
130
|
+
else:
|
131
|
+
count += 1
|
132
|
+
nodes_left.extend(node.children)
|
133
|
+
return len(seen_nodes) if distinct else count
|
134
|
+
|
135
|
+
|
136
|
+
@metric(use_reference=False, help="Cyclomatic complexity score")
|
137
|
+
def cyclomatic_complexity(target: str, **kwargs) -> float:
|
138
|
+
"""Calculate the cyclomatic complexity score.
|
139
|
+
|
140
|
+
Arguments:
|
141
|
+
target: The target text.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
The cyclomatic complexity.
|
145
|
+
"""
|
146
|
+
language = kwargs["language"]
|
147
|
+
score = TreeSitterMetric(target, language).get_cyclomatic_complexity()
|
148
|
+
return score
|
149
|
+
|
150
|
+
|
151
|
+
@metric(use_reference=False, help="Halstead effort score")
|
152
|
+
def effort(target: str, **kwargs) -> float:
|
153
|
+
"""Calculate the Halstead effort.
|
154
|
+
|
155
|
+
Arguments:
|
156
|
+
target: The target text.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
The Halstead effort.
|
160
|
+
"""
|
161
|
+
language = kwargs["language"]
|
162
|
+
score = TreeSitterMetric(target, language).get_effort()
|
163
|
+
return score
|
164
|
+
|
165
|
+
|
166
|
+
@metric(use_reference=False, help="Halstead volume score")
|
167
|
+
def volume(target: str, **kwargs) -> float:
|
168
|
+
"""Calculate the Halstead volume.
|
169
|
+
|
170
|
+
Arguments:
|
171
|
+
target: The target text.
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
The Halstead volume.
|
175
|
+
"""
|
176
|
+
language = kwargs["language"]
|
177
|
+
score = TreeSitterMetric(target, language).get_volume()
|
178
|
+
return score
|
179
|
+
|
180
|
+
|
181
|
+
@metric(use_reference=False, help="Halstead difficulty score")
|
182
|
+
def difficulty(target: str, **kwargs) -> float:
|
183
|
+
"""Calculate the Halstead difficulty.
|
184
|
+
|
185
|
+
Arguments:
|
186
|
+
target: The target text.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
The Halstead difficulty.
|
190
|
+
"""
|
191
|
+
language = kwargs["language"]
|
192
|
+
score = TreeSitterMetric(target, language).get_difficulty()
|
193
|
+
return score
|
194
|
+
|
195
|
+
|
196
|
+
@metric(use_reference=False, help="Maintainability score")
|
197
|
+
def maintainability(target: str, **kwargs) -> float:
|
198
|
+
"""Calculate the maintainability score.
|
199
|
+
|
200
|
+
Arguments:
|
201
|
+
target: The target text.
|
202
|
+
|
203
|
+
Returns:
|
204
|
+
The maintainability score.
|
205
|
+
"""
|
206
|
+
language = kwargs["language"]
|
207
|
+
score = TreeSitterMetric(target, language).get_maintainability()
|
208
|
+
return score
|