janus-llm 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +9 -1
- janus/__main__.py +4 -0
- janus/_tests/test_cli.py +128 -0
- janus/_tests/test_translate.py +49 -7
- janus/cli.py +530 -46
- janus/converter.py +50 -19
- janus/embedding/_tests/test_collections.py +2 -8
- janus/embedding/_tests/test_database.py +32 -0
- janus/embedding/_tests/test_vectorize.py +9 -4
- janus/embedding/collections.py +49 -6
- janus/embedding/embedding_models_info.py +120 -0
- janus/embedding/vectorize.py +53 -62
- janus/language/_tests/__init__.py +0 -0
- janus/language/_tests/test_combine.py +62 -0
- janus/language/_tests/test_splitter.py +16 -0
- janus/language/binary/_tests/test_binary.py +16 -1
- janus/language/binary/binary.py +10 -3
- janus/language/block.py +31 -30
- janus/language/combine.py +26 -34
- janus/language/mumps/_tests/test_mumps.py +2 -2
- janus/language/mumps/mumps.py +93 -9
- janus/language/naive/__init__.py +4 -0
- janus/language/naive/basic_splitter.py +14 -0
- janus/language/naive/chunk_splitter.py +26 -0
- janus/language/naive/registry.py +13 -0
- janus/language/naive/simple_ast.py +18 -0
- janus/language/naive/tag_splitter.py +61 -0
- janus/language/splitter.py +168 -74
- janus/language/treesitter/_tests/test_treesitter.py +9 -6
- janus/language/treesitter/treesitter.py +37 -13
- janus/llm/model_callbacks.py +177 -0
- janus/llm/models_info.py +134 -70
- janus/metrics/__init__.py +8 -0
- janus/metrics/_tests/__init__.py +0 -0
- janus/metrics/_tests/reference.py +2 -0
- janus/metrics/_tests/target.py +2 -0
- janus/metrics/_tests/test_bleu.py +56 -0
- janus/metrics/_tests/test_chrf.py +67 -0
- janus/metrics/_tests/test_file_pairing.py +59 -0
- janus/metrics/_tests/test_llm.py +91 -0
- janus/metrics/_tests/test_reading.py +28 -0
- janus/metrics/_tests/test_rouge_score.py +65 -0
- janus/metrics/_tests/test_similarity_score.py +23 -0
- janus/metrics/_tests/test_treesitter_metrics.py +110 -0
- janus/metrics/bleu.py +66 -0
- janus/metrics/chrf.py +55 -0
- janus/metrics/cli.py +7 -0
- janus/metrics/complexity_metrics.py +208 -0
- janus/metrics/file_pairing.py +113 -0
- janus/metrics/llm_metrics.py +202 -0
- janus/metrics/metric.py +466 -0
- janus/metrics/reading.py +70 -0
- janus/metrics/rouge_score.py +96 -0
- janus/metrics/similarity.py +53 -0
- janus/metrics/splitting.py +38 -0
- janus/parsers/_tests/__init__.py +0 -0
- janus/parsers/_tests/test_code_parser.py +32 -0
- janus/parsers/code_parser.py +24 -253
- janus/parsers/doc_parser.py +169 -0
- janus/parsers/eval_parser.py +80 -0
- janus/parsers/reqs_parser.py +72 -0
- janus/prompts/prompt.py +103 -30
- janus/translate.py +636 -111
- janus/utils/_tests/__init__.py +0 -0
- janus/utils/_tests/test_logger.py +67 -0
- janus/utils/_tests/test_progress.py +20 -0
- janus/utils/enums.py +56 -3
- janus/utils/progress.py +56 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/METADATA +23 -10
- janus_llm-2.0.0.dist-info/RECORD +94 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/WHEEL +1 -1
- janus_llm-1.0.0.dist-info/RECORD +0 -48
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/LICENSE +0 -0
- {janus_llm-1.0.0.dist-info → janus_llm-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest.mock import patch
|
3
|
+
|
4
|
+
import pytest
|
5
|
+
|
6
|
+
from janus.llm.models_info import load_model
|
7
|
+
|
8
|
+
from ..llm_metrics import llm_evaluate_option, llm_evaluate_ref_option
|
9
|
+
|
10
|
+
|
11
|
+
class TestLLMMetrics(unittest.TestCase):
|
12
|
+
def setUp(self):
|
13
|
+
self.bad_code = """
|
14
|
+
if __name__ == "__main__":
|
15
|
+
a1, a2, b3, b4 = 0, [1, 2000, "a"], 2, (1, 2)
|
16
|
+
for a in a2:
|
17
|
+
if b3:
|
18
|
+
elif not b3:
|
19
|
+
try:
|
20
|
+
pass
|
21
|
+
except:
|
22
|
+
raise ValueError
|
23
|
+
elif 1:
|
24
|
+
print(1)
|
25
|
+
else:
|
26
|
+
print(b4[0])
|
27
|
+
for (x, y) in range(a1, b3):
|
28
|
+
for i in range(003300):
|
29
|
+
for z in a2:
|
30
|
+
printf(b4[2])
|
31
|
+
"""
|
32
|
+
self.impressive_code = """
|
33
|
+
# This program prints out Hello, world!
|
34
|
+
|
35
|
+
print('Hello, world!')
|
36
|
+
"""
|
37
|
+
self.impressive_code_reference = """
|
38
|
+
# An implementation of python Hello, world!
|
39
|
+
|
40
|
+
print("'Hello, world!")
|
41
|
+
"""
|
42
|
+
|
43
|
+
@patch("janus.llm.models_info.load_model")
|
44
|
+
@patch("janus.metrics.llm_metrics.llm_evaluate")
|
45
|
+
@pytest.mark.llm_eval
|
46
|
+
def test_llm_self_eval_quality(self, mock_llm_evaluate, mock_load_model):
|
47
|
+
"""Test that the quality llm self eval recognizes bad_code as bad code
|
48
|
+
(<5 on a scale of 1-10)"""
|
49
|
+
mock_llm_evaluate.return_value = 4 # return a value less than 5
|
50
|
+
mock_load_model.return_value = [None] # return a dummy model
|
51
|
+
|
52
|
+
bad_code_quality = llm_evaluate_option(
|
53
|
+
self.bad_code,
|
54
|
+
self.bad_code,
|
55
|
+
metric="quality",
|
56
|
+
language="python",
|
57
|
+
llm=load_model("gpt-3.5-turbo-0125")[0],
|
58
|
+
)
|
59
|
+
self.assertLess(bad_code_quality, 5)
|
60
|
+
|
61
|
+
mock_llm_evaluate.return_value = 6 # return a value greater than 5
|
62
|
+
impressive_code_quality = llm_evaluate_option(
|
63
|
+
self.impressive_code,
|
64
|
+
self.impressive_code,
|
65
|
+
metric="quality",
|
66
|
+
language="python",
|
67
|
+
llm=load_model("gpt-3.5-turbo-0125")[0],
|
68
|
+
)
|
69
|
+
self.assertGreater(impressive_code_quality, 5)
|
70
|
+
|
71
|
+
@patch("janus.llm.models_info.load_model")
|
72
|
+
@patch("janus.metrics.llm_metrics.llm_evaluate")
|
73
|
+
@pytest.mark.llm_eval
|
74
|
+
def test_llm_self_eval_faithfulness(self, mock_llm_evaluate, mock_load_model):
|
75
|
+
"""The two Hello, world! samples are more or less the same,
|
76
|
+
so the faithfulness score should be high"""
|
77
|
+
mock_llm_evaluate.return_value = 9 # return a high value
|
78
|
+
mock_load_model.return_value = [None] # return a dummy model
|
79
|
+
|
80
|
+
faithfulness = llm_evaluate_ref_option(
|
81
|
+
self.impressive_code,
|
82
|
+
self.impressive_code_reference,
|
83
|
+
metric="faithfulness",
|
84
|
+
language="python",
|
85
|
+
llm=load_model("gpt-3.5-turbo-0125")[0],
|
86
|
+
)
|
87
|
+
self.assertGreater(faithfulness, 8)
|
88
|
+
|
89
|
+
|
90
|
+
if __name__ == "__main__":
|
91
|
+
unittest.main()
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from ..reading import _repeat_text, flesch, gunning_fog
|
4
|
+
|
5
|
+
|
6
|
+
class TestReading(unittest.TestCase):
|
7
|
+
def setUp(self):
|
8
|
+
self.text = "This is a sample text for testing readability metrics"
|
9
|
+
|
10
|
+
def test_repeat_text(self):
|
11
|
+
"""Test the _repeat_text function."""
|
12
|
+
repeated_text = _repeat_text(self.text)
|
13
|
+
self.assertIsInstance(repeated_text, str)
|
14
|
+
self.assertTrue(len(repeated_text.split()) >= 100)
|
15
|
+
|
16
|
+
def test_flesch(self):
|
17
|
+
"""Test the Flesch readability score."""
|
18
|
+
score = flesch(self.text)
|
19
|
+
self.assertAlmostEqual(score, 47.3, places=2)
|
20
|
+
|
21
|
+
def test_gunning_fog(self):
|
22
|
+
"""Test the Gunning-Fog readability score."""
|
23
|
+
score = gunning_fog(self.text)
|
24
|
+
self.assertAlmostEqual(score, 8.04, places=2)
|
25
|
+
|
26
|
+
|
27
|
+
if __name__ == "__main__":
|
28
|
+
unittest.main()
|
@@ -0,0 +1,65 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from janus.metrics.rouge_score import rouge
|
4
|
+
|
5
|
+
|
6
|
+
class TestRouge(unittest.TestCase):
|
7
|
+
def setUp(self):
|
8
|
+
self.target = "This is a test sentence."
|
9
|
+
self.reference = "This is a reference sentence."
|
10
|
+
|
11
|
+
def test_rouge_with_granularity_n(self):
|
12
|
+
score = rouge(
|
13
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="f"
|
14
|
+
)
|
15
|
+
self.assertIsInstance(score, float)
|
16
|
+
|
17
|
+
def test_rouge_with_granularity_l(self):
|
18
|
+
score = rouge(
|
19
|
+
self.target, self.reference, granularity="l", n_gram=2, score_type="f"
|
20
|
+
)
|
21
|
+
self.assertIsInstance(score, float)
|
22
|
+
|
23
|
+
def test_rouge_with_granularity_w(self):
|
24
|
+
score = rouge(
|
25
|
+
self.target, self.reference, granularity="w", n_gram=2, score_type="f"
|
26
|
+
)
|
27
|
+
self.assertIsInstance(score, float)
|
28
|
+
|
29
|
+
def test_rouge_with_invalid_granularity(self):
|
30
|
+
with self.assertRaises(ValueError):
|
31
|
+
rouge(
|
32
|
+
self.target,
|
33
|
+
self.reference,
|
34
|
+
granularity="invalid",
|
35
|
+
n_gram=2,
|
36
|
+
score_type="f",
|
37
|
+
)
|
38
|
+
|
39
|
+
def test_rouge_with_score_type_f(self):
|
40
|
+
score = rouge(
|
41
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="f"
|
42
|
+
)
|
43
|
+
self.assertIsInstance(score, float)
|
44
|
+
|
45
|
+
def test_rouge_with_score_type_p(self):
|
46
|
+
score = rouge(
|
47
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="p"
|
48
|
+
)
|
49
|
+
self.assertIsInstance(score, float)
|
50
|
+
|
51
|
+
def test_rouge_with_score_type_r(self):
|
52
|
+
score = rouge(
|
53
|
+
self.target, self.reference, granularity="n", n_gram=2, score_type="r"
|
54
|
+
)
|
55
|
+
self.assertIsInstance(score, float)
|
56
|
+
|
57
|
+
def test_rouge_with_invalid_score_type(self):
|
58
|
+
with self.assertRaises(ValueError):
|
59
|
+
rouge(
|
60
|
+
self.target,
|
61
|
+
self.reference,
|
62
|
+
granularity="n",
|
63
|
+
n_gram=2,
|
64
|
+
score_type="invalid",
|
65
|
+
)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from janus.metrics.similarity import similarity_score
|
4
|
+
|
5
|
+
|
6
|
+
class TestSimilarityScore(unittest.TestCase):
|
7
|
+
def setUp(self):
|
8
|
+
self.target = "This is a test sentence."
|
9
|
+
self.reference = "This is a reference sentence."
|
10
|
+
|
11
|
+
def test_similarity_score(self):
|
12
|
+
score = similarity_score(self.target, self.reference)
|
13
|
+
self.assertIsInstance(score, float)
|
14
|
+
|
15
|
+
def test_similarity_score_with_different_model(self):
|
16
|
+
score = similarity_score(
|
17
|
+
self.target, self.reference, model_name="text-embedding-ada-002"
|
18
|
+
)
|
19
|
+
self.assertIsInstance(score, float)
|
20
|
+
|
21
|
+
def test_similarity_score_with_different_distance(self):
|
22
|
+
score = similarity_score(self.target, self.reference, distance_metric="euclidean")
|
23
|
+
self.assertIsInstance(score, float)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import unittest
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from typer.testing import CliRunner
|
5
|
+
|
6
|
+
from ...cli import app
|
7
|
+
from ..complexity_metrics import (
|
8
|
+
TreeSitterMetric,
|
9
|
+
cyclomatic_complexity,
|
10
|
+
difficulty,
|
11
|
+
effort,
|
12
|
+
maintainability,
|
13
|
+
volume,
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
class TestTreesitterMetrics(unittest.TestCase):
|
18
|
+
def setUp(self):
|
19
|
+
self.runner = CliRunner()
|
20
|
+
asm_file = Path(__file__).parent.resolve() / "asm_test_file.asm"
|
21
|
+
self.asm_target_text = asm_file.read_text()
|
22
|
+
mumps_file = Path(__file__).parent.resolve() / "mumps_test_file.m"
|
23
|
+
self.mumps_target_text = mumps_file.read_text()
|
24
|
+
|
25
|
+
def test_cyclomatic_complexity(self):
|
26
|
+
"""Test the cyclomatic complexity function."""
|
27
|
+
function_score = cyclomatic_complexity(self.asm_target_text, language="ibmhlasm")
|
28
|
+
expected_score = 3
|
29
|
+
self.assertEqual(function_score, expected_score)
|
30
|
+
function_score = cyclomatic_complexity(self.mumps_target_text, language="mumps")
|
31
|
+
expected_score = 2
|
32
|
+
self.assertEqual(function_score, expected_score)
|
33
|
+
|
34
|
+
def test_length(self):
|
35
|
+
"""Test the get_program_vocabulary function."""
|
36
|
+
tsm_asm = TreeSitterMetric(code=self.asm_target_text, language="ibmhlasm")
|
37
|
+
function_score = tsm_asm.get_program_length()
|
38
|
+
expected_score = 18
|
39
|
+
self.assertEqual(function_score, expected_score)
|
40
|
+
tsm_mumps = TreeSitterMetric(code=self.mumps_target_text, language="mumps")
|
41
|
+
function_score = tsm_mumps.get_program_length()
|
42
|
+
expected_score = 11
|
43
|
+
self.assertEqual(function_score, expected_score)
|
44
|
+
|
45
|
+
def test_vocabulary(self):
|
46
|
+
"""Test the get_program_vocabulary function."""
|
47
|
+
tsm_asm = TreeSitterMetric(code=self.asm_target_text, language="ibmhlasm")
|
48
|
+
function_score = tsm_asm.get_program_vocabulary()
|
49
|
+
expected_score = 9
|
50
|
+
self.assertEqual(function_score, expected_score)
|
51
|
+
tsm_mumps = TreeSitterMetric(code=self.mumps_target_text, language="mumps")
|
52
|
+
function_score = tsm_mumps.get_program_vocabulary()
|
53
|
+
expected_score = 7
|
54
|
+
self.assertEqual(function_score, expected_score)
|
55
|
+
|
56
|
+
def test_difficulty(self):
|
57
|
+
"""Test the get_program_vocabulary function."""
|
58
|
+
function_score = difficulty(self.asm_target_text, language="ibmhlasm")
|
59
|
+
expected_score = 5
|
60
|
+
self.assertEqual(function_score, expected_score)
|
61
|
+
function_score = difficulty(self.mumps_target_text, language="mumps")
|
62
|
+
expected_score = 2.625
|
63
|
+
self.assertAlmostEqual(function_score, expected_score, places=2)
|
64
|
+
|
65
|
+
def test_effort(self):
|
66
|
+
"""Test the halstead effort."""
|
67
|
+
function_score = effort(self.asm_target_text, language="ibmhlasm")
|
68
|
+
self.assertAlmostEqual(function_score, 285.29, places=2)
|
69
|
+
function_score = effort(self.mumps_target_text, language="mumps")
|
70
|
+
self.assertAlmostEqual(function_score, 81.06, places=2)
|
71
|
+
|
72
|
+
def test_volume(self):
|
73
|
+
"""Test the halstead volume."""
|
74
|
+
function_score = volume(self.asm_target_text, language="ibmhlasm")
|
75
|
+
self.assertAlmostEqual(function_score, 57.06, places=2)
|
76
|
+
function_score = volume(self.mumps_target_text, language="mumps")
|
77
|
+
self.assertAlmostEqual(function_score, 30.88, places=2)
|
78
|
+
|
79
|
+
def test_maintainability(self):
|
80
|
+
"""Test the halstead volume."""
|
81
|
+
function_score = maintainability(self.asm_target_text, language="ibmhlasm")
|
82
|
+
self.assertAlmostEqual(function_score, 65.48, places=2)
|
83
|
+
function_score = maintainability(self.mumps_target_text, language="mumps")
|
84
|
+
self.assertAlmostEqual(function_score, 72.326, places=2)
|
85
|
+
|
86
|
+
def test_in_cli(self):
|
87
|
+
"""Test the function in the CLI."""
|
88
|
+
output_path = Path("test.json")
|
89
|
+
if output_path.exists():
|
90
|
+
output_path.unlink()
|
91
|
+
result = self.runner.invoke(
|
92
|
+
app,
|
93
|
+
[
|
94
|
+
"evaluate",
|
95
|
+
"cyclomatic-complexity",
|
96
|
+
"-l",
|
97
|
+
"ibmhlasm",
|
98
|
+
"-t",
|
99
|
+
"janus/language/treesitter/_tests/languages/ibmhlasm.asm",
|
100
|
+
"-o",
|
101
|
+
f"{output_path}",
|
102
|
+
],
|
103
|
+
)
|
104
|
+
self.assertEqual(result.exit_code, 0)
|
105
|
+
self.assertTrue(output_path.exists())
|
106
|
+
output_path.unlink()
|
107
|
+
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
unittest.main()
|
janus/metrics/bleu.py
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
from typing import Annotated, Optional
|
2
|
+
|
3
|
+
import click
|
4
|
+
import typer
|
5
|
+
from sacrebleu import sentence_bleu
|
6
|
+
|
7
|
+
from .metric import metric
|
8
|
+
|
9
|
+
# from sacrebleu import sentence_chrf
|
10
|
+
|
11
|
+
|
12
|
+
@metric(help="BLEU score using sacrebleu")
|
13
|
+
def bleu(
|
14
|
+
target: str,
|
15
|
+
reference: str,
|
16
|
+
smooth_method: Annotated[
|
17
|
+
str,
|
18
|
+
typer.Option(
|
19
|
+
click_type=click.Choice(["exp", "floor", "add-k", "none"]),
|
20
|
+
help="Smoothing method to use.",
|
21
|
+
),
|
22
|
+
] = "exp",
|
23
|
+
smooth_value: Annotated[
|
24
|
+
Optional[float],
|
25
|
+
typer.Option(
|
26
|
+
help="Smoothing value (only for 'floor' and 'add-k').",
|
27
|
+
),
|
28
|
+
] = None,
|
29
|
+
lowercase: Annotated[
|
30
|
+
bool,
|
31
|
+
typer.Option(
|
32
|
+
help="Whether to lowercase the data.",
|
33
|
+
),
|
34
|
+
] = False,
|
35
|
+
use_effective_order: Annotated[
|
36
|
+
bool,
|
37
|
+
typer.Option(
|
38
|
+
help="Whether to use n-gram orders without matches.",
|
39
|
+
),
|
40
|
+
] = True,
|
41
|
+
**kwargs,
|
42
|
+
) -> float:
|
43
|
+
"""Computes BLEU score using sacrebleu
|
44
|
+
|
45
|
+
Arguments:
|
46
|
+
target: The target text.
|
47
|
+
reference: The reference text.
|
48
|
+
smooth_method: smoothing method to use.
|
49
|
+
smooth_value: smoothing value (only for floor and add-k).
|
50
|
+
lowercase: whether to lowercase the data.
|
51
|
+
use_effective_order: Don't use n-gram orders without matches.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
The BLEU score float.
|
55
|
+
"""
|
56
|
+
score = sentence_bleu(
|
57
|
+
target,
|
58
|
+
[reference],
|
59
|
+
smooth_method=smooth_method,
|
60
|
+
smooth_value=smooth_value,
|
61
|
+
lowercase=lowercase,
|
62
|
+
use_effective_order=use_effective_order,
|
63
|
+
)
|
64
|
+
# Dividing by 100 to get the score in the range [0, 1]
|
65
|
+
# sacrebleu gives the score in percentage
|
66
|
+
return float(score.score) / 100.0
|
janus/metrics/chrf.py
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
import typer
|
2
|
+
from sacrebleu import sentence_chrf
|
3
|
+
|
4
|
+
from .metric import metric
|
5
|
+
|
6
|
+
|
7
|
+
@metric(help="chrF score using sacrebleu")
|
8
|
+
def chrf(
|
9
|
+
target: str,
|
10
|
+
reference: str,
|
11
|
+
n_char_order: int = typer.Option(
|
12
|
+
default=6,
|
13
|
+
help=(
|
14
|
+
"A character n-gram order. If n_char_order=6, the metrics refers to the "
|
15
|
+
"official chrF/chrF++."
|
16
|
+
),
|
17
|
+
),
|
18
|
+
n_word_order: int = typer.Option(
|
19
|
+
default=2,
|
20
|
+
help=(
|
21
|
+
"A word n-gram order. If n_word_order=2, the metric refers to the official "
|
22
|
+
"chrF++. If n_word_order=0, the metric is equivalent to the original ChrF."
|
23
|
+
),
|
24
|
+
),
|
25
|
+
beta: float = typer.Option(
|
26
|
+
default=2.0,
|
27
|
+
help=(
|
28
|
+
"Determines importance of recall w.r.t. precision. If beta=1, their "
|
29
|
+
"importance is equal."
|
30
|
+
),
|
31
|
+
),
|
32
|
+
**kwargs,
|
33
|
+
) -> float:
|
34
|
+
"""Calculate the chrF Score using Torchmetrics.
|
35
|
+
|
36
|
+
Arguments:
|
37
|
+
target: The target text.
|
38
|
+
reference: The reference text.
|
39
|
+
n_char_order: The character order.
|
40
|
+
n_word_order: The word order.
|
41
|
+
beta: The beta value.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
The chrF score.
|
45
|
+
"""
|
46
|
+
score = sentence_chrf(
|
47
|
+
target,
|
48
|
+
[reference],
|
49
|
+
char_order=n_char_order,
|
50
|
+
word_order=n_word_order,
|
51
|
+
beta=beta,
|
52
|
+
)
|
53
|
+
# Dividing by 100 to get the score in the range [0, 1]
|
54
|
+
# sacrebleu gives the score in percentage
|
55
|
+
return float(score.score) / 100.0
|
janus/metrics/cli.py
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
import math
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from janus.language.block import CodeBlock
|
5
|
+
from janus.language.treesitter.treesitter import TreeSitterSplitter
|
6
|
+
from janus.utils.enums import LANGUAGES
|
7
|
+
|
8
|
+
from .metric import metric
|
9
|
+
|
10
|
+
|
11
|
+
class NodeException(Exception):
|
12
|
+
pass
|
13
|
+
|
14
|
+
|
15
|
+
class TreeSitterMetric:
|
16
|
+
"""A class for calculating node-based complexity metrics of code."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
code: str,
|
21
|
+
language: Optional[str],
|
22
|
+
):
|
23
|
+
"""
|
24
|
+
Arguments:
|
25
|
+
code: The code to get metrics on
|
26
|
+
language: The language the code is written in
|
27
|
+
"""
|
28
|
+
if language is None:
|
29
|
+
raise ValueError("Error: must provide language for tree-sitter metrics")
|
30
|
+
self.branch_nodes: List[str] = LANGUAGES[language].get("branch_node_types")
|
31
|
+
self.operation_nodes: List[str] = LANGUAGES[language].get("operation_node_types")
|
32
|
+
self.operand_nodes: List[str] = LANGUAGES[language].get("operand_node_types")
|
33
|
+
self.code = code
|
34
|
+
self.language = language
|
35
|
+
self.splitter = TreeSitterSplitter(
|
36
|
+
language=language,
|
37
|
+
)
|
38
|
+
self.ast = self.splitter._get_ast(code)
|
39
|
+
|
40
|
+
def get_cyclomatic_complexity(self) -> int:
|
41
|
+
if not self.branch_nodes:
|
42
|
+
raise NodeException(f"No branch nodes are set for {self.language}")
|
43
|
+
else:
|
44
|
+
return self._count_nodes_of_type(self.ast, self.branch_nodes) + 1
|
45
|
+
|
46
|
+
def get_lines_of_code(self) -> int:
|
47
|
+
return self.code.count("\n")
|
48
|
+
|
49
|
+
"""
|
50
|
+
The following metrics are based on Halstead complexity measures:
|
51
|
+
https://en.wikipedia.org/wiki/Halstead_complexity_measures
|
52
|
+
"""
|
53
|
+
|
54
|
+
def get_program_vocabulary(self) -> int:
|
55
|
+
if not self.operation_nodes:
|
56
|
+
raise NodeException(f"No operation nodes are set for {self.language}")
|
57
|
+
else:
|
58
|
+
return self._count_nodes_of_type(
|
59
|
+
self.ast, self.operand_nodes, distinct=True
|
60
|
+
) + self._count_nodes_of_type(self.ast, self.operation_nodes, distinct=True)
|
61
|
+
|
62
|
+
def get_program_length(self) -> int:
|
63
|
+
if not self.operation_nodes:
|
64
|
+
raise NodeException(f"No operation nodes are set for {self.language}")
|
65
|
+
else:
|
66
|
+
return self._count_nodes_of_type(
|
67
|
+
self.ast, self.operation_nodes
|
68
|
+
) + self._count_nodes_of_type(self.ast, self.operand_nodes)
|
69
|
+
|
70
|
+
def get_volume(self) -> float:
|
71
|
+
vocabulary = self.get_program_vocabulary()
|
72
|
+
if not vocabulary:
|
73
|
+
raise ValueError(
|
74
|
+
"Volume cannot be calculated because program vocabulary is 0. \
|
75
|
+
Confirm that your code is parsing properly."
|
76
|
+
)
|
77
|
+
return self.get_program_length() * math.log2(vocabulary)
|
78
|
+
|
79
|
+
def get_difficulty(self) -> float:
|
80
|
+
return (
|
81
|
+
self._count_nodes_of_type(self.ast, self.operation_nodes, distinct=True)
|
82
|
+
/ 2
|
83
|
+
* self._count_nodes_of_type(self.ast, self.operand_nodes, distinct=False)
|
84
|
+
/ self._count_nodes_of_type(self.ast, self.operand_nodes, distinct=True)
|
85
|
+
)
|
86
|
+
|
87
|
+
def get_effort(self) -> float:
|
88
|
+
return self.get_volume() * self.get_difficulty()
|
89
|
+
|
90
|
+
def get_time_to_program(self) -> float:
|
91
|
+
return self.get_effort() / 18
|
92
|
+
|
93
|
+
def get_num_bugs(self) -> float:
|
94
|
+
return self.get_effort() ** (2 / 3) / 3000
|
95
|
+
|
96
|
+
def get_maintainability(self) -> float:
|
97
|
+
volume = self.get_volume()
|
98
|
+
cyclomatic_complexity = self.get_cyclomatic_complexity()
|
99
|
+
lines_of_code = self.get_lines_of_code()
|
100
|
+
if not (volume and lines_of_code):
|
101
|
+
raise ValueError(
|
102
|
+
"Maintainability cannot be calculated because volume or lines of code\
|
103
|
+
is 0. Confirm that your code is parsing properly."
|
104
|
+
)
|
105
|
+
return max(
|
106
|
+
0,
|
107
|
+
(
|
108
|
+
171
|
109
|
+
- (5.2 * math.log(volume))
|
110
|
+
- (0.23 * cyclomatic_complexity)
|
111
|
+
- (16.2 * math.log(lines_of_code))
|
112
|
+
)
|
113
|
+
* 100
|
114
|
+
/ 171,
|
115
|
+
)
|
116
|
+
|
117
|
+
def _count_nodes_of_type(
|
118
|
+
self, code_block: CodeBlock, nodes: List[str], distinct=False
|
119
|
+
) -> int:
|
120
|
+
"""Recurse through all nodes of a CodeBlock,
|
121
|
+
take count of the number of nodes of a specified type"""
|
122
|
+
seen_nodes = set()
|
123
|
+
count = 0
|
124
|
+
nodes_left = [code_block]
|
125
|
+
while nodes_left:
|
126
|
+
node = nodes_left.pop()
|
127
|
+
if str(node.node_type) in nodes:
|
128
|
+
if distinct:
|
129
|
+
seen_nodes.add(node.text.strip())
|
130
|
+
else:
|
131
|
+
count += 1
|
132
|
+
nodes_left.extend(node.children)
|
133
|
+
return len(seen_nodes) if distinct else count
|
134
|
+
|
135
|
+
|
136
|
+
@metric(use_reference=False, help="Cyclomatic complexity score")
|
137
|
+
def cyclomatic_complexity(target: str, **kwargs) -> float:
|
138
|
+
"""Calculate the cyclomatic complexity score.
|
139
|
+
|
140
|
+
Arguments:
|
141
|
+
target: The target text.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
The cyclomatic complexity.
|
145
|
+
"""
|
146
|
+
language = kwargs["language"]
|
147
|
+
score = TreeSitterMetric(target, language).get_cyclomatic_complexity()
|
148
|
+
return score
|
149
|
+
|
150
|
+
|
151
|
+
@metric(use_reference=False, help="Halstead effort score")
|
152
|
+
def effort(target: str, **kwargs) -> float:
|
153
|
+
"""Calculate the Halstead effort.
|
154
|
+
|
155
|
+
Arguments:
|
156
|
+
target: The target text.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
The Halstead effort.
|
160
|
+
"""
|
161
|
+
language = kwargs["language"]
|
162
|
+
score = TreeSitterMetric(target, language).get_effort()
|
163
|
+
return score
|
164
|
+
|
165
|
+
|
166
|
+
@metric(use_reference=False, help="Halstead volume score")
|
167
|
+
def volume(target: str, **kwargs) -> float:
|
168
|
+
"""Calculate the Halstead volume.
|
169
|
+
|
170
|
+
Arguments:
|
171
|
+
target: The target text.
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
The Halstead volume.
|
175
|
+
"""
|
176
|
+
language = kwargs["language"]
|
177
|
+
score = TreeSitterMetric(target, language).get_volume()
|
178
|
+
return score
|
179
|
+
|
180
|
+
|
181
|
+
@metric(use_reference=False, help="Halstead difficulty score")
|
182
|
+
def difficulty(target: str, **kwargs) -> float:
|
183
|
+
"""Calculate the Halstead difficulty.
|
184
|
+
|
185
|
+
Arguments:
|
186
|
+
target: The target text.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
The Halstead difficulty.
|
190
|
+
"""
|
191
|
+
language = kwargs["language"]
|
192
|
+
score = TreeSitterMetric(target, language).get_difficulty()
|
193
|
+
return score
|
194
|
+
|
195
|
+
|
196
|
+
@metric(use_reference=False, help="Maintainability score")
|
197
|
+
def maintainability(target: str, **kwargs) -> float:
|
198
|
+
"""Calculate the maintainability score.
|
199
|
+
|
200
|
+
Arguments:
|
201
|
+
target: The target text.
|
202
|
+
|
203
|
+
Returns:
|
204
|
+
The maintainability score.
|
205
|
+
"""
|
206
|
+
language = kwargs["language"]
|
207
|
+
score = TreeSitterMetric(target, language).get_maintainability()
|
208
|
+
return score
|