logdetective 1.5.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logdetective-1.5.0 → logdetective-1.7.0}/PKG-INFO +25 -2
- {logdetective-1.5.0 → logdetective-1.7.0}/README.md +24 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/constants.py +1 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/extractors.py +23 -10
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/logdetective.py +18 -2
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/models.py +32 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/remote_log.py +1 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/config.py +9 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/database/models/__init__.py +12 -0
- logdetective-1.7.0/logdetective/server/database/models/exceptions.py +13 -0
- logdetective-1.7.0/logdetective/server/database/models/koji.py +126 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/database/models/merge_request_jobs.py +11 -10
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/database/models/metrics.py +1 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/emoji.py +22 -12
- logdetective-1.7.0/logdetective/server/exceptions.py +33 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/gitlab.py +1 -4
- logdetective-1.7.0/logdetective/server/koji.py +167 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/llm.py +11 -2
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/metric.py +10 -10
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/models.py +91 -2
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/plot.py +36 -35
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/server.py +192 -2
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/templates/gitlab_full_comment.md.j2 +3 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/templates/gitlab_short_comment.md.j2 +3 -1
- logdetective-1.7.0/logdetective/skip_snippets.yml +12 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/utils.py +25 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/pyproject.toml +1 -1
- {logdetective-1.5.0 → logdetective-1.7.0}/LICENSE +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/__init__.py +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/drain3.ini +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/prompts-summary-first.yml +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/prompts-summary-only.yml +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/prompts.yml +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/__init__.py +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/compressors.py +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/database/__init__.py +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/database/base.py +0 -0
- {logdetective-1.5.0 → logdetective-1.7.0}/logdetective.1.asciidoc +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
@@ -90,6 +90,7 @@ To analyze a log file, run the script with the following command line arguments:
|
|
|
90
90
|
- `--summarizer` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only.(optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
|
|
91
91
|
- `--n_lines` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only. (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
|
|
92
92
|
- `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
|
|
93
|
+
- `--skip_snippets` Path to patterns for skipping snippets.
|
|
93
94
|
|
|
94
95
|
Example usage:
|
|
95
96
|
|
|
@@ -330,7 +331,7 @@ If the variable is not set, `./models` is mounted inside by default.
|
|
|
330
331
|
|
|
331
332
|
Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-copr) by:
|
|
332
333
|
```
|
|
333
|
-
$ curl -L -o models/mistral-7b-instruct-v0.
|
|
334
|
+
$ curl -L -o models/mistral-7b-instruct-v0.3.Q4_K.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/ggml-model-Q4_K.gguf
|
|
334
335
|
```
|
|
335
336
|
|
|
336
337
|
Generate a new database revision with alembic
|
|
@@ -438,6 +439,28 @@ with spaces, or replacement fields marked with curly braces, `{}` left for inser
|
|
|
438
439
|
Number of replacement fields in new prompts, must be the same as in originals.
|
|
439
440
|
Although their position may be different.
|
|
440
441
|
|
|
442
|
+
|
|
443
|
+
Skip Snippets
|
|
444
|
+
-------------
|
|
445
|
+
|
|
446
|
+
Certain log chunks may not contribute to the analysis of the problem under any circumstances.
|
|
447
|
+
User can specify regular expressions, matching such log chunks, along with simple description,
|
|
448
|
+
using Skip Snippets feature.
|
|
449
|
+
|
|
450
|
+
Patterns to be skipped must be defined yaml file as a dictionary, where key is a description
|
|
451
|
+
and value is a regular expression. For example:
|
|
452
|
+
|
|
453
|
+
```
|
|
454
|
+
child_exit_code_zero: "Child return code was: 0"
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
Special care must be taken not to write a regular expression which may match
|
|
458
|
+
too many chunks, or which may be evaluated as data structure by the yaml parser.
|
|
459
|
+
|
|
460
|
+
Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
|
|
461
|
+
can be used as a starting point and is used as a default if no other definition is provided.
|
|
462
|
+
|
|
463
|
+
|
|
441
464
|
License
|
|
442
465
|
-------
|
|
443
466
|
|
|
@@ -46,6 +46,7 @@ To analyze a log file, run the script with the following command line arguments:
|
|
|
46
46
|
- `--summarizer` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only.(optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
|
|
47
47
|
- `--n_lines` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only. (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
|
|
48
48
|
- `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
|
|
49
|
+
- `--skip_snippets` Path to patterns for skipping snippets.
|
|
49
50
|
|
|
50
51
|
Example usage:
|
|
51
52
|
|
|
@@ -286,7 +287,7 @@ If the variable is not set, `./models` is mounted inside by default.
|
|
|
286
287
|
|
|
287
288
|
Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-copr) by:
|
|
288
289
|
```
|
|
289
|
-
$ curl -L -o models/mistral-7b-instruct-v0.
|
|
290
|
+
$ curl -L -o models/mistral-7b-instruct-v0.3.Q4_K.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/ggml-model-Q4_K.gguf
|
|
290
291
|
```
|
|
291
292
|
|
|
292
293
|
Generate a new database revision with alembic
|
|
@@ -394,6 +395,28 @@ with spaces, or replacement fields marked with curly braces, `{}` left for inser
|
|
|
394
395
|
Number of replacement fields in new prompts, must be the same as in originals.
|
|
395
396
|
Although their position may be different.
|
|
396
397
|
|
|
398
|
+
|
|
399
|
+
Skip Snippets
|
|
400
|
+
-------------
|
|
401
|
+
|
|
402
|
+
Certain log chunks may not contribute to the analysis of the problem under any circumstances.
|
|
403
|
+
User can specify regular expressions, matching such log chunks, along with simple description,
|
|
404
|
+
using Skip Snippets feature.
|
|
405
|
+
|
|
406
|
+
Patterns to be skipped must be defined yaml file as a dictionary, where key is a description
|
|
407
|
+
and value is a regular expression. For example:
|
|
408
|
+
|
|
409
|
+
```
|
|
410
|
+
child_exit_code_zero: "Child return code was: 0"
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
Special care must be taken not to write a regular expression which may match
|
|
414
|
+
too many chunks, or which may be evaluated as data structure by the yaml parser.
|
|
415
|
+
|
|
416
|
+
Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
|
|
417
|
+
can be used as a starting point and is used as a default if no other definition is provided.
|
|
418
|
+
|
|
419
|
+
|
|
397
420
|
License
|
|
398
421
|
-------
|
|
399
422
|
|
|
@@ -4,7 +4,7 @@ in prompts.yaml instead.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
# pylint: disable=line-too-long
|
|
7
|
-
DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.
|
|
7
|
+
DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.3-GGUF"
|
|
8
8
|
|
|
9
9
|
PROMPT_TEMPLATE = """
|
|
10
10
|
Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
|
|
@@ -5,7 +5,8 @@ from typing import Tuple
|
|
|
5
5
|
import drain3
|
|
6
6
|
from drain3.template_miner_config import TemplateMinerConfig
|
|
7
7
|
|
|
8
|
-
from logdetective.utils import get_chunks
|
|
8
|
+
from logdetective.utils import get_chunks, filter_snippet_patterns
|
|
9
|
+
from logdetective.models import SkipSnippets
|
|
9
10
|
|
|
10
11
|
LOG = logging.getLogger("logdetective")
|
|
11
12
|
|
|
@@ -13,7 +14,13 @@ LOG = logging.getLogger("logdetective")
|
|
|
13
14
|
class DrainExtractor:
|
|
14
15
|
"""A class that extracts information from logs using a template miner algorithm."""
|
|
15
16
|
|
|
16
|
-
def __init__(
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
verbose: bool = False,
|
|
20
|
+
context: bool = False,
|
|
21
|
+
max_clusters=8,
|
|
22
|
+
skip_snippets: SkipSnippets = SkipSnippets({}),
|
|
23
|
+
):
|
|
17
24
|
config = TemplateMinerConfig()
|
|
18
25
|
config.load(f"{os.path.dirname(__file__)}/drain3.ini")
|
|
19
26
|
config.profiling_enabled = verbose
|
|
@@ -21,22 +28,28 @@ class DrainExtractor:
|
|
|
21
28
|
self.miner = drain3.TemplateMiner(config=config)
|
|
22
29
|
self.verbose = verbose
|
|
23
30
|
self.context = context
|
|
31
|
+
self.skip_snippets = skip_snippets
|
|
24
32
|
|
|
25
33
|
def __call__(self, log: str) -> list[Tuple[int, str]]:
|
|
26
34
|
out = []
|
|
35
|
+
# Create chunks
|
|
36
|
+
chunks = list(get_chunks(log))
|
|
37
|
+
# Keep only chunks that don't match any of the excluded patterns
|
|
38
|
+
chunks = [
|
|
39
|
+
(_, chunk)
|
|
40
|
+
for _, chunk in chunks
|
|
41
|
+
if not filter_snippet_patterns(chunk, self.skip_snippets)
|
|
42
|
+
]
|
|
27
43
|
# First pass create clusters
|
|
28
|
-
for _, chunk in
|
|
44
|
+
for _, chunk in chunks:
|
|
29
45
|
processed_chunk = self.miner.add_log_message(chunk)
|
|
30
46
|
LOG.debug(processed_chunk)
|
|
31
|
-
|
|
32
|
-
sorted_clusters = sorted(
|
|
33
|
-
self.miner.drain.clusters, key=lambda it: it.size, reverse=True
|
|
34
|
-
)
|
|
47
|
+
clusters = list(self.miner.drain.clusters)
|
|
35
48
|
# Second pass, only matching lines with clusters,
|
|
36
49
|
# to recover original text
|
|
37
|
-
for chunk_start, chunk in
|
|
50
|
+
for chunk_start, chunk in chunks:
|
|
38
51
|
cluster = self.miner.match(chunk, "always")
|
|
39
|
-
if cluster in
|
|
52
|
+
if cluster in clusters:
|
|
40
53
|
out.append((chunk_start, chunk))
|
|
41
|
-
|
|
54
|
+
clusters.remove(cluster)
|
|
42
55
|
return out
|
|
@@ -14,6 +14,7 @@ from logdetective.utils import (
|
|
|
14
14
|
format_snippets,
|
|
15
15
|
compute_certainty,
|
|
16
16
|
load_prompts,
|
|
17
|
+
load_skip_snippet_patterns,
|
|
17
18
|
)
|
|
18
19
|
from logdetective.extractors import DrainExtractor
|
|
19
20
|
|
|
@@ -41,7 +42,7 @@ def setup_args():
|
|
|
41
42
|
"--filename_suffix",
|
|
42
43
|
help="Suffix of the model file name to be retrieved from Hugging Face.\
|
|
43
44
|
Makes sense only if the model is specified with Hugging Face name.",
|
|
44
|
-
default="
|
|
45
|
+
default="Q4_K.gguf",
|
|
45
46
|
)
|
|
46
47
|
parser.add_argument("-n", "--no-stream", action="store_true")
|
|
47
48
|
parser.add_argument(
|
|
@@ -82,6 +83,12 @@ def setup_args():
|
|
|
82
83
|
default=DEFAULT_TEMPERATURE,
|
|
83
84
|
help="Temperature for inference.",
|
|
84
85
|
)
|
|
86
|
+
parser.add_argument(
|
|
87
|
+
"--skip_snippets",
|
|
88
|
+
type=str,
|
|
89
|
+
default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
|
|
90
|
+
help="Path to patterns for skipping snippets.",
|
|
91
|
+
)
|
|
85
92
|
return parser.parse_args()
|
|
86
93
|
|
|
87
94
|
|
|
@@ -120,9 +127,18 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
120
127
|
LOG.error("You likely do not have enough memory to load the AI model")
|
|
121
128
|
sys.exit(3)
|
|
122
129
|
|
|
130
|
+
try:
|
|
131
|
+
skip_snippets = load_skip_snippet_patterns(args.skip_snippets)
|
|
132
|
+
except OSError as e:
|
|
133
|
+
LOG.error(e)
|
|
134
|
+
sys.exit(5)
|
|
135
|
+
|
|
123
136
|
# Log file summarizer initialization
|
|
124
137
|
extractor = DrainExtractor(
|
|
125
|
-
args.verbose > 1,
|
|
138
|
+
args.verbose > 1,
|
|
139
|
+
context=True,
|
|
140
|
+
max_clusters=args.n_clusters,
|
|
141
|
+
skip_snippets=skip_snippets,
|
|
126
142
|
)
|
|
127
143
|
|
|
128
144
|
LOG.info("Getting summary")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from typing import Optional
|
|
2
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, model_validator
|
|
3
4
|
|
|
4
5
|
from logdetective.constants import (
|
|
5
6
|
PROMPT_TEMPLATE,
|
|
@@ -40,3 +41,33 @@ class PromptConfig(BaseModel):
|
|
|
40
41
|
self.staged_system_prompt = data.get(
|
|
41
42
|
"staged_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
42
43
|
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SkipSnippets(BaseModel):
|
|
47
|
+
"""Regular expressions defining snippets we should not analyze"""
|
|
48
|
+
|
|
49
|
+
snippet_patterns: dict[str, re.Pattern] = {}
|
|
50
|
+
|
|
51
|
+
def __init__(self, data: Optional[dict] = None):
|
|
52
|
+
super().__init__(data=data)
|
|
53
|
+
if data is None:
|
|
54
|
+
return
|
|
55
|
+
self.snippet_patterns = {
|
|
56
|
+
key: re.compile(pattern) for key, pattern in data.items()
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
@model_validator(mode="before")
|
|
60
|
+
@classmethod
|
|
61
|
+
def check_patterns(cls, data: dict):
|
|
62
|
+
"""Check if all supplied patterns are valid regular expressions.
|
|
63
|
+
Techically replicating what is done in __init__ but with nicer error message."""
|
|
64
|
+
patterns = data["data"]
|
|
65
|
+
for key, pattern in patterns.items():
|
|
66
|
+
try:
|
|
67
|
+
re.compile(pattern=pattern)
|
|
68
|
+
except (TypeError, re.error) as ex:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Invalid pattern `{pattern}` with name `{key}` supplied for skipping in logs."
|
|
71
|
+
) from ex
|
|
72
|
+
|
|
73
|
+
return data
|
|
@@ -53,7 +53,7 @@ class RemoteLog:
|
|
|
53
53
|
LOG.debug("process url %s", self.url)
|
|
54
54
|
try:
|
|
55
55
|
response = await self._http_session.get(self.url, raise_for_status=True)
|
|
56
|
-
except aiohttp.ClientResponseError as ex:
|
|
56
|
+
except (aiohttp.ClientResponseError, aiohttp.ClientConnectorError) as ex:
|
|
57
57
|
raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
|
|
58
58
|
return await response.text()
|
|
59
59
|
LOG.error("Invalid URL received ")
|
|
@@ -3,8 +3,9 @@ import logging
|
|
|
3
3
|
import yaml
|
|
4
4
|
from openai import AsyncOpenAI
|
|
5
5
|
|
|
6
|
-
from logdetective.utils import load_prompts
|
|
6
|
+
from logdetective.utils import load_prompts, load_skip_snippet_patterns
|
|
7
7
|
from logdetective.server.models import Config, InferenceConfig
|
|
8
|
+
import logdetective
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
def load_server_config(path: str | None) -> Config:
|
|
@@ -60,9 +61,16 @@ def get_openai_api_client(ineference_config: InferenceConfig):
|
|
|
60
61
|
|
|
61
62
|
SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
|
|
62
63
|
SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
|
|
64
|
+
# The default location for skip patterns is in the same directory
|
|
65
|
+
# as logdetective __init__.py file.
|
|
66
|
+
SERVER_SKIP_PATTERNS_PATH = os.environ.get(
|
|
67
|
+
"LOGDETECIVE_SKIP_PATTERNS",
|
|
68
|
+
f"{os.path.dirname(logdetective.__file__)}/skip_snippets.yml",
|
|
69
|
+
)
|
|
63
70
|
|
|
64
71
|
SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
|
|
65
72
|
PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
|
|
73
|
+
SKIP_SNIPPETS_CONFIG = load_skip_snippet_patterns(SERVER_SKIP_PATTERNS_PATH)
|
|
66
74
|
|
|
67
75
|
LOG = get_log(SERVER_CONFIG)
|
|
68
76
|
|
|
@@ -5,10 +5,18 @@ from logdetective.server.database.models.merge_request_jobs import (
|
|
|
5
5
|
Comments,
|
|
6
6
|
Reactions,
|
|
7
7
|
)
|
|
8
|
+
from logdetective.server.database.models.koji import (
|
|
9
|
+
KojiTaskAnalysis,
|
|
10
|
+
)
|
|
8
11
|
from logdetective.server.database.models.metrics import (
|
|
9
12
|
AnalyzeRequestMetrics,
|
|
10
13
|
EndpointType,
|
|
11
14
|
)
|
|
15
|
+
from logdetective.server.database.models.exceptions import (
|
|
16
|
+
KojiTaskNotFoundError,
|
|
17
|
+
KojiTaskNotAnalyzedError,
|
|
18
|
+
KojiTaskAnalysisTimeoutError,
|
|
19
|
+
)
|
|
12
20
|
|
|
13
21
|
__all__ = [
|
|
14
22
|
Base.__name__,
|
|
@@ -18,4 +26,8 @@ __all__ = [
|
|
|
18
26
|
AnalyzeRequestMetrics.__name__,
|
|
19
27
|
EndpointType.__name__,
|
|
20
28
|
Forge.__name__,
|
|
29
|
+
KojiTaskAnalysis.__name__,
|
|
30
|
+
KojiTaskNotFoundError.__name__,
|
|
31
|
+
KojiTaskNotAnalyzedError.__name__,
|
|
32
|
+
KojiTaskAnalysisTimeoutError.__name__,
|
|
21
33
|
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Database model exceptions for logdetective."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class KojiTaskNotFoundError(Exception):
|
|
5
|
+
"""Exception raised when a koji task is not found"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KojiTaskNotAnalyzedError(Exception):
|
|
9
|
+
"""Exception raised when a koji task analysis is still in progress"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class KojiTaskAnalysisTimeoutError(Exception):
|
|
13
|
+
"""Exception raised when a koji task analysis has timed out"""
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from datetime import datetime, timedelta, timezone
|
|
2
|
+
from sqlalchemy import Column, BigInteger, DateTime, ForeignKey, Integer, String
|
|
3
|
+
from sqlalchemy.orm import relationship
|
|
4
|
+
from sqlalchemy.exc import OperationalError
|
|
5
|
+
import backoff
|
|
6
|
+
|
|
7
|
+
from logdetective.server.config import SERVER_CONFIG
|
|
8
|
+
from logdetective.server.compressors import LLMResponseCompressor
|
|
9
|
+
from logdetective.server.database.models.metrics import AnalyzeRequestMetrics
|
|
10
|
+
from logdetective.server.database.base import Base, transaction, DB_MAX_RETRIES
|
|
11
|
+
from logdetective.server.database.models.exceptions import (
|
|
12
|
+
KojiTaskNotFoundError,
|
|
13
|
+
KojiTaskNotAnalyzedError,
|
|
14
|
+
KojiTaskAnalysisTimeoutError,
|
|
15
|
+
)
|
|
16
|
+
from logdetective.server.models import KojiStagedResponse
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class KojiTaskAnalysis(Base):
|
|
20
|
+
"""Store details for the koji task analysis"""
|
|
21
|
+
|
|
22
|
+
__tablename__ = "koji_task_analysis"
|
|
23
|
+
|
|
24
|
+
id = Column(Integer, primary_key=True)
|
|
25
|
+
koji_instance = Column(String(255), nullable=False, index=True)
|
|
26
|
+
task_id = Column(BigInteger, nullable=False, index=True, unique=True)
|
|
27
|
+
log_file_name = Column(String(255), nullable=False, index=True)
|
|
28
|
+
request_received_at = Column(
|
|
29
|
+
DateTime,
|
|
30
|
+
nullable=False,
|
|
31
|
+
index=True,
|
|
32
|
+
default=datetime.now(timezone.utc),
|
|
33
|
+
comment="Timestamp when the request was received",
|
|
34
|
+
)
|
|
35
|
+
response_id = Column(
|
|
36
|
+
Integer,
|
|
37
|
+
ForeignKey("analyze_request_metrics.id"),
|
|
38
|
+
nullable=True,
|
|
39
|
+
index=False,
|
|
40
|
+
comment="The id of the analyze request metrics for this task",
|
|
41
|
+
)
|
|
42
|
+
response = relationship("AnalyzeRequestMetrics")
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
@backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
|
|
46
|
+
def create_or_restart(cls, koji_instance: str, task_id: int, log_file_name: str):
|
|
47
|
+
"""Create a new koji task analysis"""
|
|
48
|
+
with transaction(commit=True) as session:
|
|
49
|
+
# Check if the task analysis already exists
|
|
50
|
+
koji_task_analysis = (
|
|
51
|
+
session.query(cls)
|
|
52
|
+
.filter_by(koji_instance=koji_instance, task_id=task_id)
|
|
53
|
+
.first()
|
|
54
|
+
)
|
|
55
|
+
if koji_task_analysis:
|
|
56
|
+
# If it does, update the request_received_at timestamp
|
|
57
|
+
koji_task_analysis.request_received_at = datetime.now(timezone.utc)
|
|
58
|
+
session.add(koji_task_analysis)
|
|
59
|
+
session.flush()
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
# If it doesn't, create a new one
|
|
63
|
+
koji_task_analysis = KojiTaskAnalysis()
|
|
64
|
+
koji_task_analysis.koji_instance = koji_instance
|
|
65
|
+
koji_task_analysis.task_id = task_id
|
|
66
|
+
koji_task_analysis.log_file_name = log_file_name
|
|
67
|
+
session.add(koji_task_analysis)
|
|
68
|
+
session.flush()
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
@backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
|
|
72
|
+
def add_response(cls, task_id: int, metric_id: int):
|
|
73
|
+
"""Add a response to a koji task analysis"""
|
|
74
|
+
with transaction(commit=True) as session:
|
|
75
|
+
koji_task_analysis = session.query(cls).filter_by(task_id=task_id).first()
|
|
76
|
+
# Ensure that the task analysis doesn't already have a response
|
|
77
|
+
if koji_task_analysis.response:
|
|
78
|
+
# This is probably due to an analysis that took so long that
|
|
79
|
+
# a follow-up analysis was started before this one completed.
|
|
80
|
+
# We want to maintain consistency between the response we
|
|
81
|
+
# returned to the consumer, so we'll just drop this extra one
|
|
82
|
+
# on the floor and keep the one saved in the database.
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
metric = (
|
|
86
|
+
session.query(AnalyzeRequestMetrics).filter_by(id=metric_id).first()
|
|
87
|
+
)
|
|
88
|
+
koji_task_analysis.response = metric
|
|
89
|
+
session.add(koji_task_analysis)
|
|
90
|
+
session.flush()
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
@backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
|
|
94
|
+
def get_response_by_task_id(cls, task_id: int) -> KojiStagedResponse:
|
|
95
|
+
"""Get a koji task analysis by task id"""
|
|
96
|
+
with transaction(commit=False) as session:
|
|
97
|
+
koji_task_analysis = session.query(cls).filter_by(task_id=task_id).first()
|
|
98
|
+
if not koji_task_analysis:
|
|
99
|
+
raise KojiTaskNotFoundError(f"Task {task_id} not yet analyzed")
|
|
100
|
+
|
|
101
|
+
if not koji_task_analysis.response:
|
|
102
|
+
# Check if the task analysis has timed out
|
|
103
|
+
if koji_task_analysis.request_received_at.replace(
|
|
104
|
+
tzinfo=timezone.utc
|
|
105
|
+
) + timedelta(
|
|
106
|
+
minutes=SERVER_CONFIG.koji.analysis_timeout
|
|
107
|
+
) < datetime.now(timezone.utc):
|
|
108
|
+
raise KojiTaskAnalysisTimeoutError(
|
|
109
|
+
f"Task {task_id} analysis has timed out"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Task analysis is still in progress, so we need to let the
|
|
113
|
+
# consumer know
|
|
114
|
+
raise KojiTaskNotAnalyzedError(
|
|
115
|
+
f"Task {task_id} analysis is still in progress"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# We need to decompress the response message and return it
|
|
119
|
+
response = LLMResponseCompressor.unzip(
|
|
120
|
+
koji_task_analysis.response.compressed_response
|
|
121
|
+
)
|
|
122
|
+
return KojiStagedResponse(
|
|
123
|
+
task_id=task_id,
|
|
124
|
+
log_file_name=koji_task_analysis.log_file_name,
|
|
125
|
+
response=response,
|
|
126
|
+
)
|
{logdetective-1.5.0 → logdetective-1.7.0}/logdetective/server/database/models/merge_request_jobs.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import enum
|
|
2
2
|
import datetime
|
|
3
|
-
from typing import Optional, List, Tuple
|
|
3
|
+
from typing import Optional, List, Tuple, Self
|
|
4
4
|
|
|
5
5
|
import backoff
|
|
6
6
|
|
|
@@ -15,6 +15,7 @@ from sqlalchemy import (
|
|
|
15
15
|
desc,
|
|
16
16
|
)
|
|
17
17
|
from sqlalchemy.orm import relationship
|
|
18
|
+
from sqlalchemy.engine import Row
|
|
18
19
|
from sqlalchemy.exc import OperationalError
|
|
19
20
|
from logdetective.server.database.base import Base, transaction, DB_MAX_RETRIES
|
|
20
21
|
|
|
@@ -134,7 +135,7 @@ class GitlabMergeRequestJobs(Base):
|
|
|
134
135
|
@classmethod
|
|
135
136
|
def get_by_mr_iid(
|
|
136
137
|
cls, forge: Forge, project_id: int, mr_iid
|
|
137
|
-
) ->
|
|
138
|
+
) -> List[Self]:
|
|
138
139
|
"""Get all the mr jobs saved for the specified mr iid and project id."""
|
|
139
140
|
with transaction(commit=False) as session:
|
|
140
141
|
comments = (
|
|
@@ -262,7 +263,7 @@ class Comments(Base):
|
|
|
262
263
|
cls,
|
|
263
264
|
forge: Forge,
|
|
264
265
|
comment_id: str,
|
|
265
|
-
) -> Optional[
|
|
266
|
+
) -> Optional[Self]:
|
|
266
267
|
"""Search for a detailed comment
|
|
267
268
|
by its unique forge comment id.
|
|
268
269
|
|
|
@@ -324,7 +325,7 @@ class Comments(Base):
|
|
|
324
325
|
forge: Forge,
|
|
325
326
|
project_id: int,
|
|
326
327
|
mr_iid: int,
|
|
327
|
-
) ->
|
|
328
|
+
) -> List[Self]:
|
|
328
329
|
"""Search for all merge request comments.
|
|
329
330
|
|
|
330
331
|
Args:
|
|
@@ -358,7 +359,7 @@ class Comments(Base):
|
|
|
358
359
|
mr_iid: int,
|
|
359
360
|
job_id: int,
|
|
360
361
|
comment_id: str,
|
|
361
|
-
) ->
|
|
362
|
+
) -> Self:
|
|
362
363
|
"""Search for a detailed comment
|
|
363
364
|
or create a new one if not found.
|
|
364
365
|
|
|
@@ -372,11 +373,11 @@ class Comments(Base):
|
|
|
372
373
|
comment = Comments.get_by_gitlab_id(forge, comment_id)
|
|
373
374
|
if comment is None:
|
|
374
375
|
id_ = Comments.create(forge, project_id, mr_iid, job_id, comment_id)
|
|
375
|
-
comment =
|
|
376
|
+
comment = Comments.get_by_id(id_)
|
|
376
377
|
return comment
|
|
377
378
|
|
|
378
379
|
@classmethod
|
|
379
|
-
def get_since(cls, time: datetime.datetime) ->
|
|
380
|
+
def get_since(cls, time: datetime.datetime) -> List[Self]:
|
|
380
381
|
"""Get all the comments created after the given time."""
|
|
381
382
|
with transaction(commit=False) as session:
|
|
382
383
|
comments = (
|
|
@@ -485,7 +486,7 @@ class Reactions(Base):
|
|
|
485
486
|
mr_iid: int,
|
|
486
487
|
job_id: int,
|
|
487
488
|
comment_id: str,
|
|
488
|
-
) ->
|
|
489
|
+
) -> List[Self]:
|
|
489
490
|
"""Get all reactions for a comment
|
|
490
491
|
|
|
491
492
|
Args:
|
|
@@ -524,7 +525,7 @@ class Reactions(Base):
|
|
|
524
525
|
job_id: int,
|
|
525
526
|
comment_id: str,
|
|
526
527
|
reaction_type: str,
|
|
527
|
-
) ->
|
|
528
|
+
) -> Self | None:
|
|
528
529
|
"""Get reaction, of a given type,
|
|
529
530
|
for a comment
|
|
530
531
|
|
|
@@ -589,7 +590,7 @@ class Reactions(Base):
|
|
|
589
590
|
@classmethod
|
|
590
591
|
def get_since(
|
|
591
592
|
cls, time: datetime.datetime
|
|
592
|
-
) -> List[Tuple[datetime.datetime,
|
|
593
|
+
) -> List[Row[Tuple[datetime.datetime, Self]]]:
|
|
593
594
|
"""Get all the reactions on comments created after the given time
|
|
594
595
|
and the comment creation time."""
|
|
595
596
|
with transaction(commit=False) as session:
|
|
@@ -10,6 +10,7 @@ from logdetective.server.database.models import (
|
|
|
10
10
|
Comments,
|
|
11
11
|
Reactions,
|
|
12
12
|
GitlabMergeRequestJobs,
|
|
13
|
+
Forge,
|
|
13
14
|
)
|
|
14
15
|
from logdetective.server.config import LOG
|
|
15
16
|
|
|
@@ -19,7 +20,7 @@ async def collect_emojis(gitlab_conn: gitlab.Gitlab, period: TimePeriod):
|
|
|
19
20
|
Collect emoji feedback from logdetective comments saved in database.
|
|
20
21
|
Check only comments created in the last given period of time.
|
|
21
22
|
"""
|
|
22
|
-
comments = Comments.get_since(period.get_period_start_time())
|
|
23
|
+
comments = Comments.get_since(period.get_period_start_time()) or []
|
|
23
24
|
comments_for_gitlab_connection = [
|
|
24
25
|
comment for comment in comments if comment.forge == gitlab_conn.url
|
|
25
26
|
]
|
|
@@ -32,7 +33,14 @@ async def collect_emojis_for_mr(
|
|
|
32
33
|
"""
|
|
33
34
|
Collect emoji feedback from logdetective comments in the specified MR.
|
|
34
35
|
"""
|
|
35
|
-
|
|
36
|
+
comments = []
|
|
37
|
+
try:
|
|
38
|
+
url = Forge(gitlab_conn.url)
|
|
39
|
+
except ValueError as ex:
|
|
40
|
+
LOG.exception("Attempt to use unrecognized Forge `%s`", gitlab_conn.url)
|
|
41
|
+
raise ex
|
|
42
|
+
mr_jobs = GitlabMergeRequestJobs.get_by_mr_iid(url, project_id, mr_iid) or []
|
|
43
|
+
|
|
36
44
|
comments = [Comments.get_by_mr_job(mr_job) for mr_job in mr_jobs]
|
|
37
45
|
await collect_emojis_in_comments(comments, gitlab_conn)
|
|
38
46
|
|
|
@@ -63,38 +71,40 @@ async def collect_emojis_in_comments( # pylint: disable=too-many-locals
|
|
|
63
71
|
Collect emoji feedback from specified logdetective comments.
|
|
64
72
|
"""
|
|
65
73
|
projects = {}
|
|
66
|
-
|
|
74
|
+
merge_requests = {}
|
|
67
75
|
for comment in comments:
|
|
68
76
|
mr_job_db = GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
|
|
69
77
|
if not mr_job_db:
|
|
70
78
|
continue
|
|
71
79
|
if mr_job_db.id not in projects:
|
|
72
|
-
|
|
80
|
+
project = await _handle_gitlab_operation(
|
|
73
81
|
gitlab_conn.projects.get, mr_job_db.project_id
|
|
74
82
|
)
|
|
75
83
|
if not project:
|
|
76
84
|
continue
|
|
85
|
+
projects[mr_job_db.id] = project
|
|
77
86
|
else:
|
|
78
87
|
project = projects[mr_job_db.id]
|
|
79
|
-
|
|
80
|
-
if
|
|
81
|
-
|
|
82
|
-
project.mergerequests.get,
|
|
88
|
+
merge_request_iid = mr_job_db.mr_iid
|
|
89
|
+
if merge_request_iid not in merge_requests:
|
|
90
|
+
merge_request = await _handle_gitlab_operation(
|
|
91
|
+
project.mergerequests.get, merge_request_iid
|
|
83
92
|
)
|
|
84
|
-
if not
|
|
93
|
+
if not merge_request:
|
|
85
94
|
continue
|
|
95
|
+
merge_requests[merge_request_iid] = merge_request
|
|
86
96
|
else:
|
|
87
|
-
|
|
97
|
+
merge_request = merge_requests[merge_request_iid]
|
|
88
98
|
|
|
89
99
|
discussion = await _handle_gitlab_operation(
|
|
90
|
-
|
|
100
|
+
merge_request.discussions.get, comment.comment_id
|
|
91
101
|
)
|
|
92
102
|
if not discussion:
|
|
93
103
|
continue
|
|
94
104
|
|
|
95
105
|
# Get the ID of the first note
|
|
96
106
|
note_id = discussion.attributes["notes"][0]["id"]
|
|
97
|
-
note = await _handle_gitlab_operation(
|
|
107
|
+
note = await _handle_gitlab_operation(merge_request.notes.get, note_id)
|
|
98
108
|
if not note:
|
|
99
109
|
continue
|
|
100
110
|
|