logdetective 2.13.0__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/constants.py +1 -1
- logdetective/logdetective.py +12 -24
- logdetective/models.py +1 -20
- logdetective/prompts/message_template.j2 +2 -0
- logdetective/prompts/snippet_message_template.j2 +2 -0
- logdetective/prompts/snippet_system_prompt.j2 +38 -0
- logdetective/prompts/staged_message_template.j2 +2 -0
- logdetective/prompts/staged_system_prompt.j2 +45 -0
- logdetective/prompts/system_prompt.j2 +57 -0
- logdetective/prompts.py +87 -0
- logdetective/prompts.yml +7 -0
- logdetective/server/config.py +3 -2
- logdetective/server/database/models/metrics.py +7 -4
- logdetective/server/metric.py +200 -7
- logdetective/server/models.py +12 -0
- logdetective/server/server.py +45 -75
- logdetective/utils.py +35 -26
- {logdetective-2.13.0.dist-info → logdetective-3.1.0.dist-info}/METADATA +54 -24
- {logdetective-2.13.0.dist-info → logdetective-3.1.0.dist-info}/RECORD +22 -16
- logdetective/server/plot.py +0 -432
- {logdetective-2.13.0.dist-info → logdetective-3.1.0.dist-info}/WHEEL +0 -0
- {logdetective-2.13.0.dist-info → logdetective-3.1.0.dist-info}/entry_points.txt +0 -0
- {logdetective-2.13.0.dist-info → logdetective-3.1.0.dist-info}/licenses/LICENSE +0 -0
logdetective/constants.py
CHANGED
|
@@ -4,7 +4,7 @@ in prompts.yaml instead.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
# pylint: disable=line-too-long
|
|
7
|
-
DEFAULT_ADVISOR = "fedora-copr/
|
|
7
|
+
DEFAULT_ADVISOR = "fedora-copr/granite-3.2-8b-instruct-GGUF"
|
|
8
8
|
|
|
9
9
|
PROMPT_TEMPLATE = """
|
|
10
10
|
Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
|
logdetective/logdetective.py
CHANGED
|
@@ -41,31 +41,15 @@ def setup_args():
|
|
|
41
41
|
)
|
|
42
42
|
parser.add_argument(
|
|
43
43
|
"-F",
|
|
44
|
-
"--
|
|
44
|
+
"--filename-suffix",
|
|
45
45
|
help="Suffix of the model file name to be retrieved from Hugging Face.\
|
|
46
46
|
Makes sense only if the model is specified with Hugging Face name.",
|
|
47
47
|
default="Q4_K.gguf",
|
|
48
48
|
)
|
|
49
49
|
parser.add_argument("-n", "--no-stream", action="store_true")
|
|
50
|
-
parser.add_argument(
|
|
51
|
-
"-S",
|
|
52
|
-
"--summarizer",
|
|
53
|
-
type=str,
|
|
54
|
-
default="drain",
|
|
55
|
-
help="DISABLED: LLM summarization option was removed. \
|
|
56
|
-
Argument is kept for backward compatibility only.",
|
|
57
|
-
)
|
|
58
|
-
parser.add_argument(
|
|
59
|
-
"-N",
|
|
60
|
-
"--n_lines",
|
|
61
|
-
type=int,
|
|
62
|
-
default=None,
|
|
63
|
-
help="DISABLED: LLM summarization option was removed. \
|
|
64
|
-
Argument is kept for backward compatibility only.",
|
|
65
|
-
)
|
|
66
50
|
parser.add_argument(
|
|
67
51
|
"-C",
|
|
68
|
-
"--
|
|
52
|
+
"--n-clusters",
|
|
69
53
|
type=int,
|
|
70
54
|
default=8,
|
|
71
55
|
help="Number of clusters for Drain to organize log chunks into.\
|
|
@@ -75,10 +59,18 @@ def setup_args():
|
|
|
75
59
|
parser.add_argument("-q", "--quiet", action="store_true")
|
|
76
60
|
parser.add_argument(
|
|
77
61
|
"--prompts",
|
|
62
|
+
"--prompts-config",
|
|
78
63
|
type=str,
|
|
79
64
|
default=f"{os.path.dirname(__file__)}/prompts.yml",
|
|
80
65
|
help="Path to prompt configuration file.",
|
|
81
66
|
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--prompt-templates",
|
|
69
|
+
type=str,
|
|
70
|
+
default=f"{os.path.dirname(__file__)}/prompts",
|
|
71
|
+
help="Path to prompt template dir. Prompts must be valid Jinja templates, \
|
|
72
|
+
and system prompts must include field `system_time`.",
|
|
73
|
+
)
|
|
82
74
|
parser.add_argument(
|
|
83
75
|
"--temperature",
|
|
84
76
|
type=float,
|
|
@@ -86,7 +78,7 @@ def setup_args():
|
|
|
86
78
|
help="Temperature for inference.",
|
|
87
79
|
)
|
|
88
80
|
parser.add_argument(
|
|
89
|
-
"--
|
|
81
|
+
"--skip-snippets",
|
|
90
82
|
type=str,
|
|
91
83
|
default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
|
|
92
84
|
help="Path to patterns for skipping snippets.",
|
|
@@ -105,10 +97,6 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals,too-many
|
|
|
105
97
|
sys.stderr.write("Error: --quiet and --verbose is mutually exclusive.\n")
|
|
106
98
|
sys.exit(2)
|
|
107
99
|
|
|
108
|
-
# Emit warning about use of discontinued args
|
|
109
|
-
if args.n_lines or args.summarizer != "drain":
|
|
110
|
-
LOG.warning("LLM based summarization was removed. Drain will be used instead.")
|
|
111
|
-
|
|
112
100
|
# Logging facility setup
|
|
113
101
|
log_level = logging.INFO
|
|
114
102
|
if args.verbose >= 1:
|
|
@@ -117,7 +105,7 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals,too-many
|
|
|
117
105
|
log_level = 0
|
|
118
106
|
|
|
119
107
|
# Get prompts configuration
|
|
120
|
-
prompts_configuration = load_prompts(args.prompts)
|
|
108
|
+
prompts_configuration = load_prompts(args.prompts, args.prompt_templates)
|
|
121
109
|
|
|
122
110
|
logging.basicConfig(stream=sys.stdout)
|
|
123
111
|
LOG.setLevel(log_level)
|
logdetective/models.py
CHANGED
|
@@ -21,26 +21,7 @@ class PromptConfig(BaseModel):
|
|
|
21
21
|
snippet_system_prompt: str = DEFAULT_SYSTEM_PROMPT
|
|
22
22
|
staged_system_prompt: str = DEFAULT_SYSTEM_PROMPT
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
super().__init__()
|
|
26
|
-
if data is None:
|
|
27
|
-
return
|
|
28
|
-
self.prompt_template = data.get("prompt_template", PROMPT_TEMPLATE)
|
|
29
|
-
self.snippet_prompt_template = data.get(
|
|
30
|
-
"snippet_prompt_template", SNIPPET_PROMPT_TEMPLATE
|
|
31
|
-
)
|
|
32
|
-
self.prompt_template_staged = data.get(
|
|
33
|
-
"prompt_template_staged", PROMPT_TEMPLATE_STAGED
|
|
34
|
-
)
|
|
35
|
-
self.default_system_prompt = data.get(
|
|
36
|
-
"default_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
37
|
-
)
|
|
38
|
-
self.snippet_system_prompt = data.get(
|
|
39
|
-
"snippet_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
40
|
-
)
|
|
41
|
-
self.staged_system_prompt = data.get(
|
|
42
|
-
"staged_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
43
|
-
)
|
|
24
|
+
references: Optional[list[dict[str, str]]] = None
|
|
44
25
|
|
|
45
26
|
|
|
46
27
|
class SkipSnippets(BaseModel):
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
System time: {{ system_time }}
|
|
2
|
+
|
|
3
|
+
You are a highly capable expert system specialized in packaging and delivery of software using RPM,
|
|
4
|
+
within the RHEL ecosystem. Your purpose is to help package maintainers diagnose and resolve package build failures.
|
|
5
|
+
You are truthful, concise, and helpful.
|
|
6
|
+
|
|
7
|
+
## Input processing
|
|
8
|
+
|
|
9
|
+
You will work with snippets of logs produced during package build.
|
|
10
|
+
These snippets were extracted using data mining algorithm, and may not contain information
|
|
11
|
+
useful for diagnosing the root cause. Snippets without useful information must be disregarded.
|
|
12
|
+
|
|
13
|
+
## Analysis procedure
|
|
14
|
+
|
|
15
|
+
1. Provide the snippet with a short explanation.
|
|
16
|
+
2. If the snippet doesn't contain useful information, indicate the fact with a short sentence.
|
|
17
|
+
|
|
18
|
+
## Examples:
|
|
19
|
+
|
|
20
|
+
User: "Snippet: RPM build errors:"
|
|
21
|
+
Assistant: "Errors occurred during package build.
|
|
22
|
+
---
|
|
23
|
+
User: "Snippet: Copr build error: Build failed"
|
|
24
|
+
Assistant: "The build in Copr has failed."
|
|
25
|
+
---
|
|
26
|
+
User: "Snippet: /bin/tar: Removing leading `/' from member names"
|
|
27
|
+
Assistant: "This snippet is irrelevant."
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
{% if references %}
|
|
31
|
+
## References:
|
|
32
|
+
|
|
33
|
+
When necessary, suggest resources that may be helpful to user.
|
|
34
|
+
|
|
35
|
+
{% for reference in references %}
|
|
36
|
+
* {{ reference.name }} : {{ reference.link }}
|
|
37
|
+
{% endfor %}
|
|
38
|
+
{% endif %}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
System time: {{ system_time }}
|
|
2
|
+
|
|
3
|
+
You are a highly capable expert system specialized in packaging and delivery of software using RPM,
|
|
4
|
+
within the RHEL ecosystem. Your purpose is to help package maintainers diagnose and resolve package build failures.
|
|
5
|
+
You are truthful, concise, and helpful.
|
|
6
|
+
|
|
7
|
+
## Input processing
|
|
8
|
+
|
|
9
|
+
You will work with snippets of logs produced during package build.
|
|
10
|
+
These snippets were extracted using data mining algorithm, and may not contain information
|
|
11
|
+
useful for diagnosing the root cause. Snippets without useful information must be disregarded.
|
|
12
|
+
|
|
13
|
+
## Analysis procedure
|
|
14
|
+
|
|
15
|
+
Analyzed snippets are a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
|
|
16
|
+
Do not reanalyze the raw log [X].
|
|
17
|
+
|
|
18
|
+
Snippets are delimited with '================'.
|
|
19
|
+
|
|
20
|
+
1. Analyze individual snippets, unless they already have analysis attached.
|
|
21
|
+
2. Disregard snippets that do not contain useful information.
|
|
22
|
+
3. Using information from all snippets provide explanation of the issue.
|
|
23
|
+
4. (Optional) Recommend a solution for the package maintainer, only if the cause is clear.
|
|
24
|
+
|
|
25
|
+
## Examples:
|
|
26
|
+
|
|
27
|
+
User: "
|
|
28
|
+
Snippets:
|
|
29
|
+
================
|
|
30
|
+
Snippet No. 1 at line #452:
|
|
31
|
+
[error: command 'gcc' failed: No such file or directory]: [`gcc` compiler is not available in the build environment]
|
|
32
|
+
================
|
|
33
|
+
Snippet No. 2 at line #452:
|
|
34
|
+
[Copr build error: Build failed]: [Package build in Copr failed]"
|
|
35
|
+
Assistant: "Package build in Copr failed due to missing `gcc` compiler. Ensure that all build requirements are correctly specified in the spec file."
|
|
36
|
+
|
|
37
|
+
{% if references %}
|
|
38
|
+
## References:
|
|
39
|
+
|
|
40
|
+
When necessary, suggest resources that may be helpful to user.
|
|
41
|
+
|
|
42
|
+
{% for reference in references %}
|
|
43
|
+
* {{ reference.name }} : {{ reference.link }}
|
|
44
|
+
{% endfor %}
|
|
45
|
+
{% endif %}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
System time: {{ system_time }}
|
|
2
|
+
|
|
3
|
+
You are a highly capable expert system specialized in packaging and delivery of software using RPM,
|
|
4
|
+
within the RHEL ecosystem. Your purpose is to help package maintainers diagnose and resolve package build failures.
|
|
5
|
+
You are truthful, concise, and helpful.
|
|
6
|
+
|
|
7
|
+
## Input processing
|
|
8
|
+
|
|
9
|
+
You will work with snippets of logs produced during a failed package build.
|
|
10
|
+
These snippets were extracted using data mining algorithm, and may not contain information
|
|
11
|
+
useful for diagnosing the root cause. Snippets without useful information must be disregarded.
|
|
12
|
+
General error messages, such as failure of commands used during build, are expected.
|
|
13
|
+
|
|
14
|
+
## Temporal Logic and Causality
|
|
15
|
+
|
|
16
|
+
Log snippets are typically provided in chronological order. When analyzing multiple snippets
|
|
17
|
+
the first significant error in the log is usually the root cause.
|
|
18
|
+
|
|
19
|
+
An error occurring at line #500 cannot be caused by an error occurring at line #1000.
|
|
20
|
+
|
|
21
|
+
Subsequent errors are often side effects of the initial failure. Focus your diagnosis on the primary trigger.
|
|
22
|
+
|
|
23
|
+
## Analysis procedure
|
|
24
|
+
|
|
25
|
+
Snippets are provided in order of appearance in the original log, with attached line number,
|
|
26
|
+
and are delimited with '================'.
|
|
27
|
+
Avoid generic or boilerplate recommendations (e.g., "check the logs," "ensure dependencies are met").
|
|
28
|
+
If a specific root cause is identified, the recommendation must directly address that cause.
|
|
29
|
+
|
|
30
|
+
1. Analyze individual snippets. Do not quote analyzed snippets.
|
|
31
|
+
2. Disregard snippets that do not contain useful information.
|
|
32
|
+
3. Using information from all snippets provide explanation of the issue. Be as specific as possible.
|
|
33
|
+
4. (Optional) Recommend a solution for the package maintainer, only if the cause is clear.
|
|
34
|
+
|
|
35
|
+
## Examples:
|
|
36
|
+
|
|
37
|
+
User: "
|
|
38
|
+
Snippets:
|
|
39
|
+
Snippet No. 1 at line #452:
|
|
40
|
+
|
|
41
|
+
error: command 'gcc' failed: No such file or directory
|
|
42
|
+
================
|
|
43
|
+
Snippet No. 2 at line #560:
|
|
44
|
+
|
|
45
|
+
Copr build error: Build failed
|
|
46
|
+
================"
|
|
47
|
+
Assistant: "Package build in Copr failed due to missing `gcc` compiler. Ensure that all build requirements are correctly specified in the spec file."
|
|
48
|
+
|
|
49
|
+
{% if references %}
|
|
50
|
+
## References:
|
|
51
|
+
|
|
52
|
+
When necessary, suggest resources that may be helpful to user.
|
|
53
|
+
|
|
54
|
+
{% for reference in references %}
|
|
55
|
+
* {{ reference.name }} : {{ reference.link }}
|
|
56
|
+
{% endfor %}
|
|
57
|
+
{% endif %}
|
logdetective/prompts.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from jinja2 import Environment, FileSystemLoader, Template
|
|
4
|
+
|
|
5
|
+
from logdetective.models import PromptConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PromptManager: # pylint: disable=too-many-instance-attributes
|
|
9
|
+
"""Manages prompts defined as jinja templates"""
|
|
10
|
+
_tmp_env: Environment
|
|
11
|
+
|
|
12
|
+
# Templates for system prompts
|
|
13
|
+
_default_system_prompt_template: Template
|
|
14
|
+
_snippet_system_prompt_template: Template
|
|
15
|
+
_staged_system_prompt_template: Template
|
|
16
|
+
|
|
17
|
+
# Templates for messages
|
|
18
|
+
default_message_template: Template
|
|
19
|
+
snippet_message_template: Template
|
|
20
|
+
staged_message_template: Template
|
|
21
|
+
|
|
22
|
+
_references: Optional[list[dict[str, str]]] = None
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self, prompts_path: str, prompts_configuration: Optional[PromptConfig] = None
|
|
26
|
+
) -> None:
|
|
27
|
+
self._tmp_env = Environment(loader=FileSystemLoader(prompts_path))
|
|
28
|
+
|
|
29
|
+
self._default_system_prompt_template = self._tmp_env.get_template(
|
|
30
|
+
"system_prompt.j2"
|
|
31
|
+
)
|
|
32
|
+
self._snippet_system_prompt_template = self._tmp_env.get_template(
|
|
33
|
+
"snippet_system_prompt.j2"
|
|
34
|
+
)
|
|
35
|
+
self._staged_system_prompt_template = self._tmp_env.get_template(
|
|
36
|
+
"staged_system_prompt.j2"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
self.default_message_template = self._tmp_env.get_template(
|
|
40
|
+
"message_template.j2"
|
|
41
|
+
)
|
|
42
|
+
self.snippet_message_template = self._tmp_env.get_template(
|
|
43
|
+
"snippet_message_template.j2"
|
|
44
|
+
)
|
|
45
|
+
self.staged_message_template = self._tmp_env.get_template(
|
|
46
|
+
"staged_message_template.j2"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if prompts_configuration:
|
|
50
|
+
self._references = prompts_configuration.references
|
|
51
|
+
|
|
52
|
+
# To maintain backward compatibility with `logdetective.models.PromptConfig`
|
|
53
|
+
@property
|
|
54
|
+
def default_system_prompt(self) -> str:
|
|
55
|
+
"""Render system prompt from a template"""
|
|
56
|
+
return self._default_system_prompt_template.render(
|
|
57
|
+
system_time=datetime.now(timezone.utc), references=self._references
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def snippet_system_prompt(self) -> str:
|
|
62
|
+
"""Render system prompt from a template"""
|
|
63
|
+
return self._snippet_system_prompt_template.render(
|
|
64
|
+
system_time=datetime.now(timezone.utc), references=self._references
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def staged_system_prompt(self) -> str:
|
|
69
|
+
"""Render system prompt from a template"""
|
|
70
|
+
return self._staged_system_prompt_template.render(
|
|
71
|
+
system_time=datetime.now(timezone.utc), references=self._references
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def prompt_template(self) -> str:
|
|
76
|
+
"""Render message prompt from the template"""
|
|
77
|
+
return self.default_message_template.render()
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def snippet_prompt_template(self) -> str:
|
|
81
|
+
"""Render message prompt from the template"""
|
|
82
|
+
return self.snippet_message_template.render()
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def prompt_template_staged(self) -> str:
|
|
86
|
+
"""Render message prompt from the template"""
|
|
87
|
+
return self.staged_message_template.render()
|
logdetective/prompts.yml
CHANGED
|
@@ -88,3 +88,10 @@ staged_system_prompt: |
|
|
|
88
88
|
You never speculate about package being built or fabricate information.
|
|
89
89
|
If you do not know the answer, you acknowledge the fact and end your response.
|
|
90
90
|
Your responses must be as short as possible.
|
|
91
|
+
|
|
92
|
+
# Optional references, to be used when constructing prompt from Jinja template
|
|
93
|
+
# references:
|
|
94
|
+
# - name: Fedora Packaging Guidelines
|
|
95
|
+
# link: https://docs.fedoraproject.org/en-US/packaging-guidelines/
|
|
96
|
+
# - name: Mock user documentation
|
|
97
|
+
# link: https://rpm-software-management.github.io/mock/
|
logdetective/server/config.py
CHANGED
|
@@ -61,7 +61,8 @@ def get_openai_api_client(inference_config: InferenceConfig):
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
|
|
64
|
-
|
|
64
|
+
SERVER_PROMPT_CONF_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
|
|
65
|
+
SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPT_TEMPLATES", None)
|
|
65
66
|
# The default location for skip patterns is in the same directory
|
|
66
67
|
# as logdetective __init__.py file.
|
|
67
68
|
SERVER_SKIP_PATTERNS_PATH = os.environ.get(
|
|
@@ -70,7 +71,7 @@ SERVER_SKIP_PATTERNS_PATH = os.environ.get(
|
|
|
70
71
|
)
|
|
71
72
|
|
|
72
73
|
SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
|
|
73
|
-
PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
|
|
74
|
+
PROMPT_CONFIG = load_prompts(SERVER_PROMPT_CONF_PATH, SERVER_PROMPT_PATH)
|
|
74
75
|
SKIP_SNIPPETS_CONFIG = load_skip_snippet_patterns(SERVER_SKIP_PATTERNS_PATH)
|
|
75
76
|
|
|
76
77
|
LOG = get_log(SERVER_CONFIG)
|
|
@@ -314,10 +314,13 @@ class AnalyzeRequestMetrics(Base):
|
|
|
314
314
|
"time_range"
|
|
315
315
|
),
|
|
316
316
|
(
|
|
317
|
-
func.
|
|
318
|
-
func.
|
|
319
|
-
|
|
320
|
-
|
|
317
|
+
func.coalesce(
|
|
318
|
+
func.avg(
|
|
319
|
+
func.extract( # pylint: disable=not-callable
|
|
320
|
+
"epoch", cls.response_sent_at - cls.request_received_at
|
|
321
|
+
)
|
|
322
|
+
),
|
|
323
|
+
0
|
|
321
324
|
)
|
|
322
325
|
).label("average_response_seconds"),
|
|
323
326
|
)
|
logdetective/server/metric.py
CHANGED
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
import inspect
|
|
2
|
+
from collections import defaultdict
|
|
2
3
|
import datetime
|
|
3
|
-
|
|
4
|
-
from typing import Optional, Union
|
|
4
|
+
from typing import Optional, Union, Dict
|
|
5
5
|
from functools import wraps
|
|
6
6
|
|
|
7
7
|
import aiohttp
|
|
8
|
-
|
|
8
|
+
import numpy
|
|
9
9
|
from starlette.responses import StreamingResponse
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
from logdetective.remote_log import RemoteLog
|
|
12
12
|
from logdetective.server.config import LOG
|
|
13
13
|
from logdetective.server.compressors import LLMResponseCompressor, RemoteLogCompressor
|
|
14
|
-
from logdetective.server.
|
|
14
|
+
from logdetective.server.models import (
|
|
15
|
+
TimePeriod,
|
|
16
|
+
MetricTimeSeries,
|
|
17
|
+
StagedResponse,
|
|
18
|
+
Response,
|
|
19
|
+
Explanation,
|
|
20
|
+
)
|
|
21
|
+
from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics, Reactions
|
|
15
22
|
from logdetective.server.exceptions import LogDetectiveMetricsError
|
|
16
23
|
|
|
17
24
|
|
|
@@ -47,7 +54,7 @@ async def add_new_metrics(
|
|
|
47
54
|
|
|
48
55
|
async def update_metrics(
|
|
49
56
|
metrics_id: int,
|
|
50
|
-
response: Union[
|
|
57
|
+
response: Union[Response, StagedResponse, StreamingResponse],
|
|
51
58
|
sent_at: Optional[datetime.datetime] = None,
|
|
52
59
|
) -> None:
|
|
53
60
|
"""Update a database metric entry for a received request,
|
|
@@ -71,7 +78,7 @@ async def update_metrics(
|
|
|
71
78
|
)
|
|
72
79
|
response_length = None
|
|
73
80
|
if hasattr(response, "explanation") and isinstance(
|
|
74
|
-
response.explanation,
|
|
81
|
+
response.explanation, Explanation
|
|
75
82
|
):
|
|
76
83
|
response_length = len(response.explanation.text)
|
|
77
84
|
response_certainty = (
|
|
@@ -125,3 +132,189 @@ def track_request(name=None):
|
|
|
125
132
|
raise NotImplementedError("An async coroutine is needed")
|
|
126
133
|
|
|
127
134
|
return decorator
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# TODO: Refactor aggregation to use database operations, instead of timestamp formatting # pylint: disable=fixme
|
|
138
|
+
class TimeDefinition:
|
|
139
|
+
"""Define time format details, given a time period."""
|
|
140
|
+
|
|
141
|
+
def __init__(self, time_period: TimePeriod):
|
|
142
|
+
self.time_period = time_period
|
|
143
|
+
self.days_diff = time_period.get_time_period().days
|
|
144
|
+
if self.time_period.hours:
|
|
145
|
+
self._time_format = "%Y-%m-%d %H"
|
|
146
|
+
self._time_delta = datetime.timedelta(hours=1)
|
|
147
|
+
elif self.time_period.days:
|
|
148
|
+
self._time_format = "%Y-%m-%d"
|
|
149
|
+
self._time_delta = datetime.timedelta(days=1)
|
|
150
|
+
elif self.time_period.weeks:
|
|
151
|
+
self._time_format = "%Y-%m-%d"
|
|
152
|
+
self._time_delta = datetime.timedelta(weeks=1)
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def time_format(self):
|
|
156
|
+
# pylint: disable=missing-function-docstring
|
|
157
|
+
return self._time_format
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def time_delta(self):
|
|
161
|
+
# pylint: disable=missing-function-docstring
|
|
162
|
+
return self._time_delta
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def create_time_series_arrays(
|
|
166
|
+
values_dict: dict[datetime.datetime, int],
|
|
167
|
+
) -> tuple[list, list]:
|
|
168
|
+
"""Create time series arrays from a dictionary of values.
|
|
169
|
+
|
|
170
|
+
This function generates two aligned lists:
|
|
171
|
+
1. An array of timestamps from start_time to end_time
|
|
172
|
+
2. A corresponding array of values for each timestamp
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
values_dict: Dictionary mapping timestamps to their respective values
|
|
176
|
+
Returns:
|
|
177
|
+
A tuple containing:
|
|
178
|
+
- list: Array of timestamps
|
|
179
|
+
- list: Array of corresponding values
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
timestamps = []
|
|
183
|
+
values = []
|
|
184
|
+
|
|
185
|
+
for timestamp, count in values_dict.items():
|
|
186
|
+
timestamps.append(timestamp)
|
|
187
|
+
values.append(count)
|
|
188
|
+
|
|
189
|
+
return timestamps, numpy.nan_to_num(values).tolist()
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
async def requests_per_time(
|
|
193
|
+
period_of_time: TimePeriod,
|
|
194
|
+
endpoint: EndpointType = EndpointType.ANALYZE,
|
|
195
|
+
end_time: Optional[datetime.datetime] = None,
|
|
196
|
+
) -> MetricTimeSeries:
|
|
197
|
+
"""
|
|
198
|
+
Get request counts over a specified time period.
|
|
199
|
+
|
|
200
|
+
The time intervals are determined by the provided TimePeriod object, which defines
|
|
201
|
+
the granularity.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
period_of_time: A TimePeriod object that defines the time period and interval
|
|
205
|
+
for the analysis (e.g., hourly, daily, weekly)
|
|
206
|
+
endpoint: One of the API endpoints
|
|
207
|
+
end_time: The end time for the analysis period. If None, defaults to the current
|
|
208
|
+
UTC time
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
A dictionary with timestamps and associated number of requests
|
|
212
|
+
"""
|
|
213
|
+
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
214
|
+
start_time = period_of_time.get_period_start_time(end_time)
|
|
215
|
+
time_def = TimeDefinition(period_of_time)
|
|
216
|
+
requests_counts = await AnalyzeRequestMetrics.get_requests_in_period(
|
|
217
|
+
start_time, end_time, time_def.time_format, endpoint
|
|
218
|
+
)
|
|
219
|
+
timestamps, counts = create_time_series_arrays(requests_counts)
|
|
220
|
+
|
|
221
|
+
return MetricTimeSeries(metric="requests", timestamps=timestamps, values=counts)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
async def average_time_per_responses(
|
|
225
|
+
period_of_time: TimePeriod,
|
|
226
|
+
endpoint: EndpointType = EndpointType.ANALYZE,
|
|
227
|
+
end_time: Optional[datetime.datetime] = None,
|
|
228
|
+
) -> MetricTimeSeries:
|
|
229
|
+
"""
|
|
230
|
+
Get average response time and length over a specified time period.
|
|
231
|
+
|
|
232
|
+
The time intervals are determined by the provided TimePeriod object, which defines
|
|
233
|
+
the granularity.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
period_of_time: A TimePeriod object that defines the time period and interval
|
|
237
|
+
for the analysis (e.g., hourly, daily, weekly)
|
|
238
|
+
endpoint: One of the API endpoints
|
|
239
|
+
end_time: The end time for the analysis period. If None, defaults to the current
|
|
240
|
+
UTC time
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
A dictionary of timestamps and average response times
|
|
244
|
+
"""
|
|
245
|
+
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
246
|
+
start_time = period_of_time.get_period_start_time(end_time)
|
|
247
|
+
time_def = TimeDefinition(period_of_time)
|
|
248
|
+
responses_average_time = (
|
|
249
|
+
await AnalyzeRequestMetrics.get_responses_average_time_in_period(
|
|
250
|
+
start_time, end_time, time_def.time_format, endpoint
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
timestamps, average_time = create_time_series_arrays(
|
|
254
|
+
responses_average_time,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return MetricTimeSeries(metric="avg_response_time", timestamps=timestamps, values=average_time)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
async def _collect_emoji_data(
|
|
261
|
+
start_time: datetime.datetime, time_def: TimeDefinition
|
|
262
|
+
) -> Dict[str, Dict[str, list]]:
|
|
263
|
+
"""Collect and organize emoji feedback data
|
|
264
|
+
|
|
265
|
+
For each reaction type, a dictionary is created with time stamps
|
|
266
|
+
as keys, and aggregate counts as values.
|
|
267
|
+
"""
|
|
268
|
+
reactions = await Reactions.get_since(start_time)
|
|
269
|
+
reaction_values: defaultdict[str, Dict] = defaultdict(lambda: defaultdict(int))
|
|
270
|
+
|
|
271
|
+
for comment_timestamp, reaction in reactions:
|
|
272
|
+
formatted_timestamp = comment_timestamp.strptime(
|
|
273
|
+
comment_timestamp.strftime(time_def.time_format), time_def.time_format
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
reaction_values[reaction.reaction_type][formatted_timestamp] += reaction.count
|
|
277
|
+
|
|
278
|
+
reaction_time_series = {
|
|
279
|
+
reaction_type: {
|
|
280
|
+
"timestamps": reaction_data.keys(),
|
|
281
|
+
"values": reaction_data.values(),
|
|
282
|
+
}
|
|
283
|
+
for reaction_type, reaction_data in reaction_values.items()
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return reaction_time_series
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
async def emojis_per_time(
|
|
290
|
+
period_of_time: TimePeriod,
|
|
291
|
+
end_time: Optional[datetime.datetime] = None,
|
|
292
|
+
) -> list[MetricTimeSeries]:
|
|
293
|
+
"""
|
|
294
|
+
Retrieve data of emoji feedback over time.
|
|
295
|
+
|
|
296
|
+
The time intervals are determined by the provided TimePeriod object, which defines
|
|
297
|
+
the granularity.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
period_of_time: A TimePeriod object that defines the time period and interval
|
|
301
|
+
for the analysis (e.g., hourly, daily, weekly)
|
|
302
|
+
end_time: The end time for the analysis period. If None, defaults to the current
|
|
303
|
+
UTC time
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
A list of `MetricTimeSeries` objects
|
|
307
|
+
"""
|
|
308
|
+
time_def = TimeDefinition(period_of_time)
|
|
309
|
+
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
310
|
+
start_time = period_of_time.get_period_start_time(end_time)
|
|
311
|
+
reactions_values_dict = await _collect_emoji_data(start_time, time_def)
|
|
312
|
+
|
|
313
|
+
reaction_values: list[MetricTimeSeries] = []
|
|
314
|
+
for reaction, time_series in reactions_values_dict.items():
|
|
315
|
+
reaction_values.append(
|
|
316
|
+
MetricTimeSeries(
|
|
317
|
+
metric=f"emoji_{reaction}",
|
|
318
|
+
timestamps=time_series["timestamps"],
|
|
319
|
+
values=time_series["values"]))
|
|
320
|
+
return reaction_values
|
logdetective/server/models.py
CHANGED
|
@@ -401,3 +401,15 @@ class TimePeriod(BaseModel):
|
|
|
401
401
|
if time.tzinfo is None:
|
|
402
402
|
time = time.replace(tzinfo=datetime.timezone.utc)
|
|
403
403
|
return time - self.get_time_period()
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
class MetricTimeSeries(BaseModel):
|
|
407
|
+
"""Recorded values of given metric"""
|
|
408
|
+
metric: str
|
|
409
|
+
timestamps: List[datetime.datetime]
|
|
410
|
+
values: List[float]
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class MetricResponse(BaseModel):
|
|
414
|
+
"""Requested metrics"""
|
|
415
|
+
time_series: List[MetricTimeSeries]
|