logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. logdetective/constants.py +33 -12
  2. logdetective/extractors.py +137 -68
  3. logdetective/logdetective.py +102 -33
  4. logdetective/models.py +99 -0
  5. logdetective/prompts-summary-first.yml +20 -0
  6. logdetective/prompts-summary-only.yml +13 -0
  7. logdetective/prompts.yml +90 -0
  8. logdetective/remote_log.py +67 -0
  9. logdetective/server/compressors.py +186 -0
  10. logdetective/server/config.py +78 -0
  11. logdetective/server/database/base.py +34 -26
  12. logdetective/server/database/models/__init__.py +33 -0
  13. logdetective/server/database/models/exceptions.py +17 -0
  14. logdetective/server/database/models/koji.py +143 -0
  15. logdetective/server/database/models/merge_request_jobs.py +623 -0
  16. logdetective/server/database/models/metrics.py +427 -0
  17. logdetective/server/emoji.py +148 -0
  18. logdetective/server/exceptions.py +37 -0
  19. logdetective/server/gitlab.py +451 -0
  20. logdetective/server/koji.py +159 -0
  21. logdetective/server/llm.py +309 -0
  22. logdetective/server/metric.py +75 -30
  23. logdetective/server/models.py +426 -23
  24. logdetective/server/plot.py +432 -0
  25. logdetective/server/server.py +580 -468
  26. logdetective/server/templates/base_response.html.j2 +59 -0
  27. logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
  28. logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
  29. logdetective/server/utils.py +98 -32
  30. logdetective/skip_snippets.yml +12 -0
  31. logdetective/utils.py +187 -73
  32. logdetective-2.11.0.dist-info/METADATA +568 -0
  33. logdetective-2.11.0.dist-info/RECORD +40 -0
  34. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  35. logdetective/server/database/models.py +0 -88
  36. logdetective-0.4.0.dist-info/METADATA +0 -333
  37. logdetective-0.4.0.dist-info/RECORD +0 -19
  38. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  39. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,88 +0,0 @@
1
- import enum
2
- import datetime
3
-
4
- from typing import Optional
5
- from sqlalchemy import Column, Integer, Float, DateTime, String, Enum
6
-
7
- from logdetective.server.database.base import Base, transaction
8
-
9
-
10
- class EndpointType(enum.Enum):
11
- """Different analyze endpoints"""
12
-
13
- ANALYZE = "analyze_log"
14
- ANALYZE_STAGED = "analyze_log_staged"
15
- ANALYZE_STREAM = "analyze_log_stream"
16
-
17
-
18
- class AnalyzeRequestMetrics(Base):
19
- """Store data related to received requests and given responses"""
20
-
21
- __tablename__ = "analyze_request_metrics"
22
-
23
- id = Column(Integer, primary_key=True)
24
- endpoint = Column(
25
- Enum(EndpointType),
26
- nullable=False,
27
- index=True,
28
- comment="The service endpoint that was called",
29
- )
30
- request_received_at = Column(
31
- DateTime,
32
- nullable=False,
33
- index=True,
34
- default=datetime.datetime.now(datetime.timezone.utc),
35
- comment="Timestamp when the request was received",
36
- )
37
- log_url = Column(
38
- String,
39
- nullable=False,
40
- index=False,
41
- comment="Log url for which analysis was requested",
42
- )
43
- response_sent_at = Column(
44
- DateTime, nullable=True, comment="Timestamp when the response was sent back"
45
- )
46
- response_length = Column(
47
- Integer, nullable=True, comment="Length of the response in chars"
48
- )
49
- response_certainty = Column(
50
- Float, nullable=True, comment="Certainty for generated response"
51
- )
52
-
53
- @classmethod
54
- def create(
55
- cls,
56
- endpoint: EndpointType,
57
- log_url: str,
58
- request_received_at: Optional[datetime.datetime] = None,
59
- ) -> int:
60
- """Create AnalyzeRequestMetrics new line
61
- with data related to a received request"""
62
- with transaction(commit=True) as session:
63
- metrics = AnalyzeRequestMetrics()
64
- metrics.endpoint = endpoint
65
- metrics.request_received_at = request_received_at or datetime.datetime.now(
66
- datetime.timezone.utc
67
- )
68
- metrics.log_url = log_url
69
- session.add(metrics)
70
- session.flush()
71
- return metrics.id
72
-
73
- @classmethod
74
- def update(
75
- cls,
76
- id_: int,
77
- response_sent_at: datetime,
78
- response_length: int,
79
- response_certainty: float,
80
- ) -> None:
81
- """Update an AnalyzeRequestMetrics line
82
- with data related to the given response"""
83
- with transaction(commit=True) as session:
84
- metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
85
- metrics.response_sent_at = response_sent_at
86
- metrics.response_length = response_length
87
- metrics.response_certainty = response_certainty
88
- session.add(metrics)
@@ -1,333 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: logdetective
3
- Version: 0.4.0
4
- Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
- License: Apache-2.0
6
- Author: Jiri Podivin
7
- Author-email: jpodivin@gmail.com
8
- Requires-Python: >=3.11,<4.0
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: Environment :: Console
11
- Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Natural Language :: English
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Classifier: Topic :: Internet :: Log Analysis
19
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
- Classifier: Topic :: Software Development :: Debuggers
21
- Provides-Extra: server
22
- Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server"
23
- Requires-Dist: drain3 (>=0.9.11,<0.10.0)
24
- Requires-Dist: fastapi (>=0.111.1) ; extra == "server"
25
- Requires-Dist: huggingface-hub (>0.23.2)
26
- Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
27
- Requires-Dist: numpy (>=1.26.0)
28
- Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
29
- Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server"
30
- Requires-Dist: python-gitlab (>=4.4.0)
31
- Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
32
- Requires-Dist: requests (>0.2.31)
33
- Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server"
34
- Project-URL: homepage, https://github.com/fedora-copr/logdetective
35
- Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
36
- Description-Content-Type: text/markdown
37
-
38
- Log Detective
39
- =============
40
-
41
- [![PyPI - Version](https://img.shields.io/pypi/v/logdetective?color=blue)][PyPI Releases]
42
-
43
- [PyPI Releases]: https://pypi.org/project/logdetective/#history
44
-
45
- A Python tool to analyze logs using a Language Model (LLM) and Drain template miner.
46
-
47
- Installation
48
- ------------
49
-
50
- **Fedora 40+**
51
-
52
- dnf install logdetective
53
-
54
- **From Pypi repository**
55
-
56
- The logdetective project is published on the the the the the [Pypi repository](https://pypi.org/project/logdetective/). The `pip` tool can be used for installation.
57
-
58
- First, ensure that the necessary dependencies for the `llama-cpp-python` project are installed. For Fedora, install `gcc-c++`:
59
-
60
- # for Fedora it will be:
61
- dnf install gcc-c++
62
-
63
- Then, install the `logdetective` project using pip:
64
-
65
- # then install logdetective project
66
- pip install logdetective
67
-
68
- **Local repository install**
69
-
70
- pip install .
71
-
72
- Usage
73
- -----
74
-
75
- To analyze a log file, run the script with the following command line arguments:
76
- - `url` (required): The URL of the log file to be analyzed.
77
- - `--model` (optional, default: "Mistral-7B-Instruct-v0.2-GGUF"): The path or URL of the language model for analysis. As we are using LLama.cpp we want this to be in the `gguf` format. You can include the download link to the model here. If the model is already on your machine it will skip the download.
78
- - `--summarizer` (optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
79
- - `--n_lines` (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
80
- - `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
81
-
82
- Example usage:
83
-
84
- logdetective https://example.com/logs.txt
85
-
86
- Or if the log file is stored locally:
87
-
88
- logdetective ./data/logs.txt
89
-
90
- Example you want to use a different model:
91
-
92
- logdetective https://example.com/logs.txt --model https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true
93
- logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF
94
-
95
- Note that streaming with some models (notably Meta-Llama-3 is broken) is broken and can be workarounded by `no-stream` option:
96
-
97
- logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF --no-stream
98
-
99
-
100
- Real Example
101
- ------------
102
- Let's have a look at a real world example. Log Detective can work with any logs though we optimize it for build logs.
103
-
104
- We're going to analyze a failed build of a python-based library that happened in Fedora Koji buildsystem:
105
- ```
106
- $ logdetective https://kojipkgs.fedoraproject.org//work/tasks/8157/117788157/build.log
107
- Explanation:
108
- [Child return code was: 0] : The rpm build process executed successfully without any errors until the 'check' phase.
109
-
110
- [wamp/test/test_wamp_component_aio.py::test_asyncio_component] : Pytest found
111
- two tests marked with '@pytest.mark.asyncio' but they are not async functions.
112
- This warning can be ignored unless the tests are intended to be run
113
- asynchronously.
114
-
115
- [wamp/test/test_wamp_component_aio.py::test_asyncio_component_404] : Another
116
- Pytest warning for the same issue as test_asyncio_component.
117
-
118
- [-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html] :
119
- This line is not related to the error, but it is a reminder to refer to Pytest
120
- documentation for handling warnings.
121
-
122
- [=========================== short test summary info
123
- ============================] : This section shows the summary of tests that
124
- were executed. It shows the number of tests passed, failed, skipped,
125
- deselected, and warnings.
126
-
127
- [FAILED wamp/test/test_wamp_cryptosign.py::TestSigVectors::test_vectors] : A
128
- failed test is reported with the name of the test file, the name of the test
129
- method, and the name of the test case that failed. In this case,
130
- TestSigVectors::test_vectors failed.
131
-
132
- [FAILED
133
- websocket/test/test_websocket_protocol.py::WebSocketClientProtocolTests::test_auto_ping]
134
- : Another failed test is reported with the same format as the previous test. In
135
- this case, it is WebSocketClientProtocolTests::test_auto_ping that failed.
136
-
137
- [FAILED websocket/test/test_websocket_protocol.py::WebSocketServerProtocolTests::test_interpolate_server_status_template]
138
- : A third failed test is reported with the same format as the previous tests.
139
- In this case, it is
140
- WebSocketServerProtocolTests::test_interpolate_server_status_template that
141
- failed.
142
-
143
- [FAILED websocket/test/test_websocket_protocol.py::WebSocketServerProtocolTests::test_sendClose_reason_with_no_code]
144
- : Another failed test is reported. This time it is
145
- WebSocketServerProtocolTests::test_sendClose_reason_with_no_code.
146
-
147
- [FAILED websocket/test/test_websocket_protocol.py::WebSocketServerProtocolTests::test_sendClose_str_reason]
148
- : Another failed test is reported with the same test file and test method name,
149
- but a different test case name: test_sendClose_str_reason.
150
-
151
- [==== 13 failed, 195 passed, 64 skipped, 13 deselected, 2 warnings in 6.55s
152
- =====] : This is the summary of all tests that were executed, including the
153
- number of tests that passed, failed, were skipped, deselected, or produced
154
- warnings. In this case, there were 13 failed tests among a total of 211 tests.
155
-
156
- [error: Bad exit status from /var/tmp/rpm-tmp.8C0L25 (%check)] : An error
157
- message is reported indicating that the 'check' phase of the rpm build process
158
- failed with a bad exit status.
159
- ```
160
-
161
- It looks like a wall of text. Similar to any log. The main difference is that here we have the most significant lines of a logfile wrapped in `[ ] : ` and followed by textual explanation of the log text done by mistral 7b.
162
-
163
-
164
- Contributing
165
- ------------
166
-
167
- Contributions are welcome! Please submit a pull request if you have any improvements or new features to add. Make sure your changes pass all existing tests before submitting.
168
-
169
- To develop logdetective, you should fork this repository, clone your fork, and install dependencies using pip:
170
-
171
- git clone https://github.com/yourusername/logdetective.git
172
- cd logdetective
173
- pip install .
174
-
175
- Make changes to the code as needed and run pre-commit.
176
-
177
- Tests
178
- -----
179
-
180
- The [tox](https://github.com/tox-dev/tox) is used to manage tests. Please install `tox` package into your distribution and run:
181
-
182
- tox
183
-
184
- This will create a virtual environment with dependencies and run all the tests. For more information follow the tox help.
185
-
186
- To run only a specific test execute this:
187
-
188
- tox run -e style # to run flake8
189
-
190
- or
191
-
192
- tox run -e lint # to run pylint
193
-
194
- Visual Studio Code testing with podman/docker-compose
195
- -----------------------------------------------------
196
-
197
- - In `Containerfile`, add `debugpy` as a dependency
198
-
199
- ```diff
200
- -RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \
201
- +RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context debugpy\
202
- ```
203
-
204
- - Rebuild server image with new dependencies
205
-
206
- ```
207
- make rebuild-server
208
- ```
209
-
210
- - Forward debugging port in `docker-compose.yaml` for `server` service.
211
-
212
- ```diff
213
- ports:
214
- - "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
215
- + - "${VSCODE_DEBUG_PORT:-5678}:${VSCODE_DEBUG_PORT:-5678}"
216
- ```
217
-
218
- - Add `debugpy` code in a logdetective file where you want to stop at first.
219
-
220
- ```diff
221
- +import debugpy
222
- +debugpy.listen(("0.0.0.0", 5678))
223
- +debugpy.wait_for_client()
224
- ```
225
-
226
- - Prepare `.vscode/lunch.json` configuration for Visual Studio Code (at least the following configuration is needed)
227
-
228
- ```json
229
- {
230
- "version": "0.2.0",
231
- "configurations": [
232
- {
233
- "name": "Python Debugger: Remote Attach",
234
- "type": "debugpy",
235
- "request": "attach",
236
- "connect": {
237
- "host": "localhost",
238
- "port": 5678
239
- },
240
- "pathMappings": [
241
- {
242
- "localRoot": "${workspaceFolder}",
243
- "remoteRoot": "/src"
244
- }
245
- ]
246
- }
247
- ]
248
- }
249
- ```
250
-
251
- - Run the server
252
-
253
- ```
254
- podman-compose up server
255
- ```
256
-
257
- - Run Visual Stdio Code debug configuration named *Python Debug: Remote Attach*
258
-
259
- Server
260
- ------
261
-
262
- FastApi based server is implemented in `logdetective/server.py`. In order to run it in a development mode,
263
- simply start llama-cpp-python server with your chosen model as described in llama-cpp-python [docs](https://llama-cpp-python.readthedocs.io/en/latest/server/#running-the-server).
264
-
265
- Afterwards, start the logdetective server with `fastapi dev logdetective/server.py --port 8080`.
266
- Requests can then be made with post requests, for example:
267
-
268
- curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze
269
-
270
- For more accurate responses, you can use `/analyze/staged` endpoint. This will submit snippets to model for individual analysis first.
271
- Afterwards the model outputs are used to construct final prompt. This will take substantially longer, compared to plain `/analyze`
272
-
273
- curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze/staged
274
-
275
- We also have a Containerfile and composefile to run the logdetective server and llama server in containers.
276
-
277
- Before doing `podman-compose up`, make sure to set `MODELS_PATH` environment variable and point to a directory with your local model files:
278
- ```
279
- $ export MODELS_PATH=/path/to/models/
280
- $ ll $MODELS_PATH
281
- -rw-r--r--. 1 tt tt 3.9G apr 10 17:18 mistral-7b-instruct-v0.2.Q4_K_S.gguf
282
- ```
283
-
284
- If the variable is not set, `./models` is mounted inside by default.
285
-
286
- Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-copr) by:
287
- ```
288
- $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/ggml-model-Q4_K_S.gguf
289
- ```
290
-
291
- Generate a new database revision with alembic
292
- ---------------------------------------------
293
-
294
- Modify the database models (`logdetective/server/database/model.py).
295
-
296
- Generate a new database revision with the command:
297
-
298
- **Warning**: this command will start up a new server
299
- and shut it down when the operation completes.
300
-
301
- ```
302
- CHANGE="A change comment" make alembic-generate-revision
303
- ```
304
-
305
- Our production instance
306
- -----------------------
307
-
308
- Our FastAPI server and model inference server run through `podman-compose` on an
309
- Amazon AWS intance. The VM is provisioned by an
310
- [ansible playbook](https://pagure.io/fedora-infra/ansible/blob/main/f/roles/logdetective/tasks/main.yml).
311
-
312
- You can control the server through:
313
-
314
- ```
315
- cd /root/logdetective
316
- podman-compose -f docker-compose-prod.yaml ...
317
- ```
318
-
319
- The `/root` directory contains valuable data. If moving to a new instance,
320
- please backup the whole directory and transfer it to the new instance.
321
-
322
- Fore some reason, we need to manually run this command after every reboot:
323
-
324
- ```
325
- nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
326
- ```
327
-
328
-
329
- License
330
- -------
331
-
332
- This project is licensed under the Apache-2.0 License - see the LICENSE file for details.
333
-
@@ -1,19 +0,0 @@
1
- logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
2
- logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
3
- logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
- logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
5
- logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
6
- logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
9
- logdetective/server/database/models.py,sha256=8jW4k03Kny_3ld35214hcjYoJqlBvQIr4LH9mfQukXw,2750
10
- logdetective/server/metric.py,sha256=VYMifrfIhcqgyu6YYN0c1nt8fC1iJ2_LCB7Bh2AheoE,2679
11
- logdetective/server/models.py,sha256=cTmNJ-w_WT3meD_x0A92wCZ5f6UiSeYpH1f01PNCOy4,5533
12
- logdetective/server/server.py,sha256=fKGN6ddlLW25V5X0O-NBGHTBlDszRU6R8FpJs0xEYe8,21179
13
- logdetective/server/utils.py,sha256=osW5-VXxJAxRt7Wd3t1wF7PyW89FE9g4gSZLZCShlLc,1216
14
- logdetective/utils.py,sha256=eudens1_T6iTtYhyzoYCpwuWgFHUMDSt6eWnrAB-mAI,6188
15
- logdetective-0.4.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
16
- logdetective-0.4.0.dist-info/METADATA,sha256=92vGsbePzgQKz1PHOVmdysHvSkgV0Yj0Owtjdzv0G2w,12714
17
- logdetective-0.4.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
18
- logdetective-0.4.0.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
19
- logdetective-0.4.0.dist-info/RECORD,,