gurrt 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gurrt-0.1.0/LICENSE +18 -0
- gurrt-0.1.0/PKG-INFO +157 -0
- gurrt-0.1.0/README.md +117 -0
- gurrt-0.1.0/pyproject.toml +71 -0
- gurrt-0.1.0/setup.cfg +4 -0
- gurrt-0.1.0/src/gurrt/__init__.py +0 -0
- gurrt-0.1.0/src/gurrt/api/server.py +105 -0
- gurrt-0.1.0/src/gurrt/cli/main.py +171 -0
- gurrt-0.1.0/src/gurrt/config/config.py +28 -0
- gurrt-0.1.0/src/gurrt/core/__init__.py +0 -0
- gurrt-0.1.0/src/gurrt/core/asr.py +33 -0
- gurrt-0.1.0/src/gurrt/core/embedding.py +31 -0
- gurrt-0.1.0/src/gurrt/core/llm.py +65 -0
- gurrt-0.1.0/src/gurrt/core/models.py +101 -0
- gurrt-0.1.0/src/gurrt/core/pipeline.py +58 -0
- gurrt-0.1.0/src/gurrt/core/prompts.py +26 -0
- gurrt-0.1.0/src/gurrt/core/search.py +48 -0
- gurrt-0.1.0/src/gurrt/core/vectordb.py +41 -0
- gurrt-0.1.0/src/gurrt/utils/utils.py +340 -0
- gurrt-0.1.0/src/gurrt.egg-info/PKG-INFO +157 -0
- gurrt-0.1.0/src/gurrt.egg-info/SOURCES.txt +23 -0
- gurrt-0.1.0/src/gurrt.egg-info/dependency_links.txt +1 -0
- gurrt-0.1.0/src/gurrt.egg-info/entry_points.txt +2 -0
- gurrt-0.1.0/src/gurrt.egg-info/requires.txt +29 -0
- gurrt-0.1.0/src/gurrt.egg-info/top_level.txt +1 -0
gurrt-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
echo "The MIT License
|
|
2
|
+
Copyright (c) `date +%Y` Arthur L Piepmeier III <http://treypiepmeier.com/>
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
\"Software\"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
The above copyright notice and this permission notice shall be
|
|
11
|
+
included in all copies or substantial portions of the Software.
|
|
12
|
+
THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,
|
|
13
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
14
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
15
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
16
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
17
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
18
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." > MIT-Licence.txt
|
gurrt-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gurrt
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An Intelligent Open-Source Video Understanding System A different path from traditional Large Video Language Models (LVLMs). Built for modularity, openness, and real-world usability.
|
|
5
|
+
Author-email: Mohammad Owais <owaismohammad2515@gmail.com>, Fareha Aslam <farehaaslam57@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: torch
|
|
11
|
+
Requires-Dist: torchvision
|
|
12
|
+
Requires-Dist: torchaudio
|
|
13
|
+
Requires-Dist: opencv-python>=4.13.0.92
|
|
14
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
15
|
+
Requires-Dist: transformers>=5.1.0
|
|
16
|
+
Requires-Dist: accelerate>=1.12.0
|
|
17
|
+
Requires-Dist: pillow>=12.1.0
|
|
18
|
+
Requires-Dist: chromadb>=1.4.1
|
|
19
|
+
Requires-Dist: ollama>=0.6.1
|
|
20
|
+
Requires-Dist: langchain>=1.2.9
|
|
21
|
+
Requires-Dist: langchain-groq>=1.1.2
|
|
22
|
+
Requires-Dist: moviepy>=1.0.3
|
|
23
|
+
Requires-Dist: sentence-transformers>=5.2.2
|
|
24
|
+
Requires-Dist: tqdm>=4.67.3
|
|
25
|
+
Requires-Dist: scenedetect>=0.6.7.1
|
|
26
|
+
Requires-Dist: scikit-image>=0.26.0
|
|
27
|
+
Requires-Dist: ipykernel>=7.2.0
|
|
28
|
+
Requires-Dist: easyocr>=1.7.2
|
|
29
|
+
Requires-Dist: yt-dlp>=2026.2.4
|
|
30
|
+
Requires-Dist: openai-whisper>=20250625
|
|
31
|
+
Requires-Dist: faster-whisper>=1.2.1
|
|
32
|
+
Requires-Dist: langchain-text-splitters>=1.1.0
|
|
33
|
+
Requires-Dist: fastapi>=0.128.7
|
|
34
|
+
Requires-Dist: pydantic>=2.12.5
|
|
35
|
+
Requires-Dist: supermemory>=3.24.0
|
|
36
|
+
Requires-Dist: platformdirs>=4.5.1
|
|
37
|
+
Requires-Dist: typer>=0.21.1
|
|
38
|
+
Requires-Dist: alive-progress>=3.3.0
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# gurrt
|
|
42
|
+
An intelligent video understanding system designed as an open-source alternative to monolithic Large Video Language Models
|
|
43
|
+
|
|
44
|
+
I built gurrt out of frustration.
|
|
45
|
+
|
|
46
|
+
Working with Large Video Language Models locally is:
|
|
47
|
+
|
|
48
|
+
- Expensive to set up
|
|
49
|
+
- GPU intensive
|
|
50
|
+
- Slow to experiment with
|
|
51
|
+
- Difficult to run on consumer hardware
|
|
52
|
+
- Often closed or partially restricted
|
|
53
|
+
|
|
54
|
+
Most state-of-the-art video models require massive compute clusters and large-scale infrastructure.
|
|
55
|
+
They are impressive — but they are not accessible.
|
|
56
|
+
|
|
57
|
+
If meaningful video intelligence requires:
|
|
58
|
+
|
|
59
|
+
- Multiple high-end GPUs
|
|
60
|
+
- Hours of inference time
|
|
61
|
+
- Proprietary model access
|
|
62
|
+
|
|
63
|
+
Then it stops feeling truly open.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
### A Different Philosophy
|
|
68
|
+
|
|
69
|
+
gurrt is not an attempt to compete with systems like YouTube’s internal models or other large-scale industrial LVLMs trained on massive GPU clusters.
|
|
70
|
+
|
|
71
|
+
It is an attempt to rethink the approach.
|
|
72
|
+
|
|
73
|
+
Instead of asking how to build a larger end-to-end video transformer, it explores a different path:
|
|
74
|
+
|
|
75
|
+
- Smarter frame sampling techniques
|
|
76
|
+
- Stronger and more modular vision models
|
|
77
|
+
- Better structured embedding strategies
|
|
78
|
+
- More efficient and grounded RAG pipelines
|
|
79
|
+
- Persistent memory-driven reasoning
|
|
80
|
+
|
|
81
|
+
It represents a belief that meaningful video understanding can emerge from:
|
|
82
|
+
|
|
83
|
+
- Thoughtful engineering
|
|
84
|
+
- Smart sampling
|
|
85
|
+
- Strong modular components
|
|
86
|
+
- Memory-augmented retrieval
|
|
87
|
+
|
|
88
|
+
Not just from massive GPU clusters and billion-parameter models.
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
## Architecture Overview
|
|
92
|
+
```bash
|
|
93
|
+
Video
|
|
94
|
+
│
|
|
95
|
+
├── Smart Frame Extraction
|
|
96
|
+
│ └── Captioning + Embeddings
|
|
97
|
+
│
|
|
98
|
+
├── Audio Extraction
|
|
99
|
+
│ └── Speech-to-Text + Embeddings
|
|
100
|
+
│
|
|
101
|
+
├── Vector Memory Store
|
|
102
|
+
│
|
|
103
|
+
├── Supermemory (Persistent Conversation Layer)
|
|
104
|
+
│
|
|
105
|
+
└── LLM Reasoning Engine
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Project Setup (using uv)
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Install uv if you haven't already
|
|
112
|
+
pip install uv
|
|
113
|
+
|
|
114
|
+
# Sync dependencies
|
|
115
|
+
uv sync
|
|
116
|
+
|
|
117
|
+
# Activate environment
|
|
118
|
+
.venv\Scripts\activate
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## File Structure
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
gurrt/
|
|
125
|
+
├── src/
|
|
126
|
+
│ |
|
|
127
|
+
│ │
|
|
128
|
+
│ └── videorag/ # Core Video-RAG application package
|
|
129
|
+
│ │
|
|
130
|
+
│ ├── api/
|
|
131
|
+
│ │ └── server.py # API server (exposes endpoints for querying, ingestion, etc.)
|
|
132
|
+
│ │
|
|
133
|
+
│ ├── cli/
|
|
134
|
+
│ │ └── main.py # CLI entry point (init, ingest, query commands)
|
|
135
|
+
│ │
|
|
136
|
+
│ ├── config/
|
|
137
|
+
│ │ └── config.py # Configuration management (API keys, paths, environment setup)
|
|
138
|
+
│ │
|
|
139
|
+
│ ├── core/ # Core intelligence pipeline
|
|
140
|
+
│ │ ├── __init__.py
|
|
141
|
+
│ │ ├── asr.py # Audio extraction + speech-to-text processing
|
|
142
|
+
│ │ ├── embedding.py # Embedding generation for captions & transcripts
|
|
143
|
+
│ │ ├── llm.py # LLM interaction and reasoning logic
|
|
144
|
+
│ │ ├── models.py # Model loading and management utilities
|
|
145
|
+
│ │ ├── pipeline.py # End-to-end ingestion + query pipeline orchestration
|
|
146
|
+
│ │ ├── prompts.py # Prompt templates and structured context injection
|
|
147
|
+
│ │ ├── search.py # Retrieval logic (semantic search over stored embeddings)
|
|
148
|
+
│ │ └── vectordb.py # Vector database interface and storage abstraction
|
|
149
|
+
│ │
|
|
150
|
+
│ └── utils/
|
|
151
|
+
│ └── utils.py # Shared utility functions and helpers
|
|
152
|
+
│
|
|
153
|
+
└── README.md # Project documentation
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
gurrt-0.1.0/README.md
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# gurrt
|
|
2
|
+
An intelligent video understanding system designed as an open-source alternative to monolithic Large Video Language Models
|
|
3
|
+
|
|
4
|
+
I built gurrt out of frustration.
|
|
5
|
+
|
|
6
|
+
Working with Large Video Language Models locally is:
|
|
7
|
+
|
|
8
|
+
- Expensive to set up
|
|
9
|
+
- GPU intensive
|
|
10
|
+
- Slow to experiment with
|
|
11
|
+
- Difficult to run on consumer hardware
|
|
12
|
+
- Often closed or partially restricted
|
|
13
|
+
|
|
14
|
+
Most state-of-the-art video models require massive compute clusters and large-scale infrastructure.
|
|
15
|
+
They are impressive — but they are not accessible.
|
|
16
|
+
|
|
17
|
+
If meaningful video intelligence requires:
|
|
18
|
+
|
|
19
|
+
- Multiple high-end GPUs
|
|
20
|
+
- Hours of inference time
|
|
21
|
+
- Proprietary model access
|
|
22
|
+
|
|
23
|
+
Then it stops feeling truly open.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
### A Different Philosophy
|
|
28
|
+
|
|
29
|
+
gurrt is not an attempt to compete with systems like YouTube’s internal models or other large-scale industrial LVLMs trained on massive GPU clusters.
|
|
30
|
+
|
|
31
|
+
It is an attempt to rethink the approach.
|
|
32
|
+
|
|
33
|
+
Instead of asking how to build a larger end-to-end video transformer, it explores a different path:
|
|
34
|
+
|
|
35
|
+
- Smarter frame sampling techniques
|
|
36
|
+
- Stronger and more modular vision models
|
|
37
|
+
- Better structured embedding strategies
|
|
38
|
+
- More efficient and grounded RAG pipelines
|
|
39
|
+
- Persistent memory-driven reasoning
|
|
40
|
+
|
|
41
|
+
It represents a belief that meaningful video understanding can emerge from:
|
|
42
|
+
|
|
43
|
+
- Thoughtful engineering
|
|
44
|
+
- Smart sampling
|
|
45
|
+
- Strong modular components
|
|
46
|
+
- Memory-augmented retrieval
|
|
47
|
+
|
|
48
|
+
Not just from massive GPU clusters and billion-parameter models.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
## Architecture Overview
|
|
52
|
+
```bash
|
|
53
|
+
Video
|
|
54
|
+
│
|
|
55
|
+
├── Smart Frame Extraction
|
|
56
|
+
│ └── Captioning + Embeddings
|
|
57
|
+
│
|
|
58
|
+
├── Audio Extraction
|
|
59
|
+
│ └── Speech-to-Text + Embeddings
|
|
60
|
+
│
|
|
61
|
+
├── Vector Memory Store
|
|
62
|
+
│
|
|
63
|
+
├── Supermemory (Persistent Conversation Layer)
|
|
64
|
+
│
|
|
65
|
+
└── LLM Reasoning Engine
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Project Setup (using uv)
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# Install uv if you haven't already
|
|
72
|
+
pip install uv
|
|
73
|
+
|
|
74
|
+
# Sync dependencies
|
|
75
|
+
uv sync
|
|
76
|
+
|
|
77
|
+
# Activate environment
|
|
78
|
+
.venv\Scripts\activate
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## File Structure
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
gurrt/
|
|
85
|
+
├── src/
|
|
86
|
+
│ |
|
|
87
|
+
│ │
|
|
88
|
+
│ └── videorag/ # Core Video-RAG application package
|
|
89
|
+
│ │
|
|
90
|
+
│ ├── api/
|
|
91
|
+
│ │ └── server.py # API server (exposes endpoints for querying, ingestion, etc.)
|
|
92
|
+
│ │
|
|
93
|
+
│ ├── cli/
|
|
94
|
+
│ │ └── main.py # CLI entry point (init, ingest, query commands)
|
|
95
|
+
│ │
|
|
96
|
+
│ ├── config/
|
|
97
|
+
│ │ └── config.py # Configuration management (API keys, paths, environment setup)
|
|
98
|
+
│ │
|
|
99
|
+
│ ├── core/ # Core intelligence pipeline
|
|
100
|
+
│ │ ├── __init__.py
|
|
101
|
+
│ │ ├── asr.py # Audio extraction + speech-to-text processing
|
|
102
|
+
│ │ ├── embedding.py # Embedding generation for captions & transcripts
|
|
103
|
+
│ │ ├── llm.py # LLM interaction and reasoning logic
|
|
104
|
+
│ │ ├── models.py # Model loading and management utilities
|
|
105
|
+
│ │ ├── pipeline.py # End-to-end ingestion + query pipeline orchestration
|
|
106
|
+
│ │ ├── prompts.py # Prompt templates and structured context injection
|
|
107
|
+
│ │ ├── search.py # Retrieval logic (semantic search over stored embeddings)
|
|
108
|
+
│ │ └── vectordb.py # Vector database interface and storage abstraction
|
|
109
|
+
│ │
|
|
110
|
+
│ └── utils/
|
|
111
|
+
│ └── utils.py # Shared utility functions and helpers
|
|
112
|
+
│
|
|
113
|
+
└── README.md # Project documentation
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "gurrt"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "An Intelligent Open-Source Video Understanding System A different path from traditional Large Video Language Models (LVLMs). Built for modularity, openness, and real-world usability."
|
|
5
|
+
authors = [
|
|
6
|
+
{name="Mohammad Owais", email = "owaismohammad2515@gmail.com"},
|
|
7
|
+
{name= "Fareha Aslam", email = "farehaaslam57@gmail.com"}
|
|
8
|
+
]
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
dependencies = [
|
|
13
|
+
"torch",
|
|
14
|
+
"torchvision",
|
|
15
|
+
"torchaudio",
|
|
16
|
+
"opencv-python>=4.13.0.92",
|
|
17
|
+
"python-dotenv>=1.2.1",
|
|
18
|
+
"transformers>=5.1.0",
|
|
19
|
+
"accelerate>=1.12.0",
|
|
20
|
+
"pillow>=12.1.0",
|
|
21
|
+
"chromadb>=1.4.1",
|
|
22
|
+
"ollama>=0.6.1",
|
|
23
|
+
"langchain>=1.2.9",
|
|
24
|
+
"langchain-groq>=1.1.2",
|
|
25
|
+
"moviepy>=1.0.3",
|
|
26
|
+
"sentence-transformers>=5.2.2",
|
|
27
|
+
"tqdm>=4.67.3",
|
|
28
|
+
"scenedetect>=0.6.7.1",
|
|
29
|
+
"scikit-image>=0.26.0",
|
|
30
|
+
"ipykernel>=7.2.0",
|
|
31
|
+
"easyocr>=1.7.2",
|
|
32
|
+
"yt-dlp>=2026.2.4",
|
|
33
|
+
"openai-whisper>=20250625",
|
|
34
|
+
"faster-whisper>=1.2.1",
|
|
35
|
+
"langchain-text-splitters>=1.1.0",
|
|
36
|
+
"fastapi>=0.128.7",
|
|
37
|
+
"pydantic>=2.12.5",
|
|
38
|
+
"supermemory>=3.24.0",
|
|
39
|
+
"platformdirs>=4.5.1",
|
|
40
|
+
"typer>=0.21.1",
|
|
41
|
+
"alive-progress>=3.3.0",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[build-system]
|
|
45
|
+
requires = ["setuptools>=61.0"]
|
|
46
|
+
build-backend = "setuptools.build_meta"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
[[tool.uv.index]]
|
|
50
|
+
name = "pytorch-cu121"
|
|
51
|
+
url = "https://download.pytorch.org/whl/cu121"
|
|
52
|
+
explicit = true
|
|
53
|
+
|
|
54
|
+
[tool.uv.sources]
|
|
55
|
+
torch = { index = "pytorch-cu121" }
|
|
56
|
+
torchvision = { index = "pytorch-cu121" }
|
|
57
|
+
torchaudio = { index = "pytorch-cu121" }
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
where = ["src"]
|
|
62
|
+
|
|
63
|
+
[dependency-groups]
|
|
64
|
+
dev = [
|
|
65
|
+
"build>=1.4.0",
|
|
66
|
+
"twine>=6.2.0",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
[project.scripts]
|
|
70
|
+
gurrt = "gurrt.cli.main:app"
|
|
71
|
+
|
gurrt-0.1.0/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
from fastapi import FastAPI, status
|
|
3
|
+
from fastapi.responses import JSONResponse
|
|
4
|
+
from gurrt.core.llm import query_llm, delete
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
import subprocess
|
|
7
|
+
|
|
8
|
+
app = FastAPI(title= "Video-Amigo")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@app.get('/save_models')
|
|
12
|
+
async def save_models() -> JSONResponse:
|
|
13
|
+
try:
|
|
14
|
+
result = subprocess.run(["python", r"core\models.py"],
|
|
15
|
+
capture_output= True,
|
|
16
|
+
check= True,
|
|
17
|
+
text= True)
|
|
18
|
+
return JSONResponse(status_code=status.HTTP_200_OK,
|
|
19
|
+
content={"message": "Resource Saved Successfully!",
|
|
20
|
+
"output": result.stdout} )
|
|
21
|
+
except subprocess.CalledProcessError as e:
|
|
22
|
+
return JSONResponse(
|
|
23
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
24
|
+
content={
|
|
25
|
+
"message": "Script execution failed",
|
|
26
|
+
"error": e.stderr
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
@app.get('/create_vectordb')
|
|
31
|
+
async def vectordb_creation() -> JSONResponse:
|
|
32
|
+
try:
|
|
33
|
+
vector_db_creation = subprocess.run(["python", r"core\vectordb.py"],
|
|
34
|
+
capture_output= True,
|
|
35
|
+
check= True,
|
|
36
|
+
text= True)
|
|
37
|
+
|
|
38
|
+
except subprocess.CalledProcessError as e:
|
|
39
|
+
return JSONResponse(
|
|
40
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
41
|
+
content={
|
|
42
|
+
"message": "Script execution failed",
|
|
43
|
+
"error": e.stderr
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
return JSONResponse(status_code=status.HTTP_200_OK,
|
|
47
|
+
content={"message": "Resource Saved Successfully!",
|
|
48
|
+
"frame_output": vector_db_creation.stdout})
|
|
49
|
+
|
|
50
|
+
@app.get('/upload_video')
|
|
51
|
+
async def video_save_caption_emb(video: Annotated[str, Field(description= "Recieve Video Path")]) -> JSONResponse:
|
|
52
|
+
try:
|
|
53
|
+
result_frame = subprocess.run(["python", r"core\embedding.py"],
|
|
54
|
+
capture_output= True,
|
|
55
|
+
check= True,
|
|
56
|
+
text= True)
|
|
57
|
+
|
|
58
|
+
except subprocess.CalledProcessError as e:
|
|
59
|
+
return JSONResponse(
|
|
60
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
61
|
+
content={
|
|
62
|
+
"message": "Script execution failed",
|
|
63
|
+
"error": e.stderr
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
result_audio = subprocess.run(["python", r"core\asr.py"],
|
|
69
|
+
capture_output= True,
|
|
70
|
+
check= True,
|
|
71
|
+
text= True)
|
|
72
|
+
|
|
73
|
+
except subprocess.CalledProcessError as e:
|
|
74
|
+
return JSONResponse(
|
|
75
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
76
|
+
content={
|
|
77
|
+
"message": "Script execution failed",
|
|
78
|
+
"error": e.stderr
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return JSONResponse(status_code=status.HTTP_200_OK,
|
|
83
|
+
content={"message": "Resource Saved Successfully!",
|
|
84
|
+
"frame_output": result_frame.stdout,
|
|
85
|
+
"audio_output": result_audio.stdout} )
|
|
86
|
+
|
|
87
|
+
@app.get("/llm_chat")
|
|
88
|
+
async def chat(query: str) -> str:
|
|
89
|
+
answer = await query_llm(query= query)
|
|
90
|
+
return answer
|
|
91
|
+
|
|
92
|
+
@app.get("/delete_chat")
|
|
93
|
+
async def delete_chat() -> JSONResponse:
|
|
94
|
+
try:
|
|
95
|
+
delete()
|
|
96
|
+
return JSONResponse(status_code=status.HTTP_200_OK,
|
|
97
|
+
content={"message": "Memory Deleted Successfully!"} )
|
|
98
|
+
except Exception as e:
|
|
99
|
+
return JSONResponse(
|
|
100
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
101
|
+
content={
|
|
102
|
+
"message": "Script execution failed",
|
|
103
|
+
"error": e
|
|
104
|
+
}
|
|
105
|
+
)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
5
|
+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
|
6
|
+
|
|
7
|
+
logging.disable(logging.WARNING)
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from platformdirs import user_config_dir
|
|
12
|
+
import json
|
|
13
|
+
import asyncio
|
|
14
|
+
from gurrt.core.pipeline import VideoRag
|
|
15
|
+
|
|
16
|
+
from rich.theme import Theme
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
from rich.prompt import Prompt
|
|
19
|
+
from rich.text import Text
|
|
20
|
+
from rich.rule import Rule
|
|
21
|
+
from rich.panel import Panel
|
|
22
|
+
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn
|
|
23
|
+
|
|
24
|
+
custom_theme = Theme({
|
|
25
|
+
"primary": "bold green",
|
|
26
|
+
"success": "bold bright_green",
|
|
27
|
+
"error": "bold red",
|
|
28
|
+
"info": "green",
|
|
29
|
+
"warning": "yellow"
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
console = Console(theme= custom_theme)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
app = typer.Typer(help= "🌿 gurrt: A Video Understanding Tool")
|
|
36
|
+
|
|
37
|
+
config_dir = Path(user_config_dir("gurrt"))
|
|
38
|
+
config_dir.mkdir(exist_ok= True, parents= True)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@app.callback()
|
|
42
|
+
def main():
|
|
43
|
+
title = Text("🌿 Gurrt: A Video Understanding Tool", style="bold bright_green")
|
|
44
|
+
console.print(Rule(title, style="green"))
|
|
45
|
+
@app.command()
|
|
46
|
+
def init():
|
|
47
|
+
"""
|
|
48
|
+
Initialize VideoRag by saving required API keys.
|
|
49
|
+
"""
|
|
50
|
+
groq_link = "https://console.groq.com/docs/models"
|
|
51
|
+
ollama_link = "https://docs.ollama.com/api/introduction"
|
|
52
|
+
supermemory_link = "https://supermemory.ai/docs/integrations/supermemory-sdk"
|
|
53
|
+
config_file = config_dir / "config.json"
|
|
54
|
+
console.print(
|
|
55
|
+
Panel(
|
|
56
|
+
"[info]Get your Groq API Key here:\n[/info]"
|
|
57
|
+
f"[bold green]{groq_link}[/bold green]",
|
|
58
|
+
title="Groq",
|
|
59
|
+
border_style="green"
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
groq = Prompt.ask("[info]Enter Groq API Key[/info]", password=True)
|
|
63
|
+
|
|
64
|
+
console.print(
|
|
65
|
+
Panel(
|
|
66
|
+
"[info]Get your Supermemory API Key here:\n[/info]"
|
|
67
|
+
f"[bold green]{supermemory_link}[/bold green]",
|
|
68
|
+
title="Supermemory",
|
|
69
|
+
border_style="green"
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
supermemory = Prompt.ask("[primary]Enter Supermemory API Key[/primary]", password=True)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
console.print(
|
|
76
|
+
Panel(
|
|
77
|
+
"[info]Ollama Setup Guide:\n[/info]"
|
|
78
|
+
f"[bold green]{ollama_link}[/bold green]",
|
|
79
|
+
title="Ollama",
|
|
80
|
+
border_style="green"
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
ollama = Prompt.ask("[info]Enter Ollama API Key[/info]", password=True)
|
|
84
|
+
|
|
85
|
+
with open(config_file, "w") as f:
|
|
86
|
+
json.dump({
|
|
87
|
+
"GROQ_API_KEY": groq,
|
|
88
|
+
"SUPERMEMORY_API_KEY": supermemory,
|
|
89
|
+
"OLLAMA_API_KEY": ollama
|
|
90
|
+
}, f, indent= 2)
|
|
91
|
+
|
|
92
|
+
console.print(
|
|
93
|
+
Panel(
|
|
94
|
+
"[success]✔ Configuration saved successfully![/success]"
|
|
95
|
+
f"[success]saved at {config_file} [/success]",
|
|
96
|
+
border_style= "green"
|
|
97
|
+
))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@app.command()
|
|
101
|
+
def models_download():
|
|
102
|
+
"""
|
|
103
|
+
Download and cache all required AI models locally.
|
|
104
|
+
"""
|
|
105
|
+
cache_dir = config_dir /"models"
|
|
106
|
+
cache_dir.mkdir(exist_ok= True, parents= True)
|
|
107
|
+
|
|
108
|
+
console.print(
|
|
109
|
+
Panel(
|
|
110
|
+
"[primary]Downloading Models[/primary]",
|
|
111
|
+
border_style="green"
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
from gurrt.core.models import download_models
|
|
115
|
+
with Progress(
|
|
116
|
+
SpinnerColumn(style="green"),
|
|
117
|
+
TextColumn("[progress.description]{task.description}"),
|
|
118
|
+
BarColumn(bar_width=None, style="green"),
|
|
119
|
+
console=console,
|
|
120
|
+
) as progress:
|
|
121
|
+
task = progress.add_task("[info]Downloading models...", total=100)
|
|
122
|
+
download_models(cache_dir)
|
|
123
|
+
progress.update(task, completed=100)
|
|
124
|
+
|
|
125
|
+
console.print("[success]✔ Models cached successfully![/success]")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@app.command()
|
|
130
|
+
def index(video_path):
|
|
131
|
+
"""
|
|
132
|
+
Index a video by extracting frames and audio for retrieval.
|
|
133
|
+
"""
|
|
134
|
+
console.print(
|
|
135
|
+
Panel(
|
|
136
|
+
f"[primary]Indexing Video[/primary]\n[info]{video_path}[/info]",
|
|
137
|
+
border_style="green"
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
rag = VideoRag()
|
|
141
|
+
rag.index_video(video_path=video_path)
|
|
142
|
+
|
|
143
|
+
with console.status("[info]Processing audio transcription...[/info]", spinner="dots"):
|
|
144
|
+
rag.index_audio(video_path=video_path)
|
|
145
|
+
|
|
146
|
+
console.print(Panel(
|
|
147
|
+
"[success]✔ Video indexed successfully![/success]"
|
|
148
|
+
"[success]You may start asking your queries![/success]",
|
|
149
|
+
border_style="green"
|
|
150
|
+
))
|
|
151
|
+
|
|
152
|
+
@app.command(help = "Ask a question about an indexed video.")
|
|
153
|
+
def ask(query:str):
|
|
154
|
+
"""
|
|
155
|
+
Ask a question about an indexed video.
|
|
156
|
+
"""
|
|
157
|
+
rag = VideoRag()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
with console.status("[info]Thinking...[/info]", spinner="dots"):
|
|
161
|
+
response = asyncio.run(rag.ask(query= query))
|
|
162
|
+
console.print(
|
|
163
|
+
Panel(
|
|
164
|
+
response,
|
|
165
|
+
title="[success]Answer[/success]",
|
|
166
|
+
border_style="green"
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
if __name__ == "__main__":
|
|
171
|
+
app()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from platformdirs import user_config_dir
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
class Settings:
|
|
6
|
+
def __init__(self):
|
|
7
|
+
home = Path(user_config_dir("gurrt"))
|
|
8
|
+
home.mkdir(exist_ok=True, parents= True)
|
|
9
|
+
|
|
10
|
+
config_file = home / "config.json"
|
|
11
|
+
cfg = {}
|
|
12
|
+
|
|
13
|
+
if config_file.exists():
|
|
14
|
+
with open(config_file) as f:
|
|
15
|
+
cfg = json.load(f)
|
|
16
|
+
self.GROQ_API_KEY = cfg.get('GROQ_API_KEY')
|
|
17
|
+
self.SUPERMEMORY_API_KEY = cfg.get("SUPERMEMORY_API_KEY")
|
|
18
|
+
else:
|
|
19
|
+
raise RuntimeError("API Keys not found")
|
|
20
|
+
self.CLIP_MODEL = "openai/clip-vit-base-patch32"
|
|
21
|
+
self.LLM_MODEL="llama-3.1-8b-instant"
|
|
22
|
+
self.RERANKER_MODEL = 'cross-encoder/ms-marco-MiniLM-L-6-v2'
|
|
23
|
+
self.BLIP_MODEL = "Salesforce/blip-image-captioning-large"
|
|
24
|
+
self.WHISPER_MODEL = "large-v2"
|
|
25
|
+
|
|
26
|
+
self.MODEL_CACHE_DIR = home / "models"
|
|
27
|
+
self.CHROMA_DB_PATH= home / "chroma_db"
|
|
28
|
+
self.AUDIO_PATH = home / "output.mp3"
|
|
File without changes
|