jetson-examples 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {jetson_examples-0.1.8.dist-info → jetson_examples-0.1.9.dist-info}/LICENSE +21 -21
  2. {jetson_examples-0.1.8.dist-info → jetson_examples-0.1.9.dist-info}/METADATA +1 -1
  3. jetson_examples-0.1.9.dist-info/RECORD +109 -0
  4. reComputer/__init__.py +1 -1
  5. reComputer/main.py +60 -60
  6. reComputer/scripts/MoveNet-Lightning/clean.sh +8 -8
  7. reComputer/scripts/MoveNet-Lightning/getVersion.sh +59 -59
  8. reComputer/scripts/MoveNet-Lightning/init.sh +6 -6
  9. reComputer/scripts/MoveNet-Lightning/readme.md +30 -30
  10. reComputer/scripts/MoveNet-Lightning/run.sh +19 -19
  11. reComputer/scripts/MoveNet-Thunder/clean.sh +7 -7
  12. reComputer/scripts/MoveNet-Thunder/getVersion.sh +59 -59
  13. reComputer/scripts/MoveNet-Thunder/init.sh +6 -6
  14. reComputer/scripts/MoveNet-Thunder/readme.md +31 -31
  15. reComputer/scripts/MoveNet-Thunder/run.sh +18 -18
  16. reComputer/scripts/MoveNetJS/clean.sh +4 -4
  17. reComputer/scripts/MoveNetJS/readme.md +56 -56
  18. reComputer/scripts/MoveNetJS/run.sh +13 -13
  19. reComputer/scripts/Sheared-LLaMA-2.7B-ShareGPT/init.sh +16 -16
  20. reComputer/scripts/Sheared-LLaMA-2.7B-ShareGPT/run.sh +8 -8
  21. reComputer/scripts/audiocraft/README.md +35 -35
  22. reComputer/scripts/audiocraft/clean.sh +5 -5
  23. reComputer/scripts/audiocraft/init.sh +16 -16
  24. reComputer/scripts/audiocraft/run.sh +7 -7
  25. reComputer/scripts/check.sh +4 -4
  26. reComputer/scripts/clean.sh +33 -33
  27. reComputer/scripts/comfyui/LICENSE +21 -21
  28. reComputer/scripts/comfyui/README.md +127 -127
  29. reComputer/scripts/comfyui/clean.sh +9 -7
  30. reComputer/scripts/comfyui/config.yaml +30 -29
  31. reComputer/scripts/comfyui/init.sh +9 -163
  32. reComputer/scripts/comfyui/run.sh +30 -30
  33. reComputer/scripts/depth-anything/Dockerfile +5 -5
  34. reComputer/scripts/depth-anything/LICENSE +21 -21
  35. reComputer/scripts/depth-anything/README.md +135 -135
  36. reComputer/scripts/depth-anything/clean.sh +7 -7
  37. reComputer/scripts/depth-anything/config.yaml +31 -31
  38. reComputer/scripts/depth-anything/init.sh +164 -164
  39. reComputer/scripts/depth-anything/run.sh +22 -22
  40. reComputer/scripts/depth-anything-v2/Dockerfile +5 -5
  41. reComputer/scripts/depth-anything-v2/LICENSE +21 -21
  42. reComputer/scripts/depth-anything-v2/README.md +135 -135
  43. reComputer/scripts/depth-anything-v2/clean.sh +7 -7
  44. reComputer/scripts/depth-anything-v2/config.yaml +31 -31
  45. reComputer/scripts/depth-anything-v2/init.sh +164 -164
  46. reComputer/scripts/depth-anything-v2/run.sh +22 -22
  47. reComputer/scripts/live-llava/init.sh +16 -16
  48. reComputer/scripts/live-llava/run.sh +278 -278
  49. reComputer/scripts/llama-factory/README.md +68 -68
  50. reComputer/scripts/llama-factory/clean.sh +4 -4
  51. reComputer/scripts/llama-factory/init.sh +52 -52
  52. reComputer/scripts/llama-factory/run.sh +10 -10
  53. reComputer/scripts/llama3/clean.sh +22 -22
  54. reComputer/scripts/llama3/config.yaml +31 -0
  55. reComputer/scripts/llama3/init.sh +19 -16
  56. reComputer/scripts/llama3/run.sh +13 -13
  57. reComputer/scripts/llava/clean.sh +3 -3
  58. reComputer/scripts/llava/init.sh +16 -16
  59. reComputer/scripts/llava/run.sh +9 -9
  60. reComputer/scripts/llava-v1.5-7b/init.sh +16 -16
  61. reComputer/scripts/llava-v1.5-7b/run.sh +9 -9
  62. reComputer/scripts/llava-v1.6-vicuna-7b/init.sh +16 -16
  63. reComputer/scripts/llava-v1.6-vicuna-7b/run.sh +10 -10
  64. reComputer/scripts/nanodb/init.sh +16 -16
  65. reComputer/scripts/nanodb/readme.md +10 -10
  66. reComputer/scripts/nanodb/run.sh +90 -90
  67. reComputer/scripts/nanoowl/init.sh +16 -16
  68. reComputer/scripts/nanoowl/run.sh +7 -7
  69. reComputer/scripts/ollama/clean.sh +22 -22
  70. reComputer/scripts/ollama/config.yaml +31 -0
  71. reComputer/scripts/ollama/init.sh +19 -16
  72. reComputer/scripts/ollama/run.sh +10 -10
  73. reComputer/scripts/parler-tts/clean.sh +7 -7
  74. reComputer/scripts/parler-tts/getVersion.sh +59 -59
  75. reComputer/scripts/parler-tts/init.sh +8 -8
  76. reComputer/scripts/parler-tts/readme.md +63 -63
  77. reComputer/scripts/parler-tts/run.sh +17 -17
  78. reComputer/scripts/run.sh +48 -48
  79. reComputer/scripts/stable-diffusion-webui/init.sh +16 -16
  80. reComputer/scripts/stable-diffusion-webui/run.sh +6 -6
  81. reComputer/scripts/text-generation-webui/init.sh +16 -16
  82. reComputer/scripts/text-generation-webui/run.sh +11 -11
  83. reComputer/scripts/ultralytics-yolo/LICENSE +21 -21
  84. reComputer/scripts/ultralytics-yolo/README.md +124 -124
  85. reComputer/scripts/ultralytics-yolo/clean.sh +6 -6
  86. reComputer/scripts/ultralytics-yolo/config.yaml +31 -31
  87. reComputer/scripts/ultralytics-yolo/init.sh +4 -4
  88. reComputer/scripts/ultralytics-yolo/run.sh +26 -26
  89. reComputer/scripts/update.sh +26 -26
  90. reComputer/scripts/utils.sh +168 -166
  91. reComputer/scripts/whisper/init.sh +16 -16
  92. reComputer/scripts/whisper/run.sh +7 -7
  93. reComputer/scripts/yolov10/Dockerfile +13 -13
  94. reComputer/scripts/yolov10/README.md +71 -71
  95. reComputer/scripts/yolov10/clean.sh +4 -4
  96. reComputer/scripts/yolov10/config.yaml +31 -31
  97. reComputer/scripts/yolov10/init.sh +20 -20
  98. reComputer/scripts/yolov10/run.sh +7 -7
  99. reComputer/scripts/yolov8-rail-inspection/config.yaml +31 -31
  100. reComputer/scripts/yolov8-rail-inspection/init.sh +5 -5
  101. reComputer/scripts/yolov8-rail-inspection/readme.md +35 -35
  102. reComputer/scripts/yolov8-rail-inspection/run.sh +21 -21
  103. jetson_examples-0.1.8.dist-info/RECORD +0 -107
  104. {jetson_examples-0.1.8.dist-info → jetson_examples-0.1.9.dist-info}/WHEEL +0 -0
  105. {jetson_examples-0.1.8.dist-info → jetson_examples-0.1.9.dist-info}/entry_points.txt +0 -0
  106. {jetson_examples-0.1.8.dist-info → jetson_examples-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,278 +1,278 @@
1
- #!/bin/bash
2
-
3
- SUPPORT_L4T_LIST="35.3.1"
4
- BASE_PATH=/home/$USER/reComputer
5
- JETSON_REPO_PATH="$BASE_PATH/jetson-containers"
6
-
7
- get_l4t_version() {
8
- ARCH=$(uname -i)
9
- echo "ARCH: $ARCH"
10
-
11
- if [ $ARCH = "aarch64" ]; then
12
- L4T_VERSION_STRING=$(head -n 1 /etc/nv_tegra_release)
13
-
14
- if [ -z "$L4T_VERSION_STRING" ]; then
15
- echo "reading L4T version from \"dpkg-query --show nvidia-l4t-core\""
16
- L4T_VERSION_STRING=$(dpkg-query --showformat='${Version}' --show nvidia-l4t-core)
17
- L4T_VERSION_ARRAY=(${L4T_VERSION_STRING//./ })
18
- L4T_RELEASE=${L4T_VERSION_ARRAY[0]}
19
- L4T_REVISION=${L4T_VERSION_ARRAY[1]}
20
- else
21
- echo "reading L4T version from /etc/nv_tegra_release"
22
- L4T_RELEASE=$(echo $L4T_VERSION_STRING | cut -f 2 -d ' ' | grep -Po '(?<=R)[^;]+')
23
- L4T_REVISION=$(echo $L4T_VERSION_STRING | cut -f 2 -d ',' | grep -Po '(?<=REVISION: )[^;]+')
24
- fi
25
-
26
- L4T_REVISION_MAJOR=${L4T_REVISION:0:1}
27
- L4T_REVISION_MINOR=${L4T_REVISION:2:1}
28
- L4T_VERSION="$L4T_RELEASE.$L4T_REVISION"
29
-
30
- echo "L4T_VERSION: $L4T_VERSION"
31
-
32
- elif [ $ARCH != "x86_64" ]; then
33
- echo "unsupported architecture: $ARCH" # show in red color
34
- exit 1
35
- fi
36
- }
37
-
38
- # 1. Check L4T version
39
- get_l4t_version
40
- CHECK_L4T_VERSION=0
41
- for item in $SUPPORT_L4T_LIST; do
42
- if [ "$item" = "$L4T_VERSION" ]; then
43
- CHECK_L4T_VERSION=1
44
- break
45
- fi
46
- done
47
-
48
- if [ $CHECK_L4T_VERSION -eq 1 ]; then
49
- echo "pass the version check"
50
- else
51
- echo "currently supported versions of jetpack are $SUPPORT_L4T_LIST" # show in red color
52
- exit 1
53
- fi
54
-
55
- # 2. Check Google Chrome
56
- if dpkg -s chromium-browser &>/dev/null; then
57
- echo "Chrome is installed."
58
- else
59
- echo "install Google Chrome ..." # show in red color
60
- sudo apt install chromium-browser
61
- echo "Google Chrome installed successfully" # show in red color
62
- fi
63
-
64
- # 3. Generate Google browser key
65
- FILE_NAME="key.pem"
66
- FILE_PATH="$JETSON_REPO_PATH/data"
67
- if [ -f "$FILE_PATH/$FILE_NAME" ]; then
68
- echo "key file '$FILE_PATH/$FILE_NAME' exists."
69
- else
70
- cd $FILE_PATH
71
- openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 365 -nodes -subj '/CN=localhost'
72
- cd ..
73
- fi
74
-
75
- # 4. edit source code
76
- cat >"$JETSON_REPO_PATH/packages/llm/local_llm/agents/video_query.py" <<'EOF'
77
- #!/usr/bin/env python3
78
- import time
79
- import logging
80
- import threading
81
-
82
- from local_llm import Agent
83
-
84
- from local_llm.plugins import (
85
- VideoSource,
86
- VideoOutput,
87
- ChatQuery,
88
- PrintStream,
89
- ProcessProxy,
90
- )
91
- from local_llm.utils import ArgParser, print_table
92
-
93
- from termcolor import cprint
94
- from jetson_utils import cudaFont, cudaMemcpy, cudaToNumpy, cudaDeviceSynchronize
95
-
96
- from flask import Flask, request
97
-
98
-
99
- class VideoQuery(Agent):
100
- """
101
- Perpetual always-on closed-loop visual agent that applies prompts to a video stream.
102
- """
103
-
104
- def __init__(self, model="liuhaotian/llava-v1.5-7b", **kwargs):
105
- super().__init__()
106
- self.lock = threading.Lock()
107
-
108
- # load model in another process for smooth streaming
109
- # self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs)
110
- self.llm = ChatQuery(model, drop_inputs=True, **kwargs)
111
- self.llm.add(PrintStream(color="green", relay=True).add(self.on_text))
112
- self.llm.start()
113
-
114
- # test / warm-up query
115
- self.warmup = True
116
- self.text = ""
117
- self.eos = False
118
-
119
- self.llm("What is 2+2?")
120
-
121
- while self.warmup:
122
- time.sleep(0.25)
123
-
124
- # create video streams
125
- self.video_source = VideoSource(**kwargs)
126
- self.video_output = VideoOutput(**kwargs)
127
-
128
- self.video_source.add(self.on_video, threaded=False)
129
- self.video_output.start()
130
-
131
- self.font = cudaFont()
132
-
133
- # setup prompts
134
- self.prompt = "Describe the image concisely and briefly."
135
-
136
- # entry node
137
- self.pipeline = [self.video_source]
138
-
139
- def on_video(self, image):
140
- np_image = cudaToNumpy(image)
141
- cudaDeviceSynchronize()
142
-
143
- self.llm(
144
- [
145
- "reset",
146
- np_image,
147
- self.prompt,
148
- ]
149
- )
150
-
151
- text = self.text.replace("\n", "").replace("</s>", "").strip()
152
-
153
- if text:
154
- worlds = text.split()
155
- line_counter = len(worlds) // 10
156
- if len(worlds) % 10 != 0:
157
- line_counter += 1
158
- for l in range(line_counter):
159
- line_text = " ".join(worlds[l * 10 : (l + 1) * 10])
160
- self.font.OverlayText(
161
- image,
162
- text=line_text,
163
- x=5,
164
- y=int(79 + l * 37),
165
- color=self.font.White,
166
- background=self.font.Gray40,
167
- )
168
- self.font.OverlayText(
169
- image,
170
- text="Prompt: " + self.prompt,
171
- x=5,
172
- y=42,
173
- color=(120, 215, 21),
174
- background=self.font.Gray40,
175
- )
176
- self.video_output(image)
177
-
178
- def on_text(self, text):
179
- if self.eos:
180
- self.text = text # new query response
181
- self.eos = False
182
- elif not self.warmup: # don't view warmup response
183
- self.text = self.text + text
184
-
185
- if text.endswith("</s>") or text.endswith("###") or text.endswith("<|im_end|>"):
186
- self.print_stats()
187
- self.warmup = False
188
- self.eos = True
189
-
190
- def update_switch(self, on_off):
191
- self.video_source.switch(on_off)
192
-
193
- def update_prompts(self, new_prompt):
194
- with self.lock:
195
- if new_prompt:
196
- self.prompt = new_prompt
197
-
198
- def print_stats(self):
199
- # print_table(self.llm.model.stats)
200
- curr_time = time.perf_counter()
201
-
202
- if not hasattr(self, "start_time"):
203
- self.start_time = curr_time
204
- else:
205
- frame_time = curr_time - self.start_time
206
- self.start_time = curr_time
207
- logging.info(
208
- f"refresh rate: {1.0 / frame_time:.2f} FPS ({frame_time*1000:.1f} ms)"
209
- )
210
-
211
-
212
- if __name__ == "__main__":
213
- parser = ArgParser(extras=ArgParser.Defaults + ["video_input", "video_output"])
214
- args = parser.parse_args()
215
- # 独立线程运行
216
- agent = VideoQuery(**vars(args))
217
-
218
- def run_video_query():
219
- agent.run()
220
-
221
- video_query_thread = threading.Thread(target=run_video_query)
222
- video_query_thread.start()
223
-
224
- # 启动web服务
225
- app = Flask(__name__)
226
-
227
- @app.route("/update_prompt", methods=["POST"])
228
- def update_prompts():
229
- prompt = request.json.get("prompt")
230
- if prompt:
231
- agent.update_prompts(prompt)
232
- return "Prompts updated successfully."
233
- else:
234
- return "Invalid prompts data."
235
-
236
- @app.route("/update_switch", methods=["POST"])
237
- def update_switch():
238
- infer_or_not = True if request.json.get("switch") == "on" else False
239
- agent.update_switch(infer_or_not)
240
- return "stop" if not infer_or_not else "start"
241
-
242
- @app.route("/update_params", methods=["POST"])
243
- def update_params():
244
- try:
245
- agent.llm.max_new_tokens = request.json.get("max_new_tokens") or 128
246
- agent.llm.min_new_tokens = request.json.get("min_new_tokens") or -1
247
- agent.llm.do_sample = request.json.get("do_sample") or False
248
- agent.llm.repetition_penalty = request.json.get("repetition_penalty") or 1.0
249
- agent.llm.temperature = request.json.get("temperature") or 0.7
250
- agent.llm.top_p = request.json.get("top_p") or 0.95
251
- if request.json.get("system_prompt"):
252
- agent.llm.chat_history.template["system_prompt"] = request.json.get(
253
- "system_prompt"
254
- )
255
- return "params updated."
256
- except Exception as e:
257
- print(e)
258
- return "update failure"
259
-
260
- app.run(host="0.0.0.0", port=5555)
261
-
262
-
263
- EOF
264
-
265
- sed -i 's/from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, SiglipImageProcessor, SiglipVisionModel/from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection # , SiglipImageProcessor, SiglipVisionModel/' "$JETSON_REPO_PATH/packages/llm/local_llm/vision/clip_hf.py"
266
- sed -i "s/'siglip': dict(preprocessor=SiglipImageProcessor, model=SiglipVisionModel),/# 'siglip': dict(preprocessor=SiglipImageProcessor, model=SiglipVisionModel),/" "$JETSON_REPO_PATH/packages/llm/local_llm/vision/clip_hf.py"
267
-
268
- sed -i 's/from .audio import */# from .audio import */' "$JETSON_REPO_PATH/packages/llm/local_llm/plugins/__init__.py"
269
- sed -i 's/from .nanodb import NanoDB/# from .nanodb import NanoDB/' "$JETSON_REPO_PATH/packages/llm/local_llm/plugins/__init__.py"
270
-
271
- sed -i 's/import onnxruntime as ort/# import onnxruntime as ort/' "$JETSON_REPO_PATH/packages/llm/local_llm/utils/model.py"
272
-
273
- echo "The script has been modified."
274
-
275
- gnome-terminal -- /bin/bash -c "chromium-browser --disable-features=WebRtcHideLocalIpsWithMdns https://localhost:8554/; exec /bin/bash"
276
-
277
- cd $JETSON_REPO_PATH
278
- sudo docker run --runtime nvidia -it --rm --network host --volume /tmp/argus_socket:/tmp/argus_socket --volume /etc/enctune.conf:/etc/enctune.conf --volume /etc/nv_tegra_release:/etc/nv_tegra_release --volume /proc/device-tree/model:/tmp/nv_jetson_model --volume /var/run/dbus:/var/run/dbus --volume /var/run/avahi-daemon/socket:/var/run/avahi-daemon/socket --volume /var/run/docker.sock:/var/run/docker.sock --volume $JETSON_REPO_PATH/data:/data --device /dev/snd --device /dev/bus/usb -e DISPLAY=:0 -v /tmp/.X11-unix/:/tmp/.X11-unix -v /tmp/.docker.xauth:/tmp/.docker.xauth -e XAUTHORITY=/tmp/.docker.xauth --device /dev/video0 --device /dev/video1 -v $JETSON_REPO_PATH/packages/llm/local_llm:/opt/local_llm/local_llm -e SSL_KEY=/data/key.pem -e SSL_CERT=/data/cert.pem dustynv/local_llm:r35.3.1 python3 -m local_llm.agents.video_query --api=mlc --verbose --model liuhaotian/llava-v1.5-7b --max-new-tokens 32 --video-input /dev/video0 --video-output webrtc://@:8554/output
1
+ #!/bin/bash
2
+
3
+ SUPPORT_L4T_LIST="35.3.1"
4
+ BASE_PATH=/home/$USER/reComputer
5
+ JETSON_REPO_PATH="$BASE_PATH/jetson-containers"
6
+
7
+ get_l4t_version() {
8
+ ARCH=$(uname -i)
9
+ echo "ARCH: $ARCH"
10
+
11
+ if [ $ARCH = "aarch64" ]; then
12
+ L4T_VERSION_STRING=$(head -n 1 /etc/nv_tegra_release)
13
+
14
+ if [ -z "$L4T_VERSION_STRING" ]; then
15
+ echo "reading L4T version from \"dpkg-query --show nvidia-l4t-core\""
16
+ L4T_VERSION_STRING=$(dpkg-query --showformat='${Version}' --show nvidia-l4t-core)
17
+ L4T_VERSION_ARRAY=(${L4T_VERSION_STRING//./ })
18
+ L4T_RELEASE=${L4T_VERSION_ARRAY[0]}
19
+ L4T_REVISION=${L4T_VERSION_ARRAY[1]}
20
+ else
21
+ echo "reading L4T version from /etc/nv_tegra_release"
22
+ L4T_RELEASE=$(echo $L4T_VERSION_STRING | cut -f 2 -d ' ' | grep -Po '(?<=R)[^;]+')
23
+ L4T_REVISION=$(echo $L4T_VERSION_STRING | cut -f 2 -d ',' | grep -Po '(?<=REVISION: )[^;]+')
24
+ fi
25
+
26
+ L4T_REVISION_MAJOR=${L4T_REVISION:0:1}
27
+ L4T_REVISION_MINOR=${L4T_REVISION:2:1}
28
+ L4T_VERSION="$L4T_RELEASE.$L4T_REVISION"
29
+
30
+ echo "L4T_VERSION: $L4T_VERSION"
31
+
32
+ elif [ $ARCH != "x86_64" ]; then
33
+ echo "unsupported architecture: $ARCH" # show in red color
34
+ exit 1
35
+ fi
36
+ }
37
+
38
+ # 1. Check L4T version
39
+ get_l4t_version
40
+ CHECK_L4T_VERSION=0
41
+ for item in $SUPPORT_L4T_LIST; do
42
+ if [ "$item" = "$L4T_VERSION" ]; then
43
+ CHECK_L4T_VERSION=1
44
+ break
45
+ fi
46
+ done
47
+
48
+ if [ $CHECK_L4T_VERSION -eq 1 ]; then
49
+ echo "pass the version check"
50
+ else
51
+ echo "currently supported versions of jetpack are $SUPPORT_L4T_LIST" # show in red color
52
+ exit 1
53
+ fi
54
+
55
+ # 2. Check Google Chrome
56
+ if dpkg -s chromium-browser &>/dev/null; then
57
+ echo "Chrome is installed."
58
+ else
59
+ echo "install Google Chrome ..." # show in red color
60
+ sudo apt install chromium-browser
61
+ echo "Google Chrome installed successfully" # show in red color
62
+ fi
63
+
64
+ # 3. Generate Google browser key
65
+ FILE_NAME="key.pem"
66
+ FILE_PATH="$JETSON_REPO_PATH/data"
67
+ if [ -f "$FILE_PATH/$FILE_NAME" ]; then
68
+ echo "key file '$FILE_PATH/$FILE_NAME' exists."
69
+ else
70
+ cd $FILE_PATH
71
+ openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 365 -nodes -subj '/CN=localhost'
72
+ cd ..
73
+ fi
74
+
75
+ # 4. edit source code
76
+ cat >"$JETSON_REPO_PATH/packages/llm/local_llm/agents/video_query.py" <<'EOF'
77
+ #!/usr/bin/env python3
78
+ import time
79
+ import logging
80
+ import threading
81
+
82
+ from local_llm import Agent
83
+
84
+ from local_llm.plugins import (
85
+ VideoSource,
86
+ VideoOutput,
87
+ ChatQuery,
88
+ PrintStream,
89
+ ProcessProxy,
90
+ )
91
+ from local_llm.utils import ArgParser, print_table
92
+
93
+ from termcolor import cprint
94
+ from jetson_utils import cudaFont, cudaMemcpy, cudaToNumpy, cudaDeviceSynchronize
95
+
96
+ from flask import Flask, request
97
+
98
+
99
+ class VideoQuery(Agent):
100
+ """
101
+ Perpetual always-on closed-loop visual agent that applies prompts to a video stream.
102
+ """
103
+
104
+ def __init__(self, model="liuhaotian/llava-v1.5-7b", **kwargs):
105
+ super().__init__()
106
+ self.lock = threading.Lock()
107
+
108
+ # load model in another process for smooth streaming
109
+ # self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs)
110
+ self.llm = ChatQuery(model, drop_inputs=True, **kwargs)
111
+ self.llm.add(PrintStream(color="green", relay=True).add(self.on_text))
112
+ self.llm.start()
113
+
114
+ # test / warm-up query
115
+ self.warmup = True
116
+ self.text = ""
117
+ self.eos = False
118
+
119
+ self.llm("What is 2+2?")
120
+
121
+ while self.warmup:
122
+ time.sleep(0.25)
123
+
124
+ # create video streams
125
+ self.video_source = VideoSource(**kwargs)
126
+ self.video_output = VideoOutput(**kwargs)
127
+
128
+ self.video_source.add(self.on_video, threaded=False)
129
+ self.video_output.start()
130
+
131
+ self.font = cudaFont()
132
+
133
+ # setup prompts
134
+ self.prompt = "Describe the image concisely and briefly."
135
+
136
+ # entry node
137
+ self.pipeline = [self.video_source]
138
+
139
+ def on_video(self, image):
140
+ np_image = cudaToNumpy(image)
141
+ cudaDeviceSynchronize()
142
+
143
+ self.llm(
144
+ [
145
+ "reset",
146
+ np_image,
147
+ self.prompt,
148
+ ]
149
+ )
150
+
151
+ text = self.text.replace("\n", "").replace("</s>", "").strip()
152
+
153
+ if text:
154
+ worlds = text.split()
155
+ line_counter = len(worlds) // 10
156
+ if len(worlds) % 10 != 0:
157
+ line_counter += 1
158
+ for l in range(line_counter):
159
+ line_text = " ".join(worlds[l * 10 : (l + 1) * 10])
160
+ self.font.OverlayText(
161
+ image,
162
+ text=line_text,
163
+ x=5,
164
+ y=int(79 + l * 37),
165
+ color=self.font.White,
166
+ background=self.font.Gray40,
167
+ )
168
+ self.font.OverlayText(
169
+ image,
170
+ text="Prompt: " + self.prompt,
171
+ x=5,
172
+ y=42,
173
+ color=(120, 215, 21),
174
+ background=self.font.Gray40,
175
+ )
176
+ self.video_output(image)
177
+
178
+ def on_text(self, text):
179
+ if self.eos:
180
+ self.text = text # new query response
181
+ self.eos = False
182
+ elif not self.warmup: # don't view warmup response
183
+ self.text = self.text + text
184
+
185
+ if text.endswith("</s>") or text.endswith("###") or text.endswith("<|im_end|>"):
186
+ self.print_stats()
187
+ self.warmup = False
188
+ self.eos = True
189
+
190
+ def update_switch(self, on_off):
191
+ self.video_source.switch(on_off)
192
+
193
+ def update_prompts(self, new_prompt):
194
+ with self.lock:
195
+ if new_prompt:
196
+ self.prompt = new_prompt
197
+
198
+ def print_stats(self):
199
+ # print_table(self.llm.model.stats)
200
+ curr_time = time.perf_counter()
201
+
202
+ if not hasattr(self, "start_time"):
203
+ self.start_time = curr_time
204
+ else:
205
+ frame_time = curr_time - self.start_time
206
+ self.start_time = curr_time
207
+ logging.info(
208
+ f"refresh rate: {1.0 / frame_time:.2f} FPS ({frame_time*1000:.1f} ms)"
209
+ )
210
+
211
+
212
+ if __name__ == "__main__":
213
+ parser = ArgParser(extras=ArgParser.Defaults + ["video_input", "video_output"])
214
+ args = parser.parse_args()
215
+ # 独立线程运行
216
+ agent = VideoQuery(**vars(args))
217
+
218
+ def run_video_query():
219
+ agent.run()
220
+
221
+ video_query_thread = threading.Thread(target=run_video_query)
222
+ video_query_thread.start()
223
+
224
+ # 启动web服务
225
+ app = Flask(__name__)
226
+
227
+ @app.route("/update_prompt", methods=["POST"])
228
+ def update_prompts():
229
+ prompt = request.json.get("prompt")
230
+ if prompt:
231
+ agent.update_prompts(prompt)
232
+ return "Prompts updated successfully."
233
+ else:
234
+ return "Invalid prompts data."
235
+
236
+ @app.route("/update_switch", methods=["POST"])
237
+ def update_switch():
238
+ infer_or_not = True if request.json.get("switch") == "on" else False
239
+ agent.update_switch(infer_or_not)
240
+ return "stop" if not infer_or_not else "start"
241
+
242
+ @app.route("/update_params", methods=["POST"])
243
+ def update_params():
244
+ try:
245
+ agent.llm.max_new_tokens = request.json.get("max_new_tokens") or 128
246
+ agent.llm.min_new_tokens = request.json.get("min_new_tokens") or -1
247
+ agent.llm.do_sample = request.json.get("do_sample") or False
248
+ agent.llm.repetition_penalty = request.json.get("repetition_penalty") or 1.0
249
+ agent.llm.temperature = request.json.get("temperature") or 0.7
250
+ agent.llm.top_p = request.json.get("top_p") or 0.95
251
+ if request.json.get("system_prompt"):
252
+ agent.llm.chat_history.template["system_prompt"] = request.json.get(
253
+ "system_prompt"
254
+ )
255
+ return "params updated."
256
+ except Exception as e:
257
+ print(e)
258
+ return "update failure"
259
+
260
+ app.run(host="0.0.0.0", port=5555)
261
+
262
+
263
+ EOF
264
+
265
+ sed -i 's/from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, SiglipImageProcessor, SiglipVisionModel/from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection # , SiglipImageProcessor, SiglipVisionModel/' "$JETSON_REPO_PATH/packages/llm/local_llm/vision/clip_hf.py"
266
+ sed -i "s/'siglip': dict(preprocessor=SiglipImageProcessor, model=SiglipVisionModel),/# 'siglip': dict(preprocessor=SiglipImageProcessor, model=SiglipVisionModel),/" "$JETSON_REPO_PATH/packages/llm/local_llm/vision/clip_hf.py"
267
+
268
+ sed -i 's/from .audio import */# from .audio import */' "$JETSON_REPO_PATH/packages/llm/local_llm/plugins/__init__.py"
269
+ sed -i 's/from .nanodb import NanoDB/# from .nanodb import NanoDB/' "$JETSON_REPO_PATH/packages/llm/local_llm/plugins/__init__.py"
270
+
271
+ sed -i 's/import onnxruntime as ort/# import onnxruntime as ort/' "$JETSON_REPO_PATH/packages/llm/local_llm/utils/model.py"
272
+
273
+ echo "The script has been modified."
274
+
275
+ gnome-terminal -- /bin/bash -c "chromium-browser --disable-features=WebRtcHideLocalIpsWithMdns https://localhost:8554/; exec /bin/bash"
276
+
277
+ cd $JETSON_REPO_PATH
278
+ sudo docker run --runtime nvidia -it --rm --network host --volume /tmp/argus_socket:/tmp/argus_socket --volume /etc/enctune.conf:/etc/enctune.conf --volume /etc/nv_tegra_release:/etc/nv_tegra_release --volume /proc/device-tree/model:/tmp/nv_jetson_model --volume /var/run/dbus:/var/run/dbus --volume /var/run/avahi-daemon/socket:/var/run/avahi-daemon/socket --volume /var/run/docker.sock:/var/run/docker.sock --volume $JETSON_REPO_PATH/data:/data --device /dev/snd --device /dev/bus/usb -e DISPLAY=:0 -v /tmp/.X11-unix/:/tmp/.X11-unix -v /tmp/.docker.xauth:/tmp/.docker.xauth -e XAUTHORITY=/tmp/.docker.xauth --device /dev/video0 --device /dev/video1 -v $JETSON_REPO_PATH/packages/llm/local_llm:/opt/local_llm/local_llm -e SSL_KEY=/data/key.pem -e SSL_CERT=/data/cert.pem dustynv/local_llm:r35.3.1 python3 -m local_llm.agents.video_query --api=mlc --verbose --model liuhaotian/llava-v1.5-7b --max-new-tokens 32 --video-input /dev/video0 --video-output webrtc://@:8554/output