hjxdl 0.3.46__py3-none-any.whl → 0.3.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdl/_version.py +2 -2
- hdl/utils/vis_tools/__init__.py +0 -0
- hdl/utils/vis_tools/scene_detect.py +198 -0
- {hjxdl-0.3.46.dist-info → hjxdl-0.3.48.dist-info}/METADATA +1 -1
- {hjxdl-0.3.46.dist-info → hjxdl-0.3.48.dist-info}/RECORD +8 -6
- {hjxdl-0.3.46.dist-info → hjxdl-0.3.48.dist-info}/WHEEL +0 -0
- {hjxdl-0.3.46.dist-info → hjxdl-0.3.48.dist-info}/licenses/LICENSE +0 -0
- {hjxdl-0.3.46.dist-info → hjxdl-0.3.48.dist-info}/top_level.txt +0 -0
hdl/_version.py
CHANGED
File without changes
|
@@ -0,0 +1,198 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import base64
|
4
|
+
from pathlib import Path
|
5
|
+
# from PIL import Image
|
6
|
+
# import openai
|
7
|
+
# from scenedetect import VideoManager, SceneManager
|
8
|
+
# from scenedetect.detectors import ContentDetector
|
9
|
+
import cv2
|
10
|
+
from tqdm import tqdm
|
11
|
+
import pandas as pd
|
12
|
+
import subprocess
|
13
|
+
|
14
|
+
|
15
|
+
def detect_scenes_cli(video_path, output_dir, threshold=20):
|
16
|
+
video_path = Path(video_path)
|
17
|
+
output_dir = Path(output_dir)
|
18
|
+
# output_dir = video_path.parent
|
19
|
+
base_name = video_path.stem
|
20
|
+
csv_path = output_dir / f"{base_name}-Scenes.csv"
|
21
|
+
|
22
|
+
subprocess.run([
|
23
|
+
"scenedetect",
|
24
|
+
"-i", str(video_path),
|
25
|
+
"detect-content",
|
26
|
+
f"--threshold={threshold}",
|
27
|
+
"list-scenes",
|
28
|
+
"-o", str(output_dir)
|
29
|
+
], check=True)
|
30
|
+
|
31
|
+
return csv_path
|
32
|
+
|
33
|
+
|
34
|
+
def read_start_frames_from_csv(csv_path):
|
35
|
+
df = pd.read_csv(csv_path, skiprows=1)
|
36
|
+
return df['Start Frame'].astype(int).tolist()
|
37
|
+
|
38
|
+
|
39
|
+
def extract_frames_with_cv(video_path, frame_numbers, output_dir, grid_size=(3, 3)):
|
40
|
+
Path(output_dir).mkdir(exist_ok=True)
|
41
|
+
cap = cv2.VideoCapture(video_path)
|
42
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
43
|
+
|
44
|
+
grid_w, grid_h = grid_size
|
45
|
+
num_required = grid_w * grid_h
|
46
|
+
|
47
|
+
for i, start_frame in tqdm(enumerate(frame_numbers), total=len(frame_numbers), desc="抽取关键帧"):
|
48
|
+
end_frame = frame_numbers[i + 1] if i < len(frame_numbers) - 1 else total_frames - 1
|
49
|
+
|
50
|
+
available_range = end_frame - start_frame
|
51
|
+
if available_range <= num_required + 2:
|
52
|
+
print(f"⚠️ 场景 {i} 太短(帧数不足 {num_required + 2}),跳过")
|
53
|
+
continue
|
54
|
+
|
55
|
+
# 从 start+1 到 end-1 中抽取 num_required 个等间隔帧
|
56
|
+
step = (end_frame - start_frame - 2) / (num_required - 1)
|
57
|
+
selected_frames = [int(start_frame + 1 + round(j * step)) for j in range(num_required)]
|
58
|
+
|
59
|
+
frames = []
|
60
|
+
for frame_num in selected_frames:
|
61
|
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
62
|
+
ret, frame = cap.read()
|
63
|
+
if not ret:
|
64
|
+
print(f"⚠️ 无法读取帧 {frame_num},跳过场景 {i}")
|
65
|
+
frames = []
|
66
|
+
break
|
67
|
+
frames.append(frame)
|
68
|
+
|
69
|
+
if len(frames) != num_required:
|
70
|
+
print(f"⚠️ 场景 {i} 抽帧失败,不完整,跳过")
|
71
|
+
continue
|
72
|
+
|
73
|
+
# 拼成 grid_w * grid_h 图像
|
74
|
+
try:
|
75
|
+
rows = [cv2.hconcat(frames[y * grid_w:(y + 1) * grid_w]) for y in range(grid_h)]
|
76
|
+
grid_image = cv2.vconcat(rows)
|
77
|
+
output_img = f"{output_dir}/scene_{i:03d}.jpg"
|
78
|
+
cv2.imwrite(output_img, grid_image)
|
79
|
+
except Exception as e:
|
80
|
+
print(f"❌ 拼图失败 场景 {i}:{e}")
|
81
|
+
|
82
|
+
cap.release()
|
83
|
+
|
84
|
+
|
85
|
+
def generate_json_template(frame_numbers, output_dir, output_path):
|
86
|
+
output = []
|
87
|
+
for i, frame in enumerate(frame_numbers):
|
88
|
+
output.append({
|
89
|
+
"start_frame": frame,
|
90
|
+
"image": f"scene_{i:03d}.jpg",
|
91
|
+
"description": "请描述这张图代表的场景"
|
92
|
+
})
|
93
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
94
|
+
json.dump(output, f, indent=4, ensure_ascii=False)
|
95
|
+
|
96
|
+
|
97
|
+
def describe_image(
|
98
|
+
image_path,
|
99
|
+
client,
|
100
|
+
model: str = "default_model",
|
101
|
+
sys_info: str = (
|
102
|
+
"你是一个擅长视频场景理解的 AI,请根据提供的拼图图像,生成统一格式的视频场景描述。\n"
|
103
|
+
"请包括:\n"
|
104
|
+
"1. 场景中发生的主要事件或动作。\n"
|
105
|
+
"2. 出现的人物(如有)及其行为。\n"
|
106
|
+
"3. 场景的背景和气氛。\n"
|
107
|
+
"请用简洁、客观、第三人称的方式描述,不要加入主观感受。\n"
|
108
|
+
"输出格式如下:\n"
|
109
|
+
"场景描述:XXX。"
|
110
|
+
)
|
111
|
+
):
|
112
|
+
try:
|
113
|
+
with open(image_path, "rb") as f:
|
114
|
+
img_bytes = f.read()
|
115
|
+
b64_img = base64.b64encode(img_bytes).decode("utf-8")
|
116
|
+
|
117
|
+
response = client.chat.completions.create(
|
118
|
+
model=model,
|
119
|
+
messages=[
|
120
|
+
{"role": "system", "content": sys_info},
|
121
|
+
{
|
122
|
+
"role": "user",
|
123
|
+
"content": [
|
124
|
+
{"type": "text", "text": "请根据图像生成一段客观的视频场景描述,内容包括人物、动作、背景、氛围。输出请以“场景描述:”开头。"},
|
125
|
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"}}
|
126
|
+
]
|
127
|
+
}
|
128
|
+
],
|
129
|
+
max_tokens=200
|
130
|
+
)
|
131
|
+
return response.choices[0].message.content.strip()
|
132
|
+
except Exception as e:
|
133
|
+
print(f"❌ 处理图片 {image_path} 时出错: {e}")
|
134
|
+
return "描述生成失败"
|
135
|
+
|
136
|
+
|
137
|
+
def fill_descriptions(
|
138
|
+
client,
|
139
|
+
input_json,
|
140
|
+
output_json,
|
141
|
+
output_dir
|
142
|
+
):
|
143
|
+
with open(input_json, "r", encoding="utf-8") as f:
|
144
|
+
scenes = json.load(f)
|
145
|
+
|
146
|
+
for scene in tqdm(scenes, desc="生成场景描述"):
|
147
|
+
img_path = os.path.join(output_dir, scene["image"])
|
148
|
+
scene["description"] = describe_image(img_path, client=client)
|
149
|
+
scene.pop("image", None)
|
150
|
+
|
151
|
+
with open(output_json, "w", encoding="utf-8") as f:
|
152
|
+
json.dump(scenes, f, indent=4, ensure_ascii=False)
|
153
|
+
|
154
|
+
print(f"✅ 完成:{output_json}")
|
155
|
+
|
156
|
+
|
157
|
+
class SceneDetector(object):
|
158
|
+
def __init__(
|
159
|
+
self,
|
160
|
+
client,
|
161
|
+
video_file,
|
162
|
+
pre_processing: bool = False,
|
163
|
+
temp_json: str = None,
|
164
|
+
final_json: str = None
|
165
|
+
):
|
166
|
+
self.client = client
|
167
|
+
self.video_file = video_file
|
168
|
+
self.pre_processsing = pre_processing
|
169
|
+
self.temp_json = temp_json
|
170
|
+
self.final_json = final_json
|
171
|
+
|
172
|
+
if not self.temp_json:
|
173
|
+
self.temp_json = self.video_file + ".tmp.json"
|
174
|
+
if not self.final_json
|
175
|
+
self.final_json = self.video_file + ".final.json"
|
176
|
+
|
177
|
+
if self.pre_processsing:
|
178
|
+
self.pre_process()
|
179
|
+
|
180
|
+
def pre_process(self):
|
181
|
+
pass
|
182
|
+
|
183
|
+
def detect(
|
184
|
+
self,
|
185
|
+
out_dir
|
186
|
+
):
|
187
|
+
output_csv = detect_scenes_cli(self.video_file, out_dir)
|
188
|
+
# df = pd.read_csv(output_csv, skiprows=1)
|
189
|
+
# df = read_start_frames_from_csv(output_csv)
|
190
|
+
starts = read_start_frames_from_csv(output_csv)
|
191
|
+
extract_frames_with_cv(self.video_file, starts, out_dir)
|
192
|
+
generate_json_template(starts, out_dir, self.temp_json)
|
193
|
+
fill_descriptions(
|
194
|
+
self.temp_json,
|
195
|
+
self.final_json,
|
196
|
+
out_dir
|
197
|
+
)
|
198
|
+
|
@@ -1,5 +1,5 @@
|
|
1
1
|
hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
|
2
|
-
hdl/_version.py,sha256=
|
2
|
+
hdl/_version.py,sha256=HakMWBDNRn2Lw-dDW5Jqd_KgJ7xBCMwyZo_L2WGE1yM,513
|
3
3
|
hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
|
5
5
|
hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -142,10 +142,12 @@ hdl/utils/llm/vis.py,sha256=KuypafAuU4MEzJ5ywvopI0RcYjnyF_WKM_0oDUKbr8A,28808
|
|
142
142
|
hdl/utils/llm/visrag.py,sha256=0i-VrxqgiV-J7R3VPshu9oc7-rKjFJOldYik3HDXj6M,10176
|
143
143
|
hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
144
144
|
hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
|
145
|
+
hdl/utils/vis_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
146
|
+
hdl/utils/vis_tools/scene_detect.py,sha256=T9mQbxbZoeZZIqm8scXSg9-jvtKPzqyiVNtc_ykhxZE,6471
|
145
147
|
hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
146
148
|
hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
|
147
|
-
hjxdl-0.3.
|
148
|
-
hjxdl-0.3.
|
149
|
-
hjxdl-0.3.
|
150
|
-
hjxdl-0.3.
|
151
|
-
hjxdl-0.3.
|
149
|
+
hjxdl-0.3.48.dist-info/licenses/LICENSE,sha256=lkMiSbeZHBQLB9LJEkS9-L3Z-LBC4yGnKrzHSG8RkPM,2599
|
150
|
+
hjxdl-0.3.48.dist-info/METADATA,sha256=s9uYj4yBltF7eBP1yy3kgcyJmDUEB2Nm-CQSzWsqkBY,1332
|
151
|
+
hjxdl-0.3.48.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
152
|
+
hjxdl-0.3.48.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
|
153
|
+
hjxdl-0.3.48.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|