hjxdl 0.3.47__py3-none-any.whl → 0.3.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdl/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.47'
21
- __version_tuple__ = version_tuple = (0, 3, 47)
20
+ __version__ = version = '0.3.49'
21
+ __version_tuple__ = version_tuple = (0, 3, 49)
File without changes
@@ -0,0 +1,198 @@
1
+ import os
2
+ import json
3
+ import base64
4
+ from pathlib import Path
5
+ # from PIL import Image
6
+ # import openai
7
+ # from scenedetect import VideoManager, SceneManager
8
+ # from scenedetect.detectors import ContentDetector
9
+ import cv2
10
+ from tqdm import tqdm
11
+ import pandas as pd
12
+ import subprocess
13
+
14
+
15
+ def detect_scenes_cli(video_path, output_dir, threshold=20):
16
+ video_path = Path(video_path)
17
+ output_dir = Path(output_dir)
18
+ # output_dir = video_path.parent
19
+ base_name = video_path.stem
20
+ csv_path = output_dir / f"{base_name}-Scenes.csv"
21
+
22
+ subprocess.run([
23
+ "scenedetect",
24
+ "-i", str(video_path),
25
+ "detect-content",
26
+ f"--threshold={threshold}",
27
+ "list-scenes",
28
+ "-o", str(output_dir)
29
+ ], check=True)
30
+
31
+ return csv_path
32
+
33
+
34
+ def read_start_frames_from_csv(csv_path):
35
+ df = pd.read_csv(csv_path, skiprows=1)
36
+ return df['Start Frame'].astype(int).tolist()
37
+
38
+
39
+ def extract_frames_with_cv(video_path, frame_numbers, output_dir, grid_size=(3, 3)):
40
+ Path(output_dir).mkdir(exist_ok=True)
41
+ cap = cv2.VideoCapture(video_path)
42
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
43
+
44
+ grid_w, grid_h = grid_size
45
+ num_required = grid_w * grid_h
46
+
47
+ for i, start_frame in tqdm(enumerate(frame_numbers), total=len(frame_numbers), desc="抽取关键帧"):
48
+ end_frame = frame_numbers[i + 1] if i < len(frame_numbers) - 1 else total_frames - 1
49
+
50
+ available_range = end_frame - start_frame
51
+ if available_range <= num_required + 2:
52
+ print(f"⚠️ 场景 {i} 太短(帧数不足 {num_required + 2}),跳过")
53
+ continue
54
+
55
+ # 从 start+1 到 end-1 中抽取 num_required 个等间隔帧
56
+ step = (end_frame - start_frame - 2) / (num_required - 1)
57
+ selected_frames = [int(start_frame + 1 + round(j * step)) for j in range(num_required)]
58
+
59
+ frames = []
60
+ for frame_num in selected_frames:
61
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
62
+ ret, frame = cap.read()
63
+ if not ret:
64
+ print(f"⚠️ 无法读取帧 {frame_num},跳过场景 {i}")
65
+ frames = []
66
+ break
67
+ frames.append(frame)
68
+
69
+ if len(frames) != num_required:
70
+ print(f"⚠️ 场景 {i} 抽帧失败,不完整,跳过")
71
+ continue
72
+
73
+ # 拼成 grid_w * grid_h 图像
74
+ try:
75
+ rows = [cv2.hconcat(frames[y * grid_w:(y + 1) * grid_w]) for y in range(grid_h)]
76
+ grid_image = cv2.vconcat(rows)
77
+ output_img = f"{output_dir}/scene_{i:03d}.jpg"
78
+ cv2.imwrite(output_img, grid_image)
79
+ except Exception as e:
80
+ print(f"❌ 拼图失败 场景 {i}:{e}")
81
+
82
+ cap.release()
83
+
84
+
85
+ def generate_json_template(frame_numbers, output_dir, output_path):
86
+ output = []
87
+ for i, frame in enumerate(frame_numbers):
88
+ output.append({
89
+ "start_frame": frame,
90
+ "image": f"scene_{i:03d}.jpg",
91
+ "description": "请描述这张图代表的场景"
92
+ })
93
+ with open(output_path, "w", encoding="utf-8") as f:
94
+ json.dump(output, f, indent=4, ensure_ascii=False)
95
+
96
+
97
+ def describe_image(
98
+ image_path,
99
+ client,
100
+ model: str = "default_model",
101
+ sys_info: str = (
102
+ "你是一个擅长视频场景理解的 AI,请根据提供的拼图图像,生成统一格式的视频场景描述。\n"
103
+ "请包括:\n"
104
+ "1. 场景中发生的主要事件或动作。\n"
105
+ "2. 出现的人物(如有)及其行为。\n"
106
+ "3. 场景的背景和气氛。\n"
107
+ "请用简洁、客观、第三人称的方式描述,不要加入主观感受。\n"
108
+ "输出格式如下:\n"
109
+ "场景描述:XXX。"
110
+ )
111
+ ):
112
+ try:
113
+ with open(image_path, "rb") as f:
114
+ img_bytes = f.read()
115
+ b64_img = base64.b64encode(img_bytes).decode("utf-8")
116
+
117
+ response = client.chat.completions.create(
118
+ model=model,
119
+ messages=[
120
+ {"role": "system", "content": sys_info},
121
+ {
122
+ "role": "user",
123
+ "content": [
124
+ {"type": "text", "text": "请根据图像生成一段客观的视频场景描述,内容包括人物、动作、背景、氛围。输出请以“场景描述:”开头。"},
125
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"}}
126
+ ]
127
+ }
128
+ ],
129
+ max_tokens=200
130
+ )
131
+ return response.choices[0].message.content.strip()
132
+ except Exception as e:
133
+ print(f"❌ 处理图片 {image_path} 时出错: {e}")
134
+ return "描述生成失败"
135
+
136
+
137
+ def fill_descriptions(
138
+ client,
139
+ input_json,
140
+ output_json,
141
+ output_dir
142
+ ):
143
+ with open(input_json, "r", encoding="utf-8") as f:
144
+ scenes = json.load(f)
145
+
146
+ for scene in tqdm(scenes, desc="生成场景描述"):
147
+ img_path = os.path.join(output_dir, scene["image"])
148
+ scene["description"] = describe_image(img_path, client=client)
149
+ scene.pop("image", None)
150
+
151
+ with open(output_json, "w", encoding="utf-8") as f:
152
+ json.dump(scenes, f, indent=4, ensure_ascii=False)
153
+
154
+ print(f"✅ 完成:{output_json}")
155
+
156
+
157
+ class SceneDetector(object):
158
+ def __init__(
159
+ self,
160
+ client,
161
+ video_file,
162
+ pre_processing: bool = False,
163
+ temp_json: str = None,
164
+ final_json: str = None
165
+ ):
166
+ self.client = client
167
+ self.video_file = video_file
168
+ self.pre_processsing = pre_processing
169
+ self.temp_json = temp_json
170
+ self.final_json = final_json
171
+
172
+ if not self.temp_json:
173
+ self.temp_json = self.video_file + ".tmp.json"
174
+ if not self.final_json:
175
+ self.final_json = self.video_file + ".final.json"
176
+
177
+ if self.pre_processsing:
178
+ self.pre_process()
179
+
180
+ def pre_process(self):
181
+ pass
182
+
183
+ def detect(
184
+ self,
185
+ out_dir
186
+ ):
187
+ output_csv = detect_scenes_cli(self.video_file, out_dir)
188
+ # df = pd.read_csv(output_csv, skiprows=1)
189
+ # df = read_start_frames_from_csv(output_csv)
190
+ starts = read_start_frames_from_csv(output_csv)
191
+ extract_frames_with_cv(self.video_file, starts, out_dir)
192
+ generate_json_template(starts, out_dir, self.temp_json)
193
+ fill_descriptions(
194
+ self.temp_json,
195
+ self.final_json,
196
+ out_dir
197
+ )
198
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hjxdl
3
- Version: 0.3.47
3
+ Version: 0.3.49
4
4
  Summary: A collection of functions for Jupyter notebooks
5
5
  Home-page: https://github.com/huluxiaohuowa/hdl
6
6
  Author: Jianxing Hu
@@ -1,5 +1,5 @@
1
1
  hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
2
- hdl/_version.py,sha256=ZwCtutJtozNZEa2Aq9ioQzQ4aWWrdXTsJpA1hSOyA5w,513
2
+ hdl/_version.py,sha256=PSVxW_w7win90ZOWca_JJ9Wafx0nL_ycCXwHfJER84E,513
3
3
  hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
5
5
  hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -142,10 +142,12 @@ hdl/utils/llm/vis.py,sha256=KuypafAuU4MEzJ5ywvopI0RcYjnyF_WKM_0oDUKbr8A,28808
142
142
  hdl/utils/llm/visrag.py,sha256=0i-VrxqgiV-J7R3VPshu9oc7-rKjFJOldYik3HDXj6M,10176
143
143
  hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
144
  hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
145
+ hdl/utils/vis_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
+ hdl/utils/vis_tools/scene_detect.py,sha256=xKovQmiuaAC4jYJoxtpjmv3px64yMHTXzWR42NRgrPk,6472
145
147
  hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
148
  hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
147
- hjxdl-0.3.47.dist-info/licenses/LICENSE,sha256=lkMiSbeZHBQLB9LJEkS9-L3Z-LBC4yGnKrzHSG8RkPM,2599
148
- hjxdl-0.3.47.dist-info/METADATA,sha256=lsWSSFO97xUftu98at1NJLgF4ALfH3d2vgw9HBNA4UM,1332
149
- hjxdl-0.3.47.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
150
- hjxdl-0.3.47.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
151
- hjxdl-0.3.47.dist-info/RECORD,,
149
+ hjxdl-0.3.49.dist-info/licenses/LICENSE,sha256=lkMiSbeZHBQLB9LJEkS9-L3Z-LBC4yGnKrzHSG8RkPM,2599
150
+ hjxdl-0.3.49.dist-info/METADATA,sha256=A5zl11614EcsVaTIxKto1qBZXyaCsEAfsbYywIKpvGQ,1332
151
+ hjxdl-0.3.49.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
152
+ hjxdl-0.3.49.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
153
+ hjxdl-0.3.49.dist-info/RECORD,,
File without changes