smartpi 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. smartpi/__init__.py +1 -1
  2. smartpi/onnx_text_workflow.pyc +0 -0
  3. smartpi/posenet_utils.pyc +0 -0
  4. smartpi/rknn_text_workflow.pyc +0 -0
  5. {smartpi-1.1.5.dist-info → smartpi-1.1.6.dist-info}/METADATA +2 -3
  6. {smartpi-1.1.5.dist-info → smartpi-1.1.6.dist-info}/RECORD +8 -68
  7. smartpi/__init__.pyc +0 -0
  8. smartpi/_gui.py +0 -66
  9. smartpi/ai_asr.py +0 -1037
  10. smartpi/ai_llm.py +0 -934
  11. smartpi/ai_tts.py +0 -938
  12. smartpi/ai_vad.py +0 -83
  13. smartpi/audio.py +0 -125
  14. smartpi/base_driver.py +0 -618
  15. smartpi/camera.py +0 -84
  16. smartpi/color_sensor.py +0 -18
  17. smartpi/cw2015.py +0 -179
  18. smartpi/flash.py +0 -130
  19. smartpi/humidity.py +0 -20
  20. smartpi/led.py +0 -19
  21. smartpi/light_sensor.py +0 -72
  22. smartpi/local_model.py +0 -432
  23. smartpi/mcp_client.py +0 -100
  24. smartpi/mcp_fastmcp.py +0 -322
  25. smartpi/mcp_intent_recognizer.py +0 -408
  26. smartpi/models/__init__.pyc +0 -0
  27. smartpi/models/snakers4_silero-vad/__init__.pyc +0 -0
  28. smartpi/models/snakers4_silero-vad/hubconf.pyc +0 -0
  29. smartpi/motor.py +0 -177
  30. smartpi/move.py +0 -218
  31. smartpi/onnx_hand_workflow.py +0 -201
  32. smartpi/onnx_image_workflow.py +0 -176
  33. smartpi/onnx_pose_workflow.py +0 -482
  34. smartpi/onnx_text_workflow.py +0 -173
  35. smartpi/onnx_voice_workflow.py +0 -437
  36. smartpi/posemodel/__init__.pyc +0 -0
  37. smartpi/posenet_utils.py +0 -222
  38. smartpi/rknn_hand_workflow.py +0 -245
  39. smartpi/rknn_image_workflow.py +0 -405
  40. smartpi/rknn_pose_workflow.py +0 -592
  41. smartpi/rknn_text_workflow.py +0 -240
  42. smartpi/rknn_voice_workflow.py +0 -394
  43. smartpi/servo.py +0 -178
  44. smartpi/temperature.py +0 -18
  45. smartpi/tencentcloud-speech-sdk-python/__init__.pyc +0 -0
  46. smartpi/tencentcloud-speech-sdk-python/asr/__init__.pyc +0 -0
  47. smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.pyc +0 -0
  48. smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.pyc +0 -0
  49. smartpi/tencentcloud-speech-sdk-python/common/__init__.pyc +0 -0
  50. smartpi/tencentcloud-speech-sdk-python/common/credential.pyc +0 -0
  51. smartpi/tencentcloud-speech-sdk-python/common/log.pyc +0 -0
  52. smartpi/tencentcloud-speech-sdk-python/common/utils.pyc +0 -0
  53. smartpi/tencentcloud-speech-sdk-python/soe/__init__.pyc +0 -0
  54. smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.pyc +0 -0
  55. smartpi/tencentcloud-speech-sdk-python/tts/__init__.pyc +0 -0
  56. smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.pyc +0 -0
  57. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.pyc +0 -0
  58. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.pyc +0 -0
  59. smartpi/tencentcloud-speech-sdk-python/vc/__init__.pyc +0 -0
  60. smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.pyc +0 -0
  61. smartpi/text_gte_model/__init__.pyc +0 -0
  62. smartpi/text_gte_model/config/__init__.pyc +0 -0
  63. smartpi/text_gte_model/gte/__init__.pyc +0 -0
  64. smartpi/touch_sensor.py +0 -16
  65. smartpi/trace.py +0 -120
  66. smartpi/ultrasonic.py +0 -20
  67. {smartpi-1.1.5.dist-info → smartpi-1.1.6.dist-info}/WHEEL +0 -0
  68. {smartpi-1.1.5.dist-info → smartpi-1.1.6.dist-info}/top_level.txt +0 -0
@@ -1,240 +0,0 @@
1
- import numpy as np
2
- import onnxruntime as ort
3
- import onnx
4
- import json
5
- import os
6
- import time
7
- from transformers import AutoTokenizer
8
- from rknnlite.api import RKNNLite
9
-
10
- # 获取当前文件的绝对路径
11
- current_dir = os.path.dirname(os.path.abspath(__file__))
12
- # 构建默认的GTE模型和分词器配置路径
13
- default_feature_model = os.path.join(current_dir, 'text_gte_model', 'gte', 'gte_model.onnx')
14
- default_tokenizer_path = os.path.join(current_dir, 'text_gte_model', 'config')
15
-
16
-
17
- class TextClassificationWorkflow:
18
- def __init__(self, class_model_path, feature_model_path=None, tokenizer_path=None):
19
- # 如果没有提供路径,则使用默认路径
20
- self.feature_model_path = feature_model_path or default_feature_model
21
- self.tokenizer_path = tokenizer_path or default_tokenizer_path
22
- self.class_model_path = class_model_path
23
- # 加载分词器
24
- self.tokenizer = AutoTokenizer.from_pretrained(
25
- self.tokenizer_path,
26
- local_files_only=True
27
- )
28
-
29
- # 加载特征提取模型(保持ONNX不变)
30
- self.feature_session = ort.InferenceSession(self.feature_model_path)
31
- self.feature_input_names = [input.name for input in self.feature_session.get_inputs()]
32
-
33
- # 初始化分类模型(替换为RKNN)
34
- self.class_rknn = RKNNLite()
35
- self._load_rknn_class_model(class_model_path)
36
-
37
- # 加载元数据(类别标签)
38
- self.label_names = self._load_metadata(class_model_path)
39
- print(f"分类模型加载成功,共 {len(self.label_names)} 个类别: {self.label_names}")
40
-
41
- def _load_rknn_class_model(self, model_path):
42
- """加载RKNN分类模型并初始化运行时"""
43
- try:
44
- ret = self.class_rknn.load_rknn(model_path)
45
- if ret != 0:
46
- raise RuntimeError(f'加载分类RKNN模型失败 ({model_path}), 错误码: {ret}')
47
-
48
- ret = self.class_rknn.init_runtime()
49
- if ret != 0:
50
- raise RuntimeError(f'初始化分类模型NPU运行时失败, 错误码: {ret}')
51
-
52
- print(f"分类RKNN模型加载成功: {os.path.basename(model_path)}")
53
-
54
- except Exception as e:
55
- print(f"分类模型加载失败: {e}")
56
- raise
57
-
58
- def _get_metadata_path(self, model_path):
59
- """获取RKNN模型对应的元数据文件路径"""
60
- base_dir = os.path.dirname(model_path)
61
- base_name = os.path.basename(model_path)
62
- metadata_name = os.path.splitext(base_name)[0] + '_metadata.json'
63
- metadata_path = os.path.join(base_dir, metadata_name)
64
-
65
- if not os.path.exists(metadata_path):
66
- metadata_path = os.path.join(base_dir, 'rknn_metadata.json')
67
-
68
- return metadata_path
69
-
70
- def _load_metadata(self, model_path):
71
- """从RKNN元数据文件加载类别标签"""
72
- try:
73
- metadata_path = self._get_metadata_path(model_path)
74
- if os.path.exists(metadata_path):
75
- with open(metadata_path, 'r', encoding='utf-8') as f:
76
- metadata = json.load(f)
77
- if 'classes' in metadata:
78
- return metadata['classes']
79
- else:
80
- print(f"元数据文件中未找到类别信息")
81
-
82
- onnx_model_path = os.path.splitext(model_path)[0] + '.onnx'
83
- if os.path.exists(onnx_model_path):
84
- onnx_model = onnx.load(onnx_model_path)
85
- if onnx_model.metadata_props:
86
- for prop in onnx_model.metadata_props:
87
- if prop.key == 'classes':
88
- try:
89
- return json.loads(prop.value)
90
- except json.JSONDecodeError:
91
- return prop.value.split(',')
92
-
93
- except Exception as e:
94
- print(f"元数据读取错误: {e}")
95
-
96
- # 获取类别数(修复:确保正确获取输出维度)
97
- num_classes = 10
98
- try:
99
- output_shapes = self.class_rknn.get_output_shape()
100
- if output_shapes and len(output_shapes) > 0:
101
- num_classes = output_shapes[0][-1]
102
- except:
103
- pass
104
-
105
- label_names = [f"Class_{i}" for i in range(num_classes)]
106
- print(f"警告: 未找到类别信息,使用自动生成的名称: {label_names}")
107
- return label_names
108
-
109
- def _extract_features(self, texts):
110
- """特征提取(保持ONNX推理不变)"""
111
- inputs = self.tokenizer(
112
- texts,
113
- padding=True,
114
- truncation=True,
115
- max_length=512,
116
- return_tensors="np"
117
- )
118
-
119
- onnx_inputs = {name: inputs[name].astype(np.int64) for name in self.feature_input_names}
120
- onnx_outputs = self.feature_session.run(None, onnx_inputs)
121
- last_hidden_state = onnx_outputs[0]
122
- return last_hidden_state[:, 0, :].astype(np.float32)
123
-
124
- def _classify(self, embeddings):
125
- """分类推理(修复:支持批量输入,若模型不支持则单样本循环)"""
126
- embeddings = embeddings.astype(np.float32)
127
- batch_size = embeddings.shape[0]
128
- all_results = []
129
-
130
- # 检查RKNN模型是否支持批量输入(通过输入形状判断)
131
- try:
132
- input_shapes = self.class_rknn.get_input_shape()
133
- if input_shapes and len(input_shapes) > 0:
134
- # 输入形状格式:[batch, ...],若第一维为-1或大于1则支持批量
135
- if input_shapes[0][0] in (-1, batch_size):
136
- # 支持批量输入,直接推理
137
- class_results = self.class_rknn.inference(inputs=[embeddings])[0]
138
- return class_results
139
- except:
140
- pass
141
-
142
- # 若不支持批量输入,则逐个处理样本
143
- for i in range(batch_size):
144
- single_embedding = embeddings[i:i+1] # 保持维度为[1, feature_dim]
145
- result = self.class_rknn.inference(inputs=[single_embedding])[0]
146
- all_results.append(result[0]) # 取单样本结果
147
-
148
- return np.array(all_results) # 合并为批量结果
149
-
150
- def predict(self, texts):
151
- """执行文本分类预测,包含时间测量功能"""
152
- if not texts:
153
- return [], []
154
-
155
- # 记录总开始时间
156
- total_start_time = time.time()
157
-
158
- # 记录特征提取时间
159
- feature_start_time = time.time()
160
- embeddings = self._extract_features(texts)
161
- feature_time = time.time() - feature_start_time
162
-
163
- # 记录分类推理时间
164
- classify_start_time = time.time()
165
- probs = self._classify(embeddings)
166
- classify_time = time.time() - classify_start_time
167
-
168
- # 计算总时间
169
- total_time = time.time() - total_start_time
170
-
171
- predicted_indices = np.argmax(probs, axis=1)
172
-
173
- # 打印调试信息:确认批量大小是否正确
174
- print(f"处理文本数量: {len(texts)}, 预测结果数量: {len(predicted_indices)}")
175
-
176
- raw_results = []
177
- formatted_results = []
178
-
179
- for i, (text, idx, prob_vec) in enumerate(zip(texts, predicted_indices, probs)):
180
- label = self.label_names[idx] if idx < len(self.label_names) else f"未知类别 {idx}"
181
- confidence = float(prob_vec[idx])
182
-
183
- raw_results.append(prob_vec.tolist())
184
- formatted_results.append({
185
- 'text': text,
186
- 'class': label,
187
- 'confidence': confidence,
188
- 'class_id': int(idx),
189
- 'probabilities': prob_vec.tolist(),
190
- # 添加时间信息
191
- 'preprocess_time': 0.0, # 文本不需要传统的图像预处理
192
- 'feature_extract_time': feature_time / len(texts), # 平均到每个文本
193
- 'inference_time': classify_time / len(texts), # 平均到每个文本
194
- 'total_time': total_time / len(texts) # 平均到每个文本
195
- })
196
-
197
- return raw_results, formatted_results
198
-
199
- def release(self):
200
- """释放资源"""
201
- if hasattr(self, 'class_rknn') and self.class_rknn:
202
- self.class_rknn.release()
203
- print("RKNN分类模型资源已释放")
204
-
205
- def __del__(self):
206
- self.release()
207
-
208
-
209
- # 使用示例
210
- if __name__ == "__main__":
211
- # 替换为实际路径
212
- class_model = "./model.rknn"
213
-
214
- # 初始化工作流(现在只需要提供分类模型路径,GTE模型和分词器使用默认路径)
215
- classifier = TextClassificationWorkflow(
216
- class_model_path=class_model
217
- )
218
-
219
- # 测试文本(2个样本)
220
- test_texts = [
221
- "强大",
222
- "再见"
223
- ]
224
-
225
- # 进行预测
226
- raw_results, formatted_results = classifier.predict(test_texts)
227
-
228
- # 打印所有结果
229
- print("\n所有预测结果:")
230
- for i, result in enumerate(formatted_results):
231
- print(f"样本 {i+1}:")
232
- print(f" 文本: {result['text']}")
233
- print(f" 分类: {result['class']}")
234
- print(f" 置信度: {result['confidence']:.4f}")
235
- print(f" 类别ID: {result['class_id']}")
236
- print(f" 概率分布: {result['probabilities']}")
237
- print("---")
238
-
239
- # 释放资源
240
- classifier.release()
@@ -1,394 +0,0 @@
1
- import numpy as np
2
- import librosa
3
- import os
4
- import json
5
- import time
6
- from rknnlite.api import RKNNLite
7
-
8
-
9
- class Workflow:
10
- def __init__(self, model_path=None, smoothing_time_constant=0, step_size=43):
11
- self.rknn_lite = None
12
- self.classes = [] # 只保留类别标签
13
- self.metadata = {}
14
- self.model_params = {
15
- 'fft_size': 2048,
16
- 'sample_rate': 44100,
17
- 'num_frames': 43, # 每块帧数
18
- 'spec_features': 232 # 频谱特征数
19
- }
20
- self.smoothing_time_constant = smoothing_time_constant
21
- self.step_size = step_size
22
- self.frame_duration = None
23
- self.hop_length = 735 # 44100/60=735 (每帧时长 ~16.67ms)
24
- self.previous_spec = None
25
- self.input_shape = [1, 232, 1, 43] # RKNN模型输入形状
26
-
27
- if model_path:
28
- self.load_model(model_path)
29
-
30
- # 计算帧时间信息
31
- self.frame_duration = self.hop_length / self.model_params['sample_rate']
32
- self.block_duration = self.model_params['num_frames'] * self.frame_duration
33
-
34
- def load_model(self, model_path):
35
- """加载RKNN模型并仅解析classes元数据"""
36
- try:
37
- self.rknn_lite = RKNNLite()
38
-
39
- # 加载RKNN模型
40
- ret = self.rknn_lite.load_rknn(model_path)
41
- if ret != 0:
42
- raise RuntimeError(f'加载RKNN模型失败, 错误码: {ret}')
43
-
44
- # 初始化运行时环境
45
- ret = self.rknn_lite.init_runtime()
46
- if ret != 0:
47
- raise RuntimeError(f'初始化NPU运行时失败, 错误码: {ret}')
48
-
49
- # 加载元数据(仅提取classes)
50
- metadata_path = self._get_metadata_path(model_path)
51
- self._load_metadata(metadata_path)
52
-
53
- print(f"使用指定输入形状: {self.input_shape}")
54
-
55
- except Exception as e:
56
- print(f"模型加载失败: {e}")
57
-
58
- def _get_metadata_path(self, model_path):
59
- """获取元数据文件路径"""
60
- base_dir = os.path.dirname(model_path)
61
- base_name = os.path.basename(model_path)
62
- metadata_name = os.path.splitext(base_name)[0] + '_metadata.json'
63
- metadata_path = os.path.join(base_dir, metadata_name)
64
-
65
- if not os.path.exists(metadata_path):
66
- metadata_path = os.path.join(base_dir, 'rknn_metadata.json')
67
-
68
- return metadata_path
69
-
70
- def _load_metadata(self, metadata_path):
71
- """仅从JSON文件加载classes元数据"""
72
- self.classes = [] # 初始化为空列表
73
- try:
74
- if os.path.exists(metadata_path):
75
- with open(metadata_path, 'r', encoding='utf-8') as f:
76
- metadata = json.load(f)
77
- # 只提取classes,忽略其他元数据
78
- self.classes = metadata.get('classes', [])
79
- print(f"从 {metadata_path} 加载类别标签成功,共 {len(self.classes)} 个类别")
80
- else:
81
- print(f"元数据文件 {metadata_path} 不存在,将使用空类别标签")
82
- except Exception as e:
83
- print(f"加载元数据失败: {e},将使用空类别标签")
84
-
85
- def _apply_hann_window(self, frame):
86
- """应用汉宁窗函数"""
87
- return frame * np.hanning(len(frame))
88
-
89
- def _apply_temporal_smoothing(self, current_spec):
90
- """应用时域指数平滑"""
91
- if self.previous_spec is None:
92
- self.previous_spec = current_spec
93
- return current_spec
94
-
95
- smoothed = (self.smoothing_time_constant * self.previous_spec
96
- + (1 - self.smoothing_time_constant) * current_spec)
97
-
98
- self.previous_spec = smoothed.copy()
99
- return smoothed
100
-
101
- def _load_audio(self, audio_path):
102
- """加载音频文件(支持wav和webm)"""
103
- ext = os.path.splitext(audio_path)[1].lower()
104
-
105
- if ext == '.wav':
106
- audio, sr = librosa.load(audio_path, sr=self.model_params['sample_rate'])
107
- return audio, sr
108
-
109
- elif ext == '.webm':
110
- try:
111
- from pydub import AudioSegment
112
- except ImportError:
113
- raise ImportError("处理webm格式需要pydub库,请先安装:pip install pydub")
114
-
115
- try:
116
- audio_segment = AudioSegment.from_file(audio_path, format='webm')
117
- audio_segment = audio_segment.set_channels(1).set_frame_rate(self.model_params['sample_rate'])
118
- samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
119
- return samples / 32768.0, self.model_params['sample_rate'] # 归一化到[-1, 1]
120
- except FileNotFoundError as e:
121
- if 'ffmpeg' in str(e).lower() or 'avconv' in str(e).lower():
122
- print("\n" + "="*60)
123
- print("检测到错误:缺少ffmpeg支持,无法处理webm格式音频")
124
- print("="*60)
125
- print("请安装ffmpeg后重试(参考之前的安装指南)")
126
- print("="*60 + "\n")
127
- raise
128
- else:
129
- raise
130
- except Exception as e:
131
- print(f"处理webm音频时发生错误:{str(e)}")
132
- raise
133
-
134
- else:
135
- raise ValueError(f"不支持的音频格式: {ext},目前支持 .wav 和 .webm")
136
-
137
- def _preprocess_audio(self, audio_path):
138
- """预处理音频文件,返回[232, 时间帧]形状的分贝谱"""
139
- audio, sr = self._load_audio(audio_path)
140
- assert sr == self.model_params['sample_rate'], f"采样率不匹配,需要 {self.model_params['sample_rate']}Hz"
141
-
142
- # 计算STFT
143
- hop_length = self.hop_length
144
- win_length = self.model_params['fft_size']
145
- n_fft = self.model_params['fft_size']
146
-
147
- # 分帧并加窗
148
- frames = librosa.util.frame(audio, frame_length=win_length, hop_length=hop_length)
149
- windowed_frames = np.zeros_like(frames)
150
- for i in range(frames.shape[1]):
151
- windowed_frames[:, i] = self._apply_hann_window(frames[:, i])
152
-
153
- # FFT+幅度谱+分贝转换
154
- D = np.fft.rfft(windowed_frames, n=n_fft, axis=0)
155
- magnitude = np.abs(D)
156
- db = 20 * np.log10(np.maximum(1e-5, magnitude)) # 避免log(0)
157
-
158
- # 截取特征维度,返回[232, 时间帧]
159
- return db[:self.model_params['spec_features'], :]
160
-
161
- def preprocess_audio_segment(self, audio_segment):
162
- """预处理音频片段,返回[232, 时间帧]形状的分贝谱"""
163
- # 计算STFT(逻辑与_preprocess_audio一致)
164
- hop_length = self.hop_length
165
- win_length = self.model_params['fft_size']
166
- n_fft = self.model_params['fft_size']
167
-
168
- frames = librosa.util.frame(audio_segment, frame_length=win_length, hop_length=hop_length)
169
- windowed_frames = np.zeros_like(frames)
170
- for i in range(frames.shape[1]):
171
- windowed_frames[:, i] = self._apply_hann_window(frames[:, i])
172
-
173
- D = np.fft.rfft(windowed_frames, n=n_fft, axis=0)
174
- magnitude = np.abs(D)
175
- db = 20 * np.log10(np.maximum(1e-5, magnitude))
176
-
177
- return db[:self.model_params['spec_features'], :]
178
-
179
- def _extract_blocks(self, full_spec):
180
- """从完整频谱中提取[232, 43]的块"""
181
- total_time_frames = full_spec.shape[1]
182
- blocks = []
183
- start_indices = []
184
-
185
- num_blocks = (total_time_frames - self.model_params['num_frames']) // self.step_size + 1
186
-
187
- for i in range(num_blocks):
188
- start = i * self.step_size
189
- end = start + self.model_params['num_frames']
190
- block = full_spec[:, start:end] # 提取[232, 43]的块
191
-
192
- # 不足43帧时填充
193
- if block.shape[1] < self.model_params['num_frames']:
194
- padded = np.zeros((self.model_params['spec_features'], self.model_params['num_frames']))
195
- padded[:, :block.shape[1]] = block
196
- block = padded
197
-
198
- blocks.append(block)
199
- start_indices.append(start)
200
-
201
- return blocks, start_indices
202
-
203
- def _normalize(self, spec):
204
- """简化归一化:仅使用当前频谱块的均值和方差"""
205
- epsilon = 1e-8
206
- mean = np.mean(spec)
207
- variance = np.var(spec)
208
- std = np.sqrt(variance)
209
- return ((spec - mean) / (std + epsilon)).astype(np.float32)
210
-
211
- def inference(self, audio_path, model_path=None):
212
- if model_path and not self.rknn_lite:
213
- self.load_model(model_path)
214
-
215
- full_spec = self._preprocess_audio(audio_path) # 形状[232, 总时间帧]
216
- blocks, start_indices = self._extract_blocks(full_spec)
217
-
218
- block_results = []
219
-
220
- print(f"开始处理音频: {audio_path}")
221
- print(f"总帧数: {full_spec.shape[1]}, 总时长: {full_spec.shape[1] * self.frame_duration:.2f}秒")
222
- print(f"将处理 {len(blocks)} 个块 (每块 {self.model_params['num_frames']}帧 = {self.block_duration:.3f}秒)")
223
- print("=" * 60)
224
-
225
- for i, block in enumerate(blocks):
226
- start_time = time.time()
227
-
228
- # 归一化+调整维度至[1, 232, 1, 43]
229
- normalized_block = self._normalize(block)
230
- input_tensor = normalized_block[:, np.newaxis, :] # [232, 1, 43]
231
- input_tensor = input_tensor[np.newaxis, ...] # [1, 232, 1, 43]
232
-
233
- # RKNN推理
234
- outputs = self.rknn_lite.inference(inputs=[input_tensor])
235
- raw_output = outputs[0][0]
236
- result = self._format_output(raw_output)
237
-
238
- # 记录结果
239
- process_time = time.time() - start_time
240
- start_frame = start_indices[i]
241
- end_frame = start_frame + self.model_params['num_frames']
242
- start_time_sec = start_frame * self.frame_duration
243
- end_time_sec = end_frame * self.frame_duration
244
-
245
- block_results.append({
246
- 'block_index': i,
247
- 'start_time': start_time_sec,
248
- 'end_time': end_time_sec,
249
- 'process_time': process_time,
250
- 'result': result,
251
- 'raw_output': raw_output
252
- })
253
-
254
- print(f"块 #{i+1} [时间: {start_time_sec:.2f}-{end_time_sec:.2f}s]")
255
- print(f" 分类: {result['class']}, 置信度: {result['confidence']}%")
256
- print(f" 处理时间: {process_time * 1000:.2f}ms")
257
- print("-" * 50)
258
-
259
- final_result = self._aggregate_results(block_results)
260
- return block_results, final_result
261
-
262
- def process_audio_segment(self, audio_segment):
263
- """处理音频片段(实时处理),包含时间测量功能"""
264
- if not self.rknn_lite:
265
- raise ValueError("请先加载模型")
266
-
267
- # 记录总开始时间
268
- total_start_time = time.time()
269
-
270
- # 记录预处理时间
271
- preprocess_start_time = time.time()
272
- full_spec = self.preprocess_audio_segment(audio_segment)
273
- blocks, start_indices = self._extract_blocks(full_spec)
274
- preprocess_time = time.time() - preprocess_start_time
275
-
276
- block_results = []
277
- inference_time = 0.0
278
-
279
- for i, block in enumerate(blocks):
280
- # 记录归一化时间
281
- normalize_start_time = time.time()
282
- normalized_block = self._normalize(block)
283
- input_tensor = normalized_block[:, np.newaxis, :][np.newaxis, ...] # [1, 232, 1, 43]
284
- normalize_time = time.time() - normalize_start_time
285
-
286
- # 记录推理时间
287
- inference_start_time = time.time()
288
- outputs = self.rknn_lite.inference(inputs=[input_tensor])
289
- block_inference_time = time.time() - inference_start_time
290
- inference_time += block_inference_time
291
-
292
- raw_output = outputs[0][0]
293
- result = self._format_output(raw_output)
294
-
295
- start_frame = start_indices[i]
296
- end_frame = start_frame + self.model_params['num_frames']
297
- start_time_sec = start_frame * self.frame_duration
298
- end_time_sec = end_frame * self.frame_duration
299
-
300
- block_results.append({
301
- 'block_index': i,
302
- 'start_time': start_time_sec,
303
- 'end_time': end_time_sec,
304
- 'result': result,
305
- 'raw_output': raw_output,
306
- 'normalize_time': normalize_time,
307
- 'inference_time': block_inference_time
308
- })
309
-
310
- final_result = self._aggregate_results(block_results)
311
-
312
- # 计算总时间
313
- total_time = time.time() - total_start_time
314
-
315
- # 如果有最终结果,添加时间信息
316
- if final_result:
317
- final_result['preprocess_time'] = preprocess_time
318
- final_result['inference_time'] = inference_time
319
- final_result['total_time'] = total_time
320
-
321
- return block_results, final_result
322
-
323
- def _format_output(self, predictions):
324
- """格式化推理结果"""
325
- class_idx = np.argmax(predictions)
326
- confidence = int(predictions[class_idx] * 100)
327
- # 若没有类别标签,直接返回索引
328
- label = self.classes[class_idx] if (self.classes and class_idx < len(self.classes)) else f"类别{class_idx}"
329
- return {
330
- 'class': label,
331
- 'confidence': confidence,
332
- 'probabilities': predictions.tolist()
333
- }
334
-
335
- def _aggregate_results(self, block_results):
336
- """聚合所有块的结果"""
337
- if len(block_results) == 2:
338
- # 两个块时取置信度最高的
339
- best_result = max(block_results, key=lambda x: x['result']['confidence'])
340
- return {
341
- 'class': best_result['result']['class'],
342
- 'confidence': best_result['result']['confidence'],
343
- 'occurrence_percentage': 100.0,
344
- 'total_blocks': len(block_results),
345
- 'class_distribution': {best_result['result']['class']: 1},
346
- 'aggregation_method': 'highest_confidence'
347
- }
348
-
349
- # 统计每个类别的出现次数和最大置信度
350
- class_counts = {}
351
- max_confidence = {}
352
- for result in block_results:
353
- cls = result['result']['class']
354
- conf = result['result']['confidence']
355
- class_counts[cls] = class_counts.get(cls, 0) + 1
356
- if cls not in max_confidence or conf > max_confidence[cls]:
357
- max_confidence[cls] = conf
358
-
359
- if not class_counts:
360
- return None
361
-
362
- # 多数投票决定最终类别
363
- most_common_cls = max(class_counts.items(), key=lambda x: x[1])[0]
364
- count = class_counts[most_common_cls]
365
- return {
366
- 'class': most_common_cls,
367
- 'confidence': max_confidence[most_common_cls],
368
- 'occurrence_percentage': (count / len(block_results)) * 100,
369
- 'total_blocks': len(block_results),
370
- 'class_distribution': class_counts,
371
- 'aggregation_method': 'majority_vote'
372
- }
373
-
374
- def release(self):
375
- """释放RKNN资源"""
376
- if hasattr(self, 'rknn_lite') and self.rknn_lite:
377
- self.rknn_lite.release()
378
- print("RKNN资源已释放")
379
-
380
- def __del__(self):
381
- """析构函数自动释放资源"""
382
- self.release()
383
-
384
-
385
- # 使用示例
386
- if __name__ == "__main__":
387
- # 加载模型(替换为实际的.rknn模型路径)
388
- model = Workflow("audio_model.rknn")
389
-
390
- # 处理音频文件(替换为实际的音频路径)
391
- blocks, result = model.inference("test_audio.wav")
392
- print("\n最终结果:")
393
- print(f"分类: {result['class']}, 置信度: {result['confidence']}%")
394
- print(f"在 {result['total_blocks']} 个块中出现比例: {result['occurrence_percentage']:.2f}%")