npcpy 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcpy/data/audio.py CHANGED
@@ -175,6 +175,41 @@ def run_transcription(audio_np):
175
175
  return None
176
176
 
177
177
 
178
+ def transcribe_audio_file(file_path: str, language=None) -> str:
179
+ """
180
+ File-based transcription helper that prefers the local faster-whisper/whisper
181
+ setup used elsewhere in this module.
182
+ """
183
+ # Try faster-whisper first
184
+ try:
185
+ from faster_whisper import WhisperModel # type: ignore
186
+ try:
187
+ import torch # type: ignore
188
+ device = "cuda" if torch.cuda.is_available() else "cpu"
189
+ except Exception:
190
+ device = "cpu"
191
+ model = WhisperModel("small", device=device)
192
+ segments, _ = model.transcribe(file_path, language=language, beam_size=5)
193
+ text = " ".join(seg.text.strip() for seg in segments if seg.text).strip()
194
+ if text:
195
+ return text
196
+ except Exception:
197
+ pass
198
+
199
+ # Fallback to openai/whisper if available
200
+ try:
201
+ import whisper # type: ignore
202
+ model = whisper.load_model("small")
203
+ result = model.transcribe(file_path, language=language)
204
+ text = result.get("text", "").strip()
205
+ if text:
206
+ return text
207
+ except Exception:
208
+ pass
209
+
210
+ return ""
211
+
212
+
178
213
 
179
214
  def load_history():
180
215
  global history
@@ -431,4 +466,3 @@ def process_text_for_tts(text):
431
466
  text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
432
467
  return text
433
468
 
434
-
npcpy/data/load.py CHANGED
@@ -4,8 +4,10 @@ import json
4
4
  import io
5
5
  from PIL import Image
6
6
  import numpy as np
7
- from typing import Optional
7
+ from typing import Optional, List
8
8
  import os
9
+ import tempfile
10
+ import subprocess
9
11
 
10
12
  try:
11
13
  from docx import Document
@@ -90,12 +92,17 @@ extension_map = {
90
92
  "JPEG": "images",
91
93
  "GIF": "images",
92
94
  "SVG": "images",
95
+ "WEBP": "images",
96
+ "BMP": "images",
97
+ "TIFF": "images",
93
98
  "MP4": "videos",
94
99
  "AVI": "videos",
95
100
  "MOV": "videos",
96
101
  "WMV": "videos",
97
102
  "MPG": "videos",
98
103
  "MPEG": "videos",
104
+ "WEBM": "videos",
105
+ "MKV": "videos",
99
106
  "DOCX": "documents",
100
107
  "PPTX": "documents",
101
108
  "PDF": "documents",
@@ -105,6 +112,12 @@ extension_map = {
105
112
  "MD": "documents",
106
113
  "HTML": "documents",
107
114
  "HTM": "documents",
115
+ "MP3": "audio",
116
+ "WAV": "audio",
117
+ "M4A": "audio",
118
+ "AAC": "audio",
119
+ "FLAC": "audio",
120
+ "OGG": "audio",
108
121
  "ZIP": "archives",
109
122
  "RAR": "archives",
110
123
  "7Z": "archives",
@@ -112,6 +125,136 @@ extension_map = {
112
125
  "GZ": "archives",
113
126
  }
114
127
 
128
+ def _chunk_text(full_content: str, chunk_size: int) -> List[str]:
129
+ """Split long content into reasonably sized chunks for model input."""
130
+ chunks = []
131
+ for i in range(0, len(full_content), chunk_size):
132
+ chunk = full_content[i:i+chunk_size].strip()
133
+ if chunk:
134
+ chunks.append(chunk)
135
+ return chunks
136
+
137
+ def _transcribe_audio(file_path: str, language: Optional[str] = None) -> str:
138
+ """
139
+ Best-effort audio transcription using optional dependencies.
140
+ Tries faster-whisper, then openai/whisper. Falls back to metadata only.
141
+ """
142
+ # Prefer the existing audio module helper if present
143
+ try:
144
+ from npcpy.data.audio import transcribe_audio_file # type: ignore
145
+ text = transcribe_audio_file(file_path, language=language)
146
+ if text:
147
+ return text
148
+ except Exception:
149
+ pass
150
+
151
+ # Try faster-whisper first
152
+ try:
153
+ from faster_whisper import WhisperModel
154
+ try:
155
+ import torch
156
+ device = "cuda" if torch.cuda.is_available() else "cpu"
157
+ except Exception:
158
+ device = "cpu"
159
+ model = WhisperModel("small", device=device)
160
+ segments, _ = model.transcribe(file_path, language=language, beam_size=5)
161
+ return " ".join(seg.text.strip() for seg in segments if seg.text).strip()
162
+ except Exception:
163
+ pass
164
+
165
+ # Fallback: openai/whisper
166
+ try:
167
+ import whisper
168
+ model = whisper.load_model("small")
169
+ result = model.transcribe(file_path, language=language)
170
+ return result.get("text", "").strip()
171
+ except Exception:
172
+ pass
173
+
174
+ # Last resort metadata message
175
+ return f"[Audio file at {file_path}; install faster-whisper or whisper for transcription]"
176
+
177
+ def load_audio(file_path: str, language: Optional[str] = None) -> str:
178
+ """Load and transcribe an audio file into text."""
179
+ transcript = _transcribe_audio(file_path, language=language)
180
+ if transcript:
181
+ return transcript
182
+ return f"[Audio file at {file_path}; no transcript available]"
183
+
184
+ def _extract_audio_from_video(file_path: str, max_duration: int = 600) -> Optional[str]:
185
+ """
186
+ Use ffmpeg to dump the audio track from a video into a temp wav for transcription.
187
+ Returns the temp path or None.
188
+ """
189
+ try:
190
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
191
+ temp_audio.close()
192
+ cmd = [
193
+ "ffmpeg",
194
+ "-y",
195
+ "-i",
196
+ file_path,
197
+ "-vn",
198
+ "-ac",
199
+ "1",
200
+ "-ar",
201
+ "16000",
202
+ "-t",
203
+ str(max_duration),
204
+ temp_audio.name,
205
+ ]
206
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
207
+ return temp_audio.name
208
+ except Exception:
209
+ return None
210
+
211
+ def load_video(file_path: str, language: Optional[str] = None, max_audio_seconds: int = 600) -> str:
212
+ """
213
+ Summarize a video by reporting metadata and (optionally) transcribing its audio track.
214
+ """
215
+ # Prefer the video module helper if present
216
+ try:
217
+ from npcpy.data.video import summarize_video_file # type: ignore
218
+ return summarize_video_file(file_path, language=language, max_audio_seconds=max_audio_seconds)
219
+ except Exception:
220
+ pass
221
+
222
+ # Fallback to minimal summary/transcription
223
+ meta_bits = []
224
+ try:
225
+ import cv2
226
+ video = cv2.VideoCapture(file_path)
227
+ fps = video.get(cv2.CAP_PROP_FPS)
228
+ frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
229
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
230
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
231
+ duration = frame_count / fps if fps else 0
232
+ meta_bits.append(
233
+ f"Video file: {os.path.basename(file_path)} | {width}x{height} | {fps:.2f} fps | {frame_count} frames | ~{duration:.1f}s"
234
+ )
235
+ video.release()
236
+ except Exception:
237
+ meta_bits.append(f"Video file: {os.path.basename(file_path)}")
238
+
239
+ audio_path = _extract_audio_from_video(file_path, max_duration=max_audio_seconds)
240
+ transcript = ""
241
+ if audio_path:
242
+ try:
243
+ transcript = _transcribe_audio(audio_path, language=language)
244
+ finally:
245
+ try:
246
+ os.remove(audio_path)
247
+ except Exception:
248
+ pass
249
+
250
+ if transcript:
251
+ meta_bits.append("Audio transcript:")
252
+ meta_bits.append(transcript)
253
+ else:
254
+ meta_bits.append("[No transcript extracted; ensure ffmpeg and faster-whisper/whisper are installed]")
255
+
256
+ return "\n".join(meta_bits)
257
+
115
258
  def load_file_contents(file_path, chunk_size=None):
116
259
  file_ext = os.path.splitext(file_path)[1].upper().lstrip('.')
117
260
  full_content = ""
@@ -137,18 +280,17 @@ def load_file_contents(file_path, chunk_size=None):
137
280
  elif file_ext == 'JSON':
138
281
  data = load_json(file_path)
139
282
  full_content = json.dumps(data, indent=2)
283
+ elif file_ext in ['MP3', 'WAV', 'M4A', 'AAC', 'FLAC', 'OGG']:
284
+ full_content = load_audio(file_path)
285
+ elif file_ext in ['MP4', 'AVI', 'MOV', 'WMV', 'MPG', 'MPEG', 'WEBM', 'MKV']:
286
+ full_content = load_video(file_path)
140
287
  else:
141
288
  return [f"Unsupported file format for content loading: {file_ext}"]
142
289
 
143
290
  if not full_content:
144
291
  return []
145
292
 
146
- chunks = []
147
- for i in range(0, len(full_content), chunk_size):
148
- chunk = full_content[i:i+chunk_size].strip()
149
- if chunk:
150
- chunks.append(chunk)
151
- return chunks
293
+ return _chunk_text(full_content, chunk_size)
152
294
 
153
295
  except Exception as e:
154
296
  return [f"Error loading file {file_path}: {str(e)}"]
npcpy/data/video.py CHANGED
@@ -1,4 +1,7 @@
1
1
 
2
+ import os
3
+ import tempfile
4
+ import subprocess
2
5
 
3
6
 
4
7
  def process_video(file_path, table_name):
@@ -26,3 +29,72 @@ def process_video(file_path, table_name):
26
29
  except Exception as e:
27
30
  print(f"Error processing video: {e}")
28
31
  return [], []
32
+
33
+
34
+ def summarize_video_file(file_path: str, language: str = None, max_audio_seconds: int = 600) -> str:
35
+ """
36
+ Summarize a video using lightweight metadata plus optional audio transcript.
37
+ Prefers the audio transcription helper in npcpy.data.audio when available.
38
+ """
39
+ meta_bits = []
40
+ try:
41
+ import cv2 # type: ignore
42
+
43
+ video = cv2.VideoCapture(file_path)
44
+ fps = video.get(cv2.CAP_PROP_FPS)
45
+ frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
46
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
47
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
48
+ duration = frame_count / fps if fps else 0
49
+ meta_bits.append(
50
+ f"Video file: {os.path.basename(file_path)} | {width}x{height} | {fps:.2f} fps | {frame_count} frames | ~{duration:.1f}s"
51
+ )
52
+ video.release()
53
+ except Exception:
54
+ meta_bits.append(f"Video file: {os.path.basename(file_path)}")
55
+
56
+ # Extract audio track with ffmpeg if available
57
+ audio_path = None
58
+ try:
59
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
60
+ temp_audio.close()
61
+ cmd = [
62
+ "ffmpeg",
63
+ "-y",
64
+ "-i",
65
+ file_path,
66
+ "-vn",
67
+ "-ac",
68
+ "1",
69
+ "-ar",
70
+ "16000",
71
+ "-t",
72
+ str(max_audio_seconds),
73
+ temp_audio.name,
74
+ ]
75
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
76
+ audio_path = temp_audio.name
77
+ except Exception:
78
+ audio_path = None
79
+
80
+ transcript = ""
81
+ if audio_path:
82
+ try:
83
+ try:
84
+ from npcpy.data.audio import transcribe_audio_file
85
+ transcript = transcribe_audio_file(audio_path, language=language) # type: ignore
86
+ except Exception:
87
+ transcript = ""
88
+ finally:
89
+ try:
90
+ os.remove(audio_path)
91
+ except Exception:
92
+ pass
93
+
94
+ if transcript:
95
+ meta_bits.append("Audio transcript:")
96
+ meta_bits.append(transcript)
97
+ else:
98
+ meta_bits.append("[No transcript extracted; ensure ffmpeg and a transcription backend are installed]")
99
+
100
+ return "\n".join(meta_bits)