@umituz/react-native-ai-pruna-provider 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@umituz/react-native-ai-pruna-provider",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "description": "Pruna AI provider for React Native - implements IAIProvider interface for unified AI generation",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -80,6 +80,8 @@ export interface PrunaPredictionInput {
80
80
  readonly image?: string;
81
81
  readonly images?: readonly string[];
82
82
  readonly reference_image?: string;
83
+ /** Audio file for p-video (base64 or URL). When provided, duration is determined by audio length. */
84
+ readonly audio?: string;
83
85
  readonly duration?: number;
84
86
  readonly resolution?: PrunaResolution;
85
87
  readonly fps?: number;
@@ -18,22 +18,58 @@ import { generationLogCollector } from "../utils/log-collector";
18
18
  const TAG = 'pruna-api';
19
19
 
20
20
  /**
21
- * Upload a base64 image to Pruna's file storage.
22
- * p-video requires a file URL (not raw base64).
21
+ * Detect MIME type from raw binary bytes.
22
+ * Supports image (png, jpeg, webp) and audio (mp3, wav, flac, m4a/aac) formats.
23
+ */
24
+ function detectMimeType(bytes: Uint8Array): string {
25
+ // JPEG
26
+ if (bytes[0] === 0xFF && bytes[1] === 0xD8) return 'image/jpeg';
27
+ // PNG
28
+ if (bytes[0] === 0x89 && bytes[1] === 0x50) return 'image/png';
29
+ // RIFF container — WAV or WebP
30
+ if (bytes[0] === 0x52 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x46) {
31
+ if (bytes.length > 11 && bytes[8] === 0x57 && bytes[9] === 0x41 && bytes[10] === 0x56 && bytes[11] === 0x45) return 'audio/wav';
32
+ return 'image/webp';
33
+ }
34
+ // MP3 with ID3 tag
35
+ if (bytes[0] === 0x49 && bytes[1] === 0x44 && bytes[2] === 0x33) return 'audio/mpeg';
36
+ // MP3 sync word (0xFF followed by 0xE0-0xFF)
37
+ if (bytes[0] === 0xFF && (bytes[1] & 0xE0) === 0xE0 && bytes[1] !== 0xFF) return 'audio/mpeg';
38
+ // FLAC
39
+ if (bytes[0] === 0x66 && bytes[1] === 0x4C && bytes[2] === 0x61 && bytes[3] === 0x43) return 'audio/flac';
40
+ // M4A/AAC (MP4 container — ftyp box at offset 4)
41
+ if (bytes.length > 7 && bytes[4] === 0x66 && bytes[5] === 0x74 && bytes[6] === 0x79 && bytes[7] === 0x70) return 'audio/mp4';
42
+ // Default
43
+ return 'image/png';
44
+ }
45
+
46
+ const MIME_EXTENSION_MAP: Record<string, string> = {
47
+ 'image/png': 'png',
48
+ 'image/jpeg': 'jpg',
49
+ 'image/webp': 'webp',
50
+ 'audio/mpeg': 'mp3',
51
+ 'audio/wav': 'wav',
52
+ 'audio/flac': 'flac',
53
+ 'audio/mp4': 'm4a',
54
+ };
55
+
56
+ /**
57
+ * Upload a base64 file (image or audio) to Pruna's file storage.
58
+ * p-video requires file URLs (not raw base64).
23
59
  * Returns the HTTPS file URL to use in predictions.
24
60
  */
25
- export async function uploadImageToFiles(
61
+ export async function uploadFileToStorage(
26
62
  base64Data: string,
27
63
  apiKey: string,
28
64
  sessionId: string,
29
65
  ): Promise<string> {
30
66
  // Already a URL — return as-is
31
67
  if (base64Data.startsWith('http')) {
32
- generationLogCollector.log(sessionId, TAG, 'Image already a URL, skipping upload');
68
+ generationLogCollector.log(sessionId, TAG, 'File already a URL, skipping upload');
33
69
  return base64Data;
34
70
  }
35
71
 
36
- generationLogCollector.log(sessionId, TAG, 'Uploading image to Pruna file storage...');
72
+ generationLogCollector.log(sessionId, TAG, 'Uploading file to Pruna storage...');
37
73
 
38
74
  // Strip data URI prefix if present
39
75
  const raw = base64Data.includes('base64,') ? base64Data.split('base64,')[1] : base64Data;
@@ -42,7 +78,7 @@ export async function uploadImageToFiles(
42
78
  try {
43
79
  binaryStr = atob(raw);
44
80
  } catch {
45
- throw new Error("Invalid image format. Please provide base64 or a valid URL.");
81
+ throw new Error("Invalid file format. Please provide base64 or a valid URL.");
46
82
  }
47
83
 
48
84
  const bytes = new Uint8Array(binaryStr.length);
@@ -50,13 +86,9 @@ export async function uploadImageToFiles(
50
86
  bytes[i] = binaryStr.charCodeAt(i);
51
87
  }
52
88
 
53
- // Detect MIME from first bytes
54
- let mime = 'image/png';
55
- if (bytes[0] === 0xFF && bytes[1] === 0xD8) mime = 'image/jpeg';
56
- else if (bytes[0] === 0x52 && bytes[1] === 0x49) mime = 'image/webp';
57
-
89
+ const mime = detectMimeType(bytes);
90
+ const ext = MIME_EXTENSION_MAP[mime] || mime.split('/')[1];
58
91
  const blob = new Blob([bytes], { type: mime });
59
- const ext = mime.split('/')[1];
60
92
  const formData = new FormData();
61
93
  formData.append('content', blob, `upload.${ext}`);
62
94
 
@@ -84,6 +116,9 @@ export async function uploadImageToFiles(
84
116
  return fileUrl;
85
117
  }
86
118
 
119
+ /** @deprecated Use uploadFileToStorage instead */
120
+ export const uploadImageToFiles = uploadFileToStorage;
121
+
87
122
  /**
88
123
  * Strip base64 data URI prefix, returning raw base64 string.
89
124
  * If input is already a URL, returns it unchanged.
@@ -5,12 +5,12 @@
5
5
  * Each Pruna model has strict schema requirements:
6
6
  * p-image: { prompt, aspect_ratio? }
7
7
  * p-image-edit: { images: string[], prompt, aspect_ratio? }
8
- * p-video: { image: string (URL), prompt, duration, resolution, fps, draft, aspect_ratio, prompt_upsampling }
8
+ * p-video: { image: string (URL), prompt, duration, resolution, fps, draft, aspect_ratio, prompt_upsampling, audio? }
9
9
  */
10
10
 
11
11
  import type { PrunaModelId, PrunaAspectRatio, PrunaResolution } from "../../domain/entities/pruna.types";
12
12
  import { P_VIDEO_DEFAULTS, DEFAULT_ASPECT_RATIO } from "./pruna-provider.constants";
13
- import { uploadImageToFiles, stripBase64Prefix } from "./pruna-api-client";
13
+ import { uploadFileToStorage, stripBase64Prefix } from "./pruna-api-client";
14
14
  import { generationLogCollector } from "../utils/log-collector";
15
15
 
16
16
  const TAG = 'pruna-input-builder';
@@ -115,7 +115,7 @@ async function buildVideoInput(
115
115
 
116
116
  // Upload base64 to file storage if needed (p-video requires HTTPS URL)
117
117
  generationLogCollector.log(sessionId, TAG, 'p-video: preparing image for video generation...');
118
- const fileUrl = await uploadImageToFiles(rawImage, apiKey, sessionId);
118
+ const fileUrl = await uploadFileToStorage(rawImage, apiKey, sessionId);
119
119
 
120
120
  const duration = (input.duration as number) ?? P_VIDEO_DEFAULTS.duration;
121
121
  const resolution = (input.resolution as PrunaResolution) ?? P_VIDEO_DEFAULTS.resolution;
@@ -135,6 +135,15 @@ async function buildVideoInput(
135
135
  prompt_upsampling: promptUpsampling,
136
136
  };
137
137
 
138
+ // Handle audio input — upload to file storage if base64, pass URL if already remote
139
+ const rawAudio = input.audio as string | undefined;
140
+ if (rawAudio) {
141
+ generationLogCollector.log(sessionId, TAG, 'p-video: preparing audio for video generation...');
142
+ const audioUrl = await uploadFileToStorage(rawAudio, apiKey, sessionId);
143
+ payload.audio = audioUrl;
144
+ generationLogCollector.log(sessionId, TAG, 'p-video: audio attached — duration will be determined by audio length');
145
+ }
146
+
138
147
  if (input.disable_safety_checker !== undefined) payload.disable_safety_checker = input.disable_safety_checker;
139
148
 
140
149
  return payload;