@pipedream/openai 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,18 +1,28 @@
|
|
|
1
|
+
import axios from "axios";
|
|
2
|
+
import Bottleneck from "bottleneck";
|
|
1
3
|
import fs from "fs";
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
+
import {
|
|
5
|
+
join, extname,
|
|
6
|
+
} from "path";
|
|
4
7
|
import FormData from "form-data";
|
|
5
8
|
import { ConfigurationError } from "@pipedream/platform";
|
|
6
9
|
import common from "../common/common.mjs";
|
|
7
10
|
import constants from "../common/constants.mjs";
|
|
8
11
|
import lang from "../common/lang.mjs";
|
|
9
12
|
import openai from "../../app/openai.app.mjs";
|
|
13
|
+
import { promisify } from "util";
|
|
14
|
+
import stream from "stream";
|
|
15
|
+
import { exec } from "child_process";
|
|
16
|
+
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
|
|
10
17
|
|
|
11
18
|
const COMMON_AUDIO_FORMATS_TEXT = "Your audio file must be in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.";
|
|
12
19
|
|
|
20
|
+
const execAsync = promisify(exec);
|
|
21
|
+
const pipelineAsync = promisify(stream.pipeline);
|
|
22
|
+
|
|
13
23
|
export default {
|
|
14
24
|
name: "Create Transcription",
|
|
15
|
-
version: "0.0.
|
|
25
|
+
version: "0.0.5",
|
|
16
26
|
key: "openai-create-transcription",
|
|
17
27
|
description: "Transcribes audio into the input language. [See docs here](https://platform.openai.com/docs/api-reference/audio/create).",
|
|
18
28
|
type: "action",
|
|
@@ -80,6 +90,102 @@ export default {
|
|
|
80
90
|
|
|
81
91
|
return props;
|
|
82
92
|
},
|
|
93
|
+
methods: {
|
|
94
|
+
createForm({
|
|
95
|
+
file, outputDir,
|
|
96
|
+
}) {
|
|
97
|
+
const form = new FormData();
|
|
98
|
+
form.append("model", "whisper-1");
|
|
99
|
+
if (this.prompt) form.append("prompt", this.prompt);
|
|
100
|
+
if (this.temperature) form.append("temperature", this.temperature);
|
|
101
|
+
if (this.language) form.append("language", this.language);
|
|
102
|
+
if (this.responseFormat) form.append("response_format", this.responseFormat);
|
|
103
|
+
const readStream = fs.createReadStream(join(outputDir, file));
|
|
104
|
+
form.append("file", readStream);
|
|
105
|
+
return form;
|
|
106
|
+
},
|
|
107
|
+
async chunkFileAndTranscribe({
|
|
108
|
+
file, $,
|
|
109
|
+
}) {
|
|
110
|
+
const outputDir = join("/tmp", "chunks");
|
|
111
|
+
await execAsync(`mkdir -p ${outputDir}`);
|
|
112
|
+
await execAsync(`rm -f ${outputDir}/*`);
|
|
113
|
+
|
|
114
|
+
await this.chunkFile({
|
|
115
|
+
file,
|
|
116
|
+
outputDir,
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
const files = await fs.promises.readdir(outputDir);
|
|
120
|
+
const transcription = await this.transcribeFiles({
|
|
121
|
+
files,
|
|
122
|
+
outputDir,
|
|
123
|
+
$,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
transcription,
|
|
128
|
+
};
|
|
129
|
+
},
|
|
130
|
+
async chunkFile({
|
|
131
|
+
file, outputDir,
|
|
132
|
+
}) {
|
|
133
|
+
const ffmpegPath = ffmpegInstaller.path;
|
|
134
|
+
const ext = extname(file);
|
|
135
|
+
|
|
136
|
+
const fileSizeInMB = fs.statSync(file).size / (1024 * 1024);
|
|
137
|
+
const numberOfChunks = Math.ceil(fileSizeInMB / 24);
|
|
138
|
+
|
|
139
|
+
if (numberOfChunks === 1) {
|
|
140
|
+
await execAsync(`cp ${file} ${outputDir}/chunk-000${ext}`);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const { stdout } = await execAsync(`${ffmpegPath} -i ${file} 2>&1 | grep "Duration"`);
|
|
145
|
+
const duration = stdout.match(/\d{2}:\d{2}:\d{2}\.\d{2}/s)[0];
|
|
146
|
+
const [
|
|
147
|
+
hours,
|
|
148
|
+
minutes,
|
|
149
|
+
seconds,
|
|
150
|
+
] = duration.split(":").map(parseFloat);
|
|
151
|
+
|
|
152
|
+
const totalSeconds = (hours * 60 * 60) + (minutes * 60) + seconds;
|
|
153
|
+
const segmentTime = Math.ceil(totalSeconds / numberOfChunks);
|
|
154
|
+
|
|
155
|
+
const command = `${ffmpegPath} -i ${file} -f segment -segment_time ${segmentTime} -c copy ${outputDir}/chunk-%03d${ext}`;
|
|
156
|
+
await execAsync(command);
|
|
157
|
+
},
|
|
158
|
+
async transcribeFiles({
|
|
159
|
+
files, outputDir, $,
|
|
160
|
+
}) {
|
|
161
|
+
const limiter = new Bottleneck({
|
|
162
|
+
maxConcurrent: 1,
|
|
163
|
+
minTime: 1000 / 59,
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
const transcriptions = await Promise.all(files.map((file) => {
|
|
167
|
+
return limiter.schedule(() => this.transcribe({
|
|
168
|
+
file,
|
|
169
|
+
outputDir,
|
|
170
|
+
$,
|
|
171
|
+
}));
|
|
172
|
+
}));
|
|
173
|
+
return transcriptions.join(" ");
|
|
174
|
+
},
|
|
175
|
+
async transcribe({
|
|
176
|
+
file, outputDir, $,
|
|
177
|
+
}) {
|
|
178
|
+
const form = this.createForm({
|
|
179
|
+
file,
|
|
180
|
+
outputDir,
|
|
181
|
+
});
|
|
182
|
+
const response = await this.openai.createTranscription({
|
|
183
|
+
$,
|
|
184
|
+
form,
|
|
185
|
+
});
|
|
186
|
+
return response.text;
|
|
187
|
+
},
|
|
188
|
+
},
|
|
83
189
|
async run({ $ }) {
|
|
84
190
|
const {
|
|
85
191
|
url,
|
|
@@ -90,38 +196,38 @@ export default {
|
|
|
90
196
|
throw new Error("Must specify either File URL or File Path");
|
|
91
197
|
}
|
|
92
198
|
|
|
93
|
-
|
|
94
|
-
form.append("model", "whisper-1");
|
|
95
|
-
if (this.prompt) form.append("prompt", this.prompt);
|
|
96
|
-
if (this.temperature) form.append("temperature", this.temperature);
|
|
97
|
-
if (this.language) form.append("language", this.language);
|
|
98
|
-
if (this.responseFormat) form.append("response_format", this.responseFormat);
|
|
199
|
+
let file;
|
|
99
200
|
|
|
100
201
|
if (path) {
|
|
101
202
|
if (!fs.existsSync(path)) {
|
|
102
203
|
throw new Error(`${path} does not exist`);
|
|
103
204
|
}
|
|
104
|
-
|
|
105
|
-
|
|
205
|
+
|
|
206
|
+
file = path;
|
|
106
207
|
} else if (url) {
|
|
107
208
|
const ext = extname(url);
|
|
108
|
-
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
await new Promise((resolve, reject) => {
|
|
115
|
-
writeStream.on("finish", resolve);
|
|
116
|
-
writeStream.on("error", reject);
|
|
117
|
-
responseStream.on("error", reject);
|
|
209
|
+
|
|
210
|
+
const response = await axios({
|
|
211
|
+
method: "GET",
|
|
212
|
+
url,
|
|
213
|
+
responseType: "stream",
|
|
214
|
+
timeout: 250000,
|
|
118
215
|
});
|
|
119
|
-
|
|
120
|
-
|
|
216
|
+
|
|
217
|
+
const bufferStream = new stream.PassThrough();
|
|
218
|
+
response.data.pipe(bufferStream);
|
|
219
|
+
|
|
220
|
+
const downloadPath = join("/tmp", `audio${ext}`);
|
|
221
|
+
const writeStream = fs.createWriteStream(downloadPath);
|
|
222
|
+
|
|
223
|
+
await pipelineAsync(bufferStream, writeStream);
|
|
224
|
+
|
|
225
|
+
file = downloadPath;
|
|
121
226
|
}
|
|
122
|
-
|
|
227
|
+
|
|
228
|
+
const response = await this.chunkFileAndTranscribe({
|
|
229
|
+
file,
|
|
123
230
|
$,
|
|
124
|
-
form,
|
|
125
231
|
});
|
|
126
232
|
|
|
127
233
|
if (response) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pipedream/openai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Pipedream OpenAI Components",
|
|
5
5
|
"main": "app/openai.app.mjs",
|
|
6
6
|
"keywords": [
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"access": "public"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
+
"@ffmpeg-installer/ffmpeg": "^1.1.0",
|
|
17
18
|
"@pipedream/platform": "^1.2.1",
|
|
18
19
|
"@pipedream/types": "^0.1.4",
|
|
19
20
|
"got": "^12.6.0",
|