@pipedream/openai 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,28 @@
1
+ import axios from "axios";
2
+ import Bottleneck from "bottleneck";
1
3
  import fs from "fs";
2
- import got from "got";
3
- import { extname } from "path";
4
+ import {
5
+ join, extname,
6
+ } from "path";
4
7
  import FormData from "form-data";
5
8
  import { ConfigurationError } from "@pipedream/platform";
6
9
  import common from "../common/common.mjs";
7
10
  import constants from "../common/constants.mjs";
8
11
  import lang from "../common/lang.mjs";
9
12
  import openai from "../../app/openai.app.mjs";
13
+ import { promisify } from "util";
14
+ import stream from "stream";
15
+ import { exec } from "child_process";
16
+ import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
10
17
 
11
18
  const COMMON_AUDIO_FORMATS_TEXT = "Your audio file must be in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.";
12
19
 
20
+ const execAsync = promisify(exec);
21
+ const pipelineAsync = promisify(stream.pipeline);
22
+
13
23
  export default {
14
24
  name: "Create Transcription",
15
- version: "0.0.2",
25
+ version: "0.0.5",
16
26
  key: "openai-create-transcription",
17
27
  description: "Transcribes audio into the input language. [See docs here](https://platform.openai.com/docs/api-reference/audio/create).",
18
28
  type: "action",
@@ -80,6 +90,102 @@ export default {
80
90
 
81
91
  return props;
82
92
  },
93
+ methods: {
94
+ createForm({
95
+ file, outputDir,
96
+ }) {
97
+ const form = new FormData();
98
+ form.append("model", "whisper-1");
99
+ if (this.prompt) form.append("prompt", this.prompt);
100
+ if (this.temperature) form.append("temperature", this.temperature);
101
+ if (this.language) form.append("language", this.language);
102
+ if (this.responseFormat) form.append("response_format", this.responseFormat);
103
+ const readStream = fs.createReadStream(join(outputDir, file));
104
+ form.append("file", readStream);
105
+ return form;
106
+ },
107
+ async chunkFileAndTranscribe({
108
+ file, $,
109
+ }) {
110
+ const outputDir = join("/tmp", "chunks");
111
+ await execAsync(`mkdir -p ${outputDir}`);
112
+ await execAsync(`rm -f ${outputDir}/*`);
113
+
114
+ await this.chunkFile({
115
+ file,
116
+ outputDir,
117
+ });
118
+
119
+ const files = await fs.promises.readdir(outputDir);
120
+ const transcription = await this.transcribeFiles({
121
+ files,
122
+ outputDir,
123
+ $,
124
+ });
125
+
126
+ return {
127
+ transcription,
128
+ };
129
+ },
130
+ async chunkFile({
131
+ file, outputDir,
132
+ }) {
133
+ const ffmpegPath = ffmpegInstaller.path;
134
+ const ext = extname(file);
135
+
136
+ const fileSizeInMB = fs.statSync(file).size / (1024 * 1024);
137
+ const numberOfChunks = Math.ceil(fileSizeInMB / 24);
138
+
139
+ if (numberOfChunks === 1) {
140
+ await execAsync(`cp ${file} ${outputDir}/chunk-000${ext}`);
141
+ return;
142
+ }
143
+
144
+ const { stdout } = await execAsync(`${ffmpegPath} -i ${file} 2>&1 | grep "Duration"`);
145
+ const duration = stdout.match(/\d{2}:\d{2}:\d{2}\.\d{2}/s)[0];
146
+ const [
147
+ hours,
148
+ minutes,
149
+ seconds,
150
+ ] = duration.split(":").map(parseFloat);
151
+
152
+ const totalSeconds = (hours * 60 * 60) + (minutes * 60) + seconds;
153
+ const segmentTime = Math.ceil(totalSeconds / numberOfChunks);
154
+
155
+ const command = `${ffmpegPath} -i ${file} -f segment -segment_time ${segmentTime} -c copy ${outputDir}/chunk-%03d${ext}`;
156
+ await execAsync(command);
157
+ },
158
+ async transcribeFiles({
159
+ files, outputDir, $,
160
+ }) {
161
+ const limiter = new Bottleneck({
162
+ maxConcurrent: 1,
163
+ minTime: 1000 / 59,
164
+ });
165
+
166
+ const transcriptions = await Promise.all(files.map((file) => {
167
+ return limiter.schedule(() => this.transcribe({
168
+ file,
169
+ outputDir,
170
+ $,
171
+ }));
172
+ }));
173
+ return transcriptions.join(" ");
174
+ },
175
+ async transcribe({
176
+ file, outputDir, $,
177
+ }) {
178
+ const form = this.createForm({
179
+ file,
180
+ outputDir,
181
+ });
182
+ const response = await this.openai.createTranscription({
183
+ $,
184
+ form,
185
+ });
186
+ return response.text;
187
+ },
188
+ },
83
189
  async run({ $ }) {
84
190
  const {
85
191
  url,
@@ -90,38 +196,38 @@ export default {
90
196
  throw new Error("Must specify either File URL or File Path");
91
197
  }
92
198
 
93
- const form = new FormData();
94
- form.append("model", "whisper-1");
95
- if (this.prompt) form.append("prompt", this.prompt);
96
- if (this.temperature) form.append("temperature", this.temperature);
97
- if (this.language) form.append("language", this.language);
98
- if (this.responseFormat) form.append("response_format", this.responseFormat);
199
+ let file;
99
200
 
100
201
  if (path) {
101
202
  if (!fs.existsSync(path)) {
102
203
  throw new Error(`${path} does not exist`);
103
204
  }
104
- const readStream = fs.createReadStream(path);
105
- form.append("file", readStream);
205
+
206
+ file = path;
106
207
  } else if (url) {
107
208
  const ext = extname(url);
108
- // OpenAI only supports a few audio formats and uses the extension to determine the format
109
- const tempFilePath = `/tmp/audioFile${ext}`;
110
-
111
- const writeStream = fs.createWriteStream(tempFilePath);
112
- const responseStream = got.stream(url);
113
- responseStream.pipe(writeStream);
114
- await new Promise((resolve, reject) => {
115
- writeStream.on("finish", resolve);
116
- writeStream.on("error", reject);
117
- responseStream.on("error", reject);
209
+
210
+ const response = await axios({
211
+ method: "GET",
212
+ url,
213
+ responseType: "stream",
214
+ timeout: 250000,
118
215
  });
119
- const readStream = fs.createReadStream(tempFilePath);
120
- form.append("file", readStream);
216
+
217
+ const bufferStream = new stream.PassThrough();
218
+ response.data.pipe(bufferStream);
219
+
220
+ const downloadPath = join("/tmp", `audio${ext}`);
221
+ const writeStream = fs.createWriteStream(downloadPath);
222
+
223
+ await pipelineAsync(bufferStream, writeStream);
224
+
225
+ file = downloadPath;
121
226
  }
122
- const response = await this.openai.createTranscription({
227
+
228
+ const response = await this.chunkFileAndTranscribe({
229
+ file,
123
230
  $,
124
- form,
125
231
  });
126
232
 
127
233
  if (response) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pipedream/openai",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Pipedream OpenAI Components",
5
5
  "main": "app/openai.app.mjs",
6
6
  "keywords": [
@@ -14,6 +14,7 @@
14
14
  "access": "public"
15
15
  },
16
16
  "dependencies": {
17
+ "@ffmpeg-installer/ffmpeg": "^1.1.0",
17
18
  "@pipedream/platform": "^1.2.1",
18
19
  "@pipedream/types": "^0.1.4",
19
20
  "got": "^12.6.0",