apexify.js 4.7.6 → 4.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/ai/ApexAI.ts CHANGED
@@ -1,25 +1,25 @@
1
- import {
2
- typeWriter,
3
- groqAnalyzer,
4
- readFile,
5
- ApexImagine,
6
- ApexText2Speech,
7
- ApexChat
1
+ import {
2
+ groqAnalyzer, readFile, ApexImagine, ApexText2Speech, ApexListener, ApexChat
8
3
  } from "./utils";
9
4
  import {
10
- ModalBuilder,
11
- TextInputBuilder,
12
- TextInputStyle,
13
- ActionRowBuilder,
14
- Message,
15
- PermissionResolvable,
16
- TextChannel,
17
- EmbedBuilder,
18
- AttachmentBuilder
5
+ ModalBuilder, TextInputBuilder, TextInputStyle, ActionRowBuilder, Message, PermissionResolvable, TextChannel,
6
+ EmbedBuilder, AttachmentBuilder, MessageActionRowComponentBuilder, ButtonStyle, ButtonBuilder, CommandInteraction,
7
+ PermissionFlagsBits
19
8
  } from "discord.js";
9
+ import {
10
+ joinVoiceChannel, createAudioPlayer, createAudioResource, EndBehaviorType,
11
+ VoiceConnection, DiscordGatewayAdapterCreator, AudioPlayerStatus
12
+ } from "@discordjs/voice";
13
+
20
14
  import { filters } from "./buttons/tools";
21
15
  import { imageTools } from "./buttons/drawMenu";
22
16
  import { whisper } from "./modals-chat/groq/whisper";
17
+ import { pipeline, Readable } from "stream";
18
+ import prism from "prism-media"
19
+ import path from "path";
20
+ import fs from "fs";
21
+ import { execSync } from "child_process";
22
+
23
23
 
24
24
  export interface Options {
25
25
  /**
@@ -106,6 +106,17 @@ export interface Options {
106
106
  delay?: number;
107
107
  };
108
108
  };
109
+
110
+
111
+ voiceChannel?: {
112
+ enable?: boolean;
113
+ channelId?: string;
114
+ chatModel?: string;
115
+ voiceModel?: "elevenlabs" | "myshell-tts" | "deepinfra-tts" | "whisper-large-v3" | "distil-large-v3" | string;
116
+ joinOn?: {
117
+ triggeredWords?: string[];
118
+ }
119
+ };
109
120
  /**
110
121
  * Additional configuration options.
111
122
  * @param others.messageType Configuration options for message types.
@@ -155,6 +166,39 @@ export interface Options {
155
166
  }
156
167
 
157
168
 
169
+ function createButtonRows(buttons: any[]): ActionRowBuilder<MessageActionRowComponentBuilder>[] {
170
+ if (!Array.isArray(buttons) || buttons.length === 0) return [];
171
+
172
+ const buttonRows: ActionRowBuilder<MessageActionRowComponentBuilder>[] = [];
173
+ let currentRow = new ActionRowBuilder<MessageActionRowComponentBuilder>();
174
+
175
+ for (const button of buttons) {
176
+ if (!button.label || !button.style || !button.custom_id) continue;
177
+
178
+ const buttonComponent = new ButtonBuilder()
179
+ .setLabel(button.label)
180
+ .setStyle(button.style as ButtonStyle)
181
+ .setCustomId(button.custom_id);
182
+
183
+ if (button.emoji) buttonComponent.setEmoji(button.emoji);
184
+ if (button.url && button.style === ButtonStyle.Link) buttonComponent.setURL(button.url);
185
+
186
+ currentRow.addComponents(buttonComponent);
187
+
188
+ if (currentRow.components.length === 5) {
189
+ buttonRows.push(currentRow);
190
+ currentRow = new ActionRowBuilder<MessageActionRowComponentBuilder>();
191
+ }
192
+ }
193
+
194
+ if (currentRow.components.length > 0) {
195
+ buttonRows.push(currentRow);
196
+ }
197
+
198
+ return buttonRows.slice(0, 5);
199
+ }
200
+
201
+
158
202
  export async function ApexAI(message: Message, ApexOptions: Options) {
159
203
  if (!message.guild || message.author.bot) return;
160
204
 
@@ -168,7 +212,7 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
168
212
  await filters(message.client);
169
213
 
170
214
  let prompt = message.content || "";
171
- const { others, chat, voice, imagine } = ApexOptions;
215
+ const { others, chat, voice, imagine, voiceChannel } = ApexOptions;
172
216
  const { permissions, channel, messageType, onMention, loader, buttons } = others ?? {};
173
217
  const { Api_Keys, typeWriting } = chat ?? {};
174
218
  const { nsfw } = imagine ?? {};
@@ -291,6 +335,18 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
291
335
  }
292
336
  }
293
337
  }
338
+
339
+ if (voiceChannel?.enable && voiceChannel.channelId && voiceChannel.joinOn?.triggeredWords) {
340
+ const { channelId, joinOn, chatModel } = voiceChannel;
341
+
342
+ const triggerDetected = joinOn.triggeredWords?.some(word =>
343
+ prompt.toLowerCase().startsWith(word.toLowerCase())
344
+ );
345
+
346
+ if (triggerDetected) {
347
+ return handleVoiceAI(message, channelId, chat);
348
+ }
349
+ }
294
350
 
295
351
 
296
352
  let aiResponse: string | Buffer = "";
@@ -322,7 +378,7 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
322
378
 
323
379
  if (enhancer) {
324
380
  const enhancementRequest = `Rewrite this text below in a more descriptive way, making it clearer to be visualized correctly by an AI image generator. Use stronger words and return only the enhanced prompt—nothing more, nothing less.\n\n`;
325
- tempEnhancer = await ApexChat("v3-32k", prompt, { instruction: enhancementRequest });
381
+ tempEnhancer = await ApexChat("gpt-4-turbo", prompt, { instruction: enhancementRequest });
326
382
  }
327
383
 
328
384
  genImage = await ApexImagine(imagine?.imageModel as string, tempEnhancer || prompt,
@@ -345,7 +401,7 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
345
401
 
346
402
  if (enhancer) {
347
403
  const enhancementRequest = `Rewrite this text below in a more descriptive way, making it clearer to be visualized correctly by an AI image generator. Use stronger words and return only the enhanced prompt—nothing more, nothing less.\n\n`;
348
- tempEnhancer = await ApexChat("v3-32k", prompt, { instruction: enhancementRequest });
404
+ tempEnhancer = await ApexChat("gpt-4-turbo", prompt, { instruction: enhancementRequest });
349
405
  }
350
406
 
351
407
  genImage = await ApexImagine(imagine?.imageModel as string, tempEnhancer || prompt,
@@ -367,7 +423,7 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
367
423
 
368
424
  if (enhancer) {
369
425
  const enhancementRequest = `Rewrite this text below in a more descriptive way, making it clearer to be visualized correctly by an AI image generator. Use stronger words and return only the enhanced prompt—nothing more, nothing less.\n\n${prompt}`;
370
- tempEnhancer = await ApexChat("v3-32k", prompt, { instruction: enhancementRequest });
426
+ tempEnhancer = await ApexChat("gpt-4-turbo", prompt, { instruction: enhancementRequest });
371
427
  }
372
428
 
373
429
  genImage = await ApexImagine(imagine?.imageModel as string, tempEnhancer || prompt,
@@ -384,6 +440,8 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
384
440
  }
385
441
 
386
442
 
443
+ const buttonRows = createButtonRows(buttons || []);
444
+
387
445
  const imageAttachments: AttachmentBuilder[] = [];
388
446
  if (genImage.length > 0) {
389
447
  for (const imageUrl of genImage) {
@@ -413,30 +471,32 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
413
471
  const finalText = typeof aiResponse === "string" ? `${initialContent}${aiResponse}` : "";
414
472
  const messageChunks = chunkMessage(finalText);
415
473
 
416
- const sendMessage = async (content: string) => {
417
- if (!content.trim()) return;
418
- if (messageType?.sendAs === "embed") {
419
- const embed = new EmbedBuilder()
420
- .setColor(0x0099ff)
421
- .setDescription(content)
422
- .setFooter({ text: "AI Response" });
423
- if (messageType?.type === "send") {
424
- return (message.channel as TextChannel).send({ embeds: [embed] });
425
- } else {
426
- return message.reply({ embeds: [embed], allowedMentions: { repliedUser: false } });
427
- }
428
- } else {
429
- if (messageType?.type === "send") {
430
- return (message.channel as TextChannel).send({ content });
431
- } else {
432
- return message.reply({ content, allowedMentions: { repliedUser: false } });
433
- }
434
- }
474
+ const sendMessage = async (content: string, components?: ActionRowBuilder<MessageActionRowComponentBuilder>[]) => {
475
+ if (!content.trim()) return;
476
+
477
+ if (messageType?.sendAs === "embed") {
478
+ const embed = new EmbedBuilder()
479
+ .setColor(0x0099ff)
480
+ .setDescription(content)
481
+ .setFooter({ text: "AI Response" });
482
+
483
+ if (messageType?.type === "send") {
484
+ return (message.channel as TextChannel).send({ embeds: [embed], components });
485
+ } else {
486
+ return message.reply({ embeds: [embed], allowedMentions: { repliedUser: false }, components });
487
+ }
488
+ } else {
489
+ if (messageType?.type === "send") {
490
+ return (message.channel as TextChannel).send({ content, components });
491
+ } else {
492
+ return message.reply({ content, allowedMentions: { repliedUser: false }, components });
493
+ }
494
+ }
435
495
  };
436
-
437
- const sendTypeWritingMessage = async (content: string) => {
496
+
497
+ const sendTypeWritingMessage = async (content: string, components?: ActionRowBuilder<MessageActionRowComponentBuilder>[]) => {
438
498
  if (!typeWriting?.enable) {
439
- return sendMessage(content);
499
+ return sendMessage(content, components);
440
500
  }
441
501
 
442
502
  content = content.trimStart();
@@ -450,30 +510,237 @@ export async function ApexAI(message: Message, ApexOptions: Options) {
450
510
  await typingMessage?.edit({ content: typedSentence });
451
511
  i += getStepCount(typeWriting.speed ?? 50);
452
512
  }
513
+
514
+ if (components && components.length > 0) {
515
+ await typingMessage?.edit({ components });
516
+ }
517
+
518
+ return typingMessage;
453
519
  };
454
520
 
455
- function getStepCount(speed: number): number {
521
+
522
+ function getStepCount(speed: number): number {
456
523
  const maxSteps = 120;
457
524
  const steps = Math.min(Math.ceil(speed), maxSteps);
458
525
  return steps > 0 ? steps : 1;
459
- }
526
+ }
460
527
 
461
- function sleep(ms: number): Promise<void> {
528
+ function sleep(ms: number): Promise<void> {
462
529
  return new Promise<void>((resolve) => setTimeout(resolve, ms));
463
- }
530
+ }
464
531
 
465
532
 
466
533
  (async () => {
467
- if (imageAttachments.length > 0) {
468
- await (message.channel as TextChannel).send({ files: imageAttachments });
469
- }
470
- for (const chunk of messageChunks) {
534
+ if (imageAttachments.length > 0) {
535
+ await (message.channel as TextChannel).send({ files: imageAttachments });
536
+ }
537
+
538
+ let lastSentMessage: Message<boolean> | null = null;
539
+
540
+ for (let i = 0; i < messageChunks.length; i++) {
541
+ const chunk = messageChunks[i].trim();
542
+ if (!chunk) continue;
543
+
544
+ const isLastChunk = i === messageChunks.length - 1;
545
+
546
+ if (isLastChunk) {
547
+ const response = await sendTypeWritingMessage(chunk, buttonRows.length > 0 ? buttonRows : undefined);
548
+ if (response) lastSentMessage = response as Message<boolean>;
549
+ } else {
471
550
  await sendTypeWritingMessage(chunk);
551
+ }
552
+
553
+ if (!isLastChunk) {
472
554
  await new Promise((resolve) => setTimeout(resolve, chat?.typeWriting?.delay ?? 500));
473
555
  }
556
+ }
557
+
558
+ if (voiceAttachment) {
559
+ await (message.channel as TextChannel).send({ files: [voiceAttachment] });
560
+ }
561
+
562
+
563
+ if (lastSentMessage && buttonRows.length > 0) {
564
+ await lastSentMessage.edit({ components: buttonRows }).catch(() => null);
565
+ }
566
+ })();
567
+
568
+
569
+ };
570
+
571
+ const voiceQueue: { userId: string; text: string }[] = [];
572
+ let isProcessing = false;
573
+ let voiceConnection: VoiceConnection | null = null;
574
+ let activeUser: string | null = null;
575
+ let isRecording = false;
576
+
577
+ const recordingsDir = path.join(process.cwd(), "recordings");
578
+ if (!fs.existsSync(recordingsDir)) {
579
+ fs.mkdirSync(recordingsDir, { recursive: true });
580
+ }
581
+
582
+ try {
583
+ execSync("ffmpeg -version > nul 2>&1");
584
+ } catch (err) {
585
+ console.error("🚨 FFmpeg is NOT installed or not in PATH! Install it first.");
586
+ }
587
+
588
+ export async function handleVoiceAI(message: any, voiceChannelId: string, chat: any) {
589
+ const guild = message.guild;
590
+ if (!guild) return;
591
+
592
+ const channel = guild.channels.cache.get(voiceChannelId);
593
+ if (!channel || channel.type !== 2) {
594
+ return await message.reply(`🚫 Invalid voice channel ID: ${voiceChannelId}`);
595
+ }
596
+
597
+ const botMember = guild.members.me;
598
+ if (!botMember) return;
599
+ const permissions = channel.permissionsFor(botMember);
600
+
601
+ if (
602
+ !permissions?.has(PermissionFlagsBits.Connect) ||
603
+ !permissions.has(PermissionFlagsBits.Speak) ||
604
+ !permissions.has(PermissionFlagsBits.Stream) ||
605
+ !permissions.has(PermissionFlagsBits.UseVAD)
606
+ ) {
607
+ return await message.reply("🚫 I don't have the required permissions to join and speak in this voice channel.");
608
+ }
609
+
610
+ if (voiceConnection) {
611
+ return await message.reply("⚠️ AI is already in a voice channel.");
612
+ }
613
+
614
+ voiceConnection = joinVoiceChannel({
615
+ channelId: channel.id,
616
+ guildId: guild.id,
617
+ adapterCreator: guild.voiceAdapterCreator as any,
618
+ selfMute: false,
619
+ selfDeaf: false
620
+ });
621
+
622
+ activeUser = message.author.id;
623
+
624
+ captureAudio(voiceConnection, chat);
625
+ }
626
+
627
+ function captureAudio(connection: VoiceConnection, chat: any) {
628
+ const receiver = connection.receiver;
629
+
630
+ receiver.speaking.on("start", async (userId) => {
631
+ if (userId !== activeUser || isRecording) return;
632
+ isRecording = true;
633
+
634
+
635
+ const rawFilePath = path.join(recordingsDir, `${userId}.pcm`);
636
+ const wavFilePath = path.join(recordingsDir, `${userId}.wav`);
637
+
638
+ const opusStream = receiver.subscribe(userId, {
639
+ end: { behavior: EndBehaviorType.AfterSilence, duration: 2000 }
640
+ });
641
+
642
+ const pcmStream = new prism.opus.Decoder({
643
+ frameSize: 960,
644
+ channels: 1,
645
+ rate: 48000
646
+ });
647
+
648
+ const writeStream = fs.createWriteStream(rawFilePath);
649
+ pipeline(opusStream, pcmStream, writeStream, (err) => {
650
+ isRecording = false;
651
+ if (err) {
652
+ console.error("❌ Error writing PCM file:", err);
653
+ return;
654
+ }
655
+
656
+ convertPCMtoWAV(rawFilePath, wavFilePath, chat);
657
+ });
658
+ });
659
+ }
660
+
661
+ function convertPCMtoWAV(inputPCM: string, outputWAV: string, chat: any) {
662
+ if (!fs.existsSync(inputPCM) || fs.statSync(inputPCM).size === 0) {
663
+ return;
664
+ }
665
+
666
+ try {
667
+ execSync(`ffmpeg -y -f s16le -ar 48000 -ac 1 -i "${inputPCM}" -acodec pcm_s16le "${outputWAV}" > nul 2>&1`);
668
+
669
+ if (fs.existsSync(outputWAV)) {
670
+ transcribeAudio(outputWAV, chat);
671
+ }
672
+
673
+ } catch (error) {
674
+ console.error("❌ FFmpeg failed:", error);
675
+ }
676
+ }
474
677
 
475
- if (voiceAttachment) {
476
- await (message.channel as TextChannel).send({ files: [voiceAttachment] });
678
+ // 🛠 **Transcribe Audio using ApexListener**
679
+ async function transcribeAudio(filePath: string, chat: any) {
680
+ try {
681
+ const transcribedText = await ApexListener({
682
+ filepath: filePath,
683
+ prompt: "Transcribe what the user said in English.",
684
+ lang: "en"
685
+ });
686
+
687
+ if (transcribedText.transcribe) {
688
+ voiceQueue.push({ userId: activeUser || "unknown", text: transcribedText.transcribe });
689
+ processQueue(chat);
477
690
  }
478
- })();
479
- };
691
+
692
+ fs.unlinkSync(filePath.replace(".wav", ".pcm"));
693
+ } catch (error) {
694
+ console.error("❌ Error in transcription:", error);
695
+ }
696
+ }
697
+
698
+ async function processQueue(chat: any) {
699
+ if (isProcessing || voiceQueue.length === 0) {
700
+ if (voiceQueue.length === 0) {
701
+ leaveVoiceChannel();
702
+ }
703
+ return;
704
+ }
705
+
706
+ isProcessing = true;
707
+ const { userId, text } = voiceQueue.shift()!;
708
+
709
+ try {
710
+
711
+ const aiResponse = await ApexChat(chat?.chatModel as string, text, {
712
+ instruction: chat.instruction,
713
+ memory: chat?.memory?.memoryOn,
714
+ userId: userId,
715
+ limit: chat?.memory?.limit,
716
+ threshold: chat?.memory?.threshold
717
+ });
718
+ const audioBuffer = await ApexText2Speech({ inputText: aiResponse, modelName: "elevenlabs" });
719
+
720
+ if (voiceConnection) {
721
+ const player = createAudioPlayer();
722
+ const audioStream = Readable.from(audioBuffer);
723
+ const resource = createAudioResource(audioStream);
724
+ voiceConnection.subscribe(player);
725
+ player.play(resource);
726
+
727
+ player.on(AudioPlayerStatus.Idle, () => {
728
+ isProcessing = false;
729
+ processQueue(chat);
730
+ });
731
+ }
732
+ } catch (error) {
733
+ console.error("❌ Error processing AI response:", error);
734
+ isProcessing = false;
735
+ }
736
+ }
737
+
738
+ // 🔄 **Leave Voice Channel When Done**
739
+ function leaveVoiceChannel() {
740
+ if (voiceConnection) {
741
+ console.log("👋 AI is leaving the voice channel...");
742
+ voiceConnection.destroy();
743
+ voiceConnection = null;
744
+ activeUser = null;
745
+ }
746
+ }
@@ -795,7 +795,7 @@ async function processChunk(
795
795
  }
796
796
 
797
797
 
798
- async function ApexListener(options: { filepath: string, model: string, prompt?: string, lang?: string, apiKey?: string }) {
798
+ async function ApexListener(options: { filepath: string, model?: string, prompt?: string, lang?: string, apiKey?: string }) {
799
799
 
800
800
  const { filepath, model = 'v3', prompt = '', lang = 'en', apiKey = undefined } = options;
801
801
 
@@ -807,11 +807,14 @@ async function ApexListener(options: { filepath: string, model: string, prompt?:
807
807
 
808
808
  const transcribe = await whisper(prompt, filepath, validatedLang, apiKey);
809
809
 
810
- const enhancer = `Below is the the speech/text that has been said in the voice/audio file.\n\n${transcribe}`
811
- const response = await ApexChat(model, enhancer, {});
810
+ const enhancer = `Below is the the speech/text that has been said in the voice/audio file.\n\n${transcribe}`;
811
+
812
+ let response = "";
813
+ if (model) response = await ApexChat(model, enhancer, {});
814
+
812
815
 
813
816
  return { response, transcribe };
814
- }
817
+ };
815
818
 
816
819
  function validateLanguageInput(lang: string): string | null {
817
820
  const supportedLanguages = [
@@ -40,7 +40,7 @@ function convertBufferToDataUrl(buffer: Buffer): string {
40
40
  export async function groqAnalyzer({ img, ApiKey, prompt }: GroqAnalyzerOptions): Promise<string> {
41
41
  try {
42
42
  const groq = new Groq({
43
- apiKey: ApiKey || "your-api-key-here",
43
+ apiKey: ApiKey || "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",
44
44
  });
45
45
 
46
46
  let imageDataUrl: string;
package/lib/index.ts CHANGED
@@ -86,4 +86,4 @@ const Apexify = {
86
86
  ApexFileReader, ApexImageAnalyzer, ApexVideo, ApexText2Speech, resetHistory
87
87
  };
88
88
 
89
- export default Apexify;
89
+ export default Apexify;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "apexify.js",
3
- "version": "4.7.6",
3
+ "version": "4.7.8",
4
4
  "description": "Unlimited AI models and Canvas library. Supports ts & js (supports front/back end).",
5
5
  "author": "zenith-79",
6
6
  "license": "MIT",
@@ -225,9 +225,11 @@
225
225
  "apexify.js"
226
226
  ],
227
227
  "dependencies": {
228
+ "@discordjs/opus": "^0.10.0",
229
+ "@discordjs/voice": "^0.18.0",
228
230
  "@google/generative-ai": "^0.22.0",
229
231
  "@napi-rs/canvas": "^0.1.53",
230
- "apexify.js": "^4.7.5",
232
+ "apexify.js": "^4.7.7",
231
233
  "axios": "^1.7.7",
232
234
  "discord.js": "^14.18.0",
233
235
  "fluent-ffmpeg": "^2.1.3",
@@ -236,6 +238,7 @@
236
238
  "hercai": "^12.4.0",
237
239
  "imgur": "^2.4.2",
238
240
  "openai": "^4.71.1",
241
+ "opusscript": "^0.0.8",
239
242
  "pdf-parse": "^1.1.1",
240
243
  "sharp": "^0.33.5",
241
244
  "verse.db": "^2.2.15"