@absolutejs/rag 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31820,6 +31820,175 @@ ${tag} OK`) || chunk.startsWith(`${tag} OK`));
31820
31820
  return { messages };
31821
31821
  }
31822
31822
  });
31823
+ // src/ai/rag/socialProviders.ts
31824
+ var defaultFetch2 = Object.assign((...args) => fetch(...args), { preconnect: fetch.preconnect });
31825
+ var META_GRAPH_BASE_URL = "https://graph.facebook.com/v22.0";
31826
+ var DEFAULT_META_PAGE_SIZE = 25;
31827
+ var FACEBOOK_PAGE_READ_SCOPES = [
31828
+ "pages_show_list",
31829
+ "pages_read_engagement"
31830
+ ];
31831
+ var INSTAGRAM_BUSINESS_READ_SCOPES = [
31832
+ "pages_show_list",
31833
+ "instagram_basic"
31834
+ ];
31835
+ var toErrorMessage3 = async (response, label) => {
31836
+ let detailMessage;
31837
+ try {
31838
+ const body = await response.clone().json();
31839
+ detailMessage = body.error?.message;
31840
+ } catch {
31841
+ const text = await response.clone().text();
31842
+ detailMessage = text.trim().length > 0 ? text.trim() : undefined;
31843
+ }
31844
+ return new Error(`${label}: ${response.status} ${response.statusText}${detailMessage ? ` (${detailMessage})` : ""}`);
31845
+ };
31846
+ var fetchGraphList = async (input) => {
31847
+ const fetchImpl = input.fetch ?? defaultFetch2;
31848
+ const url = new URL(`${input.baseUrl ?? META_GRAPH_BASE_URL}${input.path}`);
31849
+ url.searchParams.set("access_token", input.accessToken);
31850
+ url.searchParams.set("fields", input.fields.join(","));
31851
+ url.searchParams.set("limit", String(input.limit ?? DEFAULT_META_PAGE_SIZE));
31852
+ if (typeof input.after === "string" && input.after.length > 0) {
31853
+ url.searchParams.set("after", input.after);
31854
+ }
31855
+ const response = await fetchImpl(url);
31856
+ if (!response.ok) {
31857
+ throw await toErrorMessage3(response, input.label);
31858
+ }
31859
+ return await response.json();
31860
+ };
31861
+ var getCheckpointAfter = (checkpoint) => {
31862
+ const after = checkpoint?.after;
31863
+ return typeof after === "string" && after.trim().length > 0 ? after.trim() : undefined;
31864
+ };
31865
+ var getCredentialMetadataString = (metadata, key) => {
31866
+ const value = metadata?.[key];
31867
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
31868
+ };
31869
+ var toFacebookPostItem = (credential, post) => {
31870
+ if (typeof post.id !== "string" || post.id.trim().length === 0) {
31871
+ return null;
31872
+ }
31873
+ const pageName = getCredentialMetadataString(credential.metadata, "label") ?? getCredentialMetadataString(credential.metadata, "pageName") ?? getCredentialMetadataString(credential.metadata, "parentPageName");
31874
+ const message = post.message?.trim() ?? "";
31875
+ return {
31876
+ createdAt: post.created_time,
31877
+ html: undefined,
31878
+ id: post.id,
31879
+ kind: "facebook_post",
31880
+ metadata: {
31881
+ facebookPageId: credential.externalAccountId,
31882
+ facebookPageName: pageName,
31883
+ facebookPictureUrl: post.full_picture,
31884
+ facebookStatusType: post.status_type,
31885
+ provider: "facebook",
31886
+ providerAuthorId: post.from?.id,
31887
+ providerAuthorName: post.from?.name
31888
+ },
31889
+ text: message.length > 0 ? message : post.permalink_url,
31890
+ title: message.length > 0 ? message.slice(0, 120) : `Facebook post ${post.id}`,
31891
+ updatedAt: post.updated_time,
31892
+ url: post.permalink_url
31893
+ };
31894
+ };
31895
+ var toInstagramMediaItem = (credential, media) => {
31896
+ if (typeof media.id !== "string" || media.id.trim().length === 0) {
31897
+ return null;
31898
+ }
31899
+ const caption = media.caption?.trim() ?? "";
31900
+ const username = getCredentialMetadataString(credential.metadata, "instagramUsername");
31901
+ return {
31902
+ createdAt: media.timestamp,
31903
+ id: media.id,
31904
+ kind: "instagram_media",
31905
+ metadata: {
31906
+ instagramMediaType: media.media_type,
31907
+ instagramThumbnailUrl: media.thumbnail_url,
31908
+ instagramUsername: media.username ?? username,
31909
+ provider: "instagram",
31910
+ providerAccountId: credential.externalAccountId,
31911
+ providerMediaUrl: media.media_url
31912
+ },
31913
+ text: caption.length > 0 ? caption : media.permalink,
31914
+ title: caption.length > 0 ? caption.slice(0, 120) : `Instagram media ${media.id}`,
31915
+ url: media.permalink
31916
+ };
31917
+ };
31918
+ var createRAGFacebookPageConnector = (input) => ({
31919
+ provider: "facebook",
31920
+ requiredScopes: () => FACEBOOK_PAGE_READ_SCOPES,
31921
+ sync: async ({ checkpoint, credential, resolver }) => {
31922
+ const lease = await resolver.getAccessToken(credential, {
31923
+ requiredScopes: FACEBOOK_PAGE_READ_SCOPES
31924
+ });
31925
+ const pageAccessToken = getCredentialMetadataString(credential.metadata, "pageAccessToken") ?? lease.accessToken;
31926
+ const response = await fetchGraphList({
31927
+ accessToken: pageAccessToken,
31928
+ after: getCheckpointAfter(checkpoint),
31929
+ baseUrl: input?.baseUrl,
31930
+ fetch: input?.fetch,
31931
+ fields: [
31932
+ "id",
31933
+ "message",
31934
+ "created_time",
31935
+ "updated_time",
31936
+ "permalink_url",
31937
+ "full_picture",
31938
+ "status_type",
31939
+ "from{id,name}"
31940
+ ],
31941
+ label: `Facebook Page sync failed for ${credential.externalAccountId}`,
31942
+ limit: input?.limit,
31943
+ path: `/${encodeURIComponent(credential.externalAccountId)}/posts`
31944
+ });
31945
+ return {
31946
+ items: (response.data ?? []).map((post) => toFacebookPostItem(credential, post)).filter(Boolean),
31947
+ nextCheckpoint: response.paging?.cursors?.after ? { after: response.paging.cursors.after } : undefined,
31948
+ diagnostics: {
31949
+ listedCount: response.data?.length ?? 0,
31950
+ nextAfter: response.paging?.cursors?.after
31951
+ }
31952
+ };
31953
+ }
31954
+ });
31955
+ var createRAGInstagramBusinessConnector = (input) => ({
31956
+ provider: "instagram",
31957
+ requiredScopes: () => INSTAGRAM_BUSINESS_READ_SCOPES,
31958
+ sync: async ({ checkpoint, credential, resolver }) => {
31959
+ const lease = await resolver.getAccessToken(credential, {
31960
+ requiredScopes: INSTAGRAM_BUSINESS_READ_SCOPES
31961
+ });
31962
+ const accessToken = getCredentialMetadataString(credential.metadata, "parentPageAccessToken") ?? lease.accessToken;
31963
+ const response = await fetchGraphList({
31964
+ accessToken,
31965
+ after: getCheckpointAfter(checkpoint),
31966
+ baseUrl: input?.baseUrl,
31967
+ fetch: input?.fetch,
31968
+ fields: [
31969
+ "id",
31970
+ "caption",
31971
+ "media_type",
31972
+ "media_url",
31973
+ "permalink",
31974
+ "thumbnail_url",
31975
+ "timestamp",
31976
+ "username"
31977
+ ],
31978
+ label: `Instagram business sync failed for ${credential.externalAccountId}`,
31979
+ limit: input?.limit,
31980
+ path: `/${encodeURIComponent(credential.externalAccountId)}/media`
31981
+ });
31982
+ return {
31983
+ items: (response.data ?? []).map((media) => toInstagramMediaItem(credential, media)).filter(Boolean),
31984
+ nextCheckpoint: response.paging?.cursors?.after ? { after: response.paging.cursors.after } : undefined,
31985
+ diagnostics: {
31986
+ listedCount: response.data?.length ?? 0,
31987
+ nextAfter: response.paging?.cursors?.after
31988
+ }
31989
+ };
31990
+ }
31991
+ });
31823
31992
  // src/ai/rag/sync.ts
31824
31993
  var {S3Client } = globalThis.Bun;
31825
31994
  import { createHash } from "crypto";
@@ -31834,6 +32003,10 @@ var wait = async (delayMs) => {
31834
32003
  };
31835
32004
  var getSyncMetadataString = (metadata, key) => typeof metadata?.[key] === "string" ? metadata[key] : undefined;
31836
32005
  var getSyncMetadataBoolean = (metadata, key) => metadata?.[key] === true;
32006
+ var getSyncMetadataRecord = (metadata, key) => {
32007
+ const value = metadata?.[key];
32008
+ return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
32009
+ };
31837
32010
  var DEFAULT_DIRECTORY_EXTENSIONS2 = [
31838
32011
  ".txt",
31839
32012
  ".md",
@@ -32172,12 +32345,30 @@ var toManagedSyncDocument = (sourceId, document, syncKey) => ({
32172
32345
  }
32173
32346
  });
32174
32347
  var encodeAttachmentContent = (attachment) => typeof attachment.content === "string" ? {
32175
- content: attachment.content,
32348
+ content: sanitizeSyncString(attachment.content),
32176
32349
  encoding: attachment.encoding ?? "utf8"
32177
32350
  } : {
32178
32351
  content: Buffer.from(attachment.content).toString("base64"),
32179
32352
  encoding: "base64"
32180
32353
  };
32354
+ var sanitizeSyncString = (value) => value.replace(/\u0000/g, "");
32355
+ var sanitizeOptionalSyncString = (value) => typeof value === "string" ? sanitizeSyncString(value) : value;
32356
+ var sanitizeSyncStringList = (value) => value?.map((entry) => sanitizeSyncString(entry)).filter((entry) => typeof entry === "string");
32357
+ var sanitizeSyncMetadataValue = (value) => {
32358
+ if (typeof value === "string") {
32359
+ return sanitizeSyncString(value);
32360
+ }
32361
+ if (Array.isArray(value)) {
32362
+ return value.map((entry) => sanitizeSyncMetadataValue(entry));
32363
+ }
32364
+ if (value && typeof value === "object") {
32365
+ return Object.fromEntries(Object.entries(value).map(([key, entry]) => [
32366
+ key,
32367
+ sanitizeSyncMetadataValue(entry)
32368
+ ]));
32369
+ }
32370
+ return value;
32371
+ };
32181
32372
  var toTimestamp = (value) => {
32182
32373
  if (typeof value === "number" && Number.isFinite(value)) {
32183
32374
  return value;
@@ -33893,21 +34084,21 @@ var createRAGEmailSyncSource = (options) => ({
33893
34084
  id: `email-${message.id}`,
33894
34085
  metadata: {
33895
34086
  ...options.baseMetadata ?? {},
33896
- ...message.metadata ?? {},
34087
+ ...sanitizeSyncMetadataValue(message.metadata ?? {}) ?? {},
33897
34088
  emailKind: "message",
33898
- from: message.from,
34089
+ from: sanitizeOptionalSyncString(message.from),
33899
34090
  hasAttachments: (message.attachments?.length ?? 0) > 0,
33900
34091
  messageId: message.id,
33901
34092
  receivedAt: toTimestamp(message.receivedAt),
33902
34093
  sentAt: toTimestamp(message.sentAt),
33903
- threadId: message.threadId,
33904
- threadTopic: message.subject,
33905
- to: message.to,
33906
- cc: message.cc
34094
+ threadId: sanitizeOptionalSyncString(message.threadId),
34095
+ threadTopic: sanitizeOptionalSyncString(message.subject),
34096
+ to: sanitizeSyncStringList(message.to),
34097
+ cc: sanitizeSyncStringList(message.cc)
33907
34098
  },
33908
- source: `email/${message.threadId ?? message.id}`,
33909
- text: message.bodyText,
33910
- title: message.subject ?? message.id
34099
+ source: `email/${sanitizeOptionalSyncString(message.threadId) ?? message.id}`,
34100
+ text: sanitizeOptionalSyncString(message.bodyText) ?? "",
34101
+ title: sanitizeOptionalSyncString(message.subject) ?? message.id
33911
34102
  }));
33912
34103
  const attachmentUploads = listed.messages.flatMap((message) => (message.attachments ?? []).map((attachment, index) => ({
33913
34104
  ...encodeAttachmentContent(attachment),
@@ -33916,18 +34107,18 @@ var createRAGEmailSyncSource = (options) => ({
33916
34107
  format: attachment.format,
33917
34108
  metadata: {
33918
34109
  ...options.baseMetadata ?? {},
33919
- ...attachment.metadata ?? {},
34110
+ ...sanitizeSyncMetadataValue(attachment.metadata ?? {}) ?? {},
33920
34111
  attachmentId: attachment.id ?? `${message.id}-attachment-${index + 1}`,
33921
34112
  emailKind: "attachment",
33922
- from: message.from,
34113
+ from: sanitizeOptionalSyncString(message.from),
33923
34114
  messageId: message.id,
33924
34115
  sentAt: toTimestamp(message.sentAt),
33925
- threadId: message.threadId,
33926
- threadTopic: message.subject
34116
+ threadId: sanitizeOptionalSyncString(message.threadId),
34117
+ threadTopic: sanitizeOptionalSyncString(message.subject)
33927
34118
  },
33928
- name: attachment.name,
33929
- source: attachment.source ?? `email/${message.threadId ?? message.id}/attachments/${attachment.name}`,
33930
- title: attachment.title ?? `${message.subject ?? message.id} \xB7 ${attachment.name}`
34119
+ name: sanitizeOptionalSyncString(attachment.name) ?? `attachment-${index + 1}`,
34120
+ source: sanitizeOptionalSyncString(attachment.source) ?? `email/${sanitizeOptionalSyncString(message.threadId) ?? message.id}/attachments/${sanitizeOptionalSyncString(attachment.name) ?? `attachment-${index + 1}`}`,
34121
+ title: sanitizeOptionalSyncString(attachment.title) ?? `${sanitizeOptionalSyncString(message.subject) ?? message.id} \xB7 ${sanitizeOptionalSyncString(attachment.name) ?? `attachment-${index + 1}`}`
33931
34122
  })));
33932
34123
  const extractionFailures = [];
33933
34124
  const loadedAttachments = attachmentUploads.length > 0 ? (await Promise.all(attachmentUploads.map(async (upload) => {
@@ -33987,6 +34178,81 @@ var createRAGEmailSyncSource = (options) => ({
33987
34178
  };
33988
34179
  }
33989
34180
  });
34181
+ var createRAGLinkedConnectorSyncSource = (options) => ({
34182
+ description: options.description,
34183
+ id: options.id,
34184
+ kind: "custom",
34185
+ label: options.label,
34186
+ metadata: options.metadata,
34187
+ retryAttempts: options.retryAttempts,
34188
+ retryDelayMs: options.retryDelayMs,
34189
+ target: options.externalAccountId ?? options.bindingId ?? options.label,
34190
+ sync: async ({ collection, deleteDocument, listDocuments, sourceRecord }) => {
34191
+ const requiredScopes = options.requiredScopes ?? options.runtime.requiredScopes({ mode: "read" });
34192
+ const credential = await options.resolver.resolveCredential({
34193
+ bindingId: options.bindingId,
34194
+ connectorProvider: options.runtime.provider,
34195
+ externalAccountId: options.externalAccountId,
34196
+ ownerRef: options.ownerRef,
34197
+ purpose: options.purpose ?? "background_sync",
34198
+ requiredScopes
34199
+ });
34200
+ if (!credential) {
34201
+ throw new Error(`No linked ${options.runtime.provider} credential could be resolved`);
34202
+ }
34203
+ const checkpoint = getSyncMetadataRecord(sourceRecord?.metadata, "connectorCheckpoint");
34204
+ const result = await options.runtime.sync({
34205
+ checkpoint,
34206
+ credential,
34207
+ resolver: options.resolver
34208
+ });
34209
+ const managedDocuments = result.items.map((item, index) => toManagedSyncDocument(options.id, {
34210
+ chunking: options.defaultChunking,
34211
+ format: item.html ? "html" : "text",
34212
+ id: `${options.runtime.provider}-${item.id}`,
34213
+ metadata: {
34214
+ ...options.baseMetadata ?? {},
34215
+ ...sanitizeSyncMetadataValue(item.metadata ?? {}) ?? {},
34216
+ connectorBindingId: credential.bindingId,
34217
+ connectorExternalAccountId: credential.externalAccountId,
34218
+ connectorKind: item.kind,
34219
+ connectorProvider: options.runtime.provider,
34220
+ createdAt: toTimestamp(item.createdAt),
34221
+ itemId: item.id,
34222
+ threadId: sanitizeOptionalSyncString(item.threadId),
34223
+ updatedAt: toTimestamp(item.updatedAt),
34224
+ url: sanitizeOptionalSyncString(item.url)
34225
+ },
34226
+ source: sanitizeOptionalSyncString(item.url) ?? `connector/${options.runtime.provider}/${sanitizeOptionalSyncString(item.threadId) ?? item.id}`,
34227
+ text: sanitizeOptionalSyncString(item.text) ?? sanitizeOptionalSyncString(item.html) ?? "",
34228
+ title: sanitizeOptionalSyncString(item.title) ?? item.id
34229
+ }, `${item.kind}:${item.id}:${index + 1}`));
34230
+ const reconciled = await reconcileManagedDocuments({
34231
+ chunkingRegistry: options.chunkingRegistry,
34232
+ collection,
34233
+ defaultChunking: options.defaultChunking,
34234
+ deleteDocument,
34235
+ documents: managedDocuments,
34236
+ listDocuments,
34237
+ sourceId: options.id,
34238
+ allowDeletions: result.nextCheckpoint === undefined
34239
+ });
34240
+ return {
34241
+ chunkCount: reconciled.chunkCount,
34242
+ documentCount: reconciled.documentCount,
34243
+ metadata: {
34244
+ connectorCheckpoint: result.nextCheckpoint,
34245
+ connectorDiagnostics: result.diagnostics,
34246
+ connectorItemCount: result.items.length,
34247
+ connectorProvider: options.runtime.provider,
34248
+ deletedCount: reconciled.deletedCount,
34249
+ resumePending: result.nextCheckpoint !== undefined,
34250
+ updatedCount: reconciled.updatedCount
34251
+ },
34252
+ reconciliation: reconciled.reconciliation
34253
+ };
34254
+ }
34255
+ });
33990
34256
  var createRAGLinkedGmailEmailSyncSource = (options) => createRAGEmailSyncSource({
33991
34257
  ...options,
33992
34258
  client: createRAGLinkedGmailEmailSyncClient(options)
@@ -36922,6 +37188,8 @@ export {
36922
37188
  createRAGMediaFileExtractor,
36923
37189
  createRAGLinkedGmailEmailSyncSource,
36924
37190
  createRAGLinkedGmailEmailSyncClient,
37191
+ createRAGLinkedConnectorSyncSource,
37192
+ createRAGInstagramBusinessConnector,
36925
37193
  createRAGImageOCRExtractor,
36926
37194
  createRAGIMAPEmailSyncClient,
36927
37195
  createRAGHTMXWorkflowRenderConfig,
@@ -36953,6 +37221,7 @@ export {
36953
37221
  createRAGFileAnswerGroundingEvaluationHistoryStore,
36954
37222
  createRAGFileAnswerGroundingCaseDifficultyHistoryStore,
36955
37223
  createRAGFeedSyncSource,
37224
+ createRAGFacebookPageConnector,
36956
37225
  createRAGEvaluationSuiteSnapshot,
36957
37226
  createRAGEvaluationSuite,
36958
37227
  createRAGEmbeddingProvider,
@@ -37059,5 +37328,5 @@ export {
37059
37328
  addRAGEvaluationSuiteCase
37060
37329
  };
37061
37330
 
37062
- //# debugId=46D96811C8BB335F64756E2164756E21
37331
+ //# debugId=B8D6F85D648EA8ED64756E2164756E21
37063
37332
  //# sourceMappingURL=index.js.map