octalens-mentions 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/index.js +168 -6
- package/dist/server/index.mjs +168 -6
- package/package.json +1 -1
package/dist/server/index.js
CHANGED
|
@@ -85,15 +85,64 @@ async function sanitizeInput(strapi, data, auth) {
|
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
const MENTION_UID$3 = "plugin::octalens-mentions.mention";
|
|
88
|
+
function tokenize(text) {
|
|
89
|
+
if (!text) return [];
|
|
90
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
|
|
91
|
+
}
|
|
92
|
+
function calculateIDF(mentions, vocabulary, titleWeight = 2) {
|
|
93
|
+
const idf = /* @__PURE__ */ new Map();
|
|
94
|
+
const N = mentions.length;
|
|
95
|
+
for (const term of vocabulary) {
|
|
96
|
+
const docsWithTerm = mentions.filter((mention2) => {
|
|
97
|
+
const titleTokens = tokenize(mention2.title || "");
|
|
98
|
+
const bodyTokens = tokenize(mention2.body || "");
|
|
99
|
+
return titleTokens.includes(term) || bodyTokens.includes(term);
|
|
100
|
+
}).length;
|
|
101
|
+
idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
|
|
102
|
+
}
|
|
103
|
+
return idf;
|
|
104
|
+
}
|
|
105
|
+
function bm25Score(mention2, queryTokens, idf, avgDocLength, titleWeight = 2, k1 = 1.5, b = 0.75) {
|
|
106
|
+
const titleTokens = tokenize(mention2.title || "");
|
|
107
|
+
const bodyTokens = tokenize(mention2.body || "");
|
|
108
|
+
const docLength = titleTokens.length * titleWeight + bodyTokens.length;
|
|
109
|
+
const tf = /* @__PURE__ */ new Map();
|
|
110
|
+
for (const token of titleTokens) {
|
|
111
|
+
tf.set(token, (tf.get(token) || 0) + titleWeight);
|
|
112
|
+
}
|
|
113
|
+
for (const token of bodyTokens) {
|
|
114
|
+
tf.set(token, (tf.get(token) || 0) + 1);
|
|
115
|
+
}
|
|
116
|
+
let score = 0;
|
|
117
|
+
for (const term of queryTokens) {
|
|
118
|
+
const termFreq = tf.get(term) || 0;
|
|
119
|
+
const termIdf = idf.get(term) || 0;
|
|
120
|
+
if (termFreq > 0) {
|
|
121
|
+
const numerator = termFreq * (k1 + 1);
|
|
122
|
+
const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
|
|
123
|
+
score += termIdf * (numerator / denominator);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return score;
|
|
127
|
+
}
|
|
128
|
+
function calculateAvgDocLength(mentions, titleWeight = 2) {
|
|
129
|
+
if (mentions.length === 0) return 1;
|
|
130
|
+
const totalLength = mentions.reduce((sum, mention2) => {
|
|
131
|
+
const titleLen = tokenize(mention2.title || "").length;
|
|
132
|
+
const bodyLen = tokenize(mention2.body || "").length;
|
|
133
|
+
return sum + titleLen * titleWeight + bodyLen;
|
|
134
|
+
}, 0);
|
|
135
|
+
return totalLength / mentions.length;
|
|
136
|
+
}
|
|
88
137
|
const searchMentionsTool = {
|
|
89
138
|
name: "search_mentions",
|
|
90
|
-
description: "Search through social mentions
|
|
139
|
+
description: "Search through social mentions using BM25 relevance scoring. Returns results ranked by relevance when a query is provided. Supports filtering by source, author, sentiment, and more. Title matches are weighted higher than body matches for better relevance.",
|
|
91
140
|
inputSchema: {
|
|
92
141
|
type: "object",
|
|
93
142
|
properties: {
|
|
94
143
|
query: {
|
|
95
144
|
type: "string",
|
|
96
|
-
description: "Search query to
|
|
145
|
+
description: "Search query - uses BM25 algorithm to find and rank relevant mentions by title and body content. More specific queries yield better results."
|
|
97
146
|
},
|
|
98
147
|
source: {
|
|
99
148
|
type: "string",
|
|
@@ -142,10 +191,8 @@ const searchMentionsTool = {
|
|
|
142
191
|
async function handleSearchMentions(strapi, args) {
|
|
143
192
|
const validatedArgs = validateToolInput("search_mentions", args);
|
|
144
193
|
const { query, source, author, keyword, sentimentLabel, bookmarked, viewName, subreddit, page, pageSize, sort } = validatedArgs;
|
|
194
|
+
const TITLE_WEIGHT = 2;
|
|
145
195
|
const filters = {};
|
|
146
|
-
if (query) {
|
|
147
|
-
filters.$or = [{ title: { $containsi: query } }, { body: { $containsi: query } }];
|
|
148
|
-
}
|
|
149
196
|
if (source) {
|
|
150
197
|
filters.source = { $eqi: source };
|
|
151
198
|
}
|
|
@@ -168,6 +215,122 @@ async function handleSearchMentions(strapi, args) {
|
|
|
168
215
|
filters.subreddit = { $containsi: subreddit };
|
|
169
216
|
}
|
|
170
217
|
try {
|
|
218
|
+
if (query) {
|
|
219
|
+
const queryTokens = tokenize(query);
|
|
220
|
+
if (queryTokens.length === 0) {
|
|
221
|
+
return {
|
|
222
|
+
content: [
|
|
223
|
+
{
|
|
224
|
+
type: "text",
|
|
225
|
+
text: JSON.stringify(
|
|
226
|
+
{
|
|
227
|
+
error: true,
|
|
228
|
+
message: "Query is empty or contains only single-character words.",
|
|
229
|
+
query
|
|
230
|
+
},
|
|
231
|
+
null,
|
|
232
|
+
2
|
|
233
|
+
)
|
|
234
|
+
}
|
|
235
|
+
]
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
const allMentions = await strapi.documents(MENTION_UID$3).findMany({
|
|
239
|
+
filters,
|
|
240
|
+
limit: 1e3
|
|
241
|
+
// Get up to 1000 mentions for BM25 corpus
|
|
242
|
+
});
|
|
243
|
+
if (allMentions.length === 0) {
|
|
244
|
+
return {
|
|
245
|
+
content: [
|
|
246
|
+
{
|
|
247
|
+
type: "text",
|
|
248
|
+
text: JSON.stringify(
|
|
249
|
+
{
|
|
250
|
+
data: [],
|
|
251
|
+
pagination: {
|
|
252
|
+
page,
|
|
253
|
+
pageSize,
|
|
254
|
+
total: 0,
|
|
255
|
+
pageCount: 0
|
|
256
|
+
},
|
|
257
|
+
searchInfo: {
|
|
258
|
+
query,
|
|
259
|
+
algorithm: "BM25",
|
|
260
|
+
matchingResults: 0
|
|
261
|
+
},
|
|
262
|
+
filters: {
|
|
263
|
+
source,
|
|
264
|
+
author,
|
|
265
|
+
keyword,
|
|
266
|
+
sentimentLabel,
|
|
267
|
+
bookmarked,
|
|
268
|
+
viewName,
|
|
269
|
+
subreddit
|
|
270
|
+
}
|
|
271
|
+
},
|
|
272
|
+
null,
|
|
273
|
+
2
|
|
274
|
+
)
|
|
275
|
+
}
|
|
276
|
+
]
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
const vocabulary = new Set(queryTokens);
|
|
280
|
+
const mentionDocs = allMentions;
|
|
281
|
+
const idf = calculateIDF(mentionDocs, vocabulary, TITLE_WEIGHT);
|
|
282
|
+
const avgDocLength = calculateAvgDocLength(mentionDocs, TITLE_WEIGHT);
|
|
283
|
+
const scoredMentions = allMentions.map((mention2) => ({
|
|
284
|
+
...mention2,
|
|
285
|
+
bm25Score: bm25Score(mention2, queryTokens, idf, avgDocLength, TITLE_WEIGHT)
|
|
286
|
+
}));
|
|
287
|
+
const rankedMentions = scoredMentions.filter((m) => m.bm25Score > 0).sort((a, b) => b.bm25Score - a.bm25Score);
|
|
288
|
+
const total2 = rankedMentions.length;
|
|
289
|
+
const startIndex = (page - 1) * pageSize;
|
|
290
|
+
const paginatedResults = rankedMentions.slice(startIndex, startIndex + pageSize);
|
|
291
|
+
const sanitizedResults2 = await sanitizeOutput(strapi, paginatedResults);
|
|
292
|
+
const resultsWithScores = sanitizedResults2.map((mention2, index2) => ({
|
|
293
|
+
...mention2,
|
|
294
|
+
bm25Score: Math.round(paginatedResults[index2].bm25Score * 100) / 100
|
|
295
|
+
}));
|
|
296
|
+
return {
|
|
297
|
+
content: [
|
|
298
|
+
{
|
|
299
|
+
type: "text",
|
|
300
|
+
text: JSON.stringify(
|
|
301
|
+
{
|
|
302
|
+
data: resultsWithScores,
|
|
303
|
+
pagination: {
|
|
304
|
+
page,
|
|
305
|
+
pageSize,
|
|
306
|
+
total: total2,
|
|
307
|
+
pageCount: Math.ceil(total2 / pageSize)
|
|
308
|
+
},
|
|
309
|
+
searchInfo: {
|
|
310
|
+
query,
|
|
311
|
+
algorithm: "BM25",
|
|
312
|
+
titleWeight: TITLE_WEIGHT,
|
|
313
|
+
matchingResults: total2,
|
|
314
|
+
corpusSize: allMentions.length,
|
|
315
|
+
hint: total2 > 0 ? "Results are ranked by relevance. Higher bm25Score indicates better match. Title matches are weighted higher than body matches." : "No matches found. Try different or fewer keywords."
|
|
316
|
+
},
|
|
317
|
+
filters: {
|
|
318
|
+
source,
|
|
319
|
+
author,
|
|
320
|
+
keyword,
|
|
321
|
+
sentimentLabel,
|
|
322
|
+
bookmarked,
|
|
323
|
+
viewName,
|
|
324
|
+
subreddit
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
null,
|
|
328
|
+
2
|
|
329
|
+
)
|
|
330
|
+
}
|
|
331
|
+
]
|
|
332
|
+
};
|
|
333
|
+
}
|
|
171
334
|
const results = await strapi.documents(MENTION_UID$3).findMany({
|
|
172
335
|
filters,
|
|
173
336
|
sort: sort ? [sort] : ["createdAt:desc"],
|
|
@@ -190,7 +353,6 @@ async function handleSearchMentions(strapi, args) {
|
|
|
190
353
|
pageCount: Math.ceil(total / pageSize)
|
|
191
354
|
},
|
|
192
355
|
filters: {
|
|
193
|
-
query,
|
|
194
356
|
source,
|
|
195
357
|
author,
|
|
196
358
|
keyword,
|
package/dist/server/index.mjs
CHANGED
|
@@ -84,15 +84,64 @@ async function sanitizeInput(strapi, data, auth) {
|
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
86
|
const MENTION_UID$3 = "plugin::octalens-mentions.mention";
|
|
87
|
+
function tokenize(text) {
|
|
88
|
+
if (!text) return [];
|
|
89
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
|
|
90
|
+
}
|
|
91
|
+
function calculateIDF(mentions, vocabulary, titleWeight = 2) {
|
|
92
|
+
const idf = /* @__PURE__ */ new Map();
|
|
93
|
+
const N = mentions.length;
|
|
94
|
+
for (const term of vocabulary) {
|
|
95
|
+
const docsWithTerm = mentions.filter((mention2) => {
|
|
96
|
+
const titleTokens = tokenize(mention2.title || "");
|
|
97
|
+
const bodyTokens = tokenize(mention2.body || "");
|
|
98
|
+
return titleTokens.includes(term) || bodyTokens.includes(term);
|
|
99
|
+
}).length;
|
|
100
|
+
idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
|
|
101
|
+
}
|
|
102
|
+
return idf;
|
|
103
|
+
}
|
|
104
|
+
function bm25Score(mention2, queryTokens, idf, avgDocLength, titleWeight = 2, k1 = 1.5, b = 0.75) {
|
|
105
|
+
const titleTokens = tokenize(mention2.title || "");
|
|
106
|
+
const bodyTokens = tokenize(mention2.body || "");
|
|
107
|
+
const docLength = titleTokens.length * titleWeight + bodyTokens.length;
|
|
108
|
+
const tf = /* @__PURE__ */ new Map();
|
|
109
|
+
for (const token of titleTokens) {
|
|
110
|
+
tf.set(token, (tf.get(token) || 0) + titleWeight);
|
|
111
|
+
}
|
|
112
|
+
for (const token of bodyTokens) {
|
|
113
|
+
tf.set(token, (tf.get(token) || 0) + 1);
|
|
114
|
+
}
|
|
115
|
+
let score = 0;
|
|
116
|
+
for (const term of queryTokens) {
|
|
117
|
+
const termFreq = tf.get(term) || 0;
|
|
118
|
+
const termIdf = idf.get(term) || 0;
|
|
119
|
+
if (termFreq > 0) {
|
|
120
|
+
const numerator = termFreq * (k1 + 1);
|
|
121
|
+
const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
|
|
122
|
+
score += termIdf * (numerator / denominator);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return score;
|
|
126
|
+
}
|
|
127
|
+
function calculateAvgDocLength(mentions, titleWeight = 2) {
|
|
128
|
+
if (mentions.length === 0) return 1;
|
|
129
|
+
const totalLength = mentions.reduce((sum, mention2) => {
|
|
130
|
+
const titleLen = tokenize(mention2.title || "").length;
|
|
131
|
+
const bodyLen = tokenize(mention2.body || "").length;
|
|
132
|
+
return sum + titleLen * titleWeight + bodyLen;
|
|
133
|
+
}, 0);
|
|
134
|
+
return totalLength / mentions.length;
|
|
135
|
+
}
|
|
87
136
|
const searchMentionsTool = {
|
|
88
137
|
name: "search_mentions",
|
|
89
|
-
description: "Search through social mentions
|
|
138
|
+
description: "Search through social mentions using BM25 relevance scoring. Returns results ranked by relevance when a query is provided. Supports filtering by source, author, sentiment, and more. Title matches are weighted higher than body matches for better relevance.",
|
|
90
139
|
inputSchema: {
|
|
91
140
|
type: "object",
|
|
92
141
|
properties: {
|
|
93
142
|
query: {
|
|
94
143
|
type: "string",
|
|
95
|
-
description: "Search query to
|
|
144
|
+
description: "Search query - uses BM25 algorithm to find and rank relevant mentions by title and body content. More specific queries yield better results."
|
|
96
145
|
},
|
|
97
146
|
source: {
|
|
98
147
|
type: "string",
|
|
@@ -141,10 +190,8 @@ const searchMentionsTool = {
|
|
|
141
190
|
async function handleSearchMentions(strapi, args) {
|
|
142
191
|
const validatedArgs = validateToolInput("search_mentions", args);
|
|
143
192
|
const { query, source, author, keyword, sentimentLabel, bookmarked, viewName, subreddit, page, pageSize, sort } = validatedArgs;
|
|
193
|
+
const TITLE_WEIGHT = 2;
|
|
144
194
|
const filters = {};
|
|
145
|
-
if (query) {
|
|
146
|
-
filters.$or = [{ title: { $containsi: query } }, { body: { $containsi: query } }];
|
|
147
|
-
}
|
|
148
195
|
if (source) {
|
|
149
196
|
filters.source = { $eqi: source };
|
|
150
197
|
}
|
|
@@ -167,6 +214,122 @@ async function handleSearchMentions(strapi, args) {
|
|
|
167
214
|
filters.subreddit = { $containsi: subreddit };
|
|
168
215
|
}
|
|
169
216
|
try {
|
|
217
|
+
if (query) {
|
|
218
|
+
const queryTokens = tokenize(query);
|
|
219
|
+
if (queryTokens.length === 0) {
|
|
220
|
+
return {
|
|
221
|
+
content: [
|
|
222
|
+
{
|
|
223
|
+
type: "text",
|
|
224
|
+
text: JSON.stringify(
|
|
225
|
+
{
|
|
226
|
+
error: true,
|
|
227
|
+
message: "Query is empty or contains only single-character words.",
|
|
228
|
+
query
|
|
229
|
+
},
|
|
230
|
+
null,
|
|
231
|
+
2
|
|
232
|
+
)
|
|
233
|
+
}
|
|
234
|
+
]
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
const allMentions = await strapi.documents(MENTION_UID$3).findMany({
|
|
238
|
+
filters,
|
|
239
|
+
limit: 1e3
|
|
240
|
+
// Get up to 1000 mentions for BM25 corpus
|
|
241
|
+
});
|
|
242
|
+
if (allMentions.length === 0) {
|
|
243
|
+
return {
|
|
244
|
+
content: [
|
|
245
|
+
{
|
|
246
|
+
type: "text",
|
|
247
|
+
text: JSON.stringify(
|
|
248
|
+
{
|
|
249
|
+
data: [],
|
|
250
|
+
pagination: {
|
|
251
|
+
page,
|
|
252
|
+
pageSize,
|
|
253
|
+
total: 0,
|
|
254
|
+
pageCount: 0
|
|
255
|
+
},
|
|
256
|
+
searchInfo: {
|
|
257
|
+
query,
|
|
258
|
+
algorithm: "BM25",
|
|
259
|
+
matchingResults: 0
|
|
260
|
+
},
|
|
261
|
+
filters: {
|
|
262
|
+
source,
|
|
263
|
+
author,
|
|
264
|
+
keyword,
|
|
265
|
+
sentimentLabel,
|
|
266
|
+
bookmarked,
|
|
267
|
+
viewName,
|
|
268
|
+
subreddit
|
|
269
|
+
}
|
|
270
|
+
},
|
|
271
|
+
null,
|
|
272
|
+
2
|
|
273
|
+
)
|
|
274
|
+
}
|
|
275
|
+
]
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
const vocabulary = new Set(queryTokens);
|
|
279
|
+
const mentionDocs = allMentions;
|
|
280
|
+
const idf = calculateIDF(mentionDocs, vocabulary, TITLE_WEIGHT);
|
|
281
|
+
const avgDocLength = calculateAvgDocLength(mentionDocs, TITLE_WEIGHT);
|
|
282
|
+
const scoredMentions = allMentions.map((mention2) => ({
|
|
283
|
+
...mention2,
|
|
284
|
+
bm25Score: bm25Score(mention2, queryTokens, idf, avgDocLength, TITLE_WEIGHT)
|
|
285
|
+
}));
|
|
286
|
+
const rankedMentions = scoredMentions.filter((m) => m.bm25Score > 0).sort((a, b) => b.bm25Score - a.bm25Score);
|
|
287
|
+
const total2 = rankedMentions.length;
|
|
288
|
+
const startIndex = (page - 1) * pageSize;
|
|
289
|
+
const paginatedResults = rankedMentions.slice(startIndex, startIndex + pageSize);
|
|
290
|
+
const sanitizedResults2 = await sanitizeOutput(strapi, paginatedResults);
|
|
291
|
+
const resultsWithScores = sanitizedResults2.map((mention2, index2) => ({
|
|
292
|
+
...mention2,
|
|
293
|
+
bm25Score: Math.round(paginatedResults[index2].bm25Score * 100) / 100
|
|
294
|
+
}));
|
|
295
|
+
return {
|
|
296
|
+
content: [
|
|
297
|
+
{
|
|
298
|
+
type: "text",
|
|
299
|
+
text: JSON.stringify(
|
|
300
|
+
{
|
|
301
|
+
data: resultsWithScores,
|
|
302
|
+
pagination: {
|
|
303
|
+
page,
|
|
304
|
+
pageSize,
|
|
305
|
+
total: total2,
|
|
306
|
+
pageCount: Math.ceil(total2 / pageSize)
|
|
307
|
+
},
|
|
308
|
+
searchInfo: {
|
|
309
|
+
query,
|
|
310
|
+
algorithm: "BM25",
|
|
311
|
+
titleWeight: TITLE_WEIGHT,
|
|
312
|
+
matchingResults: total2,
|
|
313
|
+
corpusSize: allMentions.length,
|
|
314
|
+
hint: total2 > 0 ? "Results are ranked by relevance. Higher bm25Score indicates better match. Title matches are weighted higher than body matches." : "No matches found. Try different or fewer keywords."
|
|
315
|
+
},
|
|
316
|
+
filters: {
|
|
317
|
+
source,
|
|
318
|
+
author,
|
|
319
|
+
keyword,
|
|
320
|
+
sentimentLabel,
|
|
321
|
+
bookmarked,
|
|
322
|
+
viewName,
|
|
323
|
+
subreddit
|
|
324
|
+
}
|
|
325
|
+
},
|
|
326
|
+
null,
|
|
327
|
+
2
|
|
328
|
+
)
|
|
329
|
+
}
|
|
330
|
+
]
|
|
331
|
+
};
|
|
332
|
+
}
|
|
170
333
|
const results = await strapi.documents(MENTION_UID$3).findMany({
|
|
171
334
|
filters,
|
|
172
335
|
sort: sort ? [sort] : ["createdAt:desc"],
|
|
@@ -189,7 +352,6 @@ async function handleSearchMentions(strapi, args) {
|
|
|
189
352
|
pageCount: Math.ceil(total / pageSize)
|
|
190
353
|
},
|
|
191
354
|
filters: {
|
|
192
|
-
query,
|
|
193
355
|
source,
|
|
194
356
|
author,
|
|
195
357
|
keyword,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "octalens-mentions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "A Strapi v5 plugin that fetches social mentions from Octolens and exposes them via MCP (Model Context Protocol), enabling AI assistants like Claude to search, analyze, and help write responses to social media mentions.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"strapi",
|