octalens-mentions 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,15 +85,64 @@ async function sanitizeInput(strapi, data, auth) {
85
85
  }
86
86
  }
87
87
  const MENTION_UID$3 = "plugin::octalens-mentions.mention";
88
+ function tokenize(text) {
89
+ if (!text) return [];
90
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
91
+ }
92
+ function calculateIDF(mentions, vocabulary, titleWeight = 2) {
93
+ const idf = /* @__PURE__ */ new Map();
94
+ const N = mentions.length;
95
+ for (const term of vocabulary) {
96
+ const docsWithTerm = mentions.filter((mention2) => {
97
+ const titleTokens = tokenize(mention2.title || "");
98
+ const bodyTokens = tokenize(mention2.body || "");
99
+ return titleTokens.includes(term) || bodyTokens.includes(term);
100
+ }).length;
101
+ idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
102
+ }
103
+ return idf;
104
+ }
105
+ function bm25Score(mention2, queryTokens, idf, avgDocLength, titleWeight = 2, k1 = 1.5, b = 0.75) {
106
+ const titleTokens = tokenize(mention2.title || "");
107
+ const bodyTokens = tokenize(mention2.body || "");
108
+ const docLength = titleTokens.length * titleWeight + bodyTokens.length;
109
+ const tf = /* @__PURE__ */ new Map();
110
+ for (const token of titleTokens) {
111
+ tf.set(token, (tf.get(token) || 0) + titleWeight);
112
+ }
113
+ for (const token of bodyTokens) {
114
+ tf.set(token, (tf.get(token) || 0) + 1);
115
+ }
116
+ let score = 0;
117
+ for (const term of queryTokens) {
118
+ const termFreq = tf.get(term) || 0;
119
+ const termIdf = idf.get(term) || 0;
120
+ if (termFreq > 0) {
121
+ const numerator = termFreq * (k1 + 1);
122
+ const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
123
+ score += termIdf * (numerator / denominator);
124
+ }
125
+ }
126
+ return score;
127
+ }
128
+ function calculateAvgDocLength(mentions, titleWeight = 2) {
129
+ if (mentions.length === 0) return 1;
130
+ const totalLength = mentions.reduce((sum, mention2) => {
131
+ const titleLen = tokenize(mention2.title || "").length;
132
+ const bodyLen = tokenize(mention2.body || "").length;
133
+ return sum + titleLen * titleWeight + bodyLen;
134
+ }, 0);
135
+ return totalLength / mentions.length;
136
+ }
88
137
  const searchMentionsTool = {
89
138
  name: "search_mentions",
90
- description: "Search through social mentions with various filters. Use this to find mentions by content, source, author, sentiment, and more. Returns paginated results with full mention details.",
139
+ description: "Search through social mentions using BM25 relevance scoring. Returns results ranked by relevance when a query is provided. Supports filtering by source, author, sentiment, and more. Title matches are weighted higher than body matches for better relevance.",
91
140
  inputSchema: {
92
141
  type: "object",
93
142
  properties: {
94
143
  query: {
95
144
  type: "string",
96
- description: "Search query to filter mentions by title or body content"
145
+ description: "Search query - uses BM25 algorithm to find and rank relevant mentions by title and body content. More specific queries yield better results."
97
146
  },
98
147
  source: {
99
148
  type: "string",
@@ -142,10 +191,8 @@ const searchMentionsTool = {
142
191
  async function handleSearchMentions(strapi, args) {
143
192
  const validatedArgs = validateToolInput("search_mentions", args);
144
193
  const { query, source, author, keyword, sentimentLabel, bookmarked, viewName, subreddit, page, pageSize, sort } = validatedArgs;
194
+ const TITLE_WEIGHT = 2;
145
195
  const filters = {};
146
- if (query) {
147
- filters.$or = [{ title: { $containsi: query } }, { body: { $containsi: query } }];
148
- }
149
196
  if (source) {
150
197
  filters.source = { $eqi: source };
151
198
  }
@@ -168,6 +215,122 @@ async function handleSearchMentions(strapi, args) {
168
215
  filters.subreddit = { $containsi: subreddit };
169
216
  }
170
217
  try {
218
+ if (query) {
219
+ const queryTokens = tokenize(query);
220
+ if (queryTokens.length === 0) {
221
+ return {
222
+ content: [
223
+ {
224
+ type: "text",
225
+ text: JSON.stringify(
226
+ {
227
+ error: true,
228
+ message: "Query is empty or contains only single-character words.",
229
+ query
230
+ },
231
+ null,
232
+ 2
233
+ )
234
+ }
235
+ ]
236
+ };
237
+ }
238
+ const allMentions = await strapi.documents(MENTION_UID$3).findMany({
239
+ filters,
240
+ limit: 1e3
241
+ // Get up to 1000 mentions for BM25 corpus
242
+ });
243
+ if (allMentions.length === 0) {
244
+ return {
245
+ content: [
246
+ {
247
+ type: "text",
248
+ text: JSON.stringify(
249
+ {
250
+ data: [],
251
+ pagination: {
252
+ page,
253
+ pageSize,
254
+ total: 0,
255
+ pageCount: 0
256
+ },
257
+ searchInfo: {
258
+ query,
259
+ algorithm: "BM25",
260
+ matchingResults: 0
261
+ },
262
+ filters: {
263
+ source,
264
+ author,
265
+ keyword,
266
+ sentimentLabel,
267
+ bookmarked,
268
+ viewName,
269
+ subreddit
270
+ }
271
+ },
272
+ null,
273
+ 2
274
+ )
275
+ }
276
+ ]
277
+ };
278
+ }
279
+ const vocabulary = new Set(queryTokens);
280
+ const mentionDocs = allMentions;
281
+ const idf = calculateIDF(mentionDocs, vocabulary, TITLE_WEIGHT);
282
+ const avgDocLength = calculateAvgDocLength(mentionDocs, TITLE_WEIGHT);
283
+ const scoredMentions = allMentions.map((mention2) => ({
284
+ ...mention2,
285
+ bm25Score: bm25Score(mention2, queryTokens, idf, avgDocLength, TITLE_WEIGHT)
286
+ }));
287
+ const rankedMentions = scoredMentions.filter((m) => m.bm25Score > 0).sort((a, b) => b.bm25Score - a.bm25Score);
288
+ const total2 = rankedMentions.length;
289
+ const startIndex = (page - 1) * pageSize;
290
+ const paginatedResults = rankedMentions.slice(startIndex, startIndex + pageSize);
291
+ const sanitizedResults2 = await sanitizeOutput(strapi, paginatedResults);
292
+ const resultsWithScores = sanitizedResults2.map((mention2, index2) => ({
293
+ ...mention2,
294
+ bm25Score: Math.round(paginatedResults[index2].bm25Score * 100) / 100
295
+ }));
296
+ return {
297
+ content: [
298
+ {
299
+ type: "text",
300
+ text: JSON.stringify(
301
+ {
302
+ data: resultsWithScores,
303
+ pagination: {
304
+ page,
305
+ pageSize,
306
+ total: total2,
307
+ pageCount: Math.ceil(total2 / pageSize)
308
+ },
309
+ searchInfo: {
310
+ query,
311
+ algorithm: "BM25",
312
+ titleWeight: TITLE_WEIGHT,
313
+ matchingResults: total2,
314
+ corpusSize: allMentions.length,
315
+ hint: total2 > 0 ? "Results are ranked by relevance. Higher bm25Score indicates better match. Title matches are weighted higher than body matches." : "No matches found. Try different or fewer keywords."
316
+ },
317
+ filters: {
318
+ source,
319
+ author,
320
+ keyword,
321
+ sentimentLabel,
322
+ bookmarked,
323
+ viewName,
324
+ subreddit
325
+ }
326
+ },
327
+ null,
328
+ 2
329
+ )
330
+ }
331
+ ]
332
+ };
333
+ }
171
334
  const results = await strapi.documents(MENTION_UID$3).findMany({
172
335
  filters,
173
336
  sort: sort ? [sort] : ["createdAt:desc"],
@@ -190,7 +353,6 @@ async function handleSearchMentions(strapi, args) {
190
353
  pageCount: Math.ceil(total / pageSize)
191
354
  },
192
355
  filters: {
193
- query,
194
356
  source,
195
357
  author,
196
358
  keyword,
@@ -84,15 +84,64 @@ async function sanitizeInput(strapi, data, auth) {
84
84
  }
85
85
  }
86
86
  const MENTION_UID$3 = "plugin::octalens-mentions.mention";
87
+ function tokenize(text) {
88
+ if (!text) return [];
89
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
90
+ }
91
+ function calculateIDF(mentions, vocabulary, titleWeight = 2) {
92
+ const idf = /* @__PURE__ */ new Map();
93
+ const N = mentions.length;
94
+ for (const term of vocabulary) {
95
+ const docsWithTerm = mentions.filter((mention2) => {
96
+ const titleTokens = tokenize(mention2.title || "");
97
+ const bodyTokens = tokenize(mention2.body || "");
98
+ return titleTokens.includes(term) || bodyTokens.includes(term);
99
+ }).length;
100
+ idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
101
+ }
102
+ return idf;
103
+ }
104
+ function bm25Score(mention2, queryTokens, idf, avgDocLength, titleWeight = 2, k1 = 1.5, b = 0.75) {
105
+ const titleTokens = tokenize(mention2.title || "");
106
+ const bodyTokens = tokenize(mention2.body || "");
107
+ const docLength = titleTokens.length * titleWeight + bodyTokens.length;
108
+ const tf = /* @__PURE__ */ new Map();
109
+ for (const token of titleTokens) {
110
+ tf.set(token, (tf.get(token) || 0) + titleWeight);
111
+ }
112
+ for (const token of bodyTokens) {
113
+ tf.set(token, (tf.get(token) || 0) + 1);
114
+ }
115
+ let score = 0;
116
+ for (const term of queryTokens) {
117
+ const termFreq = tf.get(term) || 0;
118
+ const termIdf = idf.get(term) || 0;
119
+ if (termFreq > 0) {
120
+ const numerator = termFreq * (k1 + 1);
121
+ const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
122
+ score += termIdf * (numerator / denominator);
123
+ }
124
+ }
125
+ return score;
126
+ }
127
+ function calculateAvgDocLength(mentions, titleWeight = 2) {
128
+ if (mentions.length === 0) return 1;
129
+ const totalLength = mentions.reduce((sum, mention2) => {
130
+ const titleLen = tokenize(mention2.title || "").length;
131
+ const bodyLen = tokenize(mention2.body || "").length;
132
+ return sum + titleLen * titleWeight + bodyLen;
133
+ }, 0);
134
+ return totalLength / mentions.length;
135
+ }
87
136
  const searchMentionsTool = {
88
137
  name: "search_mentions",
89
- description: "Search through social mentions with various filters. Use this to find mentions by content, source, author, sentiment, and more. Returns paginated results with full mention details.",
138
+ description: "Search through social mentions using BM25 relevance scoring. Returns results ranked by relevance when a query is provided. Supports filtering by source, author, sentiment, and more. Title matches are weighted higher than body matches for better relevance.",
90
139
  inputSchema: {
91
140
  type: "object",
92
141
  properties: {
93
142
  query: {
94
143
  type: "string",
95
- description: "Search query to filter mentions by title or body content"
144
+ description: "Search query - uses BM25 algorithm to find and rank relevant mentions by title and body content. More specific queries yield better results."
96
145
  },
97
146
  source: {
98
147
  type: "string",
@@ -141,10 +190,8 @@ const searchMentionsTool = {
141
190
  async function handleSearchMentions(strapi, args) {
142
191
  const validatedArgs = validateToolInput("search_mentions", args);
143
192
  const { query, source, author, keyword, sentimentLabel, bookmarked, viewName, subreddit, page, pageSize, sort } = validatedArgs;
193
+ const TITLE_WEIGHT = 2;
144
194
  const filters = {};
145
- if (query) {
146
- filters.$or = [{ title: { $containsi: query } }, { body: { $containsi: query } }];
147
- }
148
195
  if (source) {
149
196
  filters.source = { $eqi: source };
150
197
  }
@@ -167,6 +214,122 @@ async function handleSearchMentions(strapi, args) {
167
214
  filters.subreddit = { $containsi: subreddit };
168
215
  }
169
216
  try {
217
+ if (query) {
218
+ const queryTokens = tokenize(query);
219
+ if (queryTokens.length === 0) {
220
+ return {
221
+ content: [
222
+ {
223
+ type: "text",
224
+ text: JSON.stringify(
225
+ {
226
+ error: true,
227
+ message: "Query is empty or contains only single-character words.",
228
+ query
229
+ },
230
+ null,
231
+ 2
232
+ )
233
+ }
234
+ ]
235
+ };
236
+ }
237
+ const allMentions = await strapi.documents(MENTION_UID$3).findMany({
238
+ filters,
239
+ limit: 1e3
240
+ // Get up to 1000 mentions for BM25 corpus
241
+ });
242
+ if (allMentions.length === 0) {
243
+ return {
244
+ content: [
245
+ {
246
+ type: "text",
247
+ text: JSON.stringify(
248
+ {
249
+ data: [],
250
+ pagination: {
251
+ page,
252
+ pageSize,
253
+ total: 0,
254
+ pageCount: 0
255
+ },
256
+ searchInfo: {
257
+ query,
258
+ algorithm: "BM25",
259
+ matchingResults: 0
260
+ },
261
+ filters: {
262
+ source,
263
+ author,
264
+ keyword,
265
+ sentimentLabel,
266
+ bookmarked,
267
+ viewName,
268
+ subreddit
269
+ }
270
+ },
271
+ null,
272
+ 2
273
+ )
274
+ }
275
+ ]
276
+ };
277
+ }
278
+ const vocabulary = new Set(queryTokens);
279
+ const mentionDocs = allMentions;
280
+ const idf = calculateIDF(mentionDocs, vocabulary, TITLE_WEIGHT);
281
+ const avgDocLength = calculateAvgDocLength(mentionDocs, TITLE_WEIGHT);
282
+ const scoredMentions = allMentions.map((mention2) => ({
283
+ ...mention2,
284
+ bm25Score: bm25Score(mention2, queryTokens, idf, avgDocLength, TITLE_WEIGHT)
285
+ }));
286
+ const rankedMentions = scoredMentions.filter((m) => m.bm25Score > 0).sort((a, b) => b.bm25Score - a.bm25Score);
287
+ const total2 = rankedMentions.length;
288
+ const startIndex = (page - 1) * pageSize;
289
+ const paginatedResults = rankedMentions.slice(startIndex, startIndex + pageSize);
290
+ const sanitizedResults2 = await sanitizeOutput(strapi, paginatedResults);
291
+ const resultsWithScores = sanitizedResults2.map((mention2, index2) => ({
292
+ ...mention2,
293
+ bm25Score: Math.round(paginatedResults[index2].bm25Score * 100) / 100
294
+ }));
295
+ return {
296
+ content: [
297
+ {
298
+ type: "text",
299
+ text: JSON.stringify(
300
+ {
301
+ data: resultsWithScores,
302
+ pagination: {
303
+ page,
304
+ pageSize,
305
+ total: total2,
306
+ pageCount: Math.ceil(total2 / pageSize)
307
+ },
308
+ searchInfo: {
309
+ query,
310
+ algorithm: "BM25",
311
+ titleWeight: TITLE_WEIGHT,
312
+ matchingResults: total2,
313
+ corpusSize: allMentions.length,
314
+ hint: total2 > 0 ? "Results are ranked by relevance. Higher bm25Score indicates better match. Title matches are weighted higher than body matches." : "No matches found. Try different or fewer keywords."
315
+ },
316
+ filters: {
317
+ source,
318
+ author,
319
+ keyword,
320
+ sentimentLabel,
321
+ bookmarked,
322
+ viewName,
323
+ subreddit
324
+ }
325
+ },
326
+ null,
327
+ 2
328
+ )
329
+ }
330
+ ]
331
+ };
332
+ }
170
333
  const results = await strapi.documents(MENTION_UID$3).findMany({
171
334
  filters,
172
335
  sort: sort ? [sort] : ["createdAt:desc"],
@@ -189,7 +352,6 @@ async function handleSearchMentions(strapi, args) {
189
352
  pageCount: Math.ceil(total / pageSize)
190
353
  },
191
354
  filters: {
192
- query,
193
355
  source,
194
356
  author,
195
357
  keyword,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "octalens-mentions",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "A Strapi v5 plugin that fetches social mentions from Octolens and exposes them via MCP (Model Context Protocol), enabling AI assistants like Claude to search, analyze, and help write responses to social media mentions.",
5
5
  "keywords": [
6
6
  "strapi",