@sisu-ai/tool-summarize-text 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,18 @@
1
+ # Summarize Text Tool
2
+
3
+ A utility tool that asks the active model to summarize large text bodies into concise outputs. Designed for use alongside `webFetch` or other content tools.
4
+
5
+ ## Tool
6
+ - `summarizeText` — Summarize a given `text` with optional controls for length and style.
7
+
8
+ ## Args
9
+ - `text` (string, required): The input to summarize.
10
+ - `target` ("short"|"medium"|"long", optional): Coarse target length. Default `medium`.
11
+ - `maxChars` (number, optional): Hard cap for output characters.
12
+ - `bullets` (boolean, optional): Prefer bullet-point output.
13
+ - `includeCitations` (boolean, optional): Keep/collect URLs in the summary when present.
14
+ - `focus` (string, optional): Guidance on what to emphasize.
15
+
16
+ ## Notes
17
+ - Uses `ctx.model.generate` internally with `toolChoice:'none'` to avoid nested tool calls.
18
+ - For very long inputs, applies a simple map-reduce (chunk→summary→combine) approach.
@@ -0,0 +1,16 @@
1
+ import type { Tool } from '@sisu-ai/core';
2
+ export type TargetLength = 'short' | 'medium' | 'long';
3
+ export interface SummarizeArgs {
4
+ text: string;
5
+ target?: TargetLength;
6
+ maxChars?: number;
7
+ bullets?: boolean;
8
+ includeCitations?: boolean;
9
+ focus?: string;
10
+ }
11
+ export interface SummarizeResult {
12
+ summary: string;
13
+ urls?: string[];
14
+ }
15
+ export declare const summarizeText: Tool<SummarizeArgs>;
16
+ export default summarizeText;
package/dist/index.js ADDED
@@ -0,0 +1,84 @@
1
+ import { z } from 'zod';
2
+ export const summarizeText = {
3
+ name: 'summarizeText',
4
+ description: 'Summarize a block of text using the current model. Useful for condensing large webFetch outputs while keeping key facts and URLs.',
5
+ schema: z.object({
6
+ text: z.string().min(1),
7
+ target: z.enum(['short', 'medium', 'long']).optional(),
8
+ maxChars: z.number().int().positive().max(50_000).optional(),
9
+ bullets: z.boolean().optional(),
10
+ includeCitations: z.boolean().optional(),
11
+ focus: z.string().optional(),
12
+ }),
13
+ handler: async ({ text, target = 'medium', maxChars, bullets, includeCitations, focus }, ctx) => {
14
+ const cap = Math.min(Math.max(Number(maxChars ?? 2000), 200), 50_000);
15
+ const chunks = chunkText(text, 10_000);
16
+ const chunkSummaries = [];
17
+ for (const [i, ch] of chunks.entries()) {
18
+ // Allocate characters per chunk, allowing a buffer of 200, but not exceeding the cap
19
+ const charsPerChunk = Math.min(Math.floor(cap / Math.max(chunks.length, 1)) + 200, cap);
20
+ const prompt = buildPrompt(ch, target, charsPerChunk, bullets, includeCitations, focus, `Part ${i + 1}/${chunks.length}`);
21
+ const res = await ctx.model.generate(prompt, { toolChoice: 'none', signal: ctx.signal });
22
+ const s = String(res?.message?.content ?? '').trim();
23
+ if (s)
24
+ chunkSummaries.push(s.slice(0, cap));
25
+ }
26
+ const combinedInput = chunkSummaries.join('\n\n');
27
+ const finalPrompt = buildPrompt(combinedInput || text, target, cap, bullets, includeCitations, focus, 'Final Synthesis');
28
+ const res = await ctx.model.generate(finalPrompt, { toolChoice: 'none', signal: ctx.signal });
29
+ const summary = String(res?.message?.content ?? '').slice(0, cap);
30
+ const urls = includeCitations ? extractUrls([summary]) : undefined;
31
+ return { summary, ...(urls && urls.length ? { urls } : {}) };
32
+ }
33
+ };
34
+ export default summarizeText;
35
+ function buildPrompt(text, target, maxChars, bullets, includeCitations, focus, label) {
36
+ const aims = [];
37
+ aims.push(`Keep under ${maxChars} characters.`);
38
+ if (bullets)
39
+ aims.push('Prefer concise bullet points.');
40
+ if (includeCitations)
41
+ aims.push('Preserve any URLs as citations when relevant.');
42
+ if (focus)
43
+ aims.push(`Emphasize: ${focus}`);
44
+ const density = target === 'short' ? 'High compression' : target === 'long' ? 'Low compression' : 'Balanced compression';
45
+ const sys = `You are a careful summarizer. ${density}. Do not invent facts. If URLs are present, keep them.`;
46
+ const usr = `${label ? `[${label}] ` : ''}Summarize the following text.\n\n${text}`;
47
+ const guide = aims.length ? `Guidelines: ${aims.join(' ')}\n` : '';
48
+ return [
49
+ { role: 'system', content: sys },
50
+ { role: 'user', content: `${guide}${usr}` }
51
+ ];
52
+ }
53
+ function chunkText(s, size = 10_000) {
54
+ if (s.length <= size)
55
+ return [s];
56
+ const out = [];
57
+ let i = 0;
58
+ while (i < s.length) {
59
+ out.push(s.slice(i, i + size));
60
+ i += size;
61
+ }
62
+ return out;
63
+ }
64
+ function extractUrls(contents) {
65
+ /**
66
+ * Regular expression to match URLs starting with "http://" or "https://".
67
+ *
68
+ * This pattern matches any substring that:
69
+ * - Begins with "http://" or "https://"
70
+ * - Is followed by any sequence of characters except whitespace, closing parentheses, square brackets, double quotes, single quotes, greater-than signs, or angle brackets.
71
+ *
72
+ * Excluded characters (`\s)\]"'>`) are commonly found at the end of URLs in text (such as punctuation or delimiters) and are not considered part of the URL.
73
+ *
74
+ * @remarks
75
+ * While this regex works for many common cases, URL parsing can be complex and edge cases may not be handled correctly.
76
+ * For more robust and accurate URL extraction, we should consider using a dedicated URL parsing library.
77
+ */
78
+ const urlRe = /https?:\/\/[^\s)\]"'>]+/gi;
79
+ const out = new Set();
80
+ for (const c of contents)
81
+ for (const m of c.matchAll(urlRe))
82
+ out.add(m[0]);
83
+ return Array.from(out);
84
+ }
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@sisu-ai/tool-summarize-text",
3
+ "version": "2.0.0",
4
+ "type": "module",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist"
9
+ ],
10
+ "scripts": {
11
+ "build": "tsc -b"
12
+ },
13
+ "dependencies": {
14
+ "zod": "^3.23.8"
15
+ },
16
+ "peerDependencies": {
17
+ "@sisu-ai/core": "0.3.0"
18
+ },
19
+ "repository": {
20
+ "type": "git",
21
+ "url": "https://github.com/finger-gun/sisu",
22
+ "directory": "packages/tools/summarize-text"
23
+ },
24
+ "homepage": "https://github.com/finger-gun/sisu#readme",
25
+ "bugs": {
26
+ "url": "https://github.com/finger-gun/sisu/issues"
27
+ }
28
+ }