@poolzin/pool-bot 2026.2.0 → 2026.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +118 -0
- package/README-header.png +0 -0
- package/dist/agents/bash-tools.exec.js +76 -25
- package/dist/agents/cli-runner/helpers.js +9 -11
- package/dist/agents/context.js +1 -1
- package/dist/agents/identity.js +47 -7
- package/dist/agents/memory-search.js +25 -8
- package/dist/agents/model-catalog.js +1 -1
- package/dist/agents/model-selection.js +21 -0
- package/dist/agents/pi-embedded-block-chunker.js +117 -42
- package/dist/agents/pi-embedded-helpers/errors.js +183 -78
- package/dist/agents/pi-embedded-helpers.js +1 -1
- package/dist/agents/pi-embedded-runner/compact.js +8 -10
- package/dist/agents/pi-embedded-runner/model.js +62 -3
- package/dist/agents/pi-embedded-runner/run/attempt.js +21 -11
- package/dist/agents/pi-embedded-runner/run.js +199 -46
- package/dist/agents/pi-embedded-runner/system-prompt.js +10 -2
- package/dist/agents/pi-embedded-subscribe.js +118 -29
- package/dist/agents/pi-tools.js +10 -5
- package/dist/agents/poolbot-tools.js +15 -10
- package/dist/agents/sandbox-paths.js +31 -0
- package/dist/agents/session-tool-result-guard.js +94 -15
- package/dist/agents/shell-utils.js +51 -0
- package/dist/agents/skills/bundled-context.js +23 -0
- package/dist/agents/skills/bundled-dir.js +41 -7
- package/dist/agents/skills-install.js +60 -23
- package/dist/agents/subagent-announce.js +79 -34
- package/dist/agents/tool-policy.conformance.js +14 -0
- package/dist/agents/tool-policy.js +24 -0
- package/dist/agents/tools/cron-tool.js +166 -19
- package/dist/agents/tools/discord-actions-presence.js +78 -0
- package/dist/agents/tools/image-tool.js +1 -1
- package/dist/agents/tools/message-tool.js +56 -2
- package/dist/agents/tools/sessions-history-tool.js +69 -1
- package/dist/agents/tools/web-search.js +211 -42
- package/dist/agents/usage.js +23 -1
- package/dist/agents/workspace-run.js +67 -0
- package/dist/agents/workspace-templates.js +44 -0
- package/dist/auto-reply/command-auth.js +121 -6
- package/dist/auto-reply/envelope.js +74 -82
- package/dist/auto-reply/reply/commands-compact.js +1 -0
- package/dist/auto-reply/reply/commands-context-report.js +1 -0
- package/dist/auto-reply/reply/commands-context.js +1 -0
- package/dist/auto-reply/reply/commands-models.js +107 -60
- package/dist/auto-reply/reply/commands-ptt.js +171 -0
- package/dist/auto-reply/reply/get-reply-run.js +2 -1
- package/dist/auto-reply/reply/inbound-context.js +5 -1
- package/dist/auto-reply/reply/mentions.js +1 -1
- package/dist/auto-reply/reply/model-selection.js +3 -3
- package/dist/auto-reply/thinking.js +88 -43
- package/dist/browser/bridge-server.js +13 -0
- package/dist/browser/cdp.helpers.js +38 -24
- package/dist/browser/client-fetch.js +50 -7
- package/dist/browser/config.js +1 -10
- package/dist/browser/extension-relay.js +101 -40
- package/dist/browser/pw-ai.js +1 -1
- package/dist/browser/pw-session.js +143 -8
- package/dist/browser/pw-tools-core.interactions.js +125 -27
- package/dist/browser/pw-tools-core.responses.js +1 -1
- package/dist/browser/pw-tools-core.state.js +1 -1
- package/dist/browser/routes/agent.act.js +86 -41
- package/dist/browser/routes/dispatcher.js +4 -4
- package/dist/browser/screenshot.js +1 -1
- package/dist/browser/server.js +13 -0
- package/dist/build-info.json +3 -3
- package/dist/canvas-host/a2ui/index.html +28 -28
- package/dist/channels/reply-prefix.js +8 -1
- package/dist/cli/cron-cli/register.cron-add.js +61 -40
- package/dist/cli/cron-cli/register.cron-edit.js +60 -34
- package/dist/cli/cron-cli/shared.js +56 -41
- package/dist/cli/dns-cli.js +26 -14
- package/dist/cli/gateway-cli/register.js +37 -19
- package/dist/cli/memory-cli.js +5 -5
- package/dist/cli/parse-bytes.js +37 -0
- package/dist/cli/update-cli.js +173 -52
- package/dist/commands/agent.js +1 -0
- package/dist/commands/auth-choice.apply.oauth.js +1 -1
- package/dist/commands/doctor-config-flow.js +61 -5
- package/dist/commands/doctor-state-migrations.js +1 -1
- package/dist/commands/health.js +1 -1
- package/dist/commands/model-allowlist.js +29 -0
- package/dist/commands/model-picker.js +2 -1
- package/dist/commands/models/list.registry.js +1 -1
- package/dist/commands/models/list.status-command.js +43 -23
- package/dist/commands/models/shared.js +15 -0
- package/dist/commands/onboard-custom.js +384 -0
- package/dist/commands/onboard-non-interactive/local/auth-choice-inference.js +35 -0
- package/dist/commands/onboard-non-interactive/local/auth-choice.js +6 -3
- package/dist/commands/onboard-skills.js +63 -38
- package/dist/commands/openai-model-default.js +41 -0
- package/dist/compat/legacy-names.js +2 -0
- package/dist/config/defaults.js +3 -2
- package/dist/config/paths.js +136 -35
- package/dist/config/plugin-auto-enable.js +21 -5
- package/dist/config/redact-snapshot.js +153 -0
- package/dist/config/schema.field-metadata.js +590 -0
- package/dist/config/schema.js +2 -2
- package/dist/config/sessions/store.js +291 -23
- package/dist/config/zod-schema.agent-defaults.js +3 -0
- package/dist/config/zod-schema.agent-runtime.js +13 -2
- package/dist/config/zod-schema.providers-core.js +142 -0
- package/dist/config/zod-schema.session.js +3 -0
- package/dist/control-ui/assets/{index-CIRDm-Lu.css → index-CSfXd2LO.css} +1 -1
- package/dist/control-ui/assets/{index-CmNMuoem.js → index-HRr1grwl.js} +446 -413
- package/dist/control-ui/assets/index-HRr1grwl.js.map +1 -0
- package/dist/control-ui/index.html +4 -4
- package/dist/cron/delivery.js +57 -0
- package/dist/cron/isolated-agent/delivery-target.js +18 -3
- package/dist/cron/isolated-agent/helpers.js +22 -5
- package/dist/cron/isolated-agent/run.js +172 -63
- package/dist/cron/isolated-agent/session.js +2 -0
- package/dist/cron/normalize.js +356 -28
- package/dist/cron/parse.js +10 -5
- package/dist/cron/run-log.js +35 -10
- package/dist/cron/schedule.js +41 -6
- package/dist/cron/service/jobs.js +208 -35
- package/dist/cron/service/ops.js +72 -16
- package/dist/cron/service/state.js +2 -0
- package/dist/cron/service/store.js +386 -14
- package/dist/cron/service/timer.js +390 -147
- package/dist/cron/session-reaper.js +86 -0
- package/dist/cron/store.js +23 -8
- package/dist/cron/validate-timestamp.js +43 -0
- package/dist/discord/monitor/agent-components.js +438 -0
- package/dist/discord/monitor/allow-list.js +28 -5
- package/dist/discord/monitor/gateway-registry.js +29 -0
- package/dist/discord/monitor/native-command.js +44 -23
- package/dist/discord/monitor/sender-identity.js +45 -0
- package/dist/discord/pluralkit.js +27 -0
- package/dist/discord/send.outbound.js +92 -5
- package/dist/discord/send.shared.js +60 -23
- package/dist/discord/targets.js +84 -1
- package/dist/entry.js +15 -9
- package/dist/extensionAPI.js +8 -0
- package/dist/gateway/control-ui.js +8 -1
- package/dist/gateway/hooks-mapping.js +3 -0
- package/dist/gateway/hooks.js +65 -0
- package/dist/gateway/net.js +96 -31
- package/dist/gateway/node-command-policy.js +50 -15
- package/dist/gateway/origin-check.js +56 -0
- package/dist/gateway/protocol/client-info.js +9 -0
- package/dist/gateway/protocol/index.js +9 -2
- package/dist/gateway/protocol/schema/agents-models-skills.js +71 -1
- package/dist/gateway/protocol/schema/cron.js +22 -10
- package/dist/gateway/protocol/schema/protocol-schemas.js +16 -2
- package/dist/gateway/protocol/schema/sessions.js +12 -0
- package/dist/gateway/server/hooks.js +1 -1
- package/dist/gateway/server-broadcast.js +26 -9
- package/dist/gateway/server-chat.js +112 -23
- package/dist/gateway/server-discovery-runtime.js +10 -2
- package/dist/gateway/server-http.js +109 -11
- package/dist/gateway/server-methods/agent-timestamp.js +60 -0
- package/dist/gateway/server-methods/agents.js +321 -2
- package/dist/gateway/server-methods/usage.js +559 -16
- package/dist/gateway/server-runtime-state.js +22 -8
- package/dist/gateway/server-startup-memory.js +16 -0
- package/dist/gateway/server.impl.js +5 -1
- package/dist/gateway/session-utils.fs.js +23 -25
- package/dist/gateway/session-utils.js +20 -10
- package/dist/gateway/sessions-patch.js +7 -22
- package/dist/gateway/test-helpers.mocks.js +11 -7
- package/dist/gateway/test-helpers.server.js +35 -2
- package/dist/imessage/constants.js +2 -0
- package/dist/imessage/monitor/deliver.js +4 -1
- package/dist/imessage/monitor/monitor-provider.js +51 -1
- package/dist/infra/bonjour-discovery.js +131 -70
- package/dist/infra/control-ui-assets.js +134 -12
- package/dist/infra/errors.js +12 -0
- package/dist/infra/exec-approvals.js +266 -57
- package/dist/infra/format-time/format-datetime.js +79 -0
- package/dist/infra/format-time/format-duration.js +81 -0
- package/dist/infra/format-time/format-relative.js +80 -0
- package/dist/infra/heartbeat-runner.js +140 -49
- package/dist/infra/home-dir.js +54 -0
- package/dist/infra/net/fetch-guard.js +122 -0
- package/dist/infra/net/ssrf.js +65 -29
- package/dist/infra/outbound/abort.js +14 -0
- package/dist/infra/outbound/message-action-runner.js +77 -13
- package/dist/infra/outbound/outbound-session.js +143 -37
- package/dist/infra/poolbot-root.js +43 -1
- package/dist/infra/session-cost-usage.js +631 -41
- package/dist/infra/state-migrations.js +317 -47
- package/dist/infra/update-global.js +35 -0
- package/dist/infra/update-runner.js +149 -43
- package/dist/infra/warning-filter.js +65 -0
- package/dist/infra/widearea-dns.js +30 -9
- package/dist/logging/redact-identifier.js +12 -0
- package/dist/media/fetch.js +81 -58
- package/dist/media/store.js +2 -0
- package/dist/media-understanding/apply.js +403 -3
- package/dist/media-understanding/attachments.js +38 -27
- package/dist/media-understanding/defaults.js +16 -0
- package/dist/media-understanding/providers/deepgram/audio.js +22 -14
- package/dist/media-understanding/providers/google/audio.js +24 -17
- package/dist/media-understanding/providers/google/video.js +24 -17
- package/dist/media-understanding/providers/image.js +3 -3
- package/dist/media-understanding/providers/index.js +4 -1
- package/dist/media-understanding/providers/openai/audio.js +22 -14
- package/dist/media-understanding/providers/shared.js +16 -11
- package/dist/media-understanding/providers/zai/index.js +6 -0
- package/dist/media-understanding/runner.js +158 -90
- package/dist/memory/batch-voyage.js +277 -0
- package/dist/memory/embeddings-voyage.js +75 -0
- package/dist/memory/embeddings.js +28 -16
- package/dist/memory/internal.js +101 -18
- package/dist/memory/manager.js +154 -48
- package/dist/memory/search-manager.js +173 -0
- package/dist/memory/session-files.js +9 -3
- package/dist/node-host/runner.js +34 -24
- package/dist/node-host/with-timeout.js +27 -0
- package/dist/plugins/commands.js +5 -1
- package/dist/plugins/config-state.js +86 -7
- package/dist/plugins/source-display.js +51 -0
- package/dist/process/exec.js +20 -2
- package/dist/routing/resolve-route.js +12 -0
- package/dist/routing/session-key.js +15 -0
- package/dist/runtime.js +2 -0
- package/dist/security/audit-extra.async.js +601 -0
- package/dist/security/audit-extra.js +2 -830
- package/dist/security/audit-extra.sync.js +505 -0
- package/dist/security/channel-metadata.js +34 -0
- package/dist/security/external-content.js +88 -6
- package/dist/security/skill-scanner.js +330 -0
- package/dist/sessions/session-key-utils.js +7 -0
- package/dist/signal/monitor/event-handler.js +80 -1
- package/dist/slack/monitor/media.js +85 -15
- package/dist/tailscale/detect.js +1 -2
- package/dist/telegram/bot/helpers.js +109 -28
- package/dist/telegram/bot-handlers.js +144 -3
- package/dist/telegram/bot-message-context.js +37 -10
- package/dist/telegram/bot-message-dispatch.js +54 -17
- package/dist/telegram/bot-native-commands.js +86 -29
- package/dist/telegram/bot.js +30 -29
- package/dist/telegram/model-buttons.js +163 -0
- package/dist/telegram/monitor.js +110 -85
- package/dist/telegram/send.js +129 -47
- package/dist/terminal/restore.js +45 -0
- package/dist/test-helpers/state-dir-env.js +16 -0
- package/dist/tts/tts.js +12 -6
- package/dist/tui/tui-session-actions.js +166 -54
- package/dist/utils/fetch-timeout.js +20 -0
- package/dist/utils/normalize-secret-input.js +19 -0
- package/dist/utils/transcript-tools.js +58 -0
- package/dist/utils.js +45 -14
- package/dist/version.js +42 -5
- package/dist/wizard/clack-prompter.js +9 -6
- package/extensions/googlechat/node_modules/.bin/poolbot +21 -0
- package/extensions/googlechat/package.json +2 -2
- package/extensions/line/node_modules/.bin/poolbot +21 -0
- package/extensions/line/package.json +1 -1
- package/extensions/matrix/node_modules/.bin/poolbot +21 -0
- package/extensions/matrix/package.json +1 -1
- package/extensions/memory-core/node_modules/.bin/poolbot +21 -0
- package/extensions/memory-core/package.json +4 -1
- package/extensions/twitch/node_modules/.bin/poolbot +21 -0
- package/extensions/twitch/package.json +1 -1
- package/package.json +183 -24
- package/dist/control-ui/assets/index-CmNMuoem.js.map +0 -1
|
@@ -6,8 +6,9 @@ export class EmbeddedBlockChunker {
|
|
|
6
6
|
this.#chunking = chunking;
|
|
7
7
|
}
|
|
8
8
|
append(text) {
|
|
9
|
-
if (!text)
|
|
9
|
+
if (!text) {
|
|
10
10
|
return;
|
|
11
|
+
}
|
|
11
12
|
this.#buffer += text;
|
|
12
13
|
}
|
|
13
14
|
reset() {
|
|
@@ -25,8 +26,15 @@ export class EmbeddedBlockChunker {
|
|
|
25
26
|
const { force, emit } = params;
|
|
26
27
|
const minChars = Math.max(1, Math.floor(this.#chunking.minChars));
|
|
27
28
|
const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));
|
|
28
|
-
|
|
29
|
+
// When flushOnParagraph is set (chunkMode="newline"), eagerly split on \n\n
|
|
30
|
+
// boundaries regardless of minChars so each paragraph is sent immediately.
|
|
31
|
+
if (this.#chunking.flushOnParagraph && !force) {
|
|
32
|
+
this.#drainParagraphs(emit, maxChars);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
if (this.#buffer.length < minChars && !force) {
|
|
29
36
|
return;
|
|
37
|
+
}
|
|
30
38
|
if (force && this.#buffer.length <= maxChars) {
|
|
31
39
|
if (this.#buffer.trim().length > 0) {
|
|
32
40
|
emit(this.#buffer);
|
|
@@ -45,44 +53,81 @@ export class EmbeddedBlockChunker {
|
|
|
45
53
|
}
|
|
46
54
|
return;
|
|
47
55
|
}
|
|
48
|
-
|
|
49
|
-
let rawChunk = this.#buffer.slice(0, breakIdx);
|
|
50
|
-
if (rawChunk.trim().length === 0) {
|
|
51
|
-
this.#buffer = stripLeadingNewlines(this.#buffer.slice(breakIdx)).trimStart();
|
|
56
|
+
if (!this.#emitBreakResult(breakResult, emit)) {
|
|
52
57
|
continue;
|
|
53
58
|
}
|
|
54
|
-
|
|
55
|
-
const fenceSplit = breakResult.fenceSplit;
|
|
56
|
-
if (fenceSplit) {
|
|
57
|
-
const closeFence = rawChunk.endsWith("\n")
|
|
58
|
-
? `${fenceSplit.closeFenceLine}\n`
|
|
59
|
-
: `\n${fenceSplit.closeFenceLine}\n`;
|
|
60
|
-
rawChunk = `${rawChunk}${closeFence}`;
|
|
61
|
-
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
|
|
62
|
-
? fenceSplit.reopenFenceLine
|
|
63
|
-
: `${fenceSplit.reopenFenceLine}\n`;
|
|
64
|
-
nextBuffer = `${reopenFence}${nextBuffer}`;
|
|
65
|
-
}
|
|
66
|
-
emit(rawChunk);
|
|
67
|
-
if (fenceSplit) {
|
|
68
|
-
this.#buffer = nextBuffer;
|
|
69
|
-
}
|
|
70
|
-
else {
|
|
71
|
-
const nextStart = breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx])
|
|
72
|
-
? breakIdx + 1
|
|
73
|
-
: breakIdx;
|
|
74
|
-
this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart));
|
|
75
|
-
}
|
|
76
|
-
if (this.#buffer.length < minChars && !force)
|
|
59
|
+
if (this.#buffer.length < minChars && !force) {
|
|
77
60
|
return;
|
|
78
|
-
|
|
61
|
+
}
|
|
62
|
+
if (this.#buffer.length < maxChars && !force) {
|
|
79
63
|
return;
|
|
64
|
+
}
|
|
80
65
|
}
|
|
81
66
|
}
|
|
67
|
+
/** Eagerly emit complete paragraphs (text before \n\n) regardless of minChars. */
|
|
68
|
+
#drainParagraphs(emit, maxChars) {
|
|
69
|
+
while (this.#buffer.length > 0) {
|
|
70
|
+
const fenceSpans = parseFenceSpans(this.#buffer);
|
|
71
|
+
const paragraphBreak = findNextParagraphBreak(this.#buffer, fenceSpans);
|
|
72
|
+
if (!paragraphBreak || paragraphBreak.index > maxChars) {
|
|
73
|
+
// No paragraph boundary yet (or the next boundary is too far). If the
|
|
74
|
+
// buffer exceeds maxChars, fall back to normal break logic to avoid
|
|
75
|
+
// oversized chunks or unbounded accumulation.
|
|
76
|
+
if (this.#buffer.length >= maxChars) {
|
|
77
|
+
const breakResult = this.#pickBreakIndex(this.#buffer, 1);
|
|
78
|
+
if (breakResult.index > 0) {
|
|
79
|
+
this.#emitBreakResult(breakResult, emit);
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
const chunk = this.#buffer.slice(0, paragraphBreak.index);
|
|
86
|
+
if (chunk.trim().length > 0) {
|
|
87
|
+
emit(chunk);
|
|
88
|
+
}
|
|
89
|
+
this.#buffer = stripLeadingNewlines(this.#buffer.slice(paragraphBreak.index + paragraphBreak.length));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
#emitBreakResult(breakResult, emit) {
|
|
93
|
+
const breakIdx = breakResult.index;
|
|
94
|
+
if (breakIdx <= 0) {
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
let rawChunk = this.#buffer.slice(0, breakIdx);
|
|
98
|
+
if (rawChunk.trim().length === 0) {
|
|
99
|
+
this.#buffer = stripLeadingNewlines(this.#buffer.slice(breakIdx)).trimStart();
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
let nextBuffer = this.#buffer.slice(breakIdx);
|
|
103
|
+
const fenceSplit = breakResult.fenceSplit;
|
|
104
|
+
if (fenceSplit) {
|
|
105
|
+
const closeFence = rawChunk.endsWith("\n")
|
|
106
|
+
? `${fenceSplit.closeFenceLine}\n`
|
|
107
|
+
: `\n${fenceSplit.closeFenceLine}\n`;
|
|
108
|
+
rawChunk = `${rawChunk}${closeFence}`;
|
|
109
|
+
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
|
|
110
|
+
? fenceSplit.reopenFenceLine
|
|
111
|
+
: `${fenceSplit.reopenFenceLine}\n`;
|
|
112
|
+
nextBuffer = `${reopenFence}${nextBuffer}`;
|
|
113
|
+
}
|
|
114
|
+
emit(rawChunk);
|
|
115
|
+
if (fenceSplit) {
|
|
116
|
+
this.#buffer = nextBuffer;
|
|
117
|
+
}
|
|
118
|
+
else {
|
|
119
|
+
const nextStart = breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx])
|
|
120
|
+
? breakIdx + 1
|
|
121
|
+
: breakIdx;
|
|
122
|
+
this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart));
|
|
123
|
+
}
|
|
124
|
+
return true;
|
|
125
|
+
}
|
|
82
126
|
#pickSoftBreakIndex(buffer, minCharsOverride) {
|
|
83
127
|
const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars));
|
|
84
|
-
if (buffer.length < minChars)
|
|
128
|
+
if (buffer.length < minChars) {
|
|
85
129
|
return { index: -1 };
|
|
130
|
+
}
|
|
86
131
|
const fenceSpans = parseFenceSpans(buffer);
|
|
87
132
|
const preference = this.#chunking.breakPreference ?? "paragraph";
|
|
88
133
|
if (preference === "paragraph") {
|
|
@@ -90,10 +135,12 @@ export class EmbeddedBlockChunker {
|
|
|
90
135
|
while (paragraphIdx !== -1) {
|
|
91
136
|
const candidates = [paragraphIdx, paragraphIdx + 1];
|
|
92
137
|
for (const candidate of candidates) {
|
|
93
|
-
if (candidate < minChars)
|
|
138
|
+
if (candidate < minChars) {
|
|
94
139
|
continue;
|
|
95
|
-
|
|
140
|
+
}
|
|
141
|
+
if (candidate < 0 || candidate >= buffer.length) {
|
|
96
142
|
continue;
|
|
143
|
+
}
|
|
97
144
|
if (isSafeFenceBreak(fenceSpans, candidate)) {
|
|
98
145
|
return { index: candidate };
|
|
99
146
|
}
|
|
@@ -115,23 +162,26 @@ export class EmbeddedBlockChunker {
|
|
|
115
162
|
let sentenceIdx = -1;
|
|
116
163
|
for (const match of matches) {
|
|
117
164
|
const at = match.index ?? -1;
|
|
118
|
-
if (at < minChars)
|
|
165
|
+
if (at < minChars) {
|
|
119
166
|
continue;
|
|
167
|
+
}
|
|
120
168
|
const candidate = at + 1;
|
|
121
169
|
if (isSafeFenceBreak(fenceSpans, candidate)) {
|
|
122
170
|
sentenceIdx = candidate;
|
|
123
171
|
}
|
|
124
172
|
}
|
|
125
|
-
if (sentenceIdx >= minChars)
|
|
173
|
+
if (sentenceIdx >= minChars) {
|
|
126
174
|
return { index: sentenceIdx };
|
|
175
|
+
}
|
|
127
176
|
}
|
|
128
177
|
return { index: -1 };
|
|
129
178
|
}
|
|
130
179
|
#pickBreakIndex(buffer, minCharsOverride) {
|
|
131
180
|
const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars));
|
|
132
181
|
const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));
|
|
133
|
-
if (buffer.length < minChars)
|
|
182
|
+
if (buffer.length < minChars) {
|
|
134
183
|
return { index: -1 };
|
|
184
|
+
}
|
|
135
185
|
const window = buffer.slice(0, Math.min(maxChars, buffer.length));
|
|
136
186
|
const fenceSpans = parseFenceSpans(buffer);
|
|
137
187
|
const preference = this.#chunking.breakPreference ?? "paragraph";
|
|
@@ -140,10 +190,12 @@ export class EmbeddedBlockChunker {
|
|
|
140
190
|
while (paragraphIdx >= minChars) {
|
|
141
191
|
const candidates = [paragraphIdx, paragraphIdx + 1];
|
|
142
192
|
for (const candidate of candidates) {
|
|
143
|
-
if (candidate < minChars)
|
|
193
|
+
if (candidate < minChars) {
|
|
144
194
|
continue;
|
|
145
|
-
|
|
195
|
+
}
|
|
196
|
+
if (candidate < 0 || candidate >= buffer.length) {
|
|
146
197
|
continue;
|
|
198
|
+
}
|
|
147
199
|
if (isSafeFenceBreak(fenceSpans, candidate)) {
|
|
148
200
|
return { index: candidate };
|
|
149
201
|
}
|
|
@@ -165,15 +217,17 @@ export class EmbeddedBlockChunker {
|
|
|
165
217
|
let sentenceIdx = -1;
|
|
166
218
|
for (const match of matches) {
|
|
167
219
|
const at = match.index ?? -1;
|
|
168
|
-
if (at < minChars)
|
|
220
|
+
if (at < minChars) {
|
|
169
221
|
continue;
|
|
222
|
+
}
|
|
170
223
|
const candidate = at + 1;
|
|
171
224
|
if (isSafeFenceBreak(fenceSpans, candidate)) {
|
|
172
225
|
sentenceIdx = candidate;
|
|
173
226
|
}
|
|
174
227
|
}
|
|
175
|
-
if (sentenceIdx >= minChars)
|
|
228
|
+
if (sentenceIdx >= minChars) {
|
|
176
229
|
return { index: sentenceIdx };
|
|
230
|
+
}
|
|
177
231
|
}
|
|
178
232
|
if (preference === "newline" && buffer.length < maxChars) {
|
|
179
233
|
return { index: -1 };
|
|
@@ -184,8 +238,9 @@ export class EmbeddedBlockChunker {
|
|
|
184
238
|
}
|
|
185
239
|
}
|
|
186
240
|
if (buffer.length >= maxChars) {
|
|
187
|
-
if (isSafeFenceBreak(fenceSpans, maxChars))
|
|
241
|
+
if (isSafeFenceBreak(fenceSpans, maxChars)) {
|
|
188
242
|
return { index: maxChars };
|
|
243
|
+
}
|
|
189
244
|
const fence = findFenceSpanAt(fenceSpans, maxChars);
|
|
190
245
|
if (fence) {
|
|
191
246
|
return {
|
|
@@ -203,7 +258,27 @@ export class EmbeddedBlockChunker {
|
|
|
203
258
|
}
|
|
204
259
|
function stripLeadingNewlines(value) {
|
|
205
260
|
let i = 0;
|
|
206
|
-
while (i < value.length && value[i] === "\n")
|
|
261
|
+
while (i < value.length && value[i] === "\n") {
|
|
207
262
|
i++;
|
|
263
|
+
}
|
|
208
264
|
return i > 0 ? value.slice(i) : value;
|
|
209
265
|
}
|
|
266
|
+
function findNextParagraphBreak(buffer, fenceSpans, startIndex = 0) {
|
|
267
|
+
if (startIndex < 0) {
|
|
268
|
+
return null;
|
|
269
|
+
}
|
|
270
|
+
const re = /\n[\t ]*\n+/g;
|
|
271
|
+
re.lastIndex = startIndex;
|
|
272
|
+
let match;
|
|
273
|
+
while ((match = re.exec(buffer)) !== null) {
|
|
274
|
+
const index = match.index ?? -1;
|
|
275
|
+
if (index < 0) {
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
if (!isSafeFenceBreak(fenceSpans, index)) {
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
return { index, length: match[0].length };
|
|
282
|
+
}
|
|
283
|
+
return null;
|
|
284
|
+
}
|