npm - @eeacms/volto-eea-chatbot - Versions diffs - 1.0.11 → 1.0.13 - Mend

@eeacms/volto-eea-chatbot 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +26 -2
package/README.md +8 -8
package/jest-addon.config.js +1 -1
package/package.json +2 -1
package/src/ChatBlock/chat/AIMessage.tsx +32 -25
package/src/ChatBlock/chat/ChatWindow.tsx +1 -0
package/src/ChatBlock/components/HalloumiFeedback.jsx +8 -4
package/src/ChatBlock/components/markdown/ClaimModal.jsx +1 -1
package/src/ChatBlock/components/markdown/ClaimSegments.jsx +2 -3
package/src/ChatBlock/components/markdown/RenderClaimView.jsx +1 -1
package/src/ChatBlock/components/markdown/index.js +41 -15
package/src/ChatBlock/hooks/useChatController.ts +0 -15
package/src/ChatBlock/hooks/useQualityMarkers.js +0 -11
package/src/ChatBlock/style.less +47 -0
package/src/ChatBlock/types/interfaces.ts +1 -0
package/src/halloumi/filtering.js +149 -0
package/src/halloumi/filtering.test.js +44 -0
package/src/halloumi/generative.js +157 -53
package/src/halloumi/generative.test.js +28 -8
package/src/halloumi/markdown-splitter.js +172 -0
package/src/halloumi/markdown-splitter.test.js +133 -0
package/src/halloumi/middleware.js +6 -6
package/src/halloumi/postprocessing.js +0 -26
package/src/halloumi/preprocessing.js +78 -76
package/src/halloumi/preprocessing.test.js +87 -148
package/src/middleware.js +3 -0
package/src/middleware.test.js +2 -0

package/CHANGELOG.md CHANGED Viewed

@@ -4,7 +4,32 @@ All notable changes to this project will be documented in this file. Dates are d
 Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).
-### [1.0.11](https://github.com/eea/volto-eea-chatbot/compare/1.0.10...1.0.11) - 6 February 2026
+### [1.0.13](https://github.com/eea/volto-eea-chatbot/compare/1.0.12...1.0.13) - 4 March 2026
+#### :house: Internal changes
+- style: Automated code fix [eea-jenkins - [`8a8c3c4`](https://github.com/eea/volto-eea-chatbot/commit/8a8c3c4172a4f669661378cf1b5a3569d85609e6)]
+- chore: [JENKINSFILE] add package version in sonarqube [valentinab25 - [`535d986`](https://github.com/eea/volto-eea-chatbot/commit/535d986b7adc77743a668bc4ac63f835eef58df3)]
+#### :hammer_and_wrench: Others
+- update [Miu Razvan - [`04e6c3f`](https://github.com/eea/volto-eea-chatbot/commit/04e6c3f776bb9920b89a014cc97f8e5dbb284a48)]
+- update [Miu Razvan - [`f9e9beb`](https://github.com/eea/volto-eea-chatbot/commit/f9e9beb0676b215a50226a0db8c5be7c540f26ff)]
+- Forward client ip to onyx/llmgw requests, ref #298095 [Miu Razvan - [`e90a672`](https://github.com/eea/volto-eea-chatbot/commit/e90a672273f2220d2cced4ad53c4b3ed3f295691)]
+### [1.0.12](https://github.com/eea/volto-eea-chatbot/compare/1.0.11...1.0.12) - 23 February 2026
+#### :house: Internal changes
+- style: Automated code fix [eea-jenkins - [`fdcd884`](https://github.com/eea/volto-eea-chatbot/commit/fdcd8848fd4c3f990ca5ea021f407487aebd6010)]
+- chore: [JENKINSFILE] use sonarqube branches [EEA Jenkins - [`3d428d7`](https://github.com/eea/volto-eea-chatbot/commit/3d428d72f32c3d05452b0961c76f5db1c416e05c)]
+#### :hammer_and_wrench: Others
+- fix tests [Miu Razvan - [`1b08a47`](https://github.com/eea/volto-eea-chatbot/commit/1b08a4760f0f16c0a3c2a5be295196f510b8e4f0)]
+- fix tests [Miu Razvan - [`0e1c2a2`](https://github.com/eea/volto-eea-chatbot/commit/0e1c2a23cf17d0ead25132c126892778a74954e0)]
+- update [Miu Razvan - [`b9d3066`](https://github.com/eea/volto-eea-chatbot/commit/b9d306623b9034eb08fc77781a48e4ca59146e54)]
+- Filter non-verifiable sentences [Miu Razvan - [`988a4c7`](https://github.com/eea/volto-eea-chatbot/commit/988a4c71293fda99ffaf02d9750e137332d2a182)]
+### [1.0.11](https://github.com/eea/volto-eea-chatbot/compare/1.0.10...1.0.11) - 7 February 2026
 #### :house: Internal changes
@@ -19,7 +44,6 @@ Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).
 - fix eslint [Miu Razvan - [`25b2eba`](https://github.com/eea/volto-eea-chatbot/commit/25b2ebaf1ba7eb186285e8fa1e51e625167a7fd7)]
 - improve coverage to 80% [Miu Razvan - [`7bed26f`](https://github.com/eea/volto-eea-chatbot/commit/7bed26fd8ee9622755f43c5bc625e7a60c5cc09c)]
 - update jest snapshots [Miu Razvan - [`e9e0732`](https://github.com/eea/volto-eea-chatbot/commit/e9e073286fd41fc8f22a5eba248a6cc34eda57d0)]
-- Based on tests improve ui + add playwright utility [Miu Razvan - [`62fda9a`](https://github.com/eea/volto-eea-chatbot/commit/62fda9a572e6255db95ff1a40db3a565c7c9476d)]
 ### [1.0.10](https://github.com/eea/volto-eea-chatbot/compare/1.0.9...1.0.10) - 27 January 2026
 #### :hammer_and_wrench: Others

package/README.md CHANGED Viewed

@@ -3,16 +3,16 @@
 [![Releases](https://img.shields.io/github/v/release/eea/volto-eea-chatbot)](https://github.com/eea/volto-eea-chatbot/releases)
 [![Pipeline](https://ci.eionet.europa.eu/buildStatus/icon?job=volto-addons%2Fvolto-eea-chatbot%2Fmaster&subject=master)](https://ci.eionet.europa.eu/view/Github/job/volto-addons/job/volto-eea-chatbot/job/master/display/redirect)
-[![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
-[![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
-[![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
-[![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
+[![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
+[![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
+[![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
+[![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
 [![Pipeline](https://ci.eionet.europa.eu/buildStatus/icon?job=volto-addons%2Fvolto-eea-chatbot%2Fdevelop&subject=develop)](https://ci.eionet.europa.eu/view/Github/job/volto-addons/job/volto-eea-chatbot/job/develop/display/redirect)
-[![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
-[![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
-[![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
-[![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
+[![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
+[![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
+[![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
+[![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
 [Volto](https://github.com/plone/volto) add-on that integrates an AI-powered chatbot with a customizable interface and advanced settings to tailor its behavior and enhance user interactions.

package/jest-addon.config.js CHANGED Viewed

@@ -430,7 +430,7 @@ module.exports = {
       '<rootDir>/node_modules/@plone/volto/jest-addons-loader.js',
   },
   transformIgnorePatterns: [
-    '/node_modules/(?!(@plone|@root|@package|@eeacms)/).*/',
+    '/node_modules/(?!(@plone|@root|@package|@eeacms|compromise|efrt|grad-school|suffix-thumb)/).*/',
   ],
   transform: {
     '^.+\\.js(x)?$': 'babel-jest',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@eeacms/volto-eea-chatbot",
-  "version": "1.0.11",
+  "version": "1.0.13",
   "description": "@eeacms/volto-eea-chatbot: Volto add-on",
   "main": "src/index.js",
   "author": "European Environment Agency: IDM2 A-Team",
@@ -43,6 +43,7 @@
     "@eeacms/volto-matomo": "*",
     "@microsoft/fetch-event-source": "2.0.1",
     "@plone-collective/volto-sentry": "*",
+    "compromise": "14.14.5",
     "fast-json-patch": "3.1.1",
     "highlight.js": "11.10.0",
     "luxon": "3.5.0",

package/src/ChatBlock/chat/AIMessage.tsx CHANGED Viewed

@@ -67,28 +67,30 @@ function addQualityMarkersPlugin() {
 }
 export function addHalloumiContext(doc: any, text: string) {
-  const updatedDate = doc.updated_at
-    ? new Date(doc.updated_at).toLocaleString('en-GB', {
-        year: 'numeric',
-        month: 'long',
-        day: '2-digit',
-        hour: '2-digit',
-        minute: '2-digit',
-      })
-    : '';
-  const docIndex = doc.index ? `DOCUMENT ${doc.index}: ` : '';
-  const sources: any = { web: 'Website', file: 'File' };
-  const sourceType = doc.source_type
-    ? sources[doc.source_type] || capitalize(doc.source_type)
-    : '';
-  const header = `${docIndex}${doc.semantic_identifier}${
-    sourceType ? `\nSource: ${sourceType}` : ''
-  }${updatedDate ? `\nUpdated: ${updatedDate}` : ''}`;
-  return `${header}\n${text}`;
+  // TODO: CLEAN UP
+  // const updatedDate = doc.updated_at
+  //   ? new Date(doc.updated_at).toLocaleString('en-GB', {
+  //       year: 'numeric',
+  //       month: 'long',
+  //       day: '2-digit',
+  //       hour: '2-digit',
+  //       minute: '2-digit',
+  //     })
+  //   : '';
+  // const docIndex = doc.index ? `DOCUMENT ${doc.index}: ` : '';
+  // const sources: any = { web: 'Website', file: 'File' };
+  // const sourceType = doc.source_type
+  //   ? sources[doc.source_type] || capitalize(doc.source_type)
+  //   : '';
+  // const header = `${docIndex}${doc.semantic_identifier}${
+  //   sourceType ? `\nSource: ${sourceType}` : ''
+  // }${updatedDate ? `\nUpdated: ${updatedDate}` : ''}`;
+  // return `${header}\n${text}`;
+  return text.replace(/\u00A0/g, ' ');
 }
 function mapToolDocumentsToText(message: any) {
@@ -142,11 +144,13 @@ function getContextSources(
       );
 }
-function getScoreDetails(claims: any, qualityCheckStages: any) {
+function getScoreDetails(rawClaims: any, qualityCheckStages: any) {
+  const claims = rawClaims.filter((claim: any) => !claim.skipped);
   const score = (
     (claims.length > 0
-      ? claims.reduce((acc: any, { score }: any) => acc + score, 0) /
-        claims.length
+      ? claims
+          .filter((claim: any) => !claim.skipped)
+          .reduce((acc: any, { score }: any) => acc + score, 0) / claims.length
       : 1) * 100
   ).toFixed(0);
@@ -165,6 +169,7 @@ function getScoreDetails(claims: any, qualityCheckStages: any) {
 export function AIMessage({
   message,
+  prevMessage,
   isLoading,
   libs,
   onChoice,
@@ -283,6 +288,7 @@ export function AIMessage({
   );
   const claims = markers?.claims || [];
+  const emptyClaims = markers?.empty || false;
   const { score, scoreStage, scoreColor, isFirstScoreStage } = getScoreDetails(
     claims,
     qualityCheckStages,
@@ -434,6 +440,7 @@ export function AIMessage({
           }}
           showVerifyClaimsButton={showVerifyClaimsButton}
           retryHalloumi={retryHalloumi}
+          emptyClaims={emptyClaims}
         />
       )}

package/src/ChatBlock/chat/ChatWindow.tsx CHANGED Viewed

@@ -185,6 +185,7 @@ function ChatWindow({
                 <React.Fragment>
                   <ChatMessage
                     key={message.messageId}
+                    prevMessage={messages[index - 1]}
                     message={message}
                     isLoading={isStreaming}
                     isDeepResearchEnabled={isDeepResearchEnabled}

package/src/ChatBlock/components/HalloumiFeedback.jsx CHANGED Viewed

@@ -72,8 +72,10 @@ const HalloumiFeedback = ({
   showVerifyClaimsButton,
   sources,
   retryHalloumi,
+  emptyClaims,
 }) => {
-  const noClaimsScore = markers?.claims[0]?.score === null;
+  const claims = (markers?.claims || []).filter((claim) => !claim.skipped);
+  const noClaimsScore = claims[0]?.score === null;
   const messageBySource =
     'Please allow a few minutes for claim verification when many references are involved.';
@@ -98,7 +100,7 @@ const HalloumiFeedback = ({
       {noClaimsScore && (
         <>
-          <Message color="red">{markers?.claims?.[0].rationale}</Message>
+          <Message color="red">{claims[0].rationale}</Message>
           <Button onClick={retryHalloumi} className="icon">
             <SVGIcon name={RotateIcon} /> Retry Fact-check AI answer
           </Button>
@@ -110,12 +112,14 @@ const HalloumiFeedback = ({
           color={scoreColor}
           className={cx(
             'claim-message',
-            getSupportedBgColor(score / 100, 'claim'),
+            emptyClaims
+              ? 'claim-empty claim-gray-500'
+              : getSupportedBgColor(score / 100, 'claim'),
           )}
           icon
         >
           <MessageContent>
-            {printSlate(halloumiMessage, `${score}%`)}
+            {emptyClaims || printSlate(halloumiMessage, `${score}%`)}
           </MessageContent>
         </Message>
       )}

package/src/ChatBlock/components/markdown/ClaimModal.jsx CHANGED Viewed

@@ -30,7 +30,7 @@ const trimNonAlphanumeric = (str) =>
   stripMarkdown(str).replace(/(?:^[^a-zA-Z0-9]+)|(?:[^a-zA-Z0-9]+$)/g, '');
 export function ClaimModal({ claim, markers, text, citedSources }) {
-  const highlightText = trimNonAlphanumeric(text?.[0] || '');
+  const highlightText = trimNonAlphanumeric(text || '');
   return (
     <Modal

package/src/ChatBlock/components/markdown/ClaimSegments.jsx CHANGED Viewed

@@ -10,9 +10,8 @@ const VISIBLE_SEGMENTS = 50; // Number of citations to show by default
 export function ClaimSegments({ segmentIds, segments, citedSources }) {
   const joinedSources = citedSources.reduce((acc, source) => {
-    source.startIndex = acc.length ? acc.length + 1 : 0;
-    const sep = acc ? '\n' : '';
-    return acc + sep + source.halloumiContext; // + '\n---\n';
+    source.startIndex = acc.length;
+    return acc + source.halloumiContext;
   }, '');
   const snippets = (segmentIds || [])

package/src/ChatBlock/components/markdown/RenderClaimView.jsx CHANGED Viewed

@@ -19,7 +19,7 @@ export const RenderClaimView = (props) => {
   sortedSegments.forEach((segment) => {
     const segmentStart = segment.startOffset - sourceStartIndex;
-    const segmentEnd = segment.endOffset - sourceStartIndex;
+    const segmentEnd = segment.endOffset - sourceStartIndex + 1;
     // Add the text part before the current segment
     if (segmentStart > lastIndex) {

package/src/ChatBlock/components/markdown/index.js CHANGED Viewed

@@ -33,31 +33,57 @@ export function components(message, markers, citedSources) {
       return <td {...rest}>{processedChildren}</td>;
     },
     span: (props) => {
-      const { node, ...rest } = props;
+      const { node, children } = props;
       const child = node.children[0];
-      let claim;
       // identifies if the current text belongs to a claim
       if (child.type === 'text' && child.position && markers) {
+        const text = child.value || '';
         const start = child.position.start.offset;
         const end = child.position.end.offset;
-        claim = markers.claims?.find(
+        const claims = markers.claims?.filter(
           (claim) =>
-            (start >= claim.startOffset && end <= claim.endOffset) ||
-            (claim.startOffset >= start && end <= claim.endOffset),
+            claim.score !== null &&
+            ((start >= claim.startOffset && end <= claim.endOffset) ||
+              (start <= claim.endOffset && end >= claim.endOffset) ||
+              (start <= claim.startOffset && end >= claim.startOffset)),
         );
+        if (claims && claims.length > 0) {
+          let relStart = 0;
+          const claimsSegments = claims.map((claim) => ({
+            claim,
+            start: Math.max(0, claim.startOffset - start),
+            end: Math.min(text.length, claim.endOffset - start),
+          }));
+          const segments = claimsSegments.reduce((acc, segment) => {
+            if (relStart < segment.start) {
+              acc.push(child.value.substring(relStart, segment.start));
+            }
+            const claimText = child.value.substring(segment.start, segment.end);
+            acc.push(
+              <ClaimModal
+                claim={segment.claim}
+                markers={markers}
+                text={claimText}
+                citedSources={citedSources}
+              />,
+            );
+            relStart = segment.end;
+            return acc;
+          }, []);
+          if (relStart < text.length) {
+            segments.push(text.substring(relStart));
+          }
+          return segments;
+        }
+        return text;
       }
-      return !claim || claim?.score === null ? (
-        rest.children || []
-      ) : (
-        <ClaimModal
-          claim={claim}
-          markers={markers}
-          text={rest.children}
-          citedSources={citedSources}
-        />
-      );
+      return children || [];
     },
     a: (props) => {
       const { node, children, href, ...rest } = props;

package/src/ChatBlock/hooks/useChatController.ts CHANGED Viewed

@@ -22,21 +22,6 @@ function extractRelatedQuestions(str: string): RelatedQuestion[] {
     throw new Error('Related questions were not generated properly');
   }
-  const regex = /\[[\s\S]*?\]/;
-  const match = str.match(regex);
-  if (match) {
-    try {
-      return JSON.parse(match[0]);
-    } catch {
-      // Fallback to line-by-line parsing
-      return str
-        .split('\n')
-        .filter((line) => line.trim())
-        .map((question) => ({ question }));
-    }
-  }
   return str
     .split('\n')
     .filter((line) => line.trim())

package/src/ChatBlock/hooks/useQualityMarkers.js CHANGED Viewed

@@ -54,17 +54,6 @@ export function useQualityMarkers(
         return;
       }
-      // // console.log('Halloumi sources:', sources.length, sources);
-      // if (sources.length > 40) {
-      //   // eslint-disable-next-line no-console
-      //   console.warn(
-      //     `Warning: Too many sources (${sources.length}). Skipping quality control.`,
-      //   );
-      //
-      //   setHalloumiResponse(empty(message, TOOLARGE_RATIONALE));
-      //   return;
-      // }
       setIsLoading(true);
       try {

package/src/ChatBlock/style.less CHANGED Viewed

@@ -580,6 +580,53 @@ mark {
       background-color: @grey;
     }
   }
+  // Inline code
+  code {
+    padding: 0.15em 0.4em;
+    border-radius: 3px;
+    background-color: #eef0f2;
+    color: @grey;
+    font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
+    font-size: 0.875em;
+    word-break: break-word;
+  }
+  // Code blocks
+  pre {
+    padding: 1em 1.25em;
+    border-radius: @borderRadius;
+    margin: 0 0 @marginBottom;
+    background-color: #1e2a35;
+    color: #e2e8f0;
+    font-size: 0.875rem;
+    line-height: 1.65;
+    overflow-x: auto;
+    scrollbar-color: rgba(255, 255, 255, 0.2) transparent;
+    scrollbar-width: thin;
+    &::-webkit-scrollbar {
+      height: 6px;
+    }
+    &::-webkit-scrollbar-track {
+      background: transparent;
+    }
+    &::-webkit-scrollbar-thumb {
+      border-radius: 3px;
+      background-color: rgba(255, 255, 255, 0.2);
+    }
+    // Reset inline code inside blocks
+    code {
+      padding: 0;
+      background-color: transparent;
+      color: inherit;
+      font-size: inherit;
+      word-break: normal;
+    }
+  }
 }
 .circle {

package/src/ChatBlock/types/interfaces.ts CHANGED Viewed

@@ -121,6 +121,7 @@ export interface ToolCallMetadata {
 export interface ChatMessageProps {
   message: Message;
+  prevMessage?: Message;
   isLoading: boolean;
   isDeepResearchEnabled?: boolean;
   libs?: any;

package/src/halloumi/filtering.js ADDED Viewed

@@ -0,0 +1,149 @@
+import debug from 'debug';
+import fetch from 'node-fetch';
+const log = debug('halloumi');
+const LLMGW_URL = process.env.LLMGW_URL;
+const LLMGW_API_KEY = process.env.LLMGW_TOKEN;
+const MIN_CONTEXT_SENTENCES_FOR_FILTERING = 75;
+const filterModel = {
+  name: 'Inhouse-LLM/gpt-oss-120b',
+  apiUrl: `${LLMGW_URL}/chat/completions`,
+  apiKey: LLMGW_API_KEY,
+};
+export async function callLLM(apiUrl, apiKey, requestBody, { ip } = {}) {
+  const headers = {
+    'Content-Type': 'application/json',
+    accept: 'application/json',
+  };
+  if (apiKey) {
+    headers['Authorization'] = `Bearer ${apiKey}`;
+  }
+  if (ip) {
+    headers['X-Forwarded-For'] = ip;
+  }
+  const response = await fetch(apiUrl, {
+    method: 'POST',
+    headers,
+    body: JSON.stringify(requestBody),
+  });
+  return response.json();
+}
+function buildClaimFilterPrompt(sentences) {
+  const numberedSentences = sentences
+    .map((s, i) => `${i + 1}. "${s.trim()}"`)
+    .join('\n');
+  return `Given the following numbered sentences, identify which ones are NOT verifiable factual claims.
+Exclude sentences that are:
+- Greetings, preambles, or transitional phrases
+- Opinions, vague statements, or subjective assessments
+- Introductory sentences that set up a list or table
+- Sentences that merely restate or paraphrase the question
+- Conversational closers (e.g., "let me know if you need more")
+- Broad summaries that don't assert a specific fact
+- Disclaimers or meta-commentary about the response itself
+- Sentences that do not provide enough information to verify
+- Sentences that only mention a concept without providing specific information
+Respond with ONLY a comma-separated list of the sentence numbers to exclude. If none should be excluded, respond with "NONE".
+Sentences:
+${numberedSentences}`;
+}
+function buildContextFilterPrompt(contextSentences, claimSentences) {
+  const numberedContext = contextSentences
+    .map((s, i) => `${i + 1}. "${s.trim()}"`)
+    .join('\n');
+  const numberedClaims = claimSentences
+    .map((s, i) => `${i + 1}. "${s.trim()}"`)
+    .join('\n');
+  return `Given the following context sentences and claims, identify which context sentences are NOT relevant to verifying ANY of the claims.
+Context sentences:
+${numberedContext}
+Claims to verify:
+${numberedClaims}
+Respond with ONLY a comma-separated list of the context sentence numbers that are NOT relevant. If all are relevant, respond with "NONE".`;
+}
+/**
+ * Parses a comma-separated list of indices from an LLM response.
+ */
+export function parseExcludeIndices(content, maxIndex) {
+  const excludeIndices = new Set();
+  if (content.trim().toUpperCase() === 'NONE') {
+    return excludeIndices;
+  }
+  const matches = content.match(/\d+/g) || [];
+  for (const match of matches) {
+    const idx = parseInt(match, 10);
+    if (idx >= 1 && idx <= maxIndex) {
+      excludeIndices.add(idx);
+    }
+  }
+  return excludeIndices;
+}
+async function callFilterModel(prompt, { ip } = {}) {
+  const data = {
+    messages: [{ role: 'user', content: prompt }],
+    temperature: 0.0,
+    model: filterModel.name,
+  };
+  const jsonData = await callLLM(filterModel.apiUrl, filterModel.apiKey, data, {
+    ip,
+  });
+  return jsonData.choices?.[0]?.message?.content || '';
+}
+export async function excludeClaimSentences(sentences, { ip } = {}) {
+  if (sentences.length === 0) {
+    return new Set();
+  }
+  try {
+    const prompt = buildClaimFilterPrompt(sentences);
+    const content = await callFilterModel(prompt, { ip });
+    const excludedIndices = parseExcludeIndices(content, sentences.length);
+    log('Claim filter response', excludedIndices.size);
+    return excludedIndices;
+  } catch (error) {
+    log('Claim filter failed, skipping', error);
+    return new Set();
+  }
+}
+export async function excludeContextSentences(
+  contextSentences,
+  claimSentences,
+  { ip } = {},
+) {
+  if (contextSentences.length <= MIN_CONTEXT_SENTENCES_FOR_FILTERING) {
+    return new Set();
+  }
+  try {
+    const prompt = buildContextFilterPrompt(contextSentences, claimSentences);
+    const content = await callFilterModel(prompt, { ip });
+    const excludedIndices = parseExcludeIndices(
+      content,
+      contextSentences.length,
+    );
+    log('Context filter response', excludedIndices.size);
+    return excludedIndices;
+  } catch (error) {
+    log('Context filter failed, skipping', error);
+    return new Set();
+  }
+}

package/src/halloumi/filtering.test.js ADDED Viewed

@@ -0,0 +1,44 @@
+import { parseExcludeIndices } from './filtering';
+describe('parseExcludeIndices', () => {
+  it('parses single indices', () => {
+    const result = parseExcludeIndices('1,3,5', 6);
+    expect(result).toEqual(new Set([1, 3, 5]));
+  });
+  it('returns empty set for NONE', () => {
+    const result = parseExcludeIndices('NONE', 10);
+    expect(result).toEqual(new Set());
+  });
+  it('returns empty set for none (lowercase)', () => {
+    const result = parseExcludeIndices('  none  ', 10);
+    expect(result).toEqual(new Set());
+  });
+  it('ignores indices below 1', () => {
+    const result = parseExcludeIndices('0, 1, 3', 5);
+    expect(result).toEqual(new Set([1, 3]));
+  });
+  it('ignores indices above maxIndex', () => {
+    const result = parseExcludeIndices('1, 3, 99', 5);
+    expect(result).toEqual(new Set([1, 3]));
+  });
+  it('handles whitespace variations', () => {
+    const result = parseExcludeIndices('  1 ,  5 , 7  ', 10);
+    expect(result).toEqual(new Set([1, 5, 7]));
+  });
+  it('extracts numbers even from unexpected formats', () => {
+    // Parser uses match(/\d+/g) so it extracts all numbers
+    const result = parseExcludeIndices('1-3, 5', 10);
+    expect(result).toEqual(new Set([1, 3, 5]));
+  });
+  it('returns empty set for empty string with no numbers', () => {
+    const result = parseExcludeIndices('no numbers here', 10);
+    expect(result).toEqual(new Set());
+  });
+});