@eeacms/volto-eea-chatbot 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,7 +4,32 @@ All notable changes to this project will be documented in this file. Dates are d
4
4
 
5
5
  Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).
6
6
 
7
- ### [1.0.11](https://github.com/eea/volto-eea-chatbot/compare/1.0.10...1.0.11) - 6 February 2026
7
+ ### [1.0.13](https://github.com/eea/volto-eea-chatbot/compare/1.0.12...1.0.13) - 4 March 2026
8
+
9
+ #### :house: Internal changes
10
+
11
+ - style: Automated code fix [eea-jenkins - [`8a8c3c4`](https://github.com/eea/volto-eea-chatbot/commit/8a8c3c4172a4f669661378cf1b5a3569d85609e6)]
12
+ - chore: [JENKINSFILE] add package version in sonarqube [valentinab25 - [`535d986`](https://github.com/eea/volto-eea-chatbot/commit/535d986b7adc77743a668bc4ac63f835eef58df3)]
13
+
14
+ #### :hammer_and_wrench: Others
15
+
16
+ - update [Miu Razvan - [`04e6c3f`](https://github.com/eea/volto-eea-chatbot/commit/04e6c3f776bb9920b89a014cc97f8e5dbb284a48)]
17
+ - update [Miu Razvan - [`f9e9beb`](https://github.com/eea/volto-eea-chatbot/commit/f9e9beb0676b215a50226a0db8c5be7c540f26ff)]
18
+ - Forward client ip to onyx/llmgw requests, ref #298095 [Miu Razvan - [`e90a672`](https://github.com/eea/volto-eea-chatbot/commit/e90a672273f2220d2cced4ad53c4b3ed3f295691)]
19
+ ### [1.0.12](https://github.com/eea/volto-eea-chatbot/compare/1.0.11...1.0.12) - 23 February 2026
20
+
21
+ #### :house: Internal changes
22
+
23
+ - style: Automated code fix [eea-jenkins - [`fdcd884`](https://github.com/eea/volto-eea-chatbot/commit/fdcd8848fd4c3f990ca5ea021f407487aebd6010)]
24
+ - chore: [JENKINSFILE] use sonarqube branches [EEA Jenkins - [`3d428d7`](https://github.com/eea/volto-eea-chatbot/commit/3d428d72f32c3d05452b0961c76f5db1c416e05c)]
25
+
26
+ #### :hammer_and_wrench: Others
27
+
28
+ - fix tests [Miu Razvan - [`1b08a47`](https://github.com/eea/volto-eea-chatbot/commit/1b08a4760f0f16c0a3c2a5be295196f510b8e4f0)]
29
+ - fix tests [Miu Razvan - [`0e1c2a2`](https://github.com/eea/volto-eea-chatbot/commit/0e1c2a23cf17d0ead25132c126892778a74954e0)]
30
+ - update [Miu Razvan - [`b9d3066`](https://github.com/eea/volto-eea-chatbot/commit/b9d306623b9034eb08fc77781a48e4ca59146e54)]
31
+ - Filter non-verifiable sentences [Miu Razvan - [`988a4c7`](https://github.com/eea/volto-eea-chatbot/commit/988a4c71293fda99ffaf02d9750e137332d2a182)]
32
+ ### [1.0.11](https://github.com/eea/volto-eea-chatbot/compare/1.0.10...1.0.11) - 7 February 2026
8
33
 
9
34
  #### :house: Internal changes
10
35
 
@@ -19,7 +44,6 @@ Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).
19
44
  - fix eslint [Miu Razvan - [`25b2eba`](https://github.com/eea/volto-eea-chatbot/commit/25b2ebaf1ba7eb186285e8fa1e51e625167a7fd7)]
20
45
  - improve coverage to 80% [Miu Razvan - [`7bed26f`](https://github.com/eea/volto-eea-chatbot/commit/7bed26fd8ee9622755f43c5bc625e7a60c5cc09c)]
21
46
  - update jest snapshots [Miu Razvan - [`e9e0732`](https://github.com/eea/volto-eea-chatbot/commit/e9e073286fd41fc8f22a5eba248a6cc34eda57d0)]
22
- - Based on tests improve ui + add playwright utility [Miu Razvan - [`62fda9a`](https://github.com/eea/volto-eea-chatbot/commit/62fda9a572e6255db95ff1a40db3a565c7c9476d)]
23
47
  ### [1.0.10](https://github.com/eea/volto-eea-chatbot/compare/1.0.9...1.0.10) - 27 January 2026
24
48
 
25
49
  #### :hammer_and_wrench: Others
package/README.md CHANGED
@@ -3,16 +3,16 @@
3
3
  [![Releases](https://img.shields.io/github/v/release/eea/volto-eea-chatbot)](https://github.com/eea/volto-eea-chatbot/releases)
4
4
 
5
5
  [![Pipeline](https://ci.eionet.europa.eu/buildStatus/icon?job=volto-addons%2Fvolto-eea-chatbot%2Fmaster&subject=master)](https://ci.eionet.europa.eu/view/Github/job/volto-addons/job/volto-eea-chatbot/job/master/display/redirect)
6
- [![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
7
- [![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
8
- [![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
9
- [![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-master&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-master)
6
+ [![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
7
+ [![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
8
+ [![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
9
+ [![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot)
10
10
 
11
11
  [![Pipeline](https://ci.eionet.europa.eu/buildStatus/icon?job=volto-addons%2Fvolto-eea-chatbot%2Fdevelop&subject=develop)](https://ci.eionet.europa.eu/view/Github/job/volto-addons/job/volto-eea-chatbot/job/develop/display/redirect)
12
- [![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
13
- [![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
14
- [![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
15
- [![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot-develop&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot-develop)
12
+ [![Lines of Code](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=ncloc)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
13
+ [![Coverage](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=coverage)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
14
+ [![Bugs](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=bugs)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
15
+ [![Duplicated Lines (%)](https://sonarqube.eea.europa.eu/api/project_badges/measure?project=volto-eea-chatbot&branch=develop&metric=duplicated_lines_density)](https://sonarqube.eea.europa.eu/dashboard?id=volto-eea-chatbot&branch=develop)
16
16
 
17
17
  [Volto](https://github.com/plone/volto) add-on that integrates an AI-powered chatbot with a customizable interface and advanced settings to tailor its behavior and enhance user interactions.
18
18
 
@@ -430,7 +430,7 @@ module.exports = {
430
430
  '<rootDir>/node_modules/@plone/volto/jest-addons-loader.js',
431
431
  },
432
432
  transformIgnorePatterns: [
433
- '/node_modules/(?!(@plone|@root|@package|@eeacms)/).*/',
433
+ '/node_modules/(?!(@plone|@root|@package|@eeacms|compromise|efrt|grad-school|suffix-thumb)/).*/',
434
434
  ],
435
435
  transform: {
436
436
  '^.+\\.js(x)?$': 'babel-jest',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@eeacms/volto-eea-chatbot",
3
- "version": "1.0.11",
3
+ "version": "1.0.13",
4
4
  "description": "@eeacms/volto-eea-chatbot: Volto add-on",
5
5
  "main": "src/index.js",
6
6
  "author": "European Environment Agency: IDM2 A-Team",
@@ -43,6 +43,7 @@
43
43
  "@eeacms/volto-matomo": "*",
44
44
  "@microsoft/fetch-event-source": "2.0.1",
45
45
  "@plone-collective/volto-sentry": "*",
46
+ "compromise": "14.14.5",
46
47
  "fast-json-patch": "3.1.1",
47
48
  "highlight.js": "11.10.0",
48
49
  "luxon": "3.5.0",
@@ -67,28 +67,30 @@ function addQualityMarkersPlugin() {
67
67
  }
68
68
 
69
69
  export function addHalloumiContext(doc: any, text: string) {
70
- const updatedDate = doc.updated_at
71
- ? new Date(doc.updated_at).toLocaleString('en-GB', {
72
- year: 'numeric',
73
- month: 'long',
74
- day: '2-digit',
75
- hour: '2-digit',
76
- minute: '2-digit',
77
- })
78
- : '';
79
-
80
- const docIndex = doc.index ? `DOCUMENT ${doc.index}: ` : '';
81
- const sources: any = { web: 'Website', file: 'File' };
82
-
83
- const sourceType = doc.source_type
84
- ? sources[doc.source_type] || capitalize(doc.source_type)
85
- : '';
86
-
87
- const header = `${docIndex}${doc.semantic_identifier}${
88
- sourceType ? `\nSource: ${sourceType}` : ''
89
- }${updatedDate ? `\nUpdated: ${updatedDate}` : ''}`;
90
-
91
- return `${header}\n${text}`;
70
+ // TODO: CLEAN UP
71
+ // const updatedDate = doc.updated_at
72
+ // ? new Date(doc.updated_at).toLocaleString('en-GB', {
73
+ // year: 'numeric',
74
+ // month: 'long',
75
+ // day: '2-digit',
76
+ // hour: '2-digit',
77
+ // minute: '2-digit',
78
+ // })
79
+ // : '';
80
+
81
+ // const docIndex = doc.index ? `DOCUMENT ${doc.index}: ` : '';
82
+ // const sources: any = { web: 'Website', file: 'File' };
83
+
84
+ // const sourceType = doc.source_type
85
+ // ? sources[doc.source_type] || capitalize(doc.source_type)
86
+ // : '';
87
+
88
+ // const header = `${docIndex}${doc.semantic_identifier}${
89
+ // sourceType ? `\nSource: ${sourceType}` : ''
90
+ // }${updatedDate ? `\nUpdated: ${updatedDate}` : ''}`;
91
+
92
+ // return `${header}\n${text}`;
93
+ return text.replace(/\u00A0/g, ' ');
92
94
  }
93
95
 
94
96
  function mapToolDocumentsToText(message: any) {
@@ -142,11 +144,13 @@ function getContextSources(
142
144
  );
143
145
  }
144
146
 
145
- function getScoreDetails(claims: any, qualityCheckStages: any) {
147
+ function getScoreDetails(rawClaims: any, qualityCheckStages: any) {
148
+ const claims = rawClaims.filter((claim: any) => !claim.skipped);
146
149
  const score = (
147
150
  (claims.length > 0
148
- ? claims.reduce((acc: any, { score }: any) => acc + score, 0) /
149
- claims.length
151
+ ? claims
152
+ .filter((claim: any) => !claim.skipped)
153
+ .reduce((acc: any, { score }: any) => acc + score, 0) / claims.length
150
154
  : 1) * 100
151
155
  ).toFixed(0);
152
156
 
@@ -165,6 +169,7 @@ function getScoreDetails(claims: any, qualityCheckStages: any) {
165
169
 
166
170
  export function AIMessage({
167
171
  message,
172
+ prevMessage,
168
173
  isLoading,
169
174
  libs,
170
175
  onChoice,
@@ -283,6 +288,7 @@ export function AIMessage({
283
288
  );
284
289
 
285
290
  const claims = markers?.claims || [];
291
+ const emptyClaims = markers?.empty || false;
286
292
  const { score, scoreStage, scoreColor, isFirstScoreStage } = getScoreDetails(
287
293
  claims,
288
294
  qualityCheckStages,
@@ -434,6 +440,7 @@ export function AIMessage({
434
440
  }}
435
441
  showVerifyClaimsButton={showVerifyClaimsButton}
436
442
  retryHalloumi={retryHalloumi}
443
+ emptyClaims={emptyClaims}
437
444
  />
438
445
  )}
439
446
 
@@ -185,6 +185,7 @@ function ChatWindow({
185
185
  <React.Fragment>
186
186
  <ChatMessage
187
187
  key={message.messageId}
188
+ prevMessage={messages[index - 1]}
188
189
  message={message}
189
190
  isLoading={isStreaming}
190
191
  isDeepResearchEnabled={isDeepResearchEnabled}
@@ -72,8 +72,10 @@ const HalloumiFeedback = ({
72
72
  showVerifyClaimsButton,
73
73
  sources,
74
74
  retryHalloumi,
75
+ emptyClaims,
75
76
  }) => {
76
- const noClaimsScore = markers?.claims[0]?.score === null;
77
+ const claims = (markers?.claims || []).filter((claim) => !claim.skipped);
78
+ const noClaimsScore = claims[0]?.score === null;
77
79
  const messageBySource =
78
80
  'Please allow a few minutes for claim verification when many references are involved.';
79
81
 
@@ -98,7 +100,7 @@ const HalloumiFeedback = ({
98
100
 
99
101
  {noClaimsScore && (
100
102
  <>
101
- <Message color="red">{markers?.claims?.[0].rationale}</Message>
103
+ <Message color="red">{claims[0].rationale}</Message>
102
104
  <Button onClick={retryHalloumi} className="icon">
103
105
  <SVGIcon name={RotateIcon} /> Retry Fact-check AI answer
104
106
  </Button>
@@ -110,12 +112,14 @@ const HalloumiFeedback = ({
110
112
  color={scoreColor}
111
113
  className={cx(
112
114
  'claim-message',
113
- getSupportedBgColor(score / 100, 'claim'),
115
+ emptyClaims
116
+ ? 'claim-empty claim-gray-500'
117
+ : getSupportedBgColor(score / 100, 'claim'),
114
118
  )}
115
119
  icon
116
120
  >
117
121
  <MessageContent>
118
- {printSlate(halloumiMessage, `${score}%`)}
122
+ {emptyClaims || printSlate(halloumiMessage, `${score}%`)}
119
123
  </MessageContent>
120
124
  </Message>
121
125
  )}
@@ -30,7 +30,7 @@ const trimNonAlphanumeric = (str) =>
30
30
  stripMarkdown(str).replace(/(?:^[^a-zA-Z0-9]+)|(?:[^a-zA-Z0-9]+$)/g, '');
31
31
 
32
32
  export function ClaimModal({ claim, markers, text, citedSources }) {
33
- const highlightText = trimNonAlphanumeric(text?.[0] || '');
33
+ const highlightText = trimNonAlphanumeric(text || '');
34
34
 
35
35
  return (
36
36
  <Modal
@@ -10,9 +10,8 @@ const VISIBLE_SEGMENTS = 50; // Number of citations to show by default
10
10
 
11
11
  export function ClaimSegments({ segmentIds, segments, citedSources }) {
12
12
  const joinedSources = citedSources.reduce((acc, source) => {
13
- source.startIndex = acc.length ? acc.length + 1 : 0;
14
- const sep = acc ? '\n' : '';
15
- return acc + sep + source.halloumiContext; // + '\n---\n';
13
+ source.startIndex = acc.length;
14
+ return acc + source.halloumiContext;
16
15
  }, '');
17
16
 
18
17
  const snippets = (segmentIds || [])
@@ -19,7 +19,7 @@ export const RenderClaimView = (props) => {
19
19
 
20
20
  sortedSegments.forEach((segment) => {
21
21
  const segmentStart = segment.startOffset - sourceStartIndex;
22
- const segmentEnd = segment.endOffset - sourceStartIndex;
22
+ const segmentEnd = segment.endOffset - sourceStartIndex + 1;
23
23
 
24
24
  // Add the text part before the current segment
25
25
  if (segmentStart > lastIndex) {
@@ -33,31 +33,57 @@ export function components(message, markers, citedSources) {
33
33
  return <td {...rest}>{processedChildren}</td>;
34
34
  },
35
35
  span: (props) => {
36
- const { node, ...rest } = props;
36
+ const { node, children } = props;
37
37
  const child = node.children[0];
38
- let claim;
39
38
 
40
39
  // identifies if the current text belongs to a claim
41
40
  if (child.type === 'text' && child.position && markers) {
41
+ const text = child.value || '';
42
42
  const start = child.position.start.offset;
43
43
  const end = child.position.end.offset;
44
- claim = markers.claims?.find(
44
+ const claims = markers.claims?.filter(
45
45
  (claim) =>
46
- (start >= claim.startOffset && end <= claim.endOffset) ||
47
- (claim.startOffset >= start && end <= claim.endOffset),
46
+ claim.score !== null &&
47
+ ((start >= claim.startOffset && end <= claim.endOffset) ||
48
+ (start <= claim.endOffset && end >= claim.endOffset) ||
49
+ (start <= claim.startOffset && end >= claim.startOffset)),
48
50
  );
51
+
52
+ if (claims && claims.length > 0) {
53
+ let relStart = 0;
54
+ const claimsSegments = claims.map((claim) => ({
55
+ claim,
56
+ start: Math.max(0, claim.startOffset - start),
57
+ end: Math.min(text.length, claim.endOffset - start),
58
+ }));
59
+ const segments = claimsSegments.reduce((acc, segment) => {
60
+ if (relStart < segment.start) {
61
+ acc.push(child.value.substring(relStart, segment.start));
62
+ }
63
+ const claimText = child.value.substring(segment.start, segment.end);
64
+ acc.push(
65
+ <ClaimModal
66
+ claim={segment.claim}
67
+ markers={markers}
68
+ text={claimText}
69
+ citedSources={citedSources}
70
+ />,
71
+ );
72
+ relStart = segment.end;
73
+ return acc;
74
+ }, []);
75
+
76
+ if (relStart < text.length) {
77
+ segments.push(text.substring(relStart));
78
+ }
79
+
80
+ return segments;
81
+ }
82
+
83
+ return text;
49
84
  }
50
85
 
51
- return !claim || claim?.score === null ? (
52
- rest.children || []
53
- ) : (
54
- <ClaimModal
55
- claim={claim}
56
- markers={markers}
57
- text={rest.children}
58
- citedSources={citedSources}
59
- />
60
- );
86
+ return children || [];
61
87
  },
62
88
  a: (props) => {
63
89
  const { node, children, href, ...rest } = props;
@@ -22,21 +22,6 @@ function extractRelatedQuestions(str: string): RelatedQuestion[] {
22
22
  throw new Error('Related questions were not generated properly');
23
23
  }
24
24
 
25
- const regex = /\[[\s\S]*?\]/;
26
- const match = str.match(regex);
27
-
28
- if (match) {
29
- try {
30
- return JSON.parse(match[0]);
31
- } catch {
32
- // Fallback to line-by-line parsing
33
- return str
34
- .split('\n')
35
- .filter((line) => line.trim())
36
- .map((question) => ({ question }));
37
- }
38
- }
39
-
40
25
  return str
41
26
  .split('\n')
42
27
  .filter((line) => line.trim())
@@ -54,17 +54,6 @@ export function useQualityMarkers(
54
54
  return;
55
55
  }
56
56
 
57
- // // console.log('Halloumi sources:', sources.length, sources);
58
- // if (sources.length > 40) {
59
- // // eslint-disable-next-line no-console
60
- // console.warn(
61
- // `Warning: Too many sources (${sources.length}). Skipping quality control.`,
62
- // );
63
- //
64
- // setHalloumiResponse(empty(message, TOOLARGE_RATIONALE));
65
- // return;
66
- // }
67
-
68
57
  setIsLoading(true);
69
58
 
70
59
  try {
@@ -580,6 +580,53 @@ mark {
580
580
  background-color: @grey;
581
581
  }
582
582
  }
583
+
584
+ // Inline code
585
+ code {
586
+ padding: 0.15em 0.4em;
587
+ border-radius: 3px;
588
+ background-color: #eef0f2;
589
+ color: @grey;
590
+ font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
591
+ font-size: 0.875em;
592
+ word-break: break-word;
593
+ }
594
+
595
+ // Code blocks
596
+ pre {
597
+ padding: 1em 1.25em;
598
+ border-radius: @borderRadius;
599
+ margin: 0 0 @marginBottom;
600
+ background-color: #1e2a35;
601
+ color: #e2e8f0;
602
+ font-size: 0.875rem;
603
+ line-height: 1.65;
604
+ overflow-x: auto;
605
+ scrollbar-color: rgba(255, 255, 255, 0.2) transparent;
606
+ scrollbar-width: thin;
607
+
608
+ &::-webkit-scrollbar {
609
+ height: 6px;
610
+ }
611
+
612
+ &::-webkit-scrollbar-track {
613
+ background: transparent;
614
+ }
615
+
616
+ &::-webkit-scrollbar-thumb {
617
+ border-radius: 3px;
618
+ background-color: rgba(255, 255, 255, 0.2);
619
+ }
620
+
621
+ // Reset inline code inside blocks
622
+ code {
623
+ padding: 0;
624
+ background-color: transparent;
625
+ color: inherit;
626
+ font-size: inherit;
627
+ word-break: normal;
628
+ }
629
+ }
583
630
  }
584
631
 
585
632
  .circle {
@@ -121,6 +121,7 @@ export interface ToolCallMetadata {
121
121
 
122
122
  export interface ChatMessageProps {
123
123
  message: Message;
124
+ prevMessage?: Message;
124
125
  isLoading: boolean;
125
126
  isDeepResearchEnabled?: boolean;
126
127
  libs?: any;
@@ -0,0 +1,149 @@
1
+ import debug from 'debug';
2
+ import fetch from 'node-fetch';
3
+
4
+ const log = debug('halloumi');
5
+
6
+ const LLMGW_URL = process.env.LLMGW_URL;
7
+ const LLMGW_API_KEY = process.env.LLMGW_TOKEN;
8
+ const MIN_CONTEXT_SENTENCES_FOR_FILTERING = 75;
9
+
10
+ const filterModel = {
11
+ name: 'Inhouse-LLM/gpt-oss-120b',
12
+ apiUrl: `${LLMGW_URL}/chat/completions`,
13
+ apiKey: LLMGW_API_KEY,
14
+ };
15
+
16
+ export async function callLLM(apiUrl, apiKey, requestBody, { ip } = {}) {
17
+ const headers = {
18
+ 'Content-Type': 'application/json',
19
+ accept: 'application/json',
20
+ };
21
+ if (apiKey) {
22
+ headers['Authorization'] = `Bearer ${apiKey}`;
23
+ }
24
+ if (ip) {
25
+ headers['X-Forwarded-For'] = ip;
26
+ }
27
+
28
+ const response = await fetch(apiUrl, {
29
+ method: 'POST',
30
+ headers,
31
+ body: JSON.stringify(requestBody),
32
+ });
33
+
34
+ return response.json();
35
+ }
36
+
37
+ function buildClaimFilterPrompt(sentences) {
38
+ const numberedSentences = sentences
39
+ .map((s, i) => `${i + 1}. "${s.trim()}"`)
40
+ .join('\n');
41
+
42
+ return `Given the following numbered sentences, identify which ones are NOT verifiable factual claims.
43
+
44
+ Exclude sentences that are:
45
+ - Greetings, preambles, or transitional phrases
46
+ - Opinions, vague statements, or subjective assessments
47
+ - Introductory sentences that set up a list or table
48
+ - Sentences that merely restate or paraphrase the question
49
+ - Conversational closers (e.g., "let me know if you need more")
50
+ - Broad summaries that don't assert a specific fact
51
+ - Disclaimers or meta-commentary about the response itself
52
+ - Sentences that do not provide enough information to verify
53
+ - Sentences that only mention a concept without providing specific information
54
+
55
+ Respond with ONLY a comma-separated list of the sentence numbers to exclude. If none should be excluded, respond with "NONE".
56
+
57
+ Sentences:
58
+ ${numberedSentences}`;
59
+ }
60
+
61
+ function buildContextFilterPrompt(contextSentences, claimSentences) {
62
+ const numberedContext = contextSentences
63
+ .map((s, i) => `${i + 1}. "${s.trim()}"`)
64
+ .join('\n');
65
+ const numberedClaims = claimSentences
66
+ .map((s, i) => `${i + 1}. "${s.trim()}"`)
67
+ .join('\n');
68
+
69
+ return `Given the following context sentences and claims, identify which context sentences are NOT relevant to verifying ANY of the claims.
70
+
71
+ Context sentences:
72
+ ${numberedContext}
73
+
74
+ Claims to verify:
75
+ ${numberedClaims}
76
+
77
+ Respond with ONLY a comma-separated list of the context sentence numbers that are NOT relevant. If all are relevant, respond with "NONE".`;
78
+ }
79
+
80
+ /**
81
+ * Parses a comma-separated list of indices from an LLM response.
82
+ */
83
+ export function parseExcludeIndices(content, maxIndex) {
84
+ const excludeIndices = new Set();
85
+ if (content.trim().toUpperCase() === 'NONE') {
86
+ return excludeIndices;
87
+ }
88
+ const matches = content.match(/\d+/g) || [];
89
+ for (const match of matches) {
90
+ const idx = parseInt(match, 10);
91
+ if (idx >= 1 && idx <= maxIndex) {
92
+ excludeIndices.add(idx);
93
+ }
94
+ }
95
+ return excludeIndices;
96
+ }
97
+
98
+ async function callFilterModel(prompt, { ip } = {}) {
99
+ const data = {
100
+ messages: [{ role: 'user', content: prompt }],
101
+ temperature: 0.0,
102
+ model: filterModel.name,
103
+ };
104
+ const jsonData = await callLLM(filterModel.apiUrl, filterModel.apiKey, data, {
105
+ ip,
106
+ });
107
+ return jsonData.choices?.[0]?.message?.content || '';
108
+ }
109
+
110
+ export async function excludeClaimSentences(sentences, { ip } = {}) {
111
+ if (sentences.length === 0) {
112
+ return new Set();
113
+ }
114
+
115
+ try {
116
+ const prompt = buildClaimFilterPrompt(sentences);
117
+ const content = await callFilterModel(prompt, { ip });
118
+ const excludedIndices = parseExcludeIndices(content, sentences.length);
119
+ log('Claim filter response', excludedIndices.size);
120
+ return excludedIndices;
121
+ } catch (error) {
122
+ log('Claim filter failed, skipping', error);
123
+ return new Set();
124
+ }
125
+ }
126
+
127
+ export async function excludeContextSentences(
128
+ contextSentences,
129
+ claimSentences,
130
+ { ip } = {},
131
+ ) {
132
+ if (contextSentences.length <= MIN_CONTEXT_SENTENCES_FOR_FILTERING) {
133
+ return new Set();
134
+ }
135
+
136
+ try {
137
+ const prompt = buildContextFilterPrompt(contextSentences, claimSentences);
138
+ const content = await callFilterModel(prompt, { ip });
139
+ const excludedIndices = parseExcludeIndices(
140
+ content,
141
+ contextSentences.length,
142
+ );
143
+ log('Context filter response', excludedIndices.size);
144
+ return excludedIndices;
145
+ } catch (error) {
146
+ log('Context filter failed, skipping', error);
147
+ return new Set();
148
+ }
149
+ }
@@ -0,0 +1,44 @@
1
+ import { parseExcludeIndices } from './filtering';
2
+
3
+ describe('parseExcludeIndices', () => {
4
+ it('parses single indices', () => {
5
+ const result = parseExcludeIndices('1,3,5', 6);
6
+ expect(result).toEqual(new Set([1, 3, 5]));
7
+ });
8
+
9
+ it('returns empty set for NONE', () => {
10
+ const result = parseExcludeIndices('NONE', 10);
11
+ expect(result).toEqual(new Set());
12
+ });
13
+
14
+ it('returns empty set for none (lowercase)', () => {
15
+ const result = parseExcludeIndices(' none ', 10);
16
+ expect(result).toEqual(new Set());
17
+ });
18
+
19
+ it('ignores indices below 1', () => {
20
+ const result = parseExcludeIndices('0, 1, 3', 5);
21
+ expect(result).toEqual(new Set([1, 3]));
22
+ });
23
+
24
+ it('ignores indices above maxIndex', () => {
25
+ const result = parseExcludeIndices('1, 3, 99', 5);
26
+ expect(result).toEqual(new Set([1, 3]));
27
+ });
28
+
29
+ it('handles whitespace variations', () => {
30
+ const result = parseExcludeIndices(' 1 , 5 , 7 ', 10);
31
+ expect(result).toEqual(new Set([1, 5, 7]));
32
+ });
33
+
34
+ it('extracts numbers even from unexpected formats', () => {
35
+ // Parser uses match(/\d+/g) so it extracts all numbers
36
+ const result = parseExcludeIndices('1-3, 5', 10);
37
+ expect(result).toEqual(new Set([1, 3, 5]));
38
+ });
39
+
40
+ it('returns empty set for empty string with no numbers', () => {
41
+ const result = parseExcludeIndices('no numbers here', 10);
42
+ expect(result).toEqual(new Set());
43
+ });
44
+ });