@eeacms/volto-eea-chatbot 1.0.11 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -2
- package/README.md +8 -8
- package/jest-addon.config.js +1 -1
- package/package.json +2 -1
- package/src/ChatBlock/chat/AIMessage.tsx +32 -25
- package/src/ChatBlock/chat/ChatWindow.tsx +1 -0
- package/src/ChatBlock/components/HalloumiFeedback.jsx +8 -4
- package/src/ChatBlock/components/markdown/ClaimModal.jsx +1 -1
- package/src/ChatBlock/components/markdown/ClaimSegments.jsx +2 -3
- package/src/ChatBlock/components/markdown/RenderClaimView.jsx +1 -1
- package/src/ChatBlock/components/markdown/index.js +41 -15
- package/src/ChatBlock/hooks/useChatController.ts +0 -15
- package/src/ChatBlock/hooks/useQualityMarkers.js +0 -11
- package/src/ChatBlock/style.less +47 -0
- package/src/ChatBlock/types/interfaces.ts +1 -0
- package/src/halloumi/filtering.js +149 -0
- package/src/halloumi/filtering.test.js +44 -0
- package/src/halloumi/generative.js +157 -53
- package/src/halloumi/generative.test.js +28 -8
- package/src/halloumi/markdown-splitter.js +172 -0
- package/src/halloumi/markdown-splitter.test.js +133 -0
- package/src/halloumi/middleware.js +6 -6
- package/src/halloumi/postprocessing.js +0 -26
- package/src/halloumi/preprocessing.js +78 -76
- package/src/halloumi/preprocessing.test.js +87 -148
- package/src/middleware.js +3 -0
- package/src/middleware.test.js +2 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import debug from 'debug';
|
|
1
|
+
// import debug from 'debug';
|
|
2
2
|
import { getVerifyClaimResponse } from './generative';
|
|
3
3
|
|
|
4
|
-
const log = debug('halloumi');
|
|
4
|
+
// const log = debug('halloumi');
|
|
5
5
|
|
|
6
6
|
const MSG_INVALID_CONFIGURATION =
|
|
7
7
|
'Invalid configuration: missing LLMGW_TOKEN or LLMGW_URL';
|
|
@@ -45,8 +45,8 @@ export default async function middleware(req, res, next) {
|
|
|
45
45
|
};
|
|
46
46
|
const body = req.body;
|
|
47
47
|
|
|
48
|
-
log('Halloumi body', body);
|
|
49
|
-
const { sources, answer
|
|
48
|
+
// log('Halloumi body', body);
|
|
49
|
+
const { sources, answer } = body;
|
|
50
50
|
|
|
51
51
|
res.set('Content-Type', 'application/json');
|
|
52
52
|
|
|
@@ -56,9 +56,9 @@ export default async function middleware(req, res, next) {
|
|
|
56
56
|
// TODO: map with citation id
|
|
57
57
|
sources,
|
|
58
58
|
answer,
|
|
59
|
-
|
|
59
|
+
{ ip: req.headers['x-forwarded-for'] || req.ip },
|
|
60
60
|
);
|
|
61
|
-
log('Halloumi response', resp);
|
|
61
|
+
// log('Halloumi response', resp);
|
|
62
62
|
res.send(resp);
|
|
63
63
|
} catch (error) {
|
|
64
64
|
res.status(500).send({
|
|
@@ -1,29 +1,3 @@
|
|
|
1
|
-
// /**
|
|
2
|
-
// * Represents a claim object with all relevant information.
|
|
3
|
-
// */
|
|
4
|
-
// export interface GenerativeClaim {
|
|
5
|
-
// claimId: number;
|
|
6
|
-
// claimString: string;
|
|
7
|
-
// subclaims: string[];
|
|
8
|
-
// segments: number[];
|
|
9
|
-
// explanation: string;
|
|
10
|
-
// supported: boolean;
|
|
11
|
-
// probabilities: Map<string, number>;
|
|
12
|
-
// }
|
|
13
|
-
//
|
|
14
|
-
// export interface OpenAITokenLogProb {
|
|
15
|
-
// token: string;
|
|
16
|
-
// bytes: number[];
|
|
17
|
-
// logprob: number;
|
|
18
|
-
// }
|
|
19
|
-
//
|
|
20
|
-
// export interface OpenAILogProb {
|
|
21
|
-
// token: string;
|
|
22
|
-
// bytes: number[];
|
|
23
|
-
// logprob: number;
|
|
24
|
-
// top_logprobs: OpenAITokenLogProb[];
|
|
25
|
-
// }
|
|
26
|
-
//
|
|
27
1
|
/**
|
|
28
2
|
* Gets the claim id from a subsegment.
|
|
29
3
|
* @param subsegment A subsegment string of the form "<|r1|".
|
|
@@ -1,46 +1,4 @@
|
|
|
1
|
-
const
|
|
2
|
-
'Make one or more claims about information in the documents.';
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Splits a given text into sentences using sentence-splitter.
|
|
6
|
-
* @param text The input string to split.
|
|
7
|
-
* @returns An array of sentence strings.
|
|
8
|
-
*/
|
|
9
|
-
export function splitIntoSentences(text, maxSegments = 0) {
|
|
10
|
-
const segmenter = new Intl.Segmenter('en', { granularity: 'sentence' });
|
|
11
|
-
const segments = Array.from(segmenter.segment(text)).map((s) => s.segment);
|
|
12
|
-
|
|
13
|
-
const initialSentences = [];
|
|
14
|
-
let currentSentence = '';
|
|
15
|
-
|
|
16
|
-
for (const segment of segments) {
|
|
17
|
-
currentSentence += segment;
|
|
18
|
-
if (currentSentence.trim().length > 8) {
|
|
19
|
-
initialSentences.push(currentSentence);
|
|
20
|
-
currentSentence = '';
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
// Push any remaining part that didn't make it to 8 characters
|
|
24
|
-
if (currentSentence) {
|
|
25
|
-
initialSentences.push(currentSentence);
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
if (maxSegments <= 0) {
|
|
29
|
-
return initialSentences;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
if (initialSentences.length > maxSegments) {
|
|
33
|
-
const groupSize = Math.ceil(initialSentences.length / maxSegments);
|
|
34
|
-
const mergedSentences = [];
|
|
35
|
-
for (let i = 0; i < initialSentences.length; i += groupSize) {
|
|
36
|
-
const group = initialSentences.slice(i, i + groupSize);
|
|
37
|
-
mergedSentences.push(group.join(''));
|
|
38
|
-
}
|
|
39
|
-
return mergedSentences;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
return initialSentences;
|
|
43
|
-
}
|
|
1
|
+
const MAX_CONTEXT_SEGMENTS_PER_CHUNK = 100;
|
|
44
2
|
|
|
45
3
|
/**
|
|
46
4
|
* Annotate a set of sentences with a given annotation character.
|
|
@@ -48,11 +6,26 @@ export function splitIntoSentences(text, maxSegments = 0) {
|
|
|
48
6
|
* @param annotationChar The character to use for annotation.
|
|
49
7
|
* @returns The annotated string with annotation characters + sentence number.
|
|
50
8
|
*/
|
|
51
|
-
export function annotate(sentences, annotationChar) {
|
|
9
|
+
export function annotate(sentences, annotationChar, excludeIndices) {
|
|
52
10
|
return sentences
|
|
11
|
+
.map((sentence, i) => {
|
|
12
|
+
const id = i + 1;
|
|
13
|
+
if (excludeIndices && excludeIndices.has(id)) {
|
|
14
|
+
return '';
|
|
15
|
+
}
|
|
16
|
+
return `<|${annotationChar}${id}|><${sentence}><end||${annotationChar}>`;
|
|
17
|
+
})
|
|
18
|
+
.join('');
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Annotates a chunk of indexed sentences with their global IDs.
|
|
23
|
+
*/
|
|
24
|
+
function annotateChunk(chunk, annotationChar) {
|
|
25
|
+
return chunk
|
|
53
26
|
.map(
|
|
54
|
-
(sentence,
|
|
55
|
-
`<|${annotationChar}${
|
|
27
|
+
({ sentence, globalId }) =>
|
|
28
|
+
`<|${annotationChar}${globalId}|><${sentence}><end||${annotationChar}>`,
|
|
56
29
|
)
|
|
57
30
|
.join('');
|
|
58
31
|
}
|
|
@@ -74,42 +47,71 @@ export function getOffsets(originalString, sentences) {
|
|
|
74
47
|
}
|
|
75
48
|
|
|
76
49
|
/**
|
|
77
|
-
* Creates
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
* @returns
|
|
50
|
+
* Creates multiple HallOumi prompts by chunking context segments.
|
|
51
|
+
* Each chunk uses global segment IDs (s5, s42, ...) so no local-to-global
|
|
52
|
+
* mapping is needed when merging results.
|
|
53
|
+
*
|
|
54
|
+
* @returns {{ prompts: Array<{prompt, responseOffsets}> }}
|
|
82
55
|
*/
|
|
83
|
-
export function
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
56
|
+
export function createChunkedHalloumiPrompts({
|
|
57
|
+
indexedContextSentences,
|
|
58
|
+
responseSentences,
|
|
59
|
+
responseOffsets,
|
|
60
|
+
request = 'Make one or more claims about information in the documents.',
|
|
61
|
+
excludeResponseIndices,
|
|
88
62
|
}) {
|
|
89
|
-
|
|
90
|
-
const
|
|
91
|
-
|
|
63
|
+
// Build response annotation (same for all chunks)
|
|
64
|
+
const annotatedResponseSentences = annotate(
|
|
65
|
+
responseSentences || [],
|
|
66
|
+
'r',
|
|
67
|
+
excludeResponseIndices,
|
|
92
68
|
);
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
const contextOffsets = getOffsets(joinedContext, contextSentences);
|
|
69
|
+
const annotatedRequest = `<|request|><${request}><end||request>`;
|
|
70
|
+
const annotatedResponse = `<|response|>${annotatedResponseSentences}<end||response>`;
|
|
96
71
|
|
|
97
|
-
|
|
72
|
+
// Group sentences by source
|
|
73
|
+
const sourceGroups = [];
|
|
74
|
+
let currentSourceId = null;
|
|
75
|
+
for (const s of indexedContextSentences) {
|
|
76
|
+
if (s.sourceId !== currentSourceId) {
|
|
77
|
+
sourceGroups.push([]);
|
|
78
|
+
currentSourceId = s.sourceId;
|
|
79
|
+
}
|
|
80
|
+
sourceGroups[sourceGroups.length - 1].push(s);
|
|
81
|
+
}
|
|
98
82
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
const
|
|
83
|
+
// Pack whole sources into chunks (first-fit decreasing bin packing)
|
|
84
|
+
// Sort by size descending so large sources get placed first
|
|
85
|
+
const sorted = [...sourceGroups].sort((a, b) => b.length - a.length);
|
|
86
|
+
const chunks = [];
|
|
87
|
+
const chunkSizes = [];
|
|
88
|
+
for (const group of sorted) {
|
|
89
|
+
let placed = false;
|
|
90
|
+
for (let c = 0; c < chunks.length; c++) {
|
|
91
|
+
if (chunkSizes[c] + group.length <= MAX_CONTEXT_SEGMENTS_PER_CHUNK) {
|
|
92
|
+
chunks[c].push(...group);
|
|
93
|
+
chunkSizes[c] += group.length;
|
|
94
|
+
placed = true;
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (!placed) {
|
|
99
|
+
chunks.push([...group]);
|
|
100
|
+
chunkSizes.push(group.length);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (chunks.length === 0) chunks.push([]);
|
|
102
104
|
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
|
|
105
|
+
// Build one prompt per chunk with global segment IDs
|
|
106
|
+
const prompts = chunks.map((chunk) => {
|
|
107
|
+
const annotatedContext = `<|context|>${annotateChunk(
|
|
108
|
+
chunk,
|
|
109
|
+
's',
|
|
110
|
+
)}<end||context>`;
|
|
111
|
+
const prompt = `${annotatedContext}${annotatedRequest}${annotatedResponse}`;
|
|
106
112
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
prompt,
|
|
110
|
-
contextOffsets, // used by convertGenerativesClaimToVerifyClaimResponse
|
|
111
|
-
responseOffsets,
|
|
112
|
-
};
|
|
113
|
+
return { prompt, responseOffsets };
|
|
114
|
+
});
|
|
113
115
|
|
|
114
|
-
return
|
|
116
|
+
return { prompts };
|
|
115
117
|
}
|
|
@@ -1,69 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
splitIntoSentences,
|
|
2
|
+
createChunkedHalloumiPrompts,
|
|
4
3
|
annotate,
|
|
5
4
|
getOffsets,
|
|
6
5
|
} from './preprocessing';
|
|
7
6
|
|
|
8
|
-
describe('splitIntoSentences', () => {
|
|
9
|
-
it('should split a basic text into sentences', () => {
|
|
10
|
-
const text =
|
|
11
|
-
'This is sentence one. This is sentence two. This is sentence three.';
|
|
12
|
-
const expected = [
|
|
13
|
-
'This is sentence one. ',
|
|
14
|
-
'This is sentence two. ',
|
|
15
|
-
'This is sentence three.',
|
|
16
|
-
];
|
|
17
|
-
expect(splitIntoSentences(text)).toEqual(expected);
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
it('should handle short sentences by merging them', () => {
|
|
21
|
-
const text = 'Short. This is a longer sentence. Also short.';
|
|
22
|
-
const expected = ['Short. This is a longer sentence. ', 'Also short.'];
|
|
23
|
-
expect(splitIntoSentences(text)).toEqual(expected);
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
it('should return an empty array for an empty string', () => {
|
|
27
|
-
expect(splitIntoSentences('')).toEqual([]);
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
it('should handle a single sentence', () => {
|
|
31
|
-
const text = 'This is a single sentence.';
|
|
32
|
-
expect(splitIntoSentences(text)).toEqual(['This is a single sentence.']);
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
it('should handle text without punctuation', () => {
|
|
36
|
-
const text = 'This is a sentence without punctuation';
|
|
37
|
-
expect(splitIntoSentences(text)).toEqual([
|
|
38
|
-
'This is a sentence without punctuation',
|
|
39
|
-
]);
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
it('should not merge sentences when maxSegments is 0', () => {
|
|
43
|
-
const text = 'One. Two. Three. Four. Five.';
|
|
44
|
-
const expected = ['One. Two. ', 'Three. Four. ', 'Five.'];
|
|
45
|
-
expect(splitIntoSentences(text, 0)).toEqual(expected);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it('should not merge sentences when finalSentences.length <= maxSegments', () => {
|
|
49
|
-
const text = 'One. Two. Three.';
|
|
50
|
-
const expected = ['One. Two. ', 'Three.'];
|
|
51
|
-
expect(splitIntoSentences(text, 3)).toEqual(expected);
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
it('should merge sentences when finalSentences.length > maxSegments', () => {
|
|
55
|
-
const text = 'One. Two. Three. Four. Five.';
|
|
56
|
-
const expected = ['One. Two. Three. Four. ', 'Five.'];
|
|
57
|
-
expect(splitIntoSentences(text, 2)).toEqual(expected);
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
it('should merge sentences into a single segment if maxSegments is 1', () => {
|
|
61
|
-
const text = 'One. Two. Three. Four. Five.';
|
|
62
|
-
const expected = ['One. Two. Three. Four. Five.'];
|
|
63
|
-
expect(splitIntoSentences(text, 1)).toEqual(expected);
|
|
64
|
-
});
|
|
65
|
-
});
|
|
66
|
-
|
|
67
7
|
describe('annotate', () => {
|
|
68
8
|
it('should annotate multiple sentences correctly', () => {
|
|
69
9
|
const sentences = ['Sentence one.', 'Sentence two.'];
|
|
@@ -141,105 +81,104 @@ describe('getOffsets', () => {
|
|
|
141
81
|
});
|
|
142
82
|
});
|
|
143
83
|
|
|
144
|
-
describe('
|
|
145
|
-
it('should create a
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
|
|
84
|
+
describe('createChunkedHalloumiPrompts', () => {
|
|
85
|
+
it('should create a single chunk for small input', () => {
|
|
86
|
+
const indexedContextSentences = [
|
|
87
|
+
{ sentence: 'Context one.', sourceId: 1, globalId: 1 },
|
|
88
|
+
{ sentence: 'Context two.', sourceId: 1, globalId: 2 },
|
|
149
89
|
];
|
|
150
|
-
const
|
|
151
|
-
const
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
// Expect the prompt to contain annotated context and response
|
|
156
|
-
expect(result.prompt).toContain(
|
|
157
|
-
'<|context|><|s1|><This is the first source. ><end||s><|s2|><This is its second sentence.><end||s><|s3|><This is the second source.><end||s><end||context>',
|
|
158
|
-
);
|
|
159
|
-
expect(result.prompt).toContain('<|request|><Test request.><end||request>');
|
|
160
|
-
expect(result.prompt).toContain(
|
|
161
|
-
'<|response|><|r1|><This is the response. ><end||r><|r2|><It has two sentences.><end||r><end||response>',
|
|
162
|
-
);
|
|
163
|
-
|
|
164
|
-
// Expect contextOffsets and responseOffsets to be correctly populated
|
|
165
|
-
expect(result.contextOffsets).toBeInstanceOf(Map);
|
|
166
|
-
const s1 = 'This is the first source. ';
|
|
167
|
-
const s2 = 'This is its second sentence.';
|
|
168
|
-
const s3 = 'This is the second source.';
|
|
169
|
-
|
|
170
|
-
expect(result.contextOffsets.get(1)).toEqual({
|
|
171
|
-
startOffset: 0,
|
|
172
|
-
endOffset: s1.length,
|
|
173
|
-
});
|
|
174
|
-
expect(result.contextOffsets.get(2)).toEqual({
|
|
175
|
-
startOffset: s1.length,
|
|
176
|
-
endOffset: s1.length + s2.length,
|
|
177
|
-
});
|
|
178
|
-
expect(result.contextOffsets.get(3)).toEqual({
|
|
179
|
-
startOffset: s1.length + s2.length + 1, // +1 for the space between sentences
|
|
180
|
-
endOffset: s1.length + s2.length + 1 + s3.length,
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
expect(result.responseOffsets).toBeInstanceOf(Map);
|
|
184
|
-
const r1 = 'This is the response. ';
|
|
185
|
-
const r2 = 'It has two sentences.';
|
|
90
|
+
const responseSentences = ['Response one.', 'Response two.'];
|
|
91
|
+
const responseOffsets = new Map([
|
|
92
|
+
[1, { startOffset: 0, endOffset: 13 }],
|
|
93
|
+
[2, { startOffset: 14, endOffset: 28 }],
|
|
94
|
+
]);
|
|
186
95
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
expect(result.responseOffsets.get(2)).toEqual({
|
|
192
|
-
startOffset: r1.length,
|
|
193
|
-
endOffset: r1.length + r2.length,
|
|
96
|
+
const { prompts } = createChunkedHalloumiPrompts({
|
|
97
|
+
indexedContextSentences,
|
|
98
|
+
responseSentences,
|
|
99
|
+
responseOffsets,
|
|
194
100
|
});
|
|
101
|
+
|
|
102
|
+
expect(prompts).toHaveLength(1);
|
|
103
|
+
expect(prompts[0].prompt).toContain('<|s1|><Context one.><end||s>');
|
|
104
|
+
expect(prompts[0].prompt).toContain('<|s2|><Context two.><end||s>');
|
|
105
|
+
expect(prompts[0].prompt).toContain('<|r1|><Response one.><end||r>');
|
|
106
|
+
expect(prompts[0].prompt).toContain('<|r2|><Response two.><end||r>');
|
|
195
107
|
});
|
|
196
108
|
|
|
197
|
-
it('should handle empty
|
|
198
|
-
const
|
|
199
|
-
const
|
|
200
|
-
const request = '';
|
|
109
|
+
it('should handle empty context', () => {
|
|
110
|
+
const responseSentences = ['Response.'];
|
|
111
|
+
const responseOffsets = new Map([[1, { startOffset: 0, endOffset: 9 }]]);
|
|
201
112
|
|
|
202
|
-
const
|
|
113
|
+
const { prompts } = createChunkedHalloumiPrompts({
|
|
114
|
+
indexedContextSentences: [],
|
|
115
|
+
responseSentences,
|
|
116
|
+
responseOffsets,
|
|
117
|
+
});
|
|
203
118
|
|
|
204
|
-
expect(
|
|
205
|
-
|
|
206
|
-
);
|
|
207
|
-
expect(result.contextOffsets).toBeInstanceOf(Map);
|
|
208
|
-
expect(result.contextOffsets.size).toBe(0);
|
|
209
|
-
expect(result.responseOffsets).toBeInstanceOf(Map);
|
|
210
|
-
expect(result.responseOffsets.size).toBe(0);
|
|
119
|
+
expect(prompts).toHaveLength(1);
|
|
120
|
+
expect(prompts[0].prompt).toContain('<|context|><end||context>');
|
|
211
121
|
});
|
|
212
122
|
|
|
213
|
-
it('should
|
|
214
|
-
const
|
|
215
|
-
'
|
|
123
|
+
it('should exclude response sentences based on excludeResponseIndices', () => {
|
|
124
|
+
const indexedContextSentences = [
|
|
125
|
+
{ sentence: 'Context.', sourceId: 1, globalId: 1 },
|
|
216
126
|
];
|
|
217
|
-
const
|
|
218
|
-
const
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
request,
|
|
225
|
-
maxContextSegments,
|
|
226
|
-
});
|
|
127
|
+
const responseSentences = ['Keep this.', 'Skip this.', 'Keep too.'];
|
|
128
|
+
const responseOffsets = new Map([
|
|
129
|
+
[1, { startOffset: 0, endOffset: 10 }],
|
|
130
|
+
[2, { startOffset: 11, endOffset: 21 }],
|
|
131
|
+
[3, { startOffset: 22, endOffset: 31 }],
|
|
132
|
+
]);
|
|
133
|
+
const excludeResponseIndices = new Set([2]);
|
|
227
134
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
const mergedS1 = 'Sentence one. Sentence two. ';
|
|
234
|
-
const mergedS2 = 'Sentence three. Sentence four.';
|
|
235
|
-
|
|
236
|
-
expect(result.contextOffsets.get(1)).toEqual({
|
|
237
|
-
startOffset: 0,
|
|
238
|
-
endOffset: mergedS1.length,
|
|
135
|
+
const { prompts } = createChunkedHalloumiPrompts({
|
|
136
|
+
indexedContextSentences,
|
|
137
|
+
responseSentences,
|
|
138
|
+
responseOffsets,
|
|
139
|
+
excludeResponseIndices,
|
|
239
140
|
});
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
141
|
+
|
|
142
|
+
expect(prompts[0].prompt).toContain('<|r1|><Keep this.><end||r>');
|
|
143
|
+
expect(prompts[0].prompt).not.toContain('Skip this.');
|
|
144
|
+
expect(prompts[0].prompt).toContain('<|r3|><Keep too.><end||r>');
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it('should keep sources together with bin packing', () => {
|
|
148
|
+
// 3 sources: 60 + 60 + 30 = 150 sentences
|
|
149
|
+
// Should pack into 2 chunks: (60+30) and (60) or similar
|
|
150
|
+
const indexed = [];
|
|
151
|
+
let gid = 1;
|
|
152
|
+
// Source 1: 60 sentences
|
|
153
|
+
for (let i = 0; i < 60; i++) {
|
|
154
|
+
indexed.push({ sentence: `S1-${i}`, sourceId: 1, globalId: gid++ });
|
|
155
|
+
}
|
|
156
|
+
// Source 2: 60 sentences
|
|
157
|
+
for (let i = 0; i < 60; i++) {
|
|
158
|
+
indexed.push({ sentence: `S2-${i}`, sourceId: 2, globalId: gid++ });
|
|
159
|
+
}
|
|
160
|
+
// Source 3: 30 sentences
|
|
161
|
+
for (let i = 0; i < 30; i++) {
|
|
162
|
+
indexed.push({ sentence: `S3-${i}`, sourceId: 3, globalId: gid++ });
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const { prompts } = createChunkedHalloumiPrompts({
|
|
166
|
+
indexedContextSentences: indexed,
|
|
167
|
+
responseSentences: ['Claim.'],
|
|
168
|
+
responseOffsets: new Map([[1, { startOffset: 0, endOffset: 6 }]]),
|
|
243
169
|
});
|
|
170
|
+
|
|
171
|
+
expect(prompts).toHaveLength(2);
|
|
172
|
+
|
|
173
|
+
// Each source should be entirely within one chunk
|
|
174
|
+
for (const prompt of prompts) {
|
|
175
|
+
const s1Count = (prompt.prompt.match(/S1-/g) || []).length;
|
|
176
|
+
const s2Count = (prompt.prompt.match(/S2-/g) || []).length;
|
|
177
|
+
const s3Count = (prompt.prompt.match(/S3-/g) || []).length;
|
|
178
|
+
|
|
179
|
+
if (s1Count > 0) expect(s1Count).toBe(60);
|
|
180
|
+
if (s2Count > 0) expect(s2Count).toBe(60);
|
|
181
|
+
if (s3Count > 0) expect(s3Count).toBe(30);
|
|
182
|
+
}
|
|
244
183
|
});
|
|
245
184
|
});
|
package/src/middleware.js
CHANGED
|
@@ -151,6 +151,7 @@ async function send_onyx_request(
|
|
|
151
151
|
res,
|
|
152
152
|
{ username, password, api_key, url, is_related_question },
|
|
153
153
|
) {
|
|
154
|
+
const forwardedFor = req.headers['x-forwarded-for'] || req.ip;
|
|
154
155
|
let headers = {};
|
|
155
156
|
if (!api_key) {
|
|
156
157
|
await login(username, password);
|
|
@@ -166,11 +167,13 @@ async function send_onyx_request(
|
|
|
166
167
|
headers = {
|
|
167
168
|
Cookie: cached_auth_cookie,
|
|
168
169
|
'Content-Type': 'application/json',
|
|
170
|
+
'X-Forwarded-For': forwardedFor,
|
|
169
171
|
};
|
|
170
172
|
} else {
|
|
171
173
|
headers = {
|
|
172
174
|
Authorization: 'Bearer ' + api_key,
|
|
173
175
|
'Content-Type': 'application/json',
|
|
176
|
+
'X-Forwarded-For': forwardedFor,
|
|
174
177
|
};
|
|
175
178
|
}
|
|
176
179
|
|