clanka 0.2.18 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Agent.d.ts.map +1 -1
- package/dist/Agent.js +2 -1
- package/dist/Agent.js.map +1 -1
- package/dist/CodeChunker.d.ts +4 -0
- package/dist/CodeChunker.d.ts.map +1 -1
- package/dist/CodeChunker.js +64 -14
- package/dist/CodeChunker.js.map +1 -1
- package/dist/CodeChunker.test.js +71 -0
- package/dist/CodeChunker.test.js.map +1 -1
- package/dist/ScriptPreprocessing.d.ts.map +1 -1
- package/dist/ScriptPreprocessing.js +128 -9
- package/dist/ScriptPreprocessing.js.map +1 -1
- package/dist/ScriptPreprocessing.test.js +4 -0
- package/dist/ScriptPreprocessing.test.js.map +1 -1
- package/dist/SemanticSearch.d.ts +1 -0
- package/dist/SemanticSearch.d.ts.map +1 -1
- package/dist/SemanticSearch.js +4 -2
- package/dist/SemanticSearch.js.map +1 -1
- package/package.json +10 -10
- package/src/Agent.ts +2 -1
- package/src/CodeChunker.test.ts +77 -0
- package/src/CodeChunker.ts +105 -19
- package/src/ScriptPreprocessing.test.ts +4 -0
- package/src/ScriptPreprocessing.ts +161 -9
- package/src/SemanticSearch.ts +7 -2
- package/src/fixtures/patch10-broken.txt +95 -0
- package/src/fixtures/patch10-fixed.txt +95 -0
- package/src/fixtures/patch11-broken.txt +219 -0
- package/src/fixtures/patch11-fixed.txt +219 -0
- package/src/fixtures/patch8-broken.txt +30 -0
- package/src/fixtures/patch8-fixed.txt +30 -0
- package/src/fixtures/patch9-broken.txt +10 -0
- package/src/fixtures/patch9-fixed.txt +10 -0
package/src/CodeChunker.ts
CHANGED
|
@@ -59,18 +59,21 @@ export class CodeChunker extends ServiceMap.Service<
|
|
|
59
59
|
readonly path: string
|
|
60
60
|
readonly chunkSize: number
|
|
61
61
|
readonly chunkOverlap: number
|
|
62
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
62
63
|
}): Effect.Effect<ReadonlyArray<CodeChunk>>
|
|
63
64
|
chunkFiles(options: {
|
|
64
65
|
readonly root: string
|
|
65
66
|
readonly paths: ReadonlyArray<string>
|
|
66
67
|
readonly chunkSize: number
|
|
67
68
|
readonly chunkOverlap: number
|
|
69
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
68
70
|
}): Stream.Stream<CodeChunk>
|
|
69
71
|
chunkCodebase(options: {
|
|
70
72
|
readonly root: string
|
|
71
73
|
readonly maxFileSize?: string | undefined
|
|
72
74
|
readonly chunkSize: number
|
|
73
75
|
readonly chunkOverlap: number
|
|
76
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
74
77
|
}): Stream.Stream<CodeChunk>
|
|
75
78
|
}
|
|
76
79
|
>()("clanka/CodeChunker") {}
|
|
@@ -157,6 +160,7 @@ interface LineRange {
|
|
|
157
160
|
interface ChunkSettings {
|
|
158
161
|
readonly chunkSize: number
|
|
159
162
|
readonly chunkOverlap: number
|
|
163
|
+
readonly chunkMaxCharacters: number
|
|
160
164
|
}
|
|
161
165
|
|
|
162
166
|
interface ChunkRange extends LineRange {
|
|
@@ -234,16 +238,23 @@ export const isMeaningfulFile = (path: string): boolean => {
|
|
|
234
238
|
const resolveChunkSettings = (options: {
|
|
235
239
|
readonly chunkSize: number
|
|
236
240
|
readonly chunkOverlap: number
|
|
241
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
237
242
|
}): ChunkSettings => {
|
|
238
243
|
const chunkSize = Math.max(1, options.chunkSize)
|
|
239
244
|
const chunkOverlap = Math.max(
|
|
240
245
|
0,
|
|
241
246
|
Math.min(chunkSize - 1, options.chunkOverlap),
|
|
242
247
|
)
|
|
248
|
+
const chunkMaxCharacters =
|
|
249
|
+
options.chunkMaxCharacters !== undefined &&
|
|
250
|
+
Number.isFinite(options.chunkMaxCharacters)
|
|
251
|
+
? Math.max(1, Math.floor(options.chunkMaxCharacters))
|
|
252
|
+
: Number.POSITIVE_INFINITY
|
|
243
253
|
|
|
244
254
|
return {
|
|
245
255
|
chunkSize,
|
|
246
256
|
chunkOverlap,
|
|
257
|
+
chunkMaxCharacters,
|
|
247
258
|
}
|
|
248
259
|
}
|
|
249
260
|
|
|
@@ -345,24 +356,76 @@ const normalizeLineRange = (
|
|
|
345
356
|
}
|
|
346
357
|
}
|
|
347
358
|
|
|
359
|
+
const lineLengthPrefixSums = (
|
|
360
|
+
lines: ReadonlyArray<string>,
|
|
361
|
+
): ReadonlyArray<number> => {
|
|
362
|
+
const sums = [0] as Array<number>
|
|
363
|
+
|
|
364
|
+
for (let index = 0; index < lines.length; index++) {
|
|
365
|
+
sums.push(sums[index]! + lines[index]!.length)
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
return sums
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const lineRangeCharacterLength = (
|
|
372
|
+
prefixSums: ReadonlyArray<number>,
|
|
373
|
+
range: LineRange,
|
|
374
|
+
): number =>
|
|
375
|
+
prefixSums[range.endLine]! -
|
|
376
|
+
prefixSums[range.startLine - 1]! +
|
|
377
|
+
(range.endLine - range.startLine)
|
|
378
|
+
|
|
379
|
+
const resolveSegmentEndLine = (options: {
|
|
380
|
+
readonly startLine: number
|
|
381
|
+
readonly maxEndLine: number
|
|
382
|
+
readonly settings: ChunkSettings
|
|
383
|
+
readonly prefixSums: ReadonlyArray<number>
|
|
384
|
+
}): number => {
|
|
385
|
+
if (options.settings.chunkMaxCharacters === Number.POSITIVE_INFINITY) {
|
|
386
|
+
return options.maxEndLine
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
let endLine = options.maxEndLine
|
|
390
|
+
while (
|
|
391
|
+
endLine > options.startLine &&
|
|
392
|
+
lineRangeCharacterLength(options.prefixSums, {
|
|
393
|
+
startLine: options.startLine,
|
|
394
|
+
endLine,
|
|
395
|
+
}) > options.settings.chunkMaxCharacters
|
|
396
|
+
) {
|
|
397
|
+
endLine--
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
return endLine
|
|
401
|
+
}
|
|
402
|
+
|
|
348
403
|
const splitRange = (
|
|
349
404
|
range: LineRange,
|
|
350
405
|
settings: ChunkSettings,
|
|
406
|
+
prefixSums: ReadonlyArray<number>,
|
|
351
407
|
): ReadonlyArray<LineRange> => {
|
|
352
408
|
const lineCount = range.endLine - range.startLine + 1
|
|
353
|
-
if (
|
|
409
|
+
if (
|
|
410
|
+
lineCount <= settings.chunkSize &&
|
|
411
|
+
lineRangeCharacterLength(prefixSums, range) <= settings.chunkMaxCharacters
|
|
412
|
+
) {
|
|
354
413
|
return [range]
|
|
355
414
|
}
|
|
356
|
-
|
|
357
|
-
const step = settings.chunkSize - settings.chunkOverlap
|
|
358
415
|
const out = [] as Array<LineRange>
|
|
359
416
|
|
|
360
|
-
for (
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
const endLine =
|
|
417
|
+
for (let startLine = range.startLine; startLine <= range.endLine; ) {
|
|
418
|
+
const maxEndLine = Math.min(
|
|
419
|
+
range.endLine,
|
|
420
|
+
startLine + settings.chunkSize - 1,
|
|
421
|
+
)
|
|
422
|
+
const endLine = resolveSegmentEndLine({
|
|
423
|
+
startLine,
|
|
424
|
+
maxEndLine,
|
|
425
|
+
settings,
|
|
426
|
+
prefixSums,
|
|
427
|
+
})
|
|
428
|
+
|
|
366
429
|
out.push({
|
|
367
430
|
startLine,
|
|
368
431
|
endLine,
|
|
@@ -371,6 +434,8 @@ const splitRange = (
|
|
|
371
434
|
if (endLine >= range.endLine) {
|
|
372
435
|
break
|
|
373
436
|
}
|
|
437
|
+
|
|
438
|
+
startLine = Math.max(startLine + 1, endLine - settings.chunkOverlap + 1)
|
|
374
439
|
}
|
|
375
440
|
|
|
376
441
|
return out
|
|
@@ -648,6 +713,7 @@ const chunksFromRanges = (
|
|
|
648
713
|
|
|
649
714
|
const out = [] as Array<CodeChunk>
|
|
650
715
|
const seen = new Set<string>()
|
|
716
|
+
const prefixSums = lineLengthPrefixSums(lines)
|
|
651
717
|
|
|
652
718
|
for (const range of ranges) {
|
|
653
719
|
const normalizedRange = normalizeLineRange(range, lines.length)
|
|
@@ -655,7 +721,7 @@ const chunksFromRanges = (
|
|
|
655
721
|
continue
|
|
656
722
|
}
|
|
657
723
|
|
|
658
|
-
const allSegments = splitRange(normalizedRange, settings)
|
|
724
|
+
const allSegments = splitRange(normalizedRange, settings, prefixSums)
|
|
659
725
|
const segments =
|
|
660
726
|
range.type === "class" &&
|
|
661
727
|
allSegments.length > 1 &&
|
|
@@ -709,8 +775,8 @@ const chunkWithLineWindows = (
|
|
|
709
775
|
lines: ReadonlyArray<string>,
|
|
710
776
|
settings: ChunkSettings,
|
|
711
777
|
): ReadonlyArray<CodeChunk> => {
|
|
712
|
-
const step = settings.chunkSize - settings.chunkOverlap
|
|
713
778
|
const out = [] as Array<CodeChunk>
|
|
779
|
+
const prefixSums = lineLengthPrefixSums(lines)
|
|
714
780
|
|
|
715
781
|
for (let index = 0; index < lines.length; ) {
|
|
716
782
|
if (!isMeaningfulLine(lines[index]!)) {
|
|
@@ -718,25 +784,38 @@ const chunkWithLineWindows = (
|
|
|
718
784
|
continue
|
|
719
785
|
}
|
|
720
786
|
|
|
721
|
-
const
|
|
722
|
-
const
|
|
723
|
-
|
|
787
|
+
const startLine = index + 1
|
|
788
|
+
const maxEndLine = Math.min(
|
|
789
|
+
lines.length,
|
|
790
|
+
startLine + settings.chunkSize - 1,
|
|
791
|
+
)
|
|
792
|
+
const endLine = resolveSegmentEndLine({
|
|
793
|
+
startLine,
|
|
794
|
+
maxEndLine,
|
|
795
|
+
settings,
|
|
796
|
+
prefixSums,
|
|
797
|
+
})
|
|
798
|
+
const chunkLines = lines.slice(startLine - 1, endLine)
|
|
724
799
|
|
|
725
800
|
out.push({
|
|
726
801
|
path,
|
|
727
|
-
startLine
|
|
728
|
-
endLine
|
|
802
|
+
startLine,
|
|
803
|
+
endLine,
|
|
729
804
|
name: undefined,
|
|
730
805
|
type: undefined,
|
|
731
806
|
parent: undefined,
|
|
732
807
|
content: chunkLines.join("\n"),
|
|
733
808
|
})
|
|
734
809
|
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
if (end >= lines.length) {
|
|
810
|
+
if (endLine >= lines.length) {
|
|
738
811
|
break
|
|
739
812
|
}
|
|
813
|
+
|
|
814
|
+
const nextStartLine = Math.max(
|
|
815
|
+
startLine + 1,
|
|
816
|
+
endLine - settings.chunkOverlap + 1,
|
|
817
|
+
)
|
|
818
|
+
index = nextStartLine - 1
|
|
740
819
|
}
|
|
741
820
|
|
|
742
821
|
return out
|
|
@@ -752,6 +831,7 @@ export const chunkFileContent = (
|
|
|
752
831
|
options: {
|
|
753
832
|
readonly chunkSize: number
|
|
754
833
|
readonly chunkOverlap: number
|
|
834
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
755
835
|
},
|
|
756
836
|
): ReadonlyArray<CodeChunk> => {
|
|
757
837
|
if (content.trim().length === 0 || isProbablyMinified(content)) {
|
|
@@ -869,6 +949,9 @@ export const layer: Layer.Layer<
|
|
|
869
949
|
path,
|
|
870
950
|
chunkSize: options.chunkSize,
|
|
871
951
|
chunkOverlap: options.chunkOverlap,
|
|
952
|
+
...(options.chunkMaxCharacters === undefined
|
|
953
|
+
? {}
|
|
954
|
+
: { chunkMaxCharacters: options.chunkMaxCharacters }),
|
|
872
955
|
}),
|
|
873
956
|
Stream.fromArrayEffect,
|
|
874
957
|
),
|
|
@@ -891,6 +974,9 @@ export const layer: Layer.Layer<
|
|
|
891
974
|
paths: files,
|
|
892
975
|
chunkSize: options.chunkSize,
|
|
893
976
|
chunkOverlap: options.chunkOverlap,
|
|
977
|
+
...(options.chunkMaxCharacters === undefined
|
|
978
|
+
? {}
|
|
979
|
+
: { chunkMaxCharacters: options.chunkMaxCharacters }),
|
|
894
980
|
})
|
|
895
981
|
}, Stream.unwrap)
|
|
896
982
|
|
|
@@ -165,6 +165,10 @@ describe("preprocessScript", () => {
|
|
|
165
165
|
"patch5",
|
|
166
166
|
"patch6",
|
|
167
167
|
"patch7",
|
|
168
|
+
"patch8",
|
|
169
|
+
"patch9",
|
|
170
|
+
"patch10",
|
|
171
|
+
"patch11",
|
|
168
172
|
])("fixes broken %s", (fixture) => {
|
|
169
173
|
const content = readFileSync(
|
|
170
174
|
join(__dirname, "fixtures", `${fixture}-broken.txt`),
|
|
@@ -51,6 +51,121 @@ const parseIdentifier = (
|
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
const findPreviousNonWhitespace = (text: string, from: number): number => {
|
|
55
|
+
let i = from
|
|
56
|
+
while (i >= 0 && /\s/.test(text[i]!)) {
|
|
57
|
+
i--
|
|
58
|
+
}
|
|
59
|
+
return i
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const findNextNonWhitespace = (text: string, from: number): number => {
|
|
63
|
+
let i = from
|
|
64
|
+
while (i < text.length && /\s/.test(text[i]!)) {
|
|
65
|
+
i++
|
|
66
|
+
}
|
|
67
|
+
return i
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const findObjectValueTerminator = (text: string, start: number): number => {
|
|
71
|
+
let parenDepth = 0
|
|
72
|
+
let bracketDepth = 0
|
|
73
|
+
let braceDepth = 0
|
|
74
|
+
let stringDelimiter: '"' | "'" | "`" | undefined
|
|
75
|
+
|
|
76
|
+
for (let i = start; i < text.length; i++) {
|
|
77
|
+
const char = text[i]!
|
|
78
|
+
|
|
79
|
+
if (stringDelimiter !== undefined) {
|
|
80
|
+
if (char === stringDelimiter && !isEscaped(text, i)) {
|
|
81
|
+
stringDelimiter = undefined
|
|
82
|
+
}
|
|
83
|
+
continue
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (char === '"' || char === "'" || char === "`") {
|
|
87
|
+
stringDelimiter = char
|
|
88
|
+
continue
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (char === "(") {
|
|
92
|
+
parenDepth++
|
|
93
|
+
continue
|
|
94
|
+
}
|
|
95
|
+
if (char === ")") {
|
|
96
|
+
if (parenDepth > 0) {
|
|
97
|
+
parenDepth--
|
|
98
|
+
}
|
|
99
|
+
continue
|
|
100
|
+
}
|
|
101
|
+
if (char === "[") {
|
|
102
|
+
bracketDepth++
|
|
103
|
+
continue
|
|
104
|
+
}
|
|
105
|
+
if (char === "]") {
|
|
106
|
+
if (bracketDepth > 0) {
|
|
107
|
+
bracketDepth--
|
|
108
|
+
}
|
|
109
|
+
continue
|
|
110
|
+
}
|
|
111
|
+
if (char === "{") {
|
|
112
|
+
braceDepth++
|
|
113
|
+
continue
|
|
114
|
+
}
|
|
115
|
+
if (char === "}") {
|
|
116
|
+
if (parenDepth === 0 && bracketDepth === 0 && braceDepth === 0) {
|
|
117
|
+
return i
|
|
118
|
+
}
|
|
119
|
+
if (braceDepth > 0) {
|
|
120
|
+
braceDepth--
|
|
121
|
+
}
|
|
122
|
+
continue
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (
|
|
126
|
+
char === "," &&
|
|
127
|
+
parenDepth === 0 &&
|
|
128
|
+
bracketDepth === 0 &&
|
|
129
|
+
braceDepth === 0
|
|
130
|
+
) {
|
|
131
|
+
return i
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return -1
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const collectExpressionIdentifiers = (
|
|
139
|
+
text: string,
|
|
140
|
+
start: number,
|
|
141
|
+
end: number,
|
|
142
|
+
): ReadonlySet<string> => {
|
|
143
|
+
const out = new Set<string>()
|
|
144
|
+
let cursor = start
|
|
145
|
+
|
|
146
|
+
while (cursor < end) {
|
|
147
|
+
const identifier = parseIdentifier(text, cursor)
|
|
148
|
+
if (identifier === undefined) {
|
|
149
|
+
cursor++
|
|
150
|
+
continue
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const previousNonWhitespace = findPreviousNonWhitespace(text, cursor - 1)
|
|
154
|
+
const nextNonWhitespace = findNextNonWhitespace(text, identifier.end)
|
|
155
|
+
if (
|
|
156
|
+
text[previousNonWhitespace] !== "." &&
|
|
157
|
+
text[nextNonWhitespace] !== "." &&
|
|
158
|
+
text[nextNonWhitespace] !== "("
|
|
159
|
+
) {
|
|
160
|
+
out.add(identifier.name)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
cursor = identifier.end
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return out
|
|
167
|
+
}
|
|
168
|
+
|
|
54
169
|
const isEscaped = (text: string, index: number): boolean => {
|
|
55
170
|
let slashCount = 0
|
|
56
171
|
let i = index - 1
|
|
@@ -76,13 +191,41 @@ const needsTemplateEscaping = (text: string): boolean => {
|
|
|
76
191
|
|
|
77
192
|
const normalizePatchEscapedQuotes = (text: string): string =>
|
|
78
193
|
text.includes("*** Begin Patch")
|
|
79
|
-
? text.replace(/\\"([A-Za-z0-9_$.-]+)\\"/g,
|
|
194
|
+
? text.replace(/\\"([A-Za-z0-9_$.-]+)\\"/g, (match, content, index) => {
|
|
195
|
+
const previous = text[findPreviousNonWhitespace(text, index - 1)]
|
|
196
|
+
const next = text[findNextNonWhitespace(text, index + match.length)]
|
|
197
|
+
if (
|
|
198
|
+
previous === "{" ||
|
|
199
|
+
previous === "[" ||
|
|
200
|
+
previous === ":" ||
|
|
201
|
+
previous === "," ||
|
|
202
|
+
next === ":" ||
|
|
203
|
+
next === "}" ||
|
|
204
|
+
next === "]" ||
|
|
205
|
+
next === ","
|
|
206
|
+
) {
|
|
207
|
+
return match
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return `"${content}"`
|
|
211
|
+
})
|
|
80
212
|
: text
|
|
81
213
|
|
|
214
|
+
const normalizeNonPatchEscapedTemplateMarkers = (text: string): string =>
|
|
215
|
+
text
|
|
216
|
+
.replace(/\\{2,}(?=`|\$\{)/g, "\\")
|
|
217
|
+
.replace(/(^|\s)\\+(?=\.[A-Za-z0-9_-]+\/)/g, "$1")
|
|
218
|
+
|
|
82
219
|
const escapeTemplateLiteralContent = (text: string): string => {
|
|
83
|
-
const
|
|
84
|
-
const isPatchContent =
|
|
85
|
-
|
|
220
|
+
const normalizedPatchQuotes = normalizePatchEscapedQuotes(text)
|
|
221
|
+
const isPatchContent = normalizedPatchQuotes.includes("*** Begin Patch")
|
|
222
|
+
const normalized = isPatchContent
|
|
223
|
+
? normalizedPatchQuotes
|
|
224
|
+
: normalizeNonPatchEscapedTemplateMarkers(normalizedPatchQuotes)
|
|
225
|
+
if (
|
|
226
|
+
!needsTemplateEscaping(normalized) &&
|
|
227
|
+
!(isPatchContent && normalized.includes("\\"))
|
|
228
|
+
) {
|
|
86
229
|
return normalized
|
|
87
230
|
}
|
|
88
231
|
|
|
@@ -128,6 +271,10 @@ const findTemplateEnd = (
|
|
|
128
271
|
if (text[i] !== "`" || isEscaped(text, i)) {
|
|
129
272
|
continue
|
|
130
273
|
}
|
|
274
|
+
if (isTerminator(text[i + 1])) {
|
|
275
|
+
end = i
|
|
276
|
+
continue
|
|
277
|
+
}
|
|
131
278
|
const next = skipWhitespace(text, i + 1)
|
|
132
279
|
if (isTerminator(text[next])) {
|
|
133
280
|
end = i
|
|
@@ -364,11 +511,14 @@ const collectCallObjectPropertyIdentifiers = (
|
|
|
364
511
|
)
|
|
365
512
|
if (script[afterProperty] === ":") {
|
|
366
513
|
const valueStart = skipWhitespace(script, afterProperty + 1)
|
|
367
|
-
const
|
|
368
|
-
if (
|
|
369
|
-
const
|
|
370
|
-
|
|
371
|
-
|
|
514
|
+
const valueEnd = findObjectValueTerminator(script, valueStart)
|
|
515
|
+
if (valueEnd !== -1) {
|
|
516
|
+
for (const identifier of collectExpressionIdentifiers(
|
|
517
|
+
script,
|
|
518
|
+
valueStart,
|
|
519
|
+
valueEnd,
|
|
520
|
+
)) {
|
|
521
|
+
out.add(identifier)
|
|
372
522
|
}
|
|
373
523
|
}
|
|
374
524
|
cursor = valueStart + 1
|
|
@@ -441,6 +591,8 @@ const fixAssignedTemplate = (script: string, variableName: string): string => {
|
|
|
441
591
|
templateStart,
|
|
442
592
|
(char) =>
|
|
443
593
|
char === undefined ||
|
|
594
|
+
char === "\n" ||
|
|
595
|
+
char === "\r" ||
|
|
444
596
|
char === ";" ||
|
|
445
597
|
char === "," ||
|
|
446
598
|
char === ")" ||
|
package/src/SemanticSearch.ts
CHANGED
|
@@ -42,10 +42,13 @@ export class SemanticSearch extends ServiceMap.Service<
|
|
|
42
42
|
|
|
43
43
|
const normalizePath = (path: string) => path.replace(/\\/g, "/")
|
|
44
44
|
|
|
45
|
-
const
|
|
45
|
+
const resolveChunkConfig = (options: {
|
|
46
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
47
|
+
}) => ({
|
|
46
48
|
chunkSize: 30,
|
|
47
49
|
chunkOverlap: 0,
|
|
48
|
-
|
|
50
|
+
chunkMaxCharacters: options.chunkMaxCharacters ?? 10_000,
|
|
51
|
+
})
|
|
49
52
|
|
|
50
53
|
export const makeEmbeddingResolver = (
|
|
51
54
|
resolver: EmbeddingModel.Service["resolver"],
|
|
@@ -100,6 +103,7 @@ export const layer = (options: {
|
|
|
100
103
|
readonly embeddingBatchSize?: number | undefined
|
|
101
104
|
readonly embeddingRequestDelay?: Duration.Input | undefined
|
|
102
105
|
readonly concurrency?: number | undefined
|
|
106
|
+
readonly chunkMaxCharacters?: number | undefined
|
|
103
107
|
}): Layer.Layer<
|
|
104
108
|
SemanticSearch,
|
|
105
109
|
| SqlError.SqlError
|
|
@@ -121,6 +125,7 @@ export const layer = (options: {
|
|
|
121
125
|
const root = pathService.resolve(options.directory)
|
|
122
126
|
const resolver = makeEmbeddingResolver(embeddings.resolver, options)
|
|
123
127
|
const concurrency = options.concurrency ?? 2000
|
|
128
|
+
const chunkConfig = resolveChunkConfig(options)
|
|
124
129
|
const indexHandle = yield* FiberHandle.make()
|
|
125
130
|
const console = yield* Console.Console
|
|
126
131
|
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
const patch = `*** Begin Patch
|
|
2
|
+
*** Update File: packages/ai/openai/test/OpenAiLanguageModel.test.ts
|
|
3
|
+
@@
|
|
4
|
+
describe("streamText", () => {
|
|
5
|
+
it.effect("emits valid apply_patch tool params JSON for update_file diffs", () =>
|
|
6
|
+
Effect.gen(function*() {
|
|
7
|
+
@@
|
|
8
|
+
deepStrictEqual(params, {
|
|
9
|
+
call_id: outputItem.call_id,
|
|
10
|
+
operation: {
|
|
11
|
+
type: "update_file",
|
|
12
|
+
path: "src/example.ts",
|
|
13
|
+
diff
|
|
14
|
+
}
|
|
15
|
+
})
|
|
16
|
+
}))
|
|
17
|
+
+
|
|
18
|
+
+ it.effect("emits tool call from function_call_arguments.done when output_item.done is missing", () =>
|
|
19
|
+
+ Effect.gen(function*() {
|
|
20
|
+
+ const streamEvents = [
|
|
21
|
+
+ {
|
|
22
|
+
+ type: "response.created",
|
|
23
|
+
+ sequence_number: 1,
|
|
24
|
+
+ response: makeDefaultResponse({
|
|
25
|
+
+ id: "resp_function_call_done",
|
|
26
|
+
+ status: "in_progress",
|
|
27
|
+
+ output: []
|
|
28
|
+
+ })
|
|
29
|
+
+ },
|
|
30
|
+
+ {
|
|
31
|
+
+ type: "response.output_item.added",
|
|
32
|
+
+ sequence_number: 2,
|
|
33
|
+
+ output_index: 0,
|
|
34
|
+
+ item: {
|
|
35
|
+
+ type: "function_call",
|
|
36
|
+
+ id: "fc_1",
|
|
37
|
+
+ call_id: "call_1",
|
|
38
|
+
+ name: "TestTool",
|
|
39
|
+
+ arguments: "",
|
|
40
|
+
+ status: "in_progress"
|
|
41
|
+
+ }
|
|
42
|
+
+ },
|
|
43
|
+
+ {
|
|
44
|
+
+ type: "response.function_call_arguments.delta",
|
|
45
|
+
+ sequence_number: 3,
|
|
46
|
+
+ output_index: 0,
|
|
47
|
+
+ item_id: "fc_1",
|
|
48
|
+
+ delta: "{\"input\":\"hel"
|
|
49
|
+
+ },
|
|
50
|
+
+ {
|
|
51
|
+
+ type: "response.function_call_arguments.done",
|
|
52
|
+
+ sequence_number: 4,
|
|
53
|
+
+ output_index: 0,
|
|
54
|
+
+ item_id: "fc_1",
|
|
55
|
+
+ name: "TestTool",
|
|
56
|
+
+ arguments: "{\"input\":\"hello\"}"
|
|
57
|
+
+ },
|
|
58
|
+
+ {
|
|
59
|
+
+ type: "response.completed",
|
|
60
|
+
+ sequence_number: 5,
|
|
61
|
+
+ response: makeDefaultResponse({
|
|
62
|
+
+ id: "resp_function_call_done",
|
|
63
|
+
+ status: "completed",
|
|
64
|
+
+ output: []
|
|
65
|
+
+ })
|
|
66
|
+
+ }
|
|
67
|
+
+ ] as unknown as ReadonlyArray<typeof Generated.ResponseStreamEvent.Type>
|
|
68
|
+
+
|
|
69
|
+
+ const partsChunk = yield* LanguageModel.streamText({
|
|
70
|
+
+ prompt: "Use the test tool",
|
|
71
|
+
+ toolkit: TestToolkit,
|
|
72
|
+
+ disableToolCallResolution: true
|
|
73
|
+
+ }).pipe(
|
|
74
|
+
+ Stream.runCollect,
|
|
75
|
+
+ Effect.provide(OpenAiLanguageModel.model("gpt-4o-mini")),
|
|
76
|
+
+ Effect.provide(makeStreamTestLayer(streamEvents)),
|
|
77
|
+
+ Effect.provide(TestToolkitLayer)
|
|
78
|
+
+ )
|
|
79
|
+
+
|
|
80
|
+
+ const parts = globalThis.Array.from(partsChunk)
|
|
81
|
+
+ const toolCalls = parts.filter((part) => part.type === "tool-call" && part.id === "call_1")
|
|
82
|
+
+ strictEqual(toolCalls.length, 1)
|
|
83
|
+
+ const toolCall = toolCalls[0]
|
|
84
|
+
+ assert.isDefined(toolCall)
|
|
85
|
+
+ if (toolCall?.type === "tool-call") {
|
|
86
|
+
+ strictEqual(toolCall.name, "TestTool")
|
|
87
|
+
+ deepStrictEqual(toolCall.params, { input: "hello" })
|
|
88
|
+
+ }
|
|
89
|
+
+
|
|
90
|
+
+ const toolParamsEnd = parts.find((part) => part.type === "tool-params-end" && part.id === "call_1")
|
|
91
|
+
+ assert.isDefined(toolParamsEnd)
|
|
92
|
+
+ }))
|
|
93
|
+
})
|
|
94
|
+
*** End Patch`;
|
|
95
|
+
console.log(await applyPatch(patch));
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
const patch = `*** Begin Patch
|
|
2
|
+
*** Update File: packages/ai/openai/test/OpenAiLanguageModel.test.ts
|
|
3
|
+
@@
|
|
4
|
+
describe("streamText", () => {
|
|
5
|
+
it.effect("emits valid apply_patch tool params JSON for update_file diffs", () =>
|
|
6
|
+
Effect.gen(function*() {
|
|
7
|
+
@@
|
|
8
|
+
deepStrictEqual(params, {
|
|
9
|
+
call_id: outputItem.call_id,
|
|
10
|
+
operation: {
|
|
11
|
+
type: "update_file",
|
|
12
|
+
path: "src/example.ts",
|
|
13
|
+
diff
|
|
14
|
+
}
|
|
15
|
+
})
|
|
16
|
+
}))
|
|
17
|
+
+
|
|
18
|
+
+ it.effect("emits tool call from function_call_arguments.done when output_item.done is missing", () =>
|
|
19
|
+
+ Effect.gen(function*() {
|
|
20
|
+
+ const streamEvents = [
|
|
21
|
+
+ {
|
|
22
|
+
+ type: "response.created",
|
|
23
|
+
+ sequence_number: 1,
|
|
24
|
+
+ response: makeDefaultResponse({
|
|
25
|
+
+ id: "resp_function_call_done",
|
|
26
|
+
+ status: "in_progress",
|
|
27
|
+
+ output: []
|
|
28
|
+
+ })
|
|
29
|
+
+ },
|
|
30
|
+
+ {
|
|
31
|
+
+ type: "response.output_item.added",
|
|
32
|
+
+ sequence_number: 2,
|
|
33
|
+
+ output_index: 0,
|
|
34
|
+
+ item: {
|
|
35
|
+
+ type: "function_call",
|
|
36
|
+
+ id: "fc_1",
|
|
37
|
+
+ call_id: "call_1",
|
|
38
|
+
+ name: "TestTool",
|
|
39
|
+
+ arguments: "",
|
|
40
|
+
+ status: "in_progress"
|
|
41
|
+
+ }
|
|
42
|
+
+ },
|
|
43
|
+
+ {
|
|
44
|
+
+ type: "response.function_call_arguments.delta",
|
|
45
|
+
+ sequence_number: 3,
|
|
46
|
+
+ output_index: 0,
|
|
47
|
+
+ item_id: "fc_1",
|
|
48
|
+
+ delta: "{\\"input\\":\\"hel"
|
|
49
|
+
+ },
|
|
50
|
+
+ {
|
|
51
|
+
+ type: "response.function_call_arguments.done",
|
|
52
|
+
+ sequence_number: 4,
|
|
53
|
+
+ output_index: 0,
|
|
54
|
+
+ item_id: "fc_1",
|
|
55
|
+
+ name: "TestTool",
|
|
56
|
+
+ arguments: "{\\"input\\":\\"hello\\"}"
|
|
57
|
+
+ },
|
|
58
|
+
+ {
|
|
59
|
+
+ type: "response.completed",
|
|
60
|
+
+ sequence_number: 5,
|
|
61
|
+
+ response: makeDefaultResponse({
|
|
62
|
+
+ id: "resp_function_call_done",
|
|
63
|
+
+ status: "completed",
|
|
64
|
+
+ output: []
|
|
65
|
+
+ })
|
|
66
|
+
+ }
|
|
67
|
+
+ ] as unknown as ReadonlyArray<typeof Generated.ResponseStreamEvent.Type>
|
|
68
|
+
+
|
|
69
|
+
+ const partsChunk = yield* LanguageModel.streamText({
|
|
70
|
+
+ prompt: "Use the test tool",
|
|
71
|
+
+ toolkit: TestToolkit,
|
|
72
|
+
+ disableToolCallResolution: true
|
|
73
|
+
+ }).pipe(
|
|
74
|
+
+ Stream.runCollect,
|
|
75
|
+
+ Effect.provide(OpenAiLanguageModel.model("gpt-4o-mini")),
|
|
76
|
+
+ Effect.provide(makeStreamTestLayer(streamEvents)),
|
|
77
|
+
+ Effect.provide(TestToolkitLayer)
|
|
78
|
+
+ )
|
|
79
|
+
+
|
|
80
|
+
+ const parts = globalThis.Array.from(partsChunk)
|
|
81
|
+
+ const toolCalls = parts.filter((part) => part.type === "tool-call" && part.id === "call_1")
|
|
82
|
+
+ strictEqual(toolCalls.length, 1)
|
|
83
|
+
+ const toolCall = toolCalls[0]
|
|
84
|
+
+ assert.isDefined(toolCall)
|
|
85
|
+
+ if (toolCall?.type === "tool-call") {
|
|
86
|
+
+ strictEqual(toolCall.name, "TestTool")
|
|
87
|
+
+ deepStrictEqual(toolCall.params, { input: "hello" })
|
|
88
|
+
+ }
|
|
89
|
+
+
|
|
90
|
+
+ const toolParamsEnd = parts.find((part) => part.type === "tool-params-end" && part.id === "call_1")
|
|
91
|
+
+ assert.isDefined(toolParamsEnd)
|
|
92
|
+
+ }))
|
|
93
|
+
})
|
|
94
|
+
*** End Patch`;
|
|
95
|
+
console.log(await applyPatch(patch));
|