npm - clanka - Versions diffs - 0.2.18 → 0.2.20 - Mend

clanka 0.2.18 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/Agent.d.ts.map +1 -1
package/dist/Agent.js +2 -1
package/dist/Agent.js.map +1 -1
package/dist/CodeChunker.d.ts +4 -0
package/dist/CodeChunker.d.ts.map +1 -1
package/dist/CodeChunker.js +64 -14
package/dist/CodeChunker.js.map +1 -1
package/dist/CodeChunker.test.js +71 -0
package/dist/CodeChunker.test.js.map +1 -1
package/dist/ScriptPreprocessing.d.ts.map +1 -1
package/dist/ScriptPreprocessing.js +128 -9
package/dist/ScriptPreprocessing.js.map +1 -1
package/dist/ScriptPreprocessing.test.js +4 -0
package/dist/ScriptPreprocessing.test.js.map +1 -1
package/dist/SemanticSearch.d.ts +1 -0
package/dist/SemanticSearch.d.ts.map +1 -1
package/dist/SemanticSearch.js +4 -2
package/dist/SemanticSearch.js.map +1 -1
package/package.json +10 -10
package/src/Agent.ts +2 -1
package/src/CodeChunker.test.ts +77 -0
package/src/CodeChunker.ts +105 -19
package/src/ScriptPreprocessing.test.ts +4 -0
package/src/ScriptPreprocessing.ts +161 -9
package/src/SemanticSearch.ts +7 -2
package/src/fixtures/patch10-broken.txt +95 -0
package/src/fixtures/patch10-fixed.txt +95 -0
package/src/fixtures/patch11-broken.txt +219 -0
package/src/fixtures/patch11-fixed.txt +219 -0
package/src/fixtures/patch8-broken.txt +30 -0
package/src/fixtures/patch8-fixed.txt +30 -0
package/src/fixtures/patch9-broken.txt +10 -0
package/src/fixtures/patch9-fixed.txt +10 -0

package/src/CodeChunker.ts CHANGED Viewed

@@ -59,18 +59,21 @@ export class CodeChunker extends ServiceMap.Service<
       readonly path: string
       readonly chunkSize: number
       readonly chunkOverlap: number
+      readonly chunkMaxCharacters?: number | undefined
     }): Effect.Effect<ReadonlyArray<CodeChunk>>
     chunkFiles(options: {
       readonly root: string
       readonly paths: ReadonlyArray<string>
       readonly chunkSize: number
       readonly chunkOverlap: number
+      readonly chunkMaxCharacters?: number | undefined
     }): Stream.Stream<CodeChunk>
     chunkCodebase(options: {
       readonly root: string
       readonly maxFileSize?: string | undefined
       readonly chunkSize: number
       readonly chunkOverlap: number
+      readonly chunkMaxCharacters?: number | undefined
     }): Stream.Stream<CodeChunk>
   }
 >()("clanka/CodeChunker") {}
@@ -157,6 +160,7 @@ interface LineRange {
 interface ChunkSettings {
   readonly chunkSize: number
   readonly chunkOverlap: number
+  readonly chunkMaxCharacters: number
 }
 interface ChunkRange extends LineRange {
@@ -234,16 +238,23 @@ export const isMeaningfulFile = (path: string): boolean => {
 const resolveChunkSettings = (options: {
   readonly chunkSize: number
   readonly chunkOverlap: number
+  readonly chunkMaxCharacters?: number | undefined
 }): ChunkSettings => {
   const chunkSize = Math.max(1, options.chunkSize)
   const chunkOverlap = Math.max(
     0,
     Math.min(chunkSize - 1, options.chunkOverlap),
   )
+  const chunkMaxCharacters =
+    options.chunkMaxCharacters !== undefined &&
+    Number.isFinite(options.chunkMaxCharacters)
+      ? Math.max(1, Math.floor(options.chunkMaxCharacters))
+      : Number.POSITIVE_INFINITY
   return {
     chunkSize,
     chunkOverlap,
+    chunkMaxCharacters,
   }
 }
@@ -345,24 +356,76 @@ const normalizeLineRange = (
   }
 }
+const lineLengthPrefixSums = (
+  lines: ReadonlyArray<string>,
+): ReadonlyArray<number> => {
+  const sums = [0] as Array<number>
+  for (let index = 0; index < lines.length; index++) {
+    sums.push(sums[index]! + lines[index]!.length)
+  }
+  return sums
+}
+const lineRangeCharacterLength = (
+  prefixSums: ReadonlyArray<number>,
+  range: LineRange,
+): number =>
+  prefixSums[range.endLine]! -
+  prefixSums[range.startLine - 1]! +
+  (range.endLine - range.startLine)
+const resolveSegmentEndLine = (options: {
+  readonly startLine: number
+  readonly maxEndLine: number
+  readonly settings: ChunkSettings
+  readonly prefixSums: ReadonlyArray<number>
+}): number => {
+  if (options.settings.chunkMaxCharacters === Number.POSITIVE_INFINITY) {
+    return options.maxEndLine
+  }
+  let endLine = options.maxEndLine
+  while (
+    endLine > options.startLine &&
+    lineRangeCharacterLength(options.prefixSums, {
+      startLine: options.startLine,
+      endLine,
+    }) > options.settings.chunkMaxCharacters
+  ) {
+    endLine--
+  }
+  return endLine
+}
 const splitRange = (
   range: LineRange,
   settings: ChunkSettings,
+  prefixSums: ReadonlyArray<number>,
 ): ReadonlyArray<LineRange> => {
   const lineCount = range.endLine - range.startLine + 1
-  if (lineCount <= settings.chunkSize) {
+  if (
+    lineCount <= settings.chunkSize &&
+    lineRangeCharacterLength(prefixSums, range) <= settings.chunkMaxCharacters
+  ) {
     return [range]
   }
-  const step = settings.chunkSize - settings.chunkOverlap
   const out = [] as Array<LineRange>
-  for (
-    let startLine = range.startLine;
-    startLine <= range.endLine;
-    startLine += step
-  ) {
-    const endLine = Math.min(range.endLine, startLine + settings.chunkSize - 1)
+  for (let startLine = range.startLine; startLine <= range.endLine; ) {
+    const maxEndLine = Math.min(
+      range.endLine,
+      startLine + settings.chunkSize - 1,
+    )
+    const endLine = resolveSegmentEndLine({
+      startLine,
+      maxEndLine,
+      settings,
+      prefixSums,
+    })
     out.push({
       startLine,
       endLine,
@@ -371,6 +434,8 @@ const splitRange = (
     if (endLine >= range.endLine) {
       break
     }
+    startLine = Math.max(startLine + 1, endLine - settings.chunkOverlap + 1)
   }
   return out
@@ -648,6 +713,7 @@ const chunksFromRanges = (
   const out = [] as Array<CodeChunk>
   const seen = new Set<string>()
+  const prefixSums = lineLengthPrefixSums(lines)
   for (const range of ranges) {
     const normalizedRange = normalizeLineRange(range, lines.length)
@@ -655,7 +721,7 @@ const chunksFromRanges = (
       continue
     }
-    const allSegments = splitRange(normalizedRange, settings)
+    const allSegments = splitRange(normalizedRange, settings, prefixSums)
     const segments =
       range.type === "class" &&
       allSegments.length > 1 &&
@@ -709,8 +775,8 @@ const chunkWithLineWindows = (
   lines: ReadonlyArray<string>,
   settings: ChunkSettings,
 ): ReadonlyArray<CodeChunk> => {
-  const step = settings.chunkSize - settings.chunkOverlap
   const out = [] as Array<CodeChunk>
+  const prefixSums = lineLengthPrefixSums(lines)
   for (let index = 0; index < lines.length; ) {
     if (!isMeaningfulLine(lines[index]!)) {
@@ -718,25 +784,38 @@ const chunkWithLineWindows = (
       continue
     }
-    const start = index
-    const end = Math.min(lines.length, start + settings.chunkSize)
-    const chunkLines = lines.slice(start, end)
+    const startLine = index + 1
+    const maxEndLine = Math.min(
+      lines.length,
+      startLine + settings.chunkSize - 1,
+    )
+    const endLine = resolveSegmentEndLine({
+      startLine,
+      maxEndLine,
+      settings,
+      prefixSums,
+    })
+    const chunkLines = lines.slice(startLine - 1, endLine)
     out.push({
       path,
-      startLine: start + 1,
-      endLine: end,
+      startLine,
+      endLine,
       name: undefined,
       type: undefined,
       parent: undefined,
       content: chunkLines.join("\n"),
     })
-    index += step
-    if (end >= lines.length) {
+    if (endLine >= lines.length) {
       break
     }
+    const nextStartLine = Math.max(
+      startLine + 1,
+      endLine - settings.chunkOverlap + 1,
+    )
+    index = nextStartLine - 1
   }
   return out
@@ -752,6 +831,7 @@ export const chunkFileContent = (
   options: {
     readonly chunkSize: number
     readonly chunkOverlap: number
+    readonly chunkMaxCharacters?: number | undefined
   },
 ): ReadonlyArray<CodeChunk> => {
   if (content.trim().length === 0 || isProbablyMinified(content)) {
@@ -869,6 +949,9 @@ export const layer: Layer.Layer<
                 path,
                 chunkSize: options.chunkSize,
                 chunkOverlap: options.chunkOverlap,
+                ...(options.chunkMaxCharacters === undefined
+                  ? {}
+                  : { chunkMaxCharacters: options.chunkMaxCharacters }),
               }),
               Stream.fromArrayEffect,
             ),
@@ -891,6 +974,9 @@ export const layer: Layer.Layer<
           paths: files,
           chunkSize: options.chunkSize,
           chunkOverlap: options.chunkOverlap,
+          ...(options.chunkMaxCharacters === undefined
+            ? {}
+            : { chunkMaxCharacters: options.chunkMaxCharacters }),
         })
       }, Stream.unwrap)

package/src/ScriptPreprocessing.test.ts CHANGED Viewed

@@ -165,6 +165,10 @@ describe("preprocessScript", () => {
     "patch5",
     "patch6",
     "patch7",
+    "patch8",
+    "patch9",
+    "patch10",
+    "patch11",
   ])("fixes broken %s", (fixture) => {
     const content = readFileSync(
       join(__dirname, "fixtures", `${fixture}-broken.txt`),

package/src/ScriptPreprocessing.ts CHANGED Viewed

@@ -51,6 +51,121 @@ const parseIdentifier = (
   }
 }
+const findPreviousNonWhitespace = (text: string, from: number): number => {
+  let i = from
+  while (i >= 0 && /\s/.test(text[i]!)) {
+    i--
+  }
+  return i
+}
+const findNextNonWhitespace = (text: string, from: number): number => {
+  let i = from
+  while (i < text.length && /\s/.test(text[i]!)) {
+    i++
+  }
+  return i
+}
+const findObjectValueTerminator = (text: string, start: number): number => {
+  let parenDepth = 0
+  let bracketDepth = 0
+  let braceDepth = 0
+  let stringDelimiter: '"' | "'" | "`" | undefined
+  for (let i = start; i < text.length; i++) {
+    const char = text[i]!
+    if (stringDelimiter !== undefined) {
+      if (char === stringDelimiter && !isEscaped(text, i)) {
+        stringDelimiter = undefined
+      }
+      continue
+    }
+    if (char === '"' || char === "'" || char === "`") {
+      stringDelimiter = char
+      continue
+    }
+    if (char === "(") {
+      parenDepth++
+      continue
+    }
+    if (char === ")") {
+      if (parenDepth > 0) {
+        parenDepth--
+      }
+      continue
+    }
+    if (char === "[") {
+      bracketDepth++
+      continue
+    }
+    if (char === "]") {
+      if (bracketDepth > 0) {
+        bracketDepth--
+      }
+      continue
+    }
+    if (char === "{") {
+      braceDepth++
+      continue
+    }
+    if (char === "}") {
+      if (parenDepth === 0 && bracketDepth === 0 && braceDepth === 0) {
+        return i
+      }
+      if (braceDepth > 0) {
+        braceDepth--
+      }
+      continue
+    }
+    if (
+      char === "," &&
+      parenDepth === 0 &&
+      bracketDepth === 0 &&
+      braceDepth === 0
+    ) {
+      return i
+    }
+  }
+  return -1
+}
+const collectExpressionIdentifiers = (
+  text: string,
+  start: number,
+  end: number,
+): ReadonlySet<string> => {
+  const out = new Set<string>()
+  let cursor = start
+  while (cursor < end) {
+    const identifier = parseIdentifier(text, cursor)
+    if (identifier === undefined) {
+      cursor++
+      continue
+    }
+    const previousNonWhitespace = findPreviousNonWhitespace(text, cursor - 1)
+    const nextNonWhitespace = findNextNonWhitespace(text, identifier.end)
+    if (
+      text[previousNonWhitespace] !== "." &&
+      text[nextNonWhitespace] !== "." &&
+      text[nextNonWhitespace] !== "("
+    ) {
+      out.add(identifier.name)
+    }
+    cursor = identifier.end
+  }
+  return out
+}
 const isEscaped = (text: string, index: number): boolean => {
   let slashCount = 0
   let i = index - 1
@@ -76,13 +191,41 @@ const needsTemplateEscaping = (text: string): boolean => {
 const normalizePatchEscapedQuotes = (text: string): string =>
   text.includes("*** Begin Patch")
-    ? text.replace(/\\"([A-Za-z0-9_$.-]+)\\"/g, '"$1"')
+    ? text.replace(/\\"([A-Za-z0-9_$.-]+)\\"/g, (match, content, index) => {
+        const previous = text[findPreviousNonWhitespace(text, index - 1)]
+        const next = text[findNextNonWhitespace(text, index + match.length)]
+        if (
+          previous === "{" ||
+          previous === "[" ||
+          previous === ":" ||
+          previous === "," ||
+          next === ":" ||
+          next === "}" ||
+          next === "]" ||
+          next === ","
+        ) {
+          return match
+        }
+        return `"${content}"`
+      })
     : text
+const normalizeNonPatchEscapedTemplateMarkers = (text: string): string =>
+  text
+    .replace(/\\{2,}(?=`|\$\{)/g, "\\")
+    .replace(/(^|\s)\\+(?=\.[A-Za-z0-9_-]+\/)/g, "$1")
 const escapeTemplateLiteralContent = (text: string): string => {
-  const normalized = normalizePatchEscapedQuotes(text)
-  const isPatchContent = normalized.includes("*** Begin Patch")
-  if (!needsTemplateEscaping(normalized)) {
+  const normalizedPatchQuotes = normalizePatchEscapedQuotes(text)
+  const isPatchContent = normalizedPatchQuotes.includes("*** Begin Patch")
+  const normalized = isPatchContent
+    ? normalizedPatchQuotes
+    : normalizeNonPatchEscapedTemplateMarkers(normalizedPatchQuotes)
+  if (
+    !needsTemplateEscaping(normalized) &&
+    !(isPatchContent && normalized.includes("\\"))
+  ) {
     return normalized
   }
@@ -128,6 +271,10 @@ const findTemplateEnd = (
     if (text[i] !== "`" || isEscaped(text, i)) {
       continue
     }
+    if (isTerminator(text[i + 1])) {
+      end = i
+      continue
+    }
     const next = skipWhitespace(text, i + 1)
     if (isTerminator(text[next])) {
       end = i
@@ -364,11 +511,14 @@ const collectCallObjectPropertyIdentifiers = (
     )
     if (script[afterProperty] === ":") {
       const valueStart = skipWhitespace(script, afterProperty + 1)
-      const identifier = parseIdentifier(script, valueStart)
-      if (identifier !== undefined) {
-        const valueEnd = skipWhitespace(script, identifier.end)
-        if (script[valueEnd] === "}" || script[valueEnd] === ",") {
-          out.add(identifier.name)
+      const valueEnd = findObjectValueTerminator(script, valueStart)
+      if (valueEnd !== -1) {
+        for (const identifier of collectExpressionIdentifiers(
+          script,
+          valueStart,
+          valueEnd,
+        )) {
+          out.add(identifier)
         }
       }
       cursor = valueStart + 1
@@ -441,6 +591,8 @@ const fixAssignedTemplate = (script: string, variableName: string): string => {
       templateStart,
       (char) =>
         char === undefined ||
+        char === "\n" ||
+        char === "\r" ||
         char === ";" ||
         char === "," ||
         char === ")" ||

package/src/SemanticSearch.ts CHANGED Viewed

@@ -42,10 +42,13 @@ export class SemanticSearch extends ServiceMap.Service<
 const normalizePath = (path: string) => path.replace(/\\/g, "/")
-const chunkConfig = {
+const resolveChunkConfig = (options: {
+  readonly chunkMaxCharacters?: number | undefined
+}) => ({
   chunkSize: 30,
   chunkOverlap: 0,
-} as const
+  chunkMaxCharacters: options.chunkMaxCharacters ?? 10_000,
+})
 export const makeEmbeddingResolver = (
   resolver: EmbeddingModel.Service["resolver"],
@@ -100,6 +103,7 @@ export const layer = (options: {
   readonly embeddingBatchSize?: number | undefined
   readonly embeddingRequestDelay?: Duration.Input | undefined
   readonly concurrency?: number | undefined
+  readonly chunkMaxCharacters?: number | undefined
 }): Layer.Layer<
   SemanticSearch,
   | SqlError.SqlError
@@ -121,6 +125,7 @@ export const layer = (options: {
       const root = pathService.resolve(options.directory)
       const resolver = makeEmbeddingResolver(embeddings.resolver, options)
       const concurrency = options.concurrency ?? 2000
+      const chunkConfig = resolveChunkConfig(options)
       const indexHandle = yield* FiberHandle.make()
       const console = yield* Console.Console

package/src/fixtures/patch10-broken.txt ADDED Viewed

@@ -0,0 +1,95 @@
+const patch = `*** Begin Patch
+*** Update File: packages/ai/openai/test/OpenAiLanguageModel.test.ts
+@@
+   describe("streamText", () => {
+     it.effect("emits valid apply_patch tool params JSON for update_file diffs", () =>
+       Effect.gen(function*() {
+@@
+         deepStrictEqual(params, {
+           call_id: outputItem.call_id,
+           operation: {
+             type: "update_file",
+             path: "src/example.ts",
+             diff
+           }
+         })
+       }))
++
++    it.effect("emits tool call from function_call_arguments.done when output_item.done is missing", () =>
++      Effect.gen(function*() {
++        const streamEvents = [
++          {
++            type: "response.created",
++            sequence_number: 1,
++            response: makeDefaultResponse({
++              id: "resp_function_call_done",
++              status: "in_progress",
++              output: []
++            })
++          },
++          {
++            type: "response.output_item.added",
++            sequence_number: 2,
++            output_index: 0,
++            item: {
++              type: "function_call",
++              id: "fc_1",
++              call_id: "call_1",
++              name: "TestTool",
++              arguments: "",
++              status: "in_progress"
++            }
++          },
++          {
++            type: "response.function_call_arguments.delta",
++            sequence_number: 3,
++            output_index: 0,
++            item_id: "fc_1",
++            delta: "{\"input\":\"hel"
++          },
++          {
++            type: "response.function_call_arguments.done",
++            sequence_number: 4,
++            output_index: 0,
++            item_id: "fc_1",
++            name: "TestTool",
++            arguments: "{\"input\":\"hello\"}"
++          },
++          {
++            type: "response.completed",
++            sequence_number: 5,
++            response: makeDefaultResponse({
++              id: "resp_function_call_done",
++              status: "completed",
++              output: []
++            })
++          }
++        ] as unknown as ReadonlyArray<typeof Generated.ResponseStreamEvent.Type>
++
++        const partsChunk = yield* LanguageModel.streamText({
++          prompt: "Use the test tool",
++          toolkit: TestToolkit,
++          disableToolCallResolution: true
++        }).pipe(
++          Stream.runCollect,
++          Effect.provide(OpenAiLanguageModel.model("gpt-4o-mini")),
++          Effect.provide(makeStreamTestLayer(streamEvents)),
++          Effect.provide(TestToolkitLayer)
++        )
++
++        const parts = globalThis.Array.from(partsChunk)
++        const toolCalls = parts.filter((part) => part.type === "tool-call" && part.id === "call_1")
++        strictEqual(toolCalls.length, 1)
++        const toolCall = toolCalls[0]
++        assert.isDefined(toolCall)
++        if (toolCall?.type === "tool-call") {
++          strictEqual(toolCall.name, "TestTool")
++          deepStrictEqual(toolCall.params, { input: "hello" })
++        }
++
++        const toolParamsEnd = parts.find((part) => part.type === "tool-params-end" && part.id === "call_1")
++        assert.isDefined(toolParamsEnd)
++      }))
+   })
+*** End Patch`;
+console.log(await applyPatch(patch));

package/src/fixtures/patch10-fixed.txt ADDED Viewed

@@ -0,0 +1,95 @@
+const patch = `*** Begin Patch
+*** Update File: packages/ai/openai/test/OpenAiLanguageModel.test.ts
+@@
+   describe("streamText", () => {
+     it.effect("emits valid apply_patch tool params JSON for update_file diffs", () =>
+       Effect.gen(function*() {
+@@
+         deepStrictEqual(params, {
+           call_id: outputItem.call_id,
+           operation: {
+             type: "update_file",
+             path: "src/example.ts",
+             diff
+           }
+         })
+       }))
++
++    it.effect("emits tool call from function_call_arguments.done when output_item.done is missing", () =>
++      Effect.gen(function*() {
++        const streamEvents = [
++          {
++            type: "response.created",
++            sequence_number: 1,
++            response: makeDefaultResponse({
++              id: "resp_function_call_done",
++              status: "in_progress",
++              output: []
++            })
++          },
++          {
++            type: "response.output_item.added",
++            sequence_number: 2,
++            output_index: 0,
++            item: {
++              type: "function_call",
++              id: "fc_1",
++              call_id: "call_1",
++              name: "TestTool",
++              arguments: "",
++              status: "in_progress"
++            }
++          },
++          {
++            type: "response.function_call_arguments.delta",
++            sequence_number: 3,
++            output_index: 0,
++            item_id: "fc_1",
++            delta: "{\\"input\\":\\"hel"
++          },
++          {
++            type: "response.function_call_arguments.done",
++            sequence_number: 4,
++            output_index: 0,
++            item_id: "fc_1",
++            name: "TestTool",
++            arguments: "{\\"input\\":\\"hello\\"}"
++          },
++          {
++            type: "response.completed",
++            sequence_number: 5,
++            response: makeDefaultResponse({
++              id: "resp_function_call_done",
++              status: "completed",
++              output: []
++            })
++          }
++        ] as unknown as ReadonlyArray<typeof Generated.ResponseStreamEvent.Type>
++
++        const partsChunk = yield* LanguageModel.streamText({
++          prompt: "Use the test tool",
++          toolkit: TestToolkit,
++          disableToolCallResolution: true
++        }).pipe(
++          Stream.runCollect,
++          Effect.provide(OpenAiLanguageModel.model("gpt-4o-mini")),
++          Effect.provide(makeStreamTestLayer(streamEvents)),
++          Effect.provide(TestToolkitLayer)
++        )
++
++        const parts = globalThis.Array.from(partsChunk)
++        const toolCalls = parts.filter((part) => part.type === "tool-call" && part.id === "call_1")
++        strictEqual(toolCalls.length, 1)
++        const toolCall = toolCalls[0]
++        assert.isDefined(toolCall)
++        if (toolCall?.type === "tool-call") {
++          strictEqual(toolCall.name, "TestTool")
++          deepStrictEqual(toolCall.params, { input: "hello" })
++        }
++
++        const toolParamsEnd = parts.find((part) => part.type === "tool-params-end" && part.id === "call_1")
++        assert.isDefined(toolParamsEnd)
++      }))
+   })
+*** End Patch`;
+console.log(await applyPatch(patch));