clanka 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ChunkRepo.d.ts +0 -1
- package/dist/ChunkRepo.d.ts.map +1 -1
- package/dist/ChunkRepo.js +0 -10
- package/dist/ChunkRepo.js.map +1 -1
- package/dist/CodeChunker.d.ts +8 -1
- package/dist/CodeChunker.d.ts.map +1 -1
- package/dist/CodeChunker.js +331 -55
- package/dist/CodeChunker.js.map +1 -1
- package/dist/CodeChunker.test.js +231 -28
- package/dist/CodeChunker.test.js.map +1 -1
- package/dist/SemanticSearch.d.ts +2 -3
- package/dist/SemanticSearch.d.ts.map +1 -1
- package/dist/SemanticSearch.js +36 -15
- package/dist/SemanticSearch.js.map +1 -1
- package/package.json +13 -10
- package/src/ChunkRepo.ts +1 -12
- package/src/CodeChunker.test.ts +253 -32
- package/src/CodeChunker.ts +499 -70
- package/src/SemanticSearch.ts +45 -17
- package/src/fixtures/fiber.txt +255 -0
package/src/SemanticSearch.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @since 1.0.0
|
|
3
3
|
*/
|
|
4
|
+
import { createHash } from "node:crypto"
|
|
4
5
|
import * as Effect from "effect/Effect"
|
|
5
6
|
import * as ChunkRepo from "./ChunkRepo.ts"
|
|
6
7
|
import * as CodeChunker from "./CodeChunker.ts"
|
|
@@ -23,13 +24,6 @@ import type * as ChildProcessSpawner from "effect/unstable/process/ChildProcessS
|
|
|
23
24
|
import type * as FileSystem from "effect/FileSystem"
|
|
24
25
|
import * as Console from "effect/Console"
|
|
25
26
|
|
|
26
|
-
const normalizePath = (path: string) => path.replace(/\\/g, "/")
|
|
27
|
-
|
|
28
|
-
const chunkConfig = {
|
|
29
|
-
chunkSize: 20,
|
|
30
|
-
chunkOverlap: 5,
|
|
31
|
-
} as const
|
|
32
|
-
|
|
33
27
|
/**
|
|
34
28
|
* @since 1.0.0
|
|
35
29
|
* @category Services
|
|
@@ -46,6 +40,13 @@ export class SemanticSearch extends ServiceMap.Service<
|
|
|
46
40
|
}
|
|
47
41
|
>()("clanka/SemanticSearch/SemanticSearch") {}
|
|
48
42
|
|
|
43
|
+
const normalizePath = (path: string) => path.replace(/\\/g, "/")
|
|
44
|
+
|
|
45
|
+
const chunkConfig = {
|
|
46
|
+
chunkSize: 30,
|
|
47
|
+
chunkOverlap: 0,
|
|
48
|
+
} as const
|
|
49
|
+
|
|
49
50
|
export const makeEmbeddingResolver = (
|
|
50
51
|
resolver: EmbeddingModel.Service["resolver"],
|
|
51
52
|
options: {
|
|
@@ -60,6 +61,35 @@ export const makeEmbeddingResolver = (
|
|
|
60
61
|
RequestResolver.batchN(options.embeddingBatchSize ?? 500),
|
|
61
62
|
)
|
|
62
63
|
|
|
64
|
+
export const chunkEmbeddingInput = (chunk: CodeChunker.CodeChunk): string => {
|
|
65
|
+
const headerLines = ["---", "file: " + chunk.path]
|
|
66
|
+
|
|
67
|
+
if (chunk.name !== undefined) {
|
|
68
|
+
headerLines.push("name: " + chunk.name)
|
|
69
|
+
}
|
|
70
|
+
if (chunk.type !== undefined) {
|
|
71
|
+
headerLines.push("type: " + chunk.type)
|
|
72
|
+
}
|
|
73
|
+
if (chunk.parent !== undefined) {
|
|
74
|
+
headerLines.push("parent: " + chunk.parent)
|
|
75
|
+
}
|
|
76
|
+
headerLines.push("---")
|
|
77
|
+
|
|
78
|
+
const contentLines = chunk.content.split("\n")
|
|
79
|
+
let contentWithLines = ""
|
|
80
|
+
for (let i = 0; i < contentLines.length; i++) {
|
|
81
|
+
if (i > 0) {
|
|
82
|
+
contentWithLines += "\n"
|
|
83
|
+
}
|
|
84
|
+
contentWithLines += `${chunk.startLine + i}: ${contentLines[i]}`
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return headerLines.join("\n") + "\n\n" + contentWithLines
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const hashChunkInput = (input: string): string =>
|
|
91
|
+
createHash("sha256").update(input).digest("hex")
|
|
92
|
+
|
|
63
93
|
/**
|
|
64
94
|
* @since 1.0.0
|
|
65
95
|
* @category Layers
|
|
@@ -115,11 +145,14 @@ export const layer = (options: {
|
|
|
115
145
|
readonly syncId: ChunkRepo.SyncId
|
|
116
146
|
readonly checkExisting: boolean
|
|
117
147
|
}) {
|
|
148
|
+
const input = chunkEmbeddingInput(options.chunk)
|
|
149
|
+
const hash = hashChunkInput(input)
|
|
150
|
+
|
|
118
151
|
if (options.checkExisting) {
|
|
119
152
|
const id = yield* repo.exists({
|
|
120
153
|
path: options.chunk.path,
|
|
121
154
|
startLine: options.chunk.startLine,
|
|
122
|
-
hash
|
|
155
|
+
hash,
|
|
123
156
|
})
|
|
124
157
|
if (Option.isSome(id)) {
|
|
125
158
|
yield* repo.setSyncId(id.value, options.syncId)
|
|
@@ -128,12 +161,7 @@ export const layer = (options: {
|
|
|
128
161
|
}
|
|
129
162
|
|
|
130
163
|
const result = yield* Effect.request(
|
|
131
|
-
new EmbeddingModel.EmbeddingRequest({
|
|
132
|
-
input: `File: ${options.chunk.path}
|
|
133
|
-
Lines: ${options.chunk.startLine}-${options.chunk.endLine}
|
|
134
|
-
|
|
135
|
-
${options.chunk.content}`,
|
|
136
|
-
}),
|
|
164
|
+
new EmbeddingModel.EmbeddingRequest({ input }),
|
|
137
165
|
resolver,
|
|
138
166
|
)
|
|
139
167
|
const vector = new Float32Array(result.vector)
|
|
@@ -142,8 +170,8 @@ ${options.chunk.content}`,
|
|
|
142
170
|
path: options.chunk.path,
|
|
143
171
|
startLine: options.chunk.startLine,
|
|
144
172
|
endLine: options.chunk.endLine,
|
|
145
|
-
hash
|
|
146
|
-
content:
|
|
173
|
+
hash,
|
|
174
|
+
content: input,
|
|
147
175
|
vector,
|
|
148
176
|
syncId: options.syncId,
|
|
149
177
|
}),
|
|
@@ -209,7 +237,7 @@ ${options.chunk.content}`,
|
|
|
209
237
|
vector: new Float32Array(vector),
|
|
210
238
|
limit: options.limit,
|
|
211
239
|
})
|
|
212
|
-
return results.map((r) => r.
|
|
240
|
+
return results.map((r) => r.content).join("\n\n")
|
|
213
241
|
}, Effect.orDie),
|
|
214
242
|
updateFile: Effect.fn("SemanticSearch.updateFile")(function* (path) {
|
|
215
243
|
yield* Fiber.join(initialIndex)
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
// ----------------------------------------------------------------------------
|
|
2
|
+
// Fiber
|
|
3
|
+
// ----------------------------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
/** @internal */
|
|
6
|
+
export const FiberTypeId = `~effect/Fiber/${version}` as const
|
|
7
|
+
|
|
8
|
+
const fiberVariance = {
|
|
9
|
+
_A: identity,
|
|
10
|
+
_E: identity,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const fiberIdStore = { id: 0 }
|
|
14
|
+
|
|
15
|
+
/** @internal */
|
|
16
|
+
export const getCurrentFiber = (): Fiber.Fiber<any, any> | undefined =>
|
|
17
|
+
(globalThis as any)[currentFiberTypeId]
|
|
18
|
+
|
|
19
|
+
/** @internal */
|
|
20
|
+
export class FiberImpl<A = any, E = any> implements Fiber.Fiber<A, E> {
|
|
21
|
+
constructor(
|
|
22
|
+
services: ServiceMap.ServiceMap<never>,
|
|
23
|
+
interruptible: boolean = true,
|
|
24
|
+
) {
|
|
25
|
+
this[FiberTypeId] = fiberVariance as any
|
|
26
|
+
this.setServices(services)
|
|
27
|
+
this.id = ++fiberIdStore.id
|
|
28
|
+
this.currentOpCount = 0
|
|
29
|
+
this.currentLoopCount = 0
|
|
30
|
+
this.interruptible = interruptible
|
|
31
|
+
this._stack = []
|
|
32
|
+
this._observers = []
|
|
33
|
+
this._exit = undefined
|
|
34
|
+
this._children = undefined
|
|
35
|
+
this._interruptedCause = undefined
|
|
36
|
+
this._yielded = undefined
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
readonly [FiberTypeId]: Fiber.Fiber.Variance<A, E>
|
|
40
|
+
|
|
41
|
+
readonly id: number
|
|
42
|
+
interruptible: boolean
|
|
43
|
+
currentOpCount: number
|
|
44
|
+
currentLoopCount: number
|
|
45
|
+
readonly _stack: Array<Primitive>
|
|
46
|
+
readonly _observers: Array<(exit: Exit.Exit<A, E>) => void>
|
|
47
|
+
_exit: Exit.Exit<A, E> | undefined
|
|
48
|
+
_currentExit: Exit.Exit<A, E> | undefined
|
|
49
|
+
_children: Set<FiberImpl<any, any>> | undefined
|
|
50
|
+
_interruptedCause: Cause.Cause<never> | undefined
|
|
51
|
+
_yielded: Exit.Exit<any, any> | (() => void) | undefined
|
|
52
|
+
|
|
53
|
+
// set in setServices
|
|
54
|
+
services!: ServiceMap.ServiceMap<never>
|
|
55
|
+
currentScheduler!: Scheduler.Scheduler
|
|
56
|
+
currentTracerContext: Tracer.Tracer["context"]
|
|
57
|
+
currentSpan: Tracer.AnySpan | undefined
|
|
58
|
+
currentLogLevel!: LogLevel.LogLevel
|
|
59
|
+
minimumLogLevel!: LogLevel.LogLevel
|
|
60
|
+
currentStackFrame: StackFrame | undefined
|
|
61
|
+
runtimeMetrics: Metric.FiberRuntimeMetricsService | undefined
|
|
62
|
+
maxOpsBeforeYield!: number
|
|
63
|
+
currentPreventYield!: boolean
|
|
64
|
+
|
|
65
|
+
_dispatcher: Scheduler.SchedulerDispatcher | undefined = undefined
|
|
66
|
+
get currentDispatcher(): Scheduler.SchedulerDispatcher {
|
|
67
|
+
return (this._dispatcher ??= this.currentScheduler.makeDispatcher())
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
getRef<X>(ref: ServiceMap.Reference<X>): X {
|
|
71
|
+
return ServiceMap.getReferenceUnsafe(this.services, ref)
|
|
72
|
+
}
|
|
73
|
+
addObserver(cb: (exit: Exit.Exit<A, E>) => void): () => void {
|
|
74
|
+
if (this._exit) {
|
|
75
|
+
cb(this._exit)
|
|
76
|
+
return constVoid
|
|
77
|
+
}
|
|
78
|
+
this._observers.push(cb)
|
|
79
|
+
return () => {
|
|
80
|
+
const index = this._observers.indexOf(cb)
|
|
81
|
+
if (index >= 0) {
|
|
82
|
+
this._observers.splice(index, 1)
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
interruptUnsafe(
|
|
87
|
+
fiberId?: number | undefined,
|
|
88
|
+
annotations?: ServiceMap.ServiceMap<never> | undefined,
|
|
89
|
+
): void {
|
|
90
|
+
if (this._exit) {
|
|
91
|
+
return
|
|
92
|
+
}
|
|
93
|
+
let cause = causeInterrupt(fiberId)
|
|
94
|
+
if (this.currentStackFrame) {
|
|
95
|
+
cause = causeAnnotate(
|
|
96
|
+
cause,
|
|
97
|
+
ServiceMap.make(CauseStackTrace, this.currentStackFrame),
|
|
98
|
+
)
|
|
99
|
+
}
|
|
100
|
+
if (annotations) {
|
|
101
|
+
cause = causeAnnotate(cause, annotations)
|
|
102
|
+
}
|
|
103
|
+
this._interruptedCause = this._interruptedCause
|
|
104
|
+
? causeCombine(this._interruptedCause, cause)
|
|
105
|
+
: cause
|
|
106
|
+
if (this.interruptible) {
|
|
107
|
+
this.evaluate(failCause(this._interruptedCause) as any)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
pollUnsafe(): Exit.Exit<A, E> | undefined {
|
|
111
|
+
return this._exit
|
|
112
|
+
}
|
|
113
|
+
evaluate(effect: Primitive): void {
|
|
114
|
+
this.runtimeMetrics?.recordFiberStart(this.services)
|
|
115
|
+
if (this._exit) {
|
|
116
|
+
return
|
|
117
|
+
} else if (this._yielded !== undefined) {
|
|
118
|
+
const yielded = this._yielded as () => void
|
|
119
|
+
this._yielded = undefined
|
|
120
|
+
yielded()
|
|
121
|
+
}
|
|
122
|
+
const exit = this.runLoop(effect)
|
|
123
|
+
if (exit === Yield) {
|
|
124
|
+
return
|
|
125
|
+
}
|
|
126
|
+
// the interruptChildren middleware is added in Effect.forkChild, so it can be
|
|
127
|
+
// tree-shaken if not used
|
|
128
|
+
const interruptChildren =
|
|
129
|
+
fiberMiddleware.interruptChildren &&
|
|
130
|
+
fiberMiddleware.interruptChildren(this)
|
|
131
|
+
if (interruptChildren !== undefined) {
|
|
132
|
+
return this.evaluate(flatMap(interruptChildren, () => exit) as any)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
this._exit = exit
|
|
136
|
+
this.runtimeMetrics?.recordFiberEnd(this.services, this._exit)
|
|
137
|
+
for (let i = 0; i < this._observers.length; i++) {
|
|
138
|
+
this._observers[i](exit)
|
|
139
|
+
}
|
|
140
|
+
this._observers.length = 0
|
|
141
|
+
}
|
|
142
|
+
runLoop(effect: Primitive): Exit.Exit<A, E> | Yield {
|
|
143
|
+
const prevFiber = (globalThis as any)[currentFiberTypeId]
|
|
144
|
+
;(globalThis as any)[currentFiberTypeId] = this
|
|
145
|
+
let yielding = false
|
|
146
|
+
let current: Primitive | Yield = effect
|
|
147
|
+
this.currentOpCount = 0
|
|
148
|
+
const currentLoop = ++this.currentLoopCount
|
|
149
|
+
try {
|
|
150
|
+
while (true) {
|
|
151
|
+
this.currentOpCount++
|
|
152
|
+
if (
|
|
153
|
+
!yielding &&
|
|
154
|
+
!this.currentPreventYield &&
|
|
155
|
+
this.currentScheduler.shouldYield(this as any)
|
|
156
|
+
) {
|
|
157
|
+
yielding = true
|
|
158
|
+
const prev = current
|
|
159
|
+
current = flatMap(yieldNow, () => prev as any) as any
|
|
160
|
+
}
|
|
161
|
+
current = this.currentTracerContext
|
|
162
|
+
? this.currentTracerContext(current as any, this)
|
|
163
|
+
: (current as any)[evaluate](this)
|
|
164
|
+
if (currentLoop !== this.currentLoopCount) {
|
|
165
|
+
// another effect has taken over the loop,
|
|
166
|
+
return Yield
|
|
167
|
+
} else if (current === Yield) {
|
|
168
|
+
const yielded = this._yielded!
|
|
169
|
+
if (ExitTypeId in yielded) {
|
|
170
|
+
this._yielded = undefined
|
|
171
|
+
return yielded
|
|
172
|
+
}
|
|
173
|
+
return Yield
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
} catch (error) {
|
|
177
|
+
if (!hasProperty(current, evaluate)) {
|
|
178
|
+
return exitDie(`Fiber.runLoop: Not a valid effect: ${String(current)}`)
|
|
179
|
+
}
|
|
180
|
+
return this.runLoop(exitDie(error) as any)
|
|
181
|
+
} finally {
|
|
182
|
+
;(globalThis as any)[currentFiberTypeId] = prevFiber
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
getCont<S extends contA | contE>(
|
|
186
|
+
symbol: S,
|
|
187
|
+
):
|
|
188
|
+
| (Primitive & Record<S, (value: any, fiber: FiberImpl) => Primitive>)
|
|
189
|
+
| undefined {
|
|
190
|
+
while (true) {
|
|
191
|
+
const op = this._stack.pop()
|
|
192
|
+
if (!op) return undefined
|
|
193
|
+
const cont = op[contAll] && op[contAll](this)
|
|
194
|
+
if (cont) {
|
|
195
|
+
;(cont as any)[symbol] = cont
|
|
196
|
+
return cont as any
|
|
197
|
+
}
|
|
198
|
+
if (op[symbol]) return op as any
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
yieldWith(value: Exit.Exit<any, any> | (() => void)): Yield {
|
|
202
|
+
this._yielded = value
|
|
203
|
+
return Yield
|
|
204
|
+
}
|
|
205
|
+
children(): Set<Fiber.Fiber<any, any>> {
|
|
206
|
+
return (this._children ??= new Set())
|
|
207
|
+
}
|
|
208
|
+
pipe() {
|
|
209
|
+
return pipeArguments(this, arguments)
|
|
210
|
+
}
|
|
211
|
+
setServices(services: ServiceMap.ServiceMap<never>): void {
|
|
212
|
+
this.services = services
|
|
213
|
+
const scheduler = this.getRef(Scheduler.Scheduler)
|
|
214
|
+
if (scheduler !== this.currentScheduler) {
|
|
215
|
+
this.currentScheduler = scheduler
|
|
216
|
+
this._dispatcher = undefined
|
|
217
|
+
}
|
|
218
|
+
this.currentSpan = services.mapUnsafe.get(Tracer.ParentSpanKey)
|
|
219
|
+
this.currentLogLevel = this.getRef(CurrentLogLevel)
|
|
220
|
+
this.minimumLogLevel = this.getRef(MinimumLogLevel)
|
|
221
|
+
this.currentStackFrame = services.mapUnsafe.get(CurrentStackFrame.key)
|
|
222
|
+
this.maxOpsBeforeYield = this.getRef(Scheduler.MaxOpsBeforeYield)
|
|
223
|
+
this.currentPreventYield = this.getRef(Scheduler.PreventSchedulerYield)
|
|
224
|
+
this.runtimeMetrics = services.mapUnsafe.get(
|
|
225
|
+
InternalMetric.FiberRuntimeMetricsKey,
|
|
226
|
+
)
|
|
227
|
+
const currentTracer = services.mapUnsafe.get(Tracer.TracerKey)
|
|
228
|
+
this.currentTracerContext = currentTracer
|
|
229
|
+
? currentTracer["context"]
|
|
230
|
+
: undefined
|
|
231
|
+
}
|
|
232
|
+
get currentSpanLocal(): Tracer.Span | undefined {
|
|
233
|
+
return this.currentSpan?._tag === "Span" ? this.currentSpan : undefined
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const fiberMiddleware = {
|
|
238
|
+
interruptChildren: undefined as
|
|
239
|
+
| ((fiber: FiberImpl) => Effect.Effect<void> | undefined)
|
|
240
|
+
| undefined,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const fiberStackAnnotations = (fiber: Fiber.Fiber<any, any>) => {
|
|
244
|
+
if (!fiber.currentStackFrame) return undefined
|
|
245
|
+
const annotations = new Map<string, unknown>()
|
|
246
|
+
annotations.set(CauseStackTrace.key, fiber.currentStackFrame)
|
|
247
|
+
return ServiceMap.makeUnsafe(annotations)
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const fiberInterruptChildren = (fiber: FiberImpl) => {
|
|
251
|
+
if (fiber._children === undefined || fiber._children.size === 0) {
|
|
252
|
+
return undefined
|
|
253
|
+
}
|
|
254
|
+
return fiberInterruptAll(fiber._children)
|
|
255
|
+
}
|