clanka 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * @since 1.0.0
3
3
  */
4
+ import { createHash } from "node:crypto"
4
5
  import * as Effect from "effect/Effect"
5
6
  import * as ChunkRepo from "./ChunkRepo.ts"
6
7
  import * as CodeChunker from "./CodeChunker.ts"
@@ -23,13 +24,6 @@ import type * as ChildProcessSpawner from "effect/unstable/process/ChildProcessS
23
24
  import type * as FileSystem from "effect/FileSystem"
24
25
  import * as Console from "effect/Console"
25
26
 
26
- const normalizePath = (path: string) => path.replace(/\\/g, "/")
27
-
28
- const chunkConfig = {
29
- chunkSize: 20,
30
- chunkOverlap: 5,
31
- } as const
32
-
33
27
  /**
34
28
  * @since 1.0.0
35
29
  * @category Services
@@ -46,6 +40,13 @@ export class SemanticSearch extends ServiceMap.Service<
46
40
  }
47
41
  >()("clanka/SemanticSearch/SemanticSearch") {}
48
42
 
43
+ const normalizePath = (path: string) => path.replace(/\\/g, "/")
44
+
45
+ const chunkConfig = {
46
+ chunkSize: 30,
47
+ chunkOverlap: 0,
48
+ } as const
49
+
49
50
  export const makeEmbeddingResolver = (
50
51
  resolver: EmbeddingModel.Service["resolver"],
51
52
  options: {
@@ -60,6 +61,35 @@ export const makeEmbeddingResolver = (
60
61
  RequestResolver.batchN(options.embeddingBatchSize ?? 500),
61
62
  )
62
63
 
64
+ export const chunkEmbeddingInput = (chunk: CodeChunker.CodeChunk): string => {
65
+ const headerLines = ["---", "file: " + chunk.path]
66
+
67
+ if (chunk.name !== undefined) {
68
+ headerLines.push("name: " + chunk.name)
69
+ }
70
+ if (chunk.type !== undefined) {
71
+ headerLines.push("type: " + chunk.type)
72
+ }
73
+ if (chunk.parent !== undefined) {
74
+ headerLines.push("parent: " + chunk.parent)
75
+ }
76
+ headerLines.push("---")
77
+
78
+ const contentLines = chunk.content.split("\n")
79
+ let contentWithLines = ""
80
+ for (let i = 0; i < contentLines.length; i++) {
81
+ if (i > 0) {
82
+ contentWithLines += "\n"
83
+ }
84
+ contentWithLines += `${chunk.startLine + i}: ${contentLines[i]}`
85
+ }
86
+
87
+ return headerLines.join("\n") + "\n\n" + contentWithLines
88
+ }
89
+
90
+ const hashChunkInput = (input: string): string =>
91
+ createHash("sha256").update(input).digest("hex")
92
+
63
93
  /**
64
94
  * @since 1.0.0
65
95
  * @category Layers
@@ -115,11 +145,14 @@ export const layer = (options: {
115
145
  readonly syncId: ChunkRepo.SyncId
116
146
  readonly checkExisting: boolean
117
147
  }) {
148
+ const input = chunkEmbeddingInput(options.chunk)
149
+ const hash = hashChunkInput(input)
150
+
118
151
  if (options.checkExisting) {
119
152
  const id = yield* repo.exists({
120
153
  path: options.chunk.path,
121
154
  startLine: options.chunk.startLine,
122
- hash: options.chunk.contentHash,
155
+ hash,
123
156
  })
124
157
  if (Option.isSome(id)) {
125
158
  yield* repo.setSyncId(id.value, options.syncId)
@@ -128,12 +161,7 @@ export const layer = (options: {
128
161
  }
129
162
 
130
163
  const result = yield* Effect.request(
131
- new EmbeddingModel.EmbeddingRequest({
132
- input: `File: ${options.chunk.path}
133
- Lines: ${options.chunk.startLine}-${options.chunk.endLine}
134
-
135
- ${options.chunk.content}`,
136
- }),
164
+ new EmbeddingModel.EmbeddingRequest({ input }),
137
165
  resolver,
138
166
  )
139
167
  const vector = new Float32Array(result.vector)
@@ -142,8 +170,8 @@ ${options.chunk.content}`,
142
170
  path: options.chunk.path,
143
171
  startLine: options.chunk.startLine,
144
172
  endLine: options.chunk.endLine,
145
- hash: options.chunk.contentHash,
146
- content: options.chunk.content,
173
+ hash,
174
+ content: input,
147
175
  vector,
148
176
  syncId: options.syncId,
149
177
  }),
@@ -209,7 +237,7 @@ ${options.chunk.content}`,
209
237
  vector: new Float32Array(vector),
210
238
  limit: options.limit,
211
239
  })
212
- return results.map((r) => r.format()).join("\n\n")
240
+ return results.map((r) => r.content).join("\n\n")
213
241
  }, Effect.orDie),
214
242
  updateFile: Effect.fn("SemanticSearch.updateFile")(function* (path) {
215
243
  yield* Fiber.join(initialIndex)
@@ -0,0 +1,255 @@
1
+ // ----------------------------------------------------------------------------
2
+ // Fiber
3
+ // ----------------------------------------------------------------------------
4
+
5
+ /** @internal */
6
+ export const FiberTypeId = `~effect/Fiber/${version}` as const
7
+
8
+ const fiberVariance = {
9
+ _A: identity,
10
+ _E: identity,
11
+ }
12
+
13
+ const fiberIdStore = { id: 0 }
14
+
15
+ /** @internal */
16
+ export const getCurrentFiber = (): Fiber.Fiber<any, any> | undefined =>
17
+ (globalThis as any)[currentFiberTypeId]
18
+
19
+ /** @internal */
20
+ export class FiberImpl<A = any, E = any> implements Fiber.Fiber<A, E> {
21
+ constructor(
22
+ services: ServiceMap.ServiceMap<never>,
23
+ interruptible: boolean = true,
24
+ ) {
25
+ this[FiberTypeId] = fiberVariance as any
26
+ this.setServices(services)
27
+ this.id = ++fiberIdStore.id
28
+ this.currentOpCount = 0
29
+ this.currentLoopCount = 0
30
+ this.interruptible = interruptible
31
+ this._stack = []
32
+ this._observers = []
33
+ this._exit = undefined
34
+ this._children = undefined
35
+ this._interruptedCause = undefined
36
+ this._yielded = undefined
37
+ }
38
+
39
+ readonly [FiberTypeId]: Fiber.Fiber.Variance<A, E>
40
+
41
+ readonly id: number
42
+ interruptible: boolean
43
+ currentOpCount: number
44
+ currentLoopCount: number
45
+ readonly _stack: Array<Primitive>
46
+ readonly _observers: Array<(exit: Exit.Exit<A, E>) => void>
47
+ _exit: Exit.Exit<A, E> | undefined
48
+ _currentExit: Exit.Exit<A, E> | undefined
49
+ _children: Set<FiberImpl<any, any>> | undefined
50
+ _interruptedCause: Cause.Cause<never> | undefined
51
+ _yielded: Exit.Exit<any, any> | (() => void) | undefined
52
+
53
+ // set in setServices
54
+ services!: ServiceMap.ServiceMap<never>
55
+ currentScheduler!: Scheduler.Scheduler
56
+ currentTracerContext: Tracer.Tracer["context"]
57
+ currentSpan: Tracer.AnySpan | undefined
58
+ currentLogLevel!: LogLevel.LogLevel
59
+ minimumLogLevel!: LogLevel.LogLevel
60
+ currentStackFrame: StackFrame | undefined
61
+ runtimeMetrics: Metric.FiberRuntimeMetricsService | undefined
62
+ maxOpsBeforeYield!: number
63
+ currentPreventYield!: boolean
64
+
65
+ _dispatcher: Scheduler.SchedulerDispatcher | undefined = undefined
66
+ get currentDispatcher(): Scheduler.SchedulerDispatcher {
67
+ return (this._dispatcher ??= this.currentScheduler.makeDispatcher())
68
+ }
69
+
70
+ getRef<X>(ref: ServiceMap.Reference<X>): X {
71
+ return ServiceMap.getReferenceUnsafe(this.services, ref)
72
+ }
73
+ addObserver(cb: (exit: Exit.Exit<A, E>) => void): () => void {
74
+ if (this._exit) {
75
+ cb(this._exit)
76
+ return constVoid
77
+ }
78
+ this._observers.push(cb)
79
+ return () => {
80
+ const index = this._observers.indexOf(cb)
81
+ if (index >= 0) {
82
+ this._observers.splice(index, 1)
83
+ }
84
+ }
85
+ }
86
+ interruptUnsafe(
87
+ fiberId?: number | undefined,
88
+ annotations?: ServiceMap.ServiceMap<never> | undefined,
89
+ ): void {
90
+ if (this._exit) {
91
+ return
92
+ }
93
+ let cause = causeInterrupt(fiberId)
94
+ if (this.currentStackFrame) {
95
+ cause = causeAnnotate(
96
+ cause,
97
+ ServiceMap.make(CauseStackTrace, this.currentStackFrame),
98
+ )
99
+ }
100
+ if (annotations) {
101
+ cause = causeAnnotate(cause, annotations)
102
+ }
103
+ this._interruptedCause = this._interruptedCause
104
+ ? causeCombine(this._interruptedCause, cause)
105
+ : cause
106
+ if (this.interruptible) {
107
+ this.evaluate(failCause(this._interruptedCause) as any)
108
+ }
109
+ }
110
+ pollUnsafe(): Exit.Exit<A, E> | undefined {
111
+ return this._exit
112
+ }
113
+ evaluate(effect: Primitive): void {
114
+ this.runtimeMetrics?.recordFiberStart(this.services)
115
+ if (this._exit) {
116
+ return
117
+ } else if (this._yielded !== undefined) {
118
+ const yielded = this._yielded as () => void
119
+ this._yielded = undefined
120
+ yielded()
121
+ }
122
+ const exit = this.runLoop(effect)
123
+ if (exit === Yield) {
124
+ return
125
+ }
126
+ // the interruptChildren middleware is added in Effect.forkChild, so it can be
127
+ // tree-shaken if not used
128
+ const interruptChildren =
129
+ fiberMiddleware.interruptChildren &&
130
+ fiberMiddleware.interruptChildren(this)
131
+ if (interruptChildren !== undefined) {
132
+ return this.evaluate(flatMap(interruptChildren, () => exit) as any)
133
+ }
134
+
135
+ this._exit = exit
136
+ this.runtimeMetrics?.recordFiberEnd(this.services, this._exit)
137
+ for (let i = 0; i < this._observers.length; i++) {
138
+ this._observers[i](exit)
139
+ }
140
+ this._observers.length = 0
141
+ }
142
+ runLoop(effect: Primitive): Exit.Exit<A, E> | Yield {
143
+ const prevFiber = (globalThis as any)[currentFiberTypeId]
144
+ ;(globalThis as any)[currentFiberTypeId] = this
145
+ let yielding = false
146
+ let current: Primitive | Yield = effect
147
+ this.currentOpCount = 0
148
+ const currentLoop = ++this.currentLoopCount
149
+ try {
150
+ while (true) {
151
+ this.currentOpCount++
152
+ if (
153
+ !yielding &&
154
+ !this.currentPreventYield &&
155
+ this.currentScheduler.shouldYield(this as any)
156
+ ) {
157
+ yielding = true
158
+ const prev = current
159
+ current = flatMap(yieldNow, () => prev as any) as any
160
+ }
161
+ current = this.currentTracerContext
162
+ ? this.currentTracerContext(current as any, this)
163
+ : (current as any)[evaluate](this)
164
+ if (currentLoop !== this.currentLoopCount) {
165
+ // another effect has taken over the loop,
166
+ return Yield
167
+ } else if (current === Yield) {
168
+ const yielded = this._yielded!
169
+ if (ExitTypeId in yielded) {
170
+ this._yielded = undefined
171
+ return yielded
172
+ }
173
+ return Yield
174
+ }
175
+ }
176
+ } catch (error) {
177
+ if (!hasProperty(current, evaluate)) {
178
+ return exitDie(`Fiber.runLoop: Not a valid effect: ${String(current)}`)
179
+ }
180
+ return this.runLoop(exitDie(error) as any)
181
+ } finally {
182
+ ;(globalThis as any)[currentFiberTypeId] = prevFiber
183
+ }
184
+ }
185
+ getCont<S extends contA | contE>(
186
+ symbol: S,
187
+ ):
188
+ | (Primitive & Record<S, (value: any, fiber: FiberImpl) => Primitive>)
189
+ | undefined {
190
+ while (true) {
191
+ const op = this._stack.pop()
192
+ if (!op) return undefined
193
+ const cont = op[contAll] && op[contAll](this)
194
+ if (cont) {
195
+ ;(cont as any)[symbol] = cont
196
+ return cont as any
197
+ }
198
+ if (op[symbol]) return op as any
199
+ }
200
+ }
201
+ yieldWith(value: Exit.Exit<any, any> | (() => void)): Yield {
202
+ this._yielded = value
203
+ return Yield
204
+ }
205
+ children(): Set<Fiber.Fiber<any, any>> {
206
+ return (this._children ??= new Set())
207
+ }
208
+ pipe() {
209
+ return pipeArguments(this, arguments)
210
+ }
211
+ setServices(services: ServiceMap.ServiceMap<never>): void {
212
+ this.services = services
213
+ const scheduler = this.getRef(Scheduler.Scheduler)
214
+ if (scheduler !== this.currentScheduler) {
215
+ this.currentScheduler = scheduler
216
+ this._dispatcher = undefined
217
+ }
218
+ this.currentSpan = services.mapUnsafe.get(Tracer.ParentSpanKey)
219
+ this.currentLogLevel = this.getRef(CurrentLogLevel)
220
+ this.minimumLogLevel = this.getRef(MinimumLogLevel)
221
+ this.currentStackFrame = services.mapUnsafe.get(CurrentStackFrame.key)
222
+ this.maxOpsBeforeYield = this.getRef(Scheduler.MaxOpsBeforeYield)
223
+ this.currentPreventYield = this.getRef(Scheduler.PreventSchedulerYield)
224
+ this.runtimeMetrics = services.mapUnsafe.get(
225
+ InternalMetric.FiberRuntimeMetricsKey,
226
+ )
227
+ const currentTracer = services.mapUnsafe.get(Tracer.TracerKey)
228
+ this.currentTracerContext = currentTracer
229
+ ? currentTracer["context"]
230
+ : undefined
231
+ }
232
+ get currentSpanLocal(): Tracer.Span | undefined {
233
+ return this.currentSpan?._tag === "Span" ? this.currentSpan : undefined
234
+ }
235
+ }
236
+
237
+ const fiberMiddleware = {
238
+ interruptChildren: undefined as
239
+ | ((fiber: FiberImpl) => Effect.Effect<void> | undefined)
240
+ | undefined,
241
+ }
242
+
243
+ const fiberStackAnnotations = (fiber: Fiber.Fiber<any, any>) => {
244
+ if (!fiber.currentStackFrame) return undefined
245
+ const annotations = new Map<string, unknown>()
246
+ annotations.set(CauseStackTrace.key, fiber.currentStackFrame)
247
+ return ServiceMap.makeUnsafe(annotations)
248
+ }
249
+
250
+ const fiberInterruptChildren = (fiber: FiberImpl) => {
251
+ if (fiber._children === undefined || fiber._children.size === 0) {
252
+ return undefined
253
+ }
254
+ return fiberInterruptAll(fiber._children)
255
+ }