spindb 0.35.3 → 0.36.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1139 @@
1
+ import { spawn, type SpawnOptions } from 'child_process'
2
+ import { existsSync, openSync, closeSync } from 'fs'
3
+ import { chmod, mkdir, writeFile, readFile, unlink } from 'fs/promises'
4
+ import { join } from 'path'
5
+ import { BaseEngine } from '../base-engine'
6
+ import { paths } from '../../config/paths'
7
+ import { getEngineDefaults } from '../../config/defaults'
8
+ import { platformService, isWindows } from '../../core/platform-service'
9
+ import { configManager } from '../../core/config-manager'
10
+ import {
11
+ logDebug,
12
+ logWarning,
13
+ assertValidUsername,
14
+ } from '../../core/error-handler'
15
+ import { processManager } from '../../core/process-manager'
16
+ import { portManager } from '../../core/port-manager'
17
+ import { weaviateBinaryManager } from './binary-manager'
18
+ import { getBinaryUrl } from './binary-urls'
19
+ import {
20
+ normalizeVersion,
21
+ SUPPORTED_MAJOR_VERSIONS,
22
+ WEAVIATE_VERSION_MAP,
23
+ } from './version-maps'
24
+ import { fetchAvailableVersions as fetchHostdbVersions } from './hostdb-releases'
25
+ import {
26
+ detectBackupFormat as detectBackupFormatImpl,
27
+ restoreBackup,
28
+ } from './restore'
29
+ import { createBackup } from './backup'
30
+ import { weaviateApiRequest } from './api-client'
31
+ import {
32
+ type Platform,
33
+ type Arch,
34
+ type ContainerConfig,
35
+ type ProgressCallback,
36
+ type BackupFormat,
37
+ type BackupOptions,
38
+ type BackupResult,
39
+ type RestoreResult,
40
+ type DumpResult,
41
+ type StatusResult,
42
+ type QueryResult,
43
+ type QueryOptions,
44
+ type CreateUserOptions,
45
+ type UserCredentials,
46
+ } from '../../types'
47
+ import { parseRESTAPIResult } from '../../core/query-parser'
48
+
49
+ const ENGINE = 'weaviate'
50
+ const engineDef = getEngineDefaults(ENGINE)
51
+
52
+ /**
53
+ * Parse a Weaviate connection string
54
+ * Supported formats:
55
+ * - http://host:port
56
+ * - https://host:port
57
+ * - http://host:port?api_key=KEY (for API key auth)
58
+ */
59
+ function parseWeaviateConnectionString(connectionString: string): {
60
+ baseUrl: string
61
+ headers: Record<string, string>
62
+ } {
63
+ let url: URL
64
+
65
+ // Ensure scheme is present
66
+ let normalized = connectionString.trim()
67
+ if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
68
+ normalized = `http://${normalized}`
69
+ }
70
+
71
+ try {
72
+ url = new URL(normalized)
73
+ } catch {
74
+ // Redact query params (may contain api_key) before including in error
75
+ const safeString = normalized.split('?')[0]
76
+ throw new Error(
77
+ `Invalid Weaviate connection string: ${safeString}\n` +
78
+ 'Expected format: http://host:port',
79
+ )
80
+ }
81
+
82
+ // Extract API key if provided
83
+ const apiKey = url.searchParams.get('api_key')
84
+ const scheme = url.protocol.replace(':', '')
85
+
86
+ const headers: Record<string, string> = {
87
+ 'Content-Type': 'application/json',
88
+ }
89
+
90
+ if (apiKey) {
91
+ headers['Authorization'] = `Bearer ${apiKey}`
92
+ }
93
+
94
+ // Construct base URL without query params
95
+ const port = url.port || '8080'
96
+ const baseUrl = `${scheme}://${url.hostname}:${port}`
97
+
98
+ return { baseUrl, headers }
99
+ }
100
+
101
+ /**
102
+ * Make an HTTP request to a remote Weaviate server
103
+ */
104
+ async function remoteWeaviateRequest(
105
+ baseUrl: string,
106
+ method: string,
107
+ path: string,
108
+ headers: Record<string, string>,
109
+ body?: Record<string, unknown>,
110
+ timeoutMs = 30000,
111
+ ): Promise<{ status: number; data: unknown }> {
112
+ const url = `${baseUrl}${path}`
113
+
114
+ const controller = new AbortController()
115
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs)
116
+
117
+ const options: RequestInit = {
118
+ method,
119
+ headers,
120
+ signal: controller.signal,
121
+ }
122
+
123
+ if (body) {
124
+ options.body = JSON.stringify(body)
125
+ }
126
+
127
+ try {
128
+ const response = await fetch(url, options)
129
+
130
+ // Try to parse as JSON, fall back to text
131
+ let data: unknown
132
+ const contentType = response.headers.get('content-type') || ''
133
+ if (contentType.includes('application/json')) {
134
+ data = await response.json()
135
+ } else {
136
+ data = await response.text()
137
+ }
138
+
139
+ return { status: response.status, data }
140
+ } catch (error) {
141
+ if (error instanceof Error && error.name === 'AbortError') {
142
+ throw new Error(
143
+ `Remote Weaviate request timed out after ${timeoutMs / 1000}s: ${method} ${path}`,
144
+ )
145
+ }
146
+ throw error
147
+ } finally {
148
+ clearTimeout(timeoutId)
149
+ }
150
+ }
151
+
152
+ export class WeaviateEngine extends BaseEngine {
153
+ name = ENGINE
154
+ displayName = 'Weaviate'
155
+ defaultPort = engineDef.defaultPort
156
+ supportedVersions = SUPPORTED_MAJOR_VERSIONS
157
+
158
+ // Get platform info for binary operations
159
+ getPlatformInfo(): { platform: Platform; arch: Arch } {
160
+ return platformService.getPlatformInfo()
161
+ }
162
+
163
+ // Fetch available versions from hostdb (dynamically or from cache/fallback)
164
+ async fetchAvailableVersions(): Promise<Record<string, string[]>> {
165
+ return fetchHostdbVersions()
166
+ }
167
+
168
+ // Get binary download URL from hostdb
169
+ getBinaryUrl(version: string, platform: Platform, arch: Arch): string {
170
+ return getBinaryUrl(version, platform, arch)
171
+ }
172
+
173
+ // Resolves version string to full version (e.g., '1' -> '1.35.7')
174
+ resolveFullVersion(version: string): string {
175
+ // Check if already a full version (has at least two dots)
176
+ if (/^\d+\.\d+\.\d+$/.test(version)) {
177
+ return version
178
+ }
179
+ // It's a major version, resolve using version map
180
+ return WEAVIATE_VERSION_MAP[version] || `${version}.0.0`
181
+ }
182
+
183
+ // Get the path where binaries for a version would be installed
184
+ getBinaryPath(version: string): string {
185
+ const fullVersion = this.resolveFullVersion(version)
186
+ const { platform: p, arch: a } = this.getPlatformInfo()
187
+ return paths.getBinaryPath({
188
+ engine: 'weaviate',
189
+ version: fullVersion,
190
+ platform: p,
191
+ arch: a,
192
+ })
193
+ }
194
+
195
+ // Verify that Weaviate binaries are available
196
+ async verifyBinary(binPath: string): Promise<boolean> {
197
+ const ext = platformService.getExecutableExtension()
198
+ const serverPath = join(binPath, 'bin', `weaviate${ext}`)
199
+ return existsSync(serverPath)
200
+ }
201
+
202
+ // Check if a specific Weaviate version is installed (downloaded)
203
+ async isBinaryInstalled(version: string): Promise<boolean> {
204
+ const { platform, arch } = this.getPlatformInfo()
205
+ return weaviateBinaryManager.isInstalled(version, platform, arch)
206
+ }
207
+
208
+ /**
209
+ * Ensure Weaviate binaries are available for a specific version
210
+ * Downloads from hostdb if not already installed
211
+ * Returns the path to the bin directory
212
+ */
213
+ async ensureBinaries(
214
+ version: string,
215
+ onProgress?: ProgressCallback,
216
+ ): Promise<string> {
217
+ const { platform, arch } = this.getPlatformInfo()
218
+
219
+ const binPath = await weaviateBinaryManager.ensureInstalled(
220
+ version,
221
+ platform,
222
+ arch,
223
+ onProgress,
224
+ )
225
+
226
+ // Register binaries in config
227
+ const ext = platformService.getExecutableExtension()
228
+ const tools = ['weaviate'] as const
229
+
230
+ for (const tool of tools) {
231
+ const toolPath = join(binPath, 'bin', `${tool}${ext}`)
232
+ if (existsSync(toolPath)) {
233
+ await configManager.setBinaryPath(tool, toolPath, 'bundled')
234
+ }
235
+ }
236
+
237
+ return binPath
238
+ }
239
+
240
+ /**
241
+ * Initialize a new Weaviate data directory
242
+ * Creates the directory structure
243
+ */
244
+ async initDataDir(
245
+ containerName: string,
246
+ _version: string,
247
+ options: Record<string, unknown> = {},
248
+ ): Promise<string> {
249
+ const dataDir = paths.getContainerDataPath(containerName, {
250
+ engine: ENGINE,
251
+ })
252
+ const containerDir = paths.getContainerPath(containerName, {
253
+ engine: ENGINE,
254
+ })
255
+ const port = (options.port as number) || engineDef.defaultPort
256
+
257
+ // Create data directory if it doesn't exist
258
+ if (!existsSync(dataDir)) {
259
+ await mkdir(dataDir, { recursive: true })
260
+ logDebug(`Created Weaviate data directory: ${dataDir}`)
261
+ }
262
+
263
+ // Create backups directory
264
+ const backupsDir = join(dataDir, 'backups')
265
+ if (!existsSync(backupsDir)) {
266
+ await mkdir(backupsDir, { recursive: true })
267
+ logDebug(`Created Weaviate backups directory: ${backupsDir}`)
268
+ }
269
+
270
+ // Write a config file with port info for reference
271
+ const configPath = join(containerDir, 'weaviate.env')
272
+ const configContent = [
273
+ '# SpinDB generated Weaviate configuration',
274
+ `PERSISTENCE_DATA_PATH=${dataDir}`,
275
+ `BACKUP_FILESYSTEM_PATH=${backupsDir}`,
276
+ `QUERY_DEFAULTS_LIMIT=25`,
277
+ `AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true`,
278
+ `DEFAULT_VECTORIZER_MODULE=none`,
279
+ `CLUSTER_HOSTNAME=node1`,
280
+ '',
281
+ ].join('\n')
282
+ await writeFile(configPath, configContent)
283
+ logDebug(`Generated Weaviate config: ${configPath} (port: ${port})`)
284
+
285
+ return dataDir
286
+ }
287
+
288
+ // Get the path to weaviate server for a version
289
+ async getWeaviateServerPath(version: string): Promise<string> {
290
+ const { platform, arch } = this.getPlatformInfo()
291
+ const fullVersion = normalizeVersion(version)
292
+ const binPath = paths.getBinaryPath({
293
+ engine: 'weaviate',
294
+ version: fullVersion,
295
+ platform,
296
+ arch,
297
+ })
298
+ const ext = platformService.getExecutableExtension()
299
+ const serverPath = join(binPath, 'bin', `weaviate${ext}`)
300
+ if (existsSync(serverPath)) {
301
+ return serverPath
302
+ }
303
+ throw new Error(
304
+ `Weaviate ${version} is not installed. Run: spindb engines download weaviate ${version}`,
305
+ )
306
+ }
307
+
308
+ // Get the path to weaviate binary
309
+ async getWeaviatePath(version?: string): Promise<string> {
310
+ // Check config cache first
311
+ const cached = await configManager.getBinaryPath('weaviate')
312
+ if (cached && existsSync(cached)) {
313
+ return cached
314
+ }
315
+
316
+ // If version provided, use downloaded binary
317
+ if (version) {
318
+ const { platform, arch } = this.getPlatformInfo()
319
+ const fullVersion = normalizeVersion(version)
320
+ const binPath = paths.getBinaryPath({
321
+ engine: 'weaviate',
322
+ version: fullVersion,
323
+ platform,
324
+ arch,
325
+ })
326
+ const ext = platformService.getExecutableExtension()
327
+ const weaviatePath = join(binPath, 'bin', `weaviate${ext}`)
328
+ if (existsSync(weaviatePath)) {
329
+ return weaviatePath
330
+ }
331
+ }
332
+
333
+ throw new Error(
334
+ 'weaviate not found. Run: spindb engines download weaviate <version>',
335
+ )
336
+ }
337
+
338
+ /**
339
+ * Start Weaviate server
340
+ * Weaviate uses environment variables for configuration
341
+ */
342
+ async start(
343
+ container: ContainerConfig,
344
+ onProgress?: ProgressCallback,
345
+ ): Promise<{ port: number; connectionString: string }> {
346
+ const { name, port, version, binaryPath } = container
347
+
348
+ // Check if already running (idempotent behavior)
349
+ const alreadyRunning = await processManager.isRunning(name, {
350
+ engine: ENGINE,
351
+ })
352
+ if (alreadyRunning) {
353
+ return {
354
+ port,
355
+ connectionString: this.getConnectionString(container),
356
+ }
357
+ }
358
+
359
+ // Use stored binary path if available (from container creation)
360
+ let weaviateServer: string | null = null
361
+
362
+ if (binaryPath && existsSync(binaryPath)) {
363
+ const ext = platformService.getExecutableExtension()
364
+ const serverPath = join(binaryPath, 'bin', `weaviate${ext}`)
365
+ if (existsSync(serverPath)) {
366
+ weaviateServer = serverPath
367
+ logDebug(`Using stored binary path: ${weaviateServer}`)
368
+ }
369
+ }
370
+
371
+ // If we didn't find the binary above, fall back to normal path
372
+ if (!weaviateServer) {
373
+ try {
374
+ weaviateServer = await this.getWeaviateServerPath(version)
375
+ } catch (error) {
376
+ const originalMessage =
377
+ error instanceof Error ? error.message : String(error)
378
+ throw new Error(
379
+ `Weaviate ${version} is not installed. Run: spindb engines download weaviate ${version}\n` +
380
+ ` Original error: ${originalMessage}`,
381
+ )
382
+ }
383
+ }
384
+
385
+ logDebug(`Using weaviate for version ${version}: ${weaviateServer}`)
386
+
387
+ const containerDir = paths.getContainerPath(name, { engine: ENGINE })
388
+ const dataDir = paths.getContainerDataPath(name, { engine: ENGINE })
389
+ const backupsDir = join(dataDir, 'backups')
390
+ const logFile = paths.getContainerLogPath(name, { engine: ENGINE })
391
+ const pidFile = join(containerDir, 'weaviate.pid')
392
+ const grpcPort = port + 1
393
+
394
+ // Check if gRPC port is available (Weaviate uses HTTP port + 1 for gRPC)
395
+ const portWaitTimeout = isWindows() ? 60000 : 0
396
+ const portCheckInterval = 1000
397
+
398
+ const grpcCheckStart = Date.now()
399
+ while (!(await portManager.isPortAvailable(grpcPort))) {
400
+ if (Date.now() - grpcCheckStart >= portWaitTimeout) {
401
+ throw new Error(
402
+ `gRPC port ${grpcPort} is already in use. ` +
403
+ `Weaviate requires both HTTP port ${port} and gRPC port ${grpcPort} to be available.`,
404
+ )
405
+ }
406
+ logDebug(`Waiting for gRPC port ${grpcPort} to become available...`)
407
+ await new Promise((resolve) => setTimeout(resolve, portCheckInterval))
408
+ }
409
+
410
+ // Also check HTTP port on Windows
411
+ if (isWindows()) {
412
+ const httpCheckStart = Date.now()
413
+ while (!(await portManager.isPortAvailable(port))) {
414
+ if (Date.now() - httpCheckStart >= portWaitTimeout) {
415
+ throw new Error(
416
+ `HTTP port ${port} is already in use. ` +
417
+ `Weaviate requires both HTTP port ${port} and gRPC port ${grpcPort} to be available.`,
418
+ )
419
+ }
420
+ logDebug(`Waiting for HTTP port ${port} to become available...`)
421
+ await new Promise((resolve) => setTimeout(resolve, portCheckInterval))
422
+ }
423
+ }
424
+
425
+ // Ensure backups directory exists
426
+ if (!existsSync(backupsDir)) {
427
+ await mkdir(backupsDir, { recursive: true })
428
+ }
429
+
430
+ onProgress?.({ stage: 'starting', message: 'Starting Weaviate...' })
431
+
432
+ logDebug(`Starting weaviate on port ${port}`)
433
+
434
+ /**
435
+ * Check log file for startup errors
436
+ */
437
+ const checkLogForError = async (): Promise<string | null> => {
438
+ try {
439
+ const logContent = await readFile(logFile, 'utf-8')
440
+ const recentLog = logContent.slice(-2000) // Last 2KB
441
+
442
+ if (
443
+ recentLog.includes('Address already in use') ||
444
+ recentLog.includes('bind: Address already in use')
445
+ ) {
446
+ return `Port ${port} is already in use`
447
+ }
448
+ if (recentLog.includes('Failed to bind')) {
449
+ return `Port ${port} is already in use`
450
+ }
451
+ } catch {
452
+ // Log file might not exist yet
453
+ }
454
+ return null
455
+ }
456
+
457
+ // Weaviate uses environment variables for configuration
458
+ const args = [
459
+ '--host',
460
+ '127.0.0.1',
461
+ '--port',
462
+ String(port),
463
+ '--scheme',
464
+ 'http',
465
+ ]
466
+
467
+ // Derive unique internal cluster ports from the HTTP port to avoid conflicts
468
+ // when running multiple Weaviate containers simultaneously.
469
+ // Default internal ports (7946, 7947, 8300, 8301) are fixed and will conflict.
470
+ const gossipPort = port + 100 // e.g., 8080 → 8180
471
+ const dataPort = port + 101 // e.g., 8080 → 8181
472
+ const raftPort = port + 200 // e.g., 8080 → 8280
473
+ const raftInternalRpcPort = raftPort + 1 // e.g., 8080 → 8281
474
+
475
+ // Read weaviate.env file (written by initDataDir and updated by createUser)
476
+ // so that API key / auth settings persist across restarts
477
+ const envFilePath = join(containerDir, 'weaviate.env')
478
+ const fileEnv: Record<string, string> = {}
479
+ if (existsSync(envFilePath)) {
480
+ try {
481
+ const envContent = await readFile(envFilePath, 'utf-8')
482
+ for (const line of envContent.split('\n')) {
483
+ const trimmed = line.trim()
484
+ if (!trimmed || trimmed.startsWith('#')) continue
485
+ const eqIdx = trimmed.indexOf('=')
486
+ if (eqIdx > 0) {
487
+ fileEnv[trimmed.substring(0, eqIdx)] = trimmed.substring(eqIdx + 1)
488
+ }
489
+ }
490
+ } catch {
491
+ logDebug(`Could not read ${envFilePath}, using defaults`)
492
+ }
493
+ }
494
+
495
+ const env = {
496
+ ...process.env,
497
+ // Defaults from weaviate.env file (includes auth settings from createUser)
498
+ ...fileEnv,
499
+ // Explicit spawn values always override file values
500
+ PERSISTENCE_DATA_PATH: dataDir,
501
+ BACKUP_FILESYSTEM_PATH: backupsDir,
502
+ ENABLE_MODULES: 'backup-filesystem',
503
+ CLUSTER_HOSTNAME: `node-${port}`,
504
+ GRPC_PORT: String(grpcPort),
505
+ CLUSTER_GOSSIP_BIND_PORT: String(gossipPort),
506
+ CLUSTER_DATA_BIND_PORT: String(dataPort),
507
+ RAFT_PORT: String(raftPort),
508
+ RAFT_INTERNAL_RPC_PORT: String(raftInternalRpcPort),
509
+ }
510
+
511
+ // Redirect stdout/stderr to log file via file descriptor so
512
+ // checkLogForError can find startup errors. File descriptors are
513
+ // inherited by the child and don't keep Node.js event loop alive
514
+ // (unlike 'pipe'), so proc.unref() works correctly.
515
+ const logFd = openSync(logFile, 'a')
516
+
517
+ const spawnOpts: SpawnOptions = {
518
+ cwd: containerDir,
519
+ stdio: ['ignore', logFd, logFd],
520
+ detached: true,
521
+ env,
522
+ }
523
+
524
+ if (isWindows()) {
525
+ spawnOpts.windowsHide = true
526
+ }
527
+
528
+ const proc = spawn(weaviateServer, args, spawnOpts)
529
+ proc.unref()
530
+
531
+ // Close fd in parent — child inherited its own copy
532
+ closeSync(logFd)
533
+
534
+ if (!proc.pid) {
535
+ throw new Error('Weaviate server process failed to start (no PID)')
536
+ }
537
+
538
+ try {
539
+ await writeFile(pidFile, String(proc.pid))
540
+ } catch {
541
+ // Non-fatal
542
+ }
543
+
544
+ // Wait for Weaviate to be ready
545
+ const ready = await this.waitForReady(port)
546
+
547
+ if (ready) {
548
+ return {
549
+ port,
550
+ connectionString: this.getConnectionString(container),
551
+ }
552
+ }
553
+
554
+ // Clean up the orphaned detached process before throwing
555
+ if (platformService.isProcessRunning(proc.pid)) {
556
+ try {
557
+ await platformService.terminateProcess(proc.pid, true)
558
+ } catch {
559
+ // Ignore cleanup errors - best effort
560
+ }
561
+ }
562
+
563
+ const portError = await checkLogForError()
564
+
565
+ const errorDetails = [
566
+ portError || 'Weaviate failed to start within timeout.',
567
+ `Binary: ${weaviateServer}`,
568
+ `Log file: ${logFile}`,
569
+ ]
570
+ .filter(Boolean)
571
+ .join('\n')
572
+
573
+ throw new Error(errorDetails)
574
+ }
575
+
576
+ // Wait for Weaviate to be ready to accept connections
577
+ private async waitForReady(
578
+ port: number,
579
+ timeoutMs = 30000,
580
+ ): Promise<boolean> {
581
+ const startTime = Date.now()
582
+ const checkInterval = 500
583
+
584
+ while (Date.now() - startTime < timeoutMs) {
585
+ try {
586
+ // Use Weaviate's readiness endpoint
587
+ const response = await weaviateApiRequest(
588
+ port,
589
+ 'GET',
590
+ '/v1/.well-known/ready',
591
+ )
592
+ if (response.status === 200) {
593
+ logDebug(`Weaviate ready on port ${port}`)
594
+ return true
595
+ }
596
+ } catch {
597
+ // Connection failed, wait and retry
598
+ }
599
+ await new Promise((resolve) => setTimeout(resolve, checkInterval))
600
+ }
601
+
602
+ logDebug(`Weaviate did not become ready within ${timeoutMs}ms`)
603
+ return false
604
+ }
605
+
606
+ /**
607
+ * Stop Weaviate server
608
+ * Uses process termination
609
+ */
610
+ async stop(container: ContainerConfig): Promise<void> {
611
+ const { name, port } = container
612
+ const containerDir = paths.getContainerPath(name, { engine: ENGINE })
613
+ const pidFile = join(containerDir, 'weaviate.pid')
614
+ const grpcPort = port + 1
615
+
616
+ logDebug(`Stopping Weaviate container "${name}" on port ${port}`)
617
+
618
+ // Get PID and terminate
619
+ let pid: number | null = null
620
+
621
+ if (existsSync(pidFile)) {
622
+ try {
623
+ const content = await readFile(pidFile, 'utf8')
624
+ pid = parseInt(content.trim(), 10)
625
+ } catch {
626
+ // Ignore
627
+ }
628
+ }
629
+
630
+ // Kill process if running
631
+ if (pid && platformService.isProcessRunning(pid)) {
632
+ logDebug(`Killing Weaviate process ${pid}`)
633
+ try {
634
+ if (isWindows()) {
635
+ await platformService.terminateProcess(pid, true)
636
+ } else {
637
+ await platformService.terminateProcess(pid, false)
638
+ await new Promise((resolve) => setTimeout(resolve, 2000))
639
+
640
+ if (platformService.isProcessRunning(pid)) {
641
+ logWarning(`Graceful termination failed, force killing ${pid}`)
642
+ await platformService.terminateProcess(pid, true)
643
+ }
644
+ }
645
+ } catch (error) {
646
+ logDebug(`Process termination error: ${error}`)
647
+ }
648
+ }
649
+
650
+ // Wait for process to fully terminate
651
+ const terminationWait = isWindows() ? 3000 : 1000
652
+ await new Promise((resolve) => setTimeout(resolve, terminationWait))
653
+
654
+ // Kill any processes still listening on the ports
655
+ const portPids = await platformService.findProcessByPort(port)
656
+ const grpcPids = await platformService.findProcessByPort(grpcPort)
657
+ const allPids = [...new Set([...portPids, ...grpcPids])]
658
+ for (const portPid of allPids) {
659
+ if (platformService.isProcessRunning(portPid)) {
660
+ logDebug(`Killing process ${portPid} still on port ${port}/${grpcPort}`)
661
+ try {
662
+ await platformService.terminateProcess(portPid, true)
663
+ } catch {
664
+ // Ignore
665
+ }
666
+ }
667
+ }
668
+
669
+ // On Windows, wait again after killing port processes
670
+ if (isWindows() && allPids.length > 0) {
671
+ await new Promise((resolve) => setTimeout(resolve, 2000))
672
+ }
673
+
674
+ // Cleanup PID file
675
+ if (existsSync(pidFile)) {
676
+ try {
677
+ await unlink(pidFile)
678
+ } catch {
679
+ // Ignore
680
+ }
681
+ }
682
+
683
+ // On Windows, wait for ports to be released
684
+ if (isWindows()) {
685
+ logDebug(`Waiting for ports ${port} and ${grpcPort} to be released...`)
686
+ const portWaitStart = Date.now()
687
+ const portWaitTimeout = 30000
688
+ const checkInterval = 500
689
+
690
+ while (Date.now() - portWaitStart < portWaitTimeout) {
691
+ const httpAvailable = await portManager.isPortAvailable(port)
692
+ const grpcAvailable = await portManager.isPortAvailable(grpcPort)
693
+
694
+ if (httpAvailable && grpcAvailable) {
695
+ logDebug('Ports released successfully')
696
+ break
697
+ }
698
+ await new Promise((resolve) => setTimeout(resolve, checkInterval))
699
+ }
700
+ }
701
+
702
+ logDebug('Weaviate stopped')
703
+ }
704
+
705
+ // Get Weaviate server status
706
+ async status(container: ContainerConfig): Promise<StatusResult> {
707
+ const { name, port } = container
708
+ const containerDir = paths.getContainerPath(name, { engine: ENGINE })
709
+ const pidFile = join(containerDir, 'weaviate.pid')
710
+
711
+ // Try health check via REST API
712
+ try {
713
+ const response = await weaviateApiRequest(
714
+ port,
715
+ 'GET',
716
+ '/v1/.well-known/ready',
717
+ )
718
+ if (response.status === 200) {
719
+ return { running: true, message: 'Weaviate is running' }
720
+ }
721
+ } catch {
722
+ // Not responding, check PID
723
+ }
724
+
725
+ // Check PID file
726
+ if (existsSync(pidFile)) {
727
+ try {
728
+ const content = await readFile(pidFile, 'utf8')
729
+ const pid = parseInt(content.trim(), 10)
730
+ if (!isNaN(pid) && pid > 0 && platformService.isProcessRunning(pid)) {
731
+ return {
732
+ running: true,
733
+ message: `Weaviate is running (PID: ${pid})`,
734
+ }
735
+ }
736
+ } catch {
737
+ // Ignore
738
+ }
739
+ }
740
+
741
+ return { running: false, message: 'Weaviate is not running' }
742
+ }
743
+
744
+ // Detect backup format
745
+ async detectBackupFormat(filePath: string): Promise<BackupFormat> {
746
+ return detectBackupFormatImpl(filePath)
747
+ }
748
+
749
+ /**
750
+ * Restore a backup
751
+ * IMPORTANT: Weaviate must be stopped before restore
752
+ */
753
+ async restore(
754
+ container: ContainerConfig,
755
+ backupPath: string,
756
+ _options: { database?: string; flush?: boolean } = {},
757
+ ): Promise<RestoreResult> {
758
+ const { name } = container
759
+
760
+ // Check if container is running - Weaviate must be stopped for snapshot restore
761
+ const statusResult = await this.status(container)
762
+ if (statusResult.running) {
763
+ throw new Error(
764
+ `Weaviate container "${name}" must be stopped before restore. ` +
765
+ `Run: spindb stop ${name}`,
766
+ )
767
+ }
768
+
769
+ const dataDir = paths.getContainerDataPath(name, { engine: ENGINE })
770
+
771
+ return restoreBackup(backupPath, {
772
+ containerName: name,
773
+ dataDir,
774
+ })
775
+ }
776
+
777
+ /**
778
+ * Get connection string
779
+ * Format: http://127.0.0.1:PORT
780
+ */
781
+ getConnectionString(container: ContainerConfig, _database?: string): string {
782
+ const { port } = container
783
+ return `http://127.0.0.1:${port}`
784
+ }
785
+
786
+ // Open HTTP API (Weaviate uses REST/GraphQL API, no interactive shell)
787
+ async connect(container: ContainerConfig, _database?: string): Promise<void> {
788
+ const { port } = container
789
+ const url = `http://127.0.0.1:${port}`
790
+
791
+ console.log(`Weaviate REST API available at: ${url}/v1`)
792
+ console.log(`Weaviate GraphQL endpoint: ${url}/v1/graphql`)
793
+ console.log(`gRPC endpoint: 127.0.0.1:${port + 1}`)
794
+ console.log('')
795
+ console.log('Example commands:')
796
+ console.log(` curl ${url}/v1/schema`)
797
+ console.log(` curl ${url}/v1/.well-known/ready`)
798
+ }
799
+
800
+ /**
801
+ * Create a new class (collection)
802
+ * Weaviate uses classes instead of traditional databases
803
+ */
804
+ async createDatabase(
805
+ container: ContainerConfig,
806
+ database: string,
807
+ ): Promise<void> {
808
+ const { port } = container
809
+
810
+ // Create a class with default vector config
811
+ const response = await weaviateApiRequest(port, 'POST', '/v1/schema', {
812
+ class: database,
813
+ vectorizer: 'none',
814
+ })
815
+
816
+ if (response.status !== 200) {
817
+ throw new Error(
818
+ `Failed to create class: ${JSON.stringify(response.data)}`,
819
+ )
820
+ }
821
+
822
+ logDebug(`Created Weaviate class: ${database}`)
823
+ }
824
+
825
+ /**
826
+ * Drop a class
827
+ */
828
+ async dropDatabase(
829
+ container: ContainerConfig,
830
+ database: string,
831
+ ): Promise<void> {
832
+ const { port } = container
833
+
834
+ const response = await weaviateApiRequest(
835
+ port,
836
+ 'DELETE',
837
+ `/v1/schema/${database}`,
838
+ )
839
+
840
+ if (response.status !== 200) {
841
+ throw new Error(
842
+ `Failed to delete class: ${JSON.stringify(response.data)}`,
843
+ )
844
+ }
845
+
846
+ logDebug(`Deleted Weaviate class: ${database}`)
847
+ }
848
+
849
+ /**
850
+ * Get the storage size of the Weaviate instance
851
+ */
852
+ async getDatabaseSize(container: ContainerConfig): Promise<number | null> {
853
+ const { port } = container
854
+
855
+ try {
856
+ await weaviateApiRequest(port, 'GET', '/v1/meta')
857
+ // Weaviate doesn't expose direct storage size in meta
858
+ return null
859
+ } catch {
860
+ return null
861
+ }
862
+ }
863
+
864
+ /**
865
+ * Dump from a remote Weaviate connection
866
+ * Uses Weaviate's REST API to create and download a full backup
867
+ *
868
+ * Connection string format: http://host:port
869
+ * For API key auth: http://host:port?api_key=YOUR_KEY
870
+ */
871
+ async dumpFromConnectionString(
872
+ connectionString: string,
873
+ _outputPath: string,
874
+ ): Promise<DumpResult> {
875
+ // Parse connection string
876
+ const { baseUrl, headers } = parseWeaviateConnectionString(connectionString)
877
+
878
+ logDebug(`Connecting to remote Weaviate at ${baseUrl}`)
879
+
880
+ // Check connectivity and get schema
881
+ const schemaResponse = await remoteWeaviateRequest(
882
+ baseUrl,
883
+ 'GET',
884
+ '/v1/schema',
885
+ headers,
886
+ )
887
+ if (schemaResponse.status !== 200) {
888
+ throw new Error(
889
+ `Failed to connect to Weaviate at ${baseUrl}: ${JSON.stringify(schemaResponse.data)}`,
890
+ )
891
+ }
892
+
893
+ const schemaData = schemaResponse.data as {
894
+ classes?: Array<{ class: string }>
895
+ }
896
+ const classCount = schemaData.classes?.length ?? 0
897
+
898
+ logDebug(`Found ${classCount} classes on remote server`)
899
+
900
+ // Weaviate's filesystem backup backend writes to the server's local disk
901
+ // (BACKUP_FILESYSTEM_PATH/<backup_id>/). These files cannot be downloaded
902
+ // over the REST API — only the backup metadata is exposed via GET.
903
+ // To dump from a remote Weaviate instance, use an object-store backup
904
+ // backend (s3, gcs, azure) which supports remote access.
905
+ throw new Error(
906
+ `Cannot dump from a remote Weaviate instance using the filesystem backup backend.\n` +
907
+ `Weaviate filesystem backups are written to the server's local disk ` +
908
+ `(BACKUP_FILESYSTEM_PATH/<backup_id>/) and cannot be downloaded over HTTP.\n\n` +
909
+ `To export data from a remote Weaviate instance, either:\n` +
910
+ ` 1. SSH into the server and copy the backup directory directly\n` +
911
+ ` 2. Configure an object-store backup backend (S3, GCS, Azure) on the remote server\n` +
912
+ ` and use the appropriate backup module endpoint instead of /v1/backups/filesystem\n` +
913
+ ` 3. Use the Weaviate client SDK to read and re-insert objects programmatically\n\n` +
914
+ `Remote server at ${baseUrl} has ${classCount} class(es).`,
915
+ )
916
+ }
917
+
918
+ // Create a backup
919
+ async backup(
920
+ container: ContainerConfig,
921
+ outputPath: string,
922
+ options: BackupOptions,
923
+ ): Promise<BackupResult> {
924
+ return createBackup(container, outputPath, options)
925
+ }
926
+
927
+ // Run a command - Weaviate uses REST/GraphQL API, not command files
928
+ async runScript(
929
+ container: ContainerConfig,
930
+ options: { file?: string; sql?: string; database?: string },
931
+ ): Promise<void> {
932
+ const { port } = container
933
+
934
+ if (options.file) {
935
+ throw new Error(
936
+ 'Weaviate does not support command files. Use the REST API directly.\n' +
937
+ `Example: curl -X GET http://127.0.0.1:${port}/v1/schema`,
938
+ )
939
+ }
940
+
941
+ if (options.sql) {
942
+ // Try to interpret as a simple command
943
+ const command = options.sql.trim().toUpperCase()
944
+
945
+ if (command === 'LIST CLASSES' || command === 'SHOW CLASSES') {
946
+ const response = await weaviateApiRequest(port, 'GET', '/v1/schema')
947
+ console.log(JSON.stringify(response.data, null, 2))
948
+ return
949
+ }
950
+
951
+ throw new Error(
952
+ 'Weaviate uses REST/GraphQL API for operations. Use curl or the Weaviate client libraries.\n' +
953
+ `API endpoint: http://127.0.0.1:${port}/v1`,
954
+ )
955
+ }
956
+
957
+ throw new Error('Either file or sql option must be provided')
958
+ }
959
+
960
+ /**
961
+ * Execute a query via REST API
962
+ *
963
+ * Query format: METHOD /path [JSON body]
964
+ * Examples:
965
+ * GET /v1/schema
966
+ * POST /v1/graphql {"query": "{ Get { MyClass { name } } }"}
967
+ */
968
+ async executeQuery(
969
+ container: ContainerConfig,
970
+ query: string,
971
+ options?: QueryOptions,
972
+ ): Promise<QueryResult> {
973
+ const { port } = container
974
+
975
+ // Parse the query string: METHOD /path [body]
976
+ const trimmed = query.trim()
977
+ const spaceIdx = trimmed.indexOf(' ')
978
+
979
+ if (spaceIdx === -1) {
980
+ throw new Error(
981
+ 'Invalid query format. Expected: METHOD /path [body]\n' +
982
+ 'Example: GET /v1/schema',
983
+ )
984
+ }
985
+
986
+ const method = (options?.method ||
987
+ trimmed.substring(0, spaceIdx).toUpperCase()) as
988
+ | 'GET'
989
+ | 'POST'
990
+ | 'PUT'
991
+ | 'DELETE'
992
+ const rest = trimmed.substring(spaceIdx + 1).trim()
993
+
994
+ // Extract path and optional JSON body
995
+ let path: string
996
+ let body: Record<string, unknown> | undefined = options?.body
997
+
998
+ const bodyStart = rest.indexOf('{')
999
+ if (bodyStart !== -1) {
1000
+ // Always extract path without the JSON blob
1001
+ path = rest.substring(0, bodyStart).trim()
1002
+ if (options?.body) {
1003
+ throw new Error(
1004
+ 'Cannot specify both inline JSON body in query and options.body. Use one or the other.',
1005
+ )
1006
+ }
1007
+ try {
1008
+ body = JSON.parse(rest.substring(bodyStart)) as Record<string, unknown>
1009
+ } catch {
1010
+ throw new Error('Invalid JSON body in query')
1011
+ }
1012
+ } else {
1013
+ path = rest
1014
+ }
1015
+
1016
+ // Ensure path starts with /
1017
+ if (!path.startsWith('/')) {
1018
+ path = '/' + path
1019
+ }
1020
+
1021
+ const response = await weaviateApiRequest(port, method, path, body)
1022
+
1023
+ if (response.status >= 400) {
1024
+ throw new Error(
1025
+ `Weaviate API error (${response.status}): ${JSON.stringify(response.data)}`,
1026
+ )
1027
+ }
1028
+
1029
+ return parseRESTAPIResult(JSON.stringify(response.data))
1030
+ }
1031
+
1032
+ /**
1033
+ * List databases for Weaviate.
1034
+ * Weaviate uses classes, not databases. Returns the configured database.
1035
+ */
1036
+ async listDatabases(container: ContainerConfig): Promise<string[]> {
1037
+ return [container.database]
1038
+ }
1039
+
1040
+ /**
1041
+ * Create/update the API key for Weaviate.
1042
+ *
1043
+ * Weaviate supports API key authentication via environment variables.
1044
+ * Calling createUser will update the config and require a restart.
1045
+ */
1046
+ async createUser(
1047
+ container: ContainerConfig,
1048
+ options: CreateUserOptions,
1049
+ ): Promise<UserCredentials> {
1050
+ const { username, password } = options
1051
+ assertValidUsername(username)
1052
+ const { port, name } = container
1053
+
1054
+ // Read current env config and add/update API key
1055
+ const containerDir = paths.getContainerPath(name, { engine: ENGINE })
1056
+ const configPath = join(containerDir, 'weaviate.env')
1057
+
1058
+ if (!existsSync(configPath)) {
1059
+ throw new Error(
1060
+ `Weaviate config not found: ${configPath}\n` +
1061
+ `This file is created during container setup. ` +
1062
+ `Try recreating the container: spindb delete ${name} && spindb create ${name}`,
1063
+ )
1064
+ }
1065
+ const currentConfig = await readFile(configPath, 'utf-8')
1066
+
1067
+ // Update or add authentication settings
1068
+ const lines = currentConfig.split('\n')
1069
+ let foundAnonAccess = false
1070
+ let foundApiKeyEnabled = false
1071
+ let foundApiKeyAllowed = false
1072
+ let foundApiKeyUsers = false
1073
+
1074
+ for (let i = 0; i < lines.length; i++) {
1075
+ if (lines[i].startsWith('AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=')) {
1076
+ lines[i] = 'AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=false'
1077
+ foundAnonAccess = true
1078
+ }
1079
+ if (lines[i].startsWith('AUTHENTICATION_APIKEY_ENABLED=')) {
1080
+ lines[i] = 'AUTHENTICATION_APIKEY_ENABLED=true'
1081
+ foundApiKeyEnabled = true
1082
+ }
1083
+ if (lines[i].startsWith('AUTHENTICATION_APIKEY_ALLOWED_KEYS=')) {
1084
+ lines[i] = `AUTHENTICATION_APIKEY_ALLOWED_KEYS=${password}`
1085
+ foundApiKeyAllowed = true
1086
+ }
1087
+ if (lines[i].startsWith('AUTHENTICATION_APIKEY_USERS=')) {
1088
+ lines[i] = `AUTHENTICATION_APIKEY_USERS=${username}`
1089
+ foundApiKeyUsers = true
1090
+ }
1091
+ }
1092
+
1093
+ if (!foundAnonAccess) {
1094
+ lines.push('AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=false')
1095
+ }
1096
+ if (!foundApiKeyEnabled) {
1097
+ lines.push('AUTHENTICATION_APIKEY_ENABLED=true')
1098
+ }
1099
+ if (!foundApiKeyAllowed) {
1100
+ lines.push(`AUTHENTICATION_APIKEY_ALLOWED_KEYS=${password}`)
1101
+ }
1102
+ if (!foundApiKeyUsers) {
1103
+ lines.push(`AUTHENTICATION_APIKEY_USERS=${username}`)
1104
+ }
1105
+
1106
+ const updatedConfig = lines.join('\n')
1107
+
1108
+ // Only restart if the container is currently running
1109
+ const statusResult = await this.status(container)
1110
+ if (statusResult.running) {
1111
+ logWarning(
1112
+ `Restarting Weaviate container "${name}" to apply API key change. ` +
1113
+ 'Active client connections will be disconnected.',
1114
+ )
1115
+ await this.stop(container)
1116
+ await writeFile(configPath, updatedConfig)
1117
+ await chmod(configPath, 0o600)
1118
+ await this.start(container)
1119
+ } else {
1120
+ await writeFile(configPath, updatedConfig)
1121
+ await chmod(configPath, 0o600)
1122
+ }
1123
+
1124
+ logDebug(`Configured Weaviate API key (credential label: ${username})`)
1125
+
1126
+ const connectionString = `http://127.0.0.1:${port}`
1127
+
1128
+ return {
1129
+ username,
1130
+ password: '',
1131
+ connectionString,
1132
+ engine: container.engine,
1133
+ container: container.name,
1134
+ apiKey: password,
1135
+ }
1136
+ }
1137
+ }
1138
+
1139
+ export const weaviateEngine = new WeaviateEngine()