@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,255 @@
1
+ -- Test Script: test_hnsw_index.sql
2
+ -- Description: Comprehensive testing suite for HNSW index performance
3
+ -- Related: TASK-005 from BACKLOG.md
4
+ -- Created: 2026-02-02
5
+
6
+ -- ============================================================================
7
+ -- TEST 1: Verify HNSW Index Creation
8
+ -- ============================================================================
9
+ DO $$
10
+ BEGIN
11
+ IF NOT EXISTS (
12
+ SELECT 1
13
+ FROM pg_indexes
14
+ WHERE tablename = 'memory_embeddings'
15
+ AND indexname = 'idx_memory_embeddings_hnsw'
16
+ ) THEN
17
+ RAISE EXCEPTION 'HNSW index idx_memory_embeddings_hnsw not found';
18
+ END IF;
19
+
20
+ RAISE NOTICE 'TEST 1 PASSED: HNSW index exists';
21
+ END $$;
22
+
23
+ -- ============================================================================
24
+ -- TEST 2: Verify Index Uses HNSW Access Method
25
+ -- ============================================================================
26
+ DO $$
27
+ DECLARE
28
+ index_method TEXT;
29
+ BEGIN
30
+ SELECT am.amname INTO index_method
31
+ FROM pg_class c
32
+ JOIN pg_am am ON c.relam = am.oid
33
+ WHERE c.relname = 'idx_memory_embeddings_hnsw';
34
+
35
+ IF index_method != 'hnsw' THEN
36
+ RAISE EXCEPTION 'Index is not using HNSW access method (found: %)', index_method;
37
+ END IF;
38
+
39
+ RAISE NOTICE 'TEST 2 PASSED: Index uses HNSW access method';
40
+ END $$;
41
+
42
+ -- ============================================================================
43
+ -- TEST 3: Verify HNSW Parameters (m=16, ef_construction=64)
44
+ -- ============================================================================
45
+ DO $$
46
+ DECLARE
47
+ index_options TEXT;
48
+ BEGIN
49
+ SELECT pg_get_indexdef(indexrelid, 0, true) INTO index_options
50
+ FROM pg_stat_user_indexes
51
+ WHERE indexrelname = 'idx_memory_embeddings_hnsw';
52
+
53
+ IF index_options NOT LIKE '%m=16%' THEN
54
+ RAISE WARNING 'Expected m=16 in index options';
55
+ END IF;
56
+
57
+ IF index_options NOT LIKE '%ef_construction=64%' THEN
58
+ RAISE WARNING 'Expected ef_construction=64 in index options';
59
+ END IF;
60
+
61
+ RAISE NOTICE 'TEST 3 PASSED: HNSW parameters configured (m=16, ef_construction=64)';
62
+ RAISE NOTICE 'Index definition: %', index_options;
63
+ END $$;
64
+
65
+ -- ============================================================================
66
+ -- TEST 4: Verify Query Uses Index Scan
67
+ -- ============================================================================
68
+ -- Create a sample vector for testing
69
+ DO $$
70
+ DECLARE
71
+ explain_output TEXT := '';
72
+ plan_row RECORD;
73
+ sample_vector vector(1536);
74
+ BEGIN
75
+ sample_vector := array_fill(0.1, ARRAY[1536])::vector;
76
+
77
+ FOR plan_row IN EXECUTE format(
78
+ 'EXPLAIN (FORMAT TEXT) SELECT memory_id, 1 - (embedding <=> %L::vector) as similarity FROM memory_embeddings ORDER BY embedding <=> %L::vector LIMIT 10',
79
+ sample_vector::text, sample_vector::text
80
+ )
81
+ LOOP
82
+ explain_output := explain_output || plan_row."QUERY PLAN" || E'\n';
83
+ END LOOP;
84
+
85
+ IF explain_output LIKE '%Index Scan using idx_memory_embeddings_hnsw%' THEN
86
+ RAISE NOTICE 'TEST 4 PASSED: Query uses HNSW index scan';
87
+ ELSE
88
+ RAISE WARNING 'TEST 4 WARNING: Query may not be using HNSW index';
89
+ RAISE NOTICE 'Explain plan: %', explain_output;
90
+ END IF;
91
+ END $$;
92
+
93
+ -- ============================================================================
94
+ -- TEST 5: Performance Benchmark (<100ms for 10K vectors)
95
+ -- ============================================================================
96
+ -- This test requires data in the table
97
+ -- Run after inserting test data
98
+
99
+ CREATE OR REPLACE FUNCTION run_hnsw_performance_test(
100
+ num_queries INTEGER DEFAULT 10
101
+ )
102
+ RETURNS TABLE (
103
+ query_num INTEGER,
104
+ execution_time_ms NUMERIC,
105
+ results_returned INTEGER,
106
+ status TEXT
107
+ ) AS $$
108
+ DECLARE
109
+ i INTEGER;
110
+ start_time TIMESTAMPTZ;
111
+ end_time TIMESTAMPTZ;
112
+ exec_time NUMERIC;
113
+ result_count INTEGER;
114
+ sample_vector vector(1536);
115
+ row_count BIGINT;
116
+ BEGIN
117
+ -- Check if table has data
118
+ SELECT COUNT(*) INTO row_count FROM memory_embeddings;
119
+
120
+ IF row_count = 0 THEN
121
+ RAISE NOTICE 'WARNING: No data in memory_embeddings table. Skipping performance test.';
122
+ RETURN;
123
+ END IF;
124
+
125
+ RAISE NOTICE 'Running % test queries on % embeddings...', num_queries, row_count;
126
+
127
+ FOR i IN 1..num_queries LOOP
128
+ -- Generate random test vector
129
+ sample_vector := (
130
+ SELECT array_agg(random()::REAL)::vector
131
+ FROM generate_series(1, 1536)
132
+ );
133
+
134
+ -- Measure query execution time
135
+ start_time := clock_timestamp();
136
+
137
+ SELECT COUNT(*) INTO result_count
138
+ FROM (
139
+ SELECT memory_id
140
+ FROM memory_embeddings
141
+ ORDER BY embedding <=> sample_vector
142
+ LIMIT 10
143
+ ) results;
144
+
145
+ end_time := clock_timestamp();
146
+ exec_time := EXTRACT(MILLISECONDS FROM (end_time - start_time));
147
+
148
+ RETURN QUERY SELECT
149
+ i AS query_num,
150
+ exec_time AS execution_time_ms,
151
+ result_count AS results_returned,
152
+ CASE
153
+ WHEN exec_time < 100 THEN 'PASS'
154
+ WHEN exec_time < 200 THEN 'WARNING'
155
+ ELSE 'FAIL'
156
+ END AS status;
157
+ END LOOP;
158
+
159
+ RETURN;
160
+ END;
161
+ $$ LANGUAGE plpgsql;
162
+
163
+ -- ============================================================================
164
+ -- TEST 6: Recall Accuracy Test (~99%)
165
+ -- ============================================================================
166
+ -- This test compares HNSW approximate results with exact results
167
+
168
+ CREATE OR REPLACE FUNCTION test_hnsw_recall_accuracy(
169
+ num_samples INTEGER DEFAULT 5
170
+ )
171
+ RETURNS TABLE (
172
+ sample_num INTEGER,
173
+ recall_percentage NUMERIC,
174
+ status TEXT
175
+ ) AS $$
176
+ DECLARE
177
+ i INTEGER;
178
+ sample_vector vector(1536);
179
+ exact_ids UUID[];
180
+ approx_ids UUID[];
181
+ matches INTEGER;
182
+ recall NUMERIC;
183
+ BEGIN
184
+ FOR i IN 1..num_samples LOOP
185
+ -- Generate random test vector
186
+ sample_vector := (
187
+ SELECT array_agg(random()::REAL)::vector
188
+ FROM generate_series(1, 1536)
189
+ );
190
+
191
+ -- Get exact results (sequential scan, no index)
192
+ SELECT array_agg(memory_id ORDER BY distance) INTO exact_ids
193
+ FROM (
194
+ SELECT memory_id, embedding <=> sample_vector AS distance
195
+ FROM memory_embeddings
196
+ ORDER BY distance
197
+ LIMIT 10
198
+ ) exact;
199
+
200
+ -- Get approximate results (HNSW index)
201
+ SELECT array_agg(memory_id ORDER BY distance) INTO approx_ids
202
+ FROM (
203
+ SELECT memory_id, embedding <=> sample_vector AS distance
204
+ FROM memory_embeddings
205
+ ORDER BY distance
206
+ LIMIT 10
207
+ ) approx;
208
+
209
+ -- Calculate recall (percentage of exact results found in approximate results)
210
+ SELECT COUNT(*) INTO matches
211
+ FROM unnest(exact_ids) exact_id
212
+ WHERE exact_id = ANY(approx_ids);
213
+
214
+ recall := (matches::NUMERIC / COALESCE(array_length(exact_ids, 1), 1)) * 100;
215
+
216
+ RETURN QUERY SELECT
217
+ i AS sample_num,
218
+ recall AS recall_percentage,
219
+ CASE
220
+ WHEN recall >= 99 THEN 'PASS'
221
+ WHEN recall >= 95 THEN 'WARNING'
222
+ ELSE 'FAIL'
223
+ END AS status;
224
+ END LOOP;
225
+
226
+ RETURN;
227
+ END;
228
+ $$ LANGUAGE plpgsql;
229
+
230
+ -- ============================================================================
231
+ -- Run All Tests
232
+ -- ============================================================================
233
+ DO $$
234
+ BEGIN
235
+ RAISE NOTICE '========================================';
236
+ RAISE NOTICE 'HNSW Index Test Suite';
237
+ RAISE NOTICE '========================================';
238
+ RAISE NOTICE 'Running structural tests...';
239
+ END $$;
240
+
241
+ -- Tests 1-4 run automatically above
242
+
243
+ -- Note for performance tests:
244
+ \echo ''
245
+ \echo 'To run performance tests (requires data):'
246
+ \echo 'SELECT * FROM run_hnsw_performance_test(10);'
247
+ \echo ''
248
+ \echo 'To test recall accuracy:'
249
+ \echo 'SELECT * FROM test_hnsw_recall_accuracy(5);'
250
+ \echo ''
251
+ \echo 'To check current ef_search setting:'
252
+ \echo 'SHOW hnsw.ef_search;'
253
+ \echo ''
254
+ \echo 'To adjust search quality:'
255
+ \echo "SELECT set_hnsw_search_quality('balanced');"
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Git Pre-Commit Hook: Secrets Detection
5
+ *
6
+ * Scans staged files for potential secrets and blocks commits if found.
7
+ * This prevents accidental secret leakage into version control.
8
+ *
9
+ * Installation:
10
+ * cp scripts/pre-commit-secrets .git/hooks/pre-commit
11
+ * chmod +x .git/hooks/pre-commit
12
+ *
13
+ * Or use Husky:
14
+ * npx husky add .husky/pre-commit "node scripts/pre-commit-secrets"
15
+ */
16
+
17
+ import { execSync } from 'child_process';
18
+ import { readFileSync } from 'fs';
19
+ import { resolve } from 'path';
20
+
21
+ // ============================================================================
22
+ // Secret Detection Patterns
23
+ // ============================================================================
24
+
25
+ const SECRET_PATTERNS = [
26
+ {
27
+ name: 'Generic API Key',
28
+ pattern: /(?:api[_-]?key|apikey)[=:\s]+['"]?([a-z0-9_-]{20,})/gi,
29
+ description: 'Looks like an API key',
30
+ },
31
+ {
32
+ name: 'Bearer Token',
33
+ pattern: /bearer\s+([a-z0-9_.-]+)/gi,
34
+ description: 'Bearer token detected',
35
+ },
36
+ {
37
+ name: 'AWS Access Key',
38
+ pattern: /AKIA[0-9A-Z]{16}/g,
39
+ description: 'AWS access key detected',
40
+ },
41
+ {
42
+ name: 'AWS Secret Key',
43
+ pattern: /aws[_-]?secret[_-]?access[_-]?key[=:\s]+['"]?([a-z0-9/+=]{40})/gi,
44
+ description: 'AWS secret key detected',
45
+ },
46
+ {
47
+ name: 'Private Key',
48
+ pattern: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/i,
49
+ description: 'Private key detected',
50
+ },
51
+ {
52
+ name: 'Database URL with Credentials',
53
+ pattern: /(?:postgres|mysql|mongodb):\/\/([^:]+):([^@]+)@/gi,
54
+ description: 'Database URL with embedded credentials',
55
+ },
56
+ {
57
+ name: 'Password',
58
+ pattern: /(?:password|passwd|pwd)[=:\s]+['"]?([^\s'"]{8,})/gi,
59
+ description: 'Password detected',
60
+ },
61
+ {
62
+ name: 'JWT Token',
63
+ pattern: /eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g,
64
+ description: 'JWT token detected',
65
+ },
66
+ {
67
+ name: 'Generic Secret',
68
+ pattern: /secret[_-]?key[=:\s]+['"]?([a-z0-9_.-]{20,})/gi,
69
+ description: 'Secret key detected',
70
+ },
71
+ {
72
+ name: 'Anthropic API Key',
73
+ pattern: /sk-ant-[a-zA-Z0-9-_]{95,}/g,
74
+ description: 'Anthropic API key detected',
75
+ },
76
+ {
77
+ name: 'OpenAI API Key',
78
+ pattern: /sk-[a-zA-Z0-9]{48,}/g,
79
+ description: 'OpenAI API key detected',
80
+ },
81
+ {
82
+ name: 'Stripe API Key',
83
+ pattern: /sk_(live|test)_[a-zA-Z0-9]{24,}/g,
84
+ description: 'Stripe API key detected',
85
+ },
86
+ {
87
+ name: 'Google API Key',
88
+ pattern: /AIza[0-9A-Za-z_-]{35}/g,
89
+ description: 'Google API key detected',
90
+ },
91
+ ];
92
+
93
+ // ============================================================================
94
+ // Excluded Files/Patterns
95
+ // ============================================================================
96
+
97
+ const EXCLUDED_PATTERNS = [
98
+ // Documentation and examples
99
+ /\.md$/i,
100
+ /\.txt$/i,
101
+ /example/i,
102
+ /sample/i,
103
+ /template/i,
104
+ /\.example$/i,
105
+
106
+ // Test files (may contain fake secrets)
107
+ /\.test\.(ts|js)$/i,
108
+ /\.spec\.(ts|js)$/i,
109
+ /__tests__\//i,
110
+ /test\//i,
111
+ /tests\//i,
112
+
113
+ // Generated files
114
+ /dist\//i,
115
+ /build\//i,
116
+ /node_modules\//i,
117
+
118
+ // Lock files
119
+ /package-lock\.json$/i,
120
+ /yarn\.lock$/i,
121
+ /pnpm-lock\.yaml$/i,
122
+
123
+ // This file itself
124
+ /pre-commit-secrets$/i,
125
+ ];
126
+
127
+ // Files that should ALWAYS be checked (override exclusions)
128
+ const FORCE_CHECK_PATTERNS = [
129
+ /\.env$/i,
130
+ /\.env\.[^.]+$/i,
131
+ ];
132
+
133
+ // ============================================================================
134
+ // Helper Functions
135
+ // ============================================================================
136
+
137
+ /**
138
+ * Check if a file should be excluded from scanning
139
+ */
140
+ function shouldExcludeFile(filename) {
141
+ // Force check certain files
142
+ if (FORCE_CHECK_PATTERNS.some((pattern) => pattern.test(filename))) {
143
+ return false;
144
+ }
145
+
146
+ // Exclude matching patterns
147
+ return EXCLUDED_PATTERNS.some((pattern) => pattern.test(filename));
148
+ }
149
+
150
+ /**
151
+ * Get staged files from git
152
+ */
153
+ function getStagedFiles() {
154
+ try {
155
+ const output = execSync('git diff --cached --name-only --diff-filter=ACM', {
156
+ encoding: 'utf-8',
157
+ });
158
+ return output.trim().split('\n').filter(Boolean);
159
+ } catch (error) {
160
+ console.error('Error getting staged files:', error.message);
161
+ return [];
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Scan a file for secrets
167
+ */
168
+ function scanFile(filename) {
169
+ const findings = [];
170
+
171
+ // Read file content
172
+ let content;
173
+ try {
174
+ content = readFileSync(resolve(process.cwd(), filename), 'utf-8');
175
+ } catch (error) {
176
+ console.warn(`Warning: Could not read ${filename}: ${error.message}`);
177
+ return findings;
178
+ }
179
+
180
+ // Scan with each pattern
181
+ for (const { name, pattern, description } of SECRET_PATTERNS) {
182
+ const matches = content.matchAll(pattern);
183
+
184
+ for (const match of matches) {
185
+ // Get line number
186
+ const beforeMatch = content.slice(0, match.index);
187
+ const lineNumber = beforeMatch.split('\n').length;
188
+
189
+ // Get line content (truncated)
190
+ const lines = content.split('\n');
191
+ const lineContent = lines[lineNumber - 1].trim().slice(0, 80);
192
+
193
+ findings.push({
194
+ file: filename,
195
+ line: lineNumber,
196
+ type: name,
197
+ description,
198
+ content: lineContent,
199
+ matched: match[0].slice(0, 40) + (match[0].length > 40 ? '...' : ''),
200
+ });
201
+ }
202
+ }
203
+
204
+ return findings;
205
+ }
206
+
207
+ /**
208
+ * Print findings in a readable format
209
+ */
210
+ function printFindings(findings) {
211
+ console.log('\nāŒ COMMIT BLOCKED: Potential secrets detected!\n');
212
+
213
+ const groupedByFile = findings.reduce((acc, finding) => {
214
+ if (!acc[finding.file]) {
215
+ acc[finding.file] = [];
216
+ }
217
+ acc[finding.file].push(finding);
218
+ return acc;
219
+ }, {});
220
+
221
+ for (const [file, fileFindings] of Object.entries(groupedByFile)) {
222
+ console.log(`šŸ“„ ${file}:`);
223
+ for (const finding of fileFindings) {
224
+ console.log(` Line ${finding.line}: ${finding.type}`);
225
+ console.log(` ${finding.description}`);
226
+ console.log(` "${finding.content}"`);
227
+ console.log('');
228
+ }
229
+ }
230
+
231
+ console.log('šŸ”’ Security Recommendations:\n');
232
+ console.log('1. Remove the secret from your code');
233
+ console.log('2. Add the file to .gitignore if it contains secrets');
234
+ console.log('3. Use environment variables instead (see .env.example)');
235
+ console.log('4. If this is a false positive, add the pattern to the exclusion list\n');
236
+
237
+ console.log('šŸ’” Suggested .gitignore entries:\n');
238
+ const uniqueFiles = [...new Set(findings.map((f) => f.file))];
239
+ for (const file of uniqueFiles) {
240
+ console.log(` ${file}`);
241
+ }
242
+ console.log('');
243
+ }
244
+
245
+ // ============================================================================
246
+ // Main
247
+ // ============================================================================
248
+
249
+ function main() {
250
+ console.log('šŸ” Scanning staged files for secrets...\n');
251
+
252
+ const stagedFiles = getStagedFiles();
253
+
254
+ if (stagedFiles.length === 0) {
255
+ console.log('āœ… No files to scan\n');
256
+ process.exit(0);
257
+ }
258
+
259
+ // Filter out excluded files
260
+ const filesToScan = stagedFiles.filter((file) => !shouldExcludeFile(file));
261
+
262
+ console.log(`Scanning ${filesToScan.length} of ${stagedFiles.length} staged files...\n`);
263
+
264
+ // Scan each file
265
+ const allFindings = [];
266
+ for (const file of filesToScan) {
267
+ const findings = scanFile(file);
268
+ allFindings.push(...findings);
269
+ }
270
+
271
+ if (allFindings.length > 0) {
272
+ printFindings(allFindings);
273
+ console.log('āŒ Commit rejected due to detected secrets\n');
274
+ process.exit(1);
275
+ }
276
+
277
+ console.log('āœ… No secrets detected. Proceeding with commit.\n');
278
+ process.exit(0);
279
+ }
280
+
281
+ // Run the hook
282
+ main();
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Extraction Worker Runner
4
+ *
5
+ * Starts the extraction worker to process documents from the queue.
6
+ * Run with: npx tsx scripts/run-extraction-worker.ts
7
+ */
8
+
9
+ import { createExtractionWorker } from '../src/workers/extraction.worker.js';
10
+ import IORedis from 'ioredis';
11
+ import dotenv from 'dotenv';
12
+
13
+ // Load environment variables
14
+ dotenv.config();
15
+
16
+ // Redis connection
17
+ const connection = new IORedis({
18
+ host: process.env.REDIS_HOST || 'localhost',
19
+ port: parseInt(process.env.REDIS_PORT || '6379', 10),
20
+ maxRetriesPerRequest: null,
21
+ enableReadyCheck: false,
22
+ });
23
+
24
+ console.log('[ExtractionWorker] Starting worker...');
25
+ console.log(`[ExtractionWorker] Redis: ${connection.options.host}:${connection.options.port}`);
26
+ console.log(
27
+ `[ExtractionWorker] Concurrency: ${process.env.BULLMQ_CONCURRENCY_EXTRACTION || 5}`
28
+ );
29
+
30
+ // Create and start worker
31
+ const worker = createExtractionWorker(connection);
32
+
33
+ // Handle shutdown
34
+ const shutdown = async () => {
35
+ console.log('\n[ExtractionWorker] Shutting down gracefully...');
36
+ await worker.close();
37
+ await connection.quit();
38
+ console.log('[ExtractionWorker] Shutdown complete');
39
+ process.exit(0);
40
+ };
41
+
42
+ process.on('SIGTERM', shutdown);
43
+ process.on('SIGINT', shutdown);
44
+
45
+ console.log('[ExtractionWorker] Worker started successfully. Waiting for jobs...');
46
+ console.log('[ExtractionWorker] Press Ctrl+C to stop');