@unrdf/dark-matter 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,426 @@
1
+ /**
2
+ * @file Dark Matter 80/20 Query Optimizer
3
+ * @module dark-matter/optimizer
4
+ *
5
+ * @description
6
+ * Implements query rewrite rules to optimize SPARQL queries based on
7
+ * 80/20 principles: focus on the most impactful optimizations.
8
+ */
9
+
10
+ import { z } from 'zod';
11
+
12
+ /**
13
+ * Schema for optimization result
14
+ */
15
+ const OptimizationResultSchema = z.object({
16
+ original: z.string(),
17
+ optimized: z.string(),
18
+ rules: z.array(
19
+ z.object({
20
+ name: z.string(),
21
+ applied: z.boolean(),
22
+ impact: z.enum(['high', 'medium', 'low']),
23
+ description: z.string(),
24
+ })
25
+ ),
26
+ estimatedImprovement: z.object({
27
+ before: z.number(),
28
+ after: z.number(),
29
+ percentageGain: z.number(),
30
+ }),
31
+ timestamp: z.number(),
32
+ });
33
+
34
+ /**
35
+ * Query Optimizer for Dark Matter 80/20
36
+ */
37
+ export class DarkMatterOptimizer {
38
+ /**
39
+ * Create a new optimizer
40
+ * @param {Object} [config] - Configuration
41
+ */
42
+ constructor(config = {}) {
43
+ this.config = {
44
+ enableFilterPushdown: config.enableFilterPushdown !== false,
45
+ enableJoinReordering: config.enableJoinReordering !== false,
46
+ enableOptionalOptimization: config.enableOptionalOptimization !== false,
47
+ enableUnionOptimization: config.enableUnionOptimization !== false,
48
+ aggressiveOptimization: config.aggressiveOptimization || false,
49
+ ...config,
50
+ };
51
+
52
+ this.rules = this._initializeRules();
53
+ this.stats = {
54
+ totalOptimizations: 0,
55
+ rulesApplied: new Map(),
56
+ };
57
+ }
58
+
59
+ /**
60
+ * Initialize optimization rules
61
+ * @returns {Array} Optimization rules
62
+ * @private
63
+ */
64
+ _initializeRules() {
65
+ return [
66
+ {
67
+ name: 'filter-pushdown',
68
+ enabled: this.config.enableFilterPushdown,
69
+ impact: 'high',
70
+ description: 'Push filters down to reduce intermediate results',
71
+ apply: query => this._applyFilterPushdown(query),
72
+ },
73
+ {
74
+ name: 'join-reordering',
75
+ enabled: this.config.enableJoinReordering,
76
+ impact: 'high',
77
+ description: 'Reorder joins to minimize intermediate results',
78
+ apply: query => this._applyJoinReordering(query),
79
+ },
80
+ {
81
+ name: 'optional-to-filter',
82
+ enabled: this.config.enableOptionalOptimization,
83
+ impact: 'medium',
84
+ description: 'Replace OPTIONAL with FILTER EXISTS when possible',
85
+ apply: query => this._applyOptionalOptimization(query),
86
+ },
87
+ {
88
+ name: 'union-optimization',
89
+ enabled: this.config.enableUnionOptimization,
90
+ impact: 'medium',
91
+ description: 'Optimize UNION clauses',
92
+ apply: query => this._applyUnionOptimization(query),
93
+ },
94
+ {
95
+ name: 'limit-early',
96
+ enabled: true,
97
+ impact: 'high',
98
+ description: 'Add LIMIT early for queries without aggregation',
99
+ apply: query => this._applyEarlyLimit(query),
100
+ },
101
+ {
102
+ name: 'index-hints',
103
+ enabled: true,
104
+ impact: 'medium',
105
+ description: 'Add index hints for common patterns',
106
+ apply: query => this._applyIndexHints(query),
107
+ },
108
+ ];
109
+ }
110
+
111
+ /**
112
+ * Optimize a SPARQL query
113
+ * @param {string} query - SPARQL query
114
+ * @param {Object} [analysis] - Optional query analysis
115
+ * @returns {Object} Optimization result
116
+ */
117
+ optimize(query, _analysis = null) {
118
+ let optimized = query;
119
+ const appliedRules = [];
120
+ let costBefore = this._estimateCost(query);
121
+
122
+ // Apply each enabled rule
123
+ for (const rule of this.rules) {
124
+ if (!rule.enabled) continue;
125
+
126
+ try {
127
+ const result = rule.apply(optimized);
128
+
129
+ if (result.modified) {
130
+ optimized = result.query;
131
+
132
+ appliedRules.push({
133
+ name: rule.name,
134
+ applied: true,
135
+ impact: rule.impact,
136
+ description: rule.description,
137
+ });
138
+
139
+ // Update stats
140
+ const count = this.stats.rulesApplied.get(rule.name) || 0;
141
+ this.stats.rulesApplied.set(rule.name, count + 1);
142
+ } else {
143
+ appliedRules.push({
144
+ name: rule.name,
145
+ applied: false,
146
+ impact: rule.impact,
147
+ description: rule.description,
148
+ });
149
+ }
150
+ } catch (error) {
151
+ console.warn(`Rule ${rule.name} failed:`, error.message);
152
+ }
153
+ }
154
+
155
+ const costAfter = this._estimateCost(optimized);
156
+ const percentageGain = costBefore > 0 ? ((costBefore - costAfter) / costBefore) * 100 : 0;
157
+
158
+ this.stats.totalOptimizations++;
159
+
160
+ return OptimizationResultSchema.parse({
161
+ original: query,
162
+ optimized,
163
+ rules: appliedRules,
164
+ estimatedImprovement: {
165
+ before: costBefore,
166
+ after: costAfter,
167
+ percentageGain,
168
+ },
169
+ timestamp: Date.now(),
170
+ });
171
+ }
172
+
173
+ /**
174
+ * Apply filter pushdown optimization
175
+ * @param {string} query - Query
176
+ * @returns {Object} Result
177
+ * @private
178
+ */
179
+ _applyFilterPushdown(query) {
180
+ // Extract FILTER clauses
181
+ const filterPattern = /FILTER\s*\(([^)]+)\)/gi;
182
+ const filters = [];
183
+ let match;
184
+
185
+ while ((match = filterPattern.exec(query)) !== null) {
186
+ filters.push(match[0]);
187
+ }
188
+
189
+ if (filters.length === 0) {
190
+ return { query, modified: false };
191
+ }
192
+
193
+ // Simple heuristic: Move filters closer to the patterns they reference
194
+ // In production, this would use a proper query parser
195
+ let optimized = query;
196
+ let modified = false;
197
+
198
+ // For now, just ensure filters appear before OPTIONAL clauses
199
+ if (query.includes('OPTIONAL') && filters.length > 0) {
200
+ // Remove all filters
201
+ for (const filter of filters) {
202
+ optimized = optimized.replace(filter, '');
203
+ }
204
+
205
+ // Insert filters before OPTIONAL
206
+ optimized = optimized.replace(/OPTIONAL/, `${filters.join('\n ')}\n OPTIONAL`);
207
+ modified = true;
208
+ }
209
+
210
+ return { query: optimized, modified };
211
+ }
212
+
213
+ /**
214
+ * Apply join reordering optimization
215
+ * @param {string} query - Query
216
+ * @returns {Object} Result
217
+ * @private
218
+ */
219
+ _applyJoinReordering(query) {
220
+ // Extract triple patterns
221
+ const whereMatch = query.match(/WHERE\s*\{([^}]+)\}/is);
222
+ if (!whereMatch) {
223
+ return { query, modified: false };
224
+ }
225
+
226
+ const whereClause = whereMatch[1];
227
+ const triplePattern = /(\??\w+|<[^>]+>)\s+(\??\w+|<[^>]+>)\s+(\??\w+|<[^>]+>|"[^"]*")\s*\./g;
228
+ const triples = [];
229
+ let match;
230
+
231
+ while ((match = triplePattern.exec(whereClause)) !== null) {
232
+ const [fullMatch, subject, predicate, object] = match;
233
+
234
+ // Calculate selectivity score (lower = more selective)
235
+ let selectivity = 100;
236
+
237
+ // Concrete subject is very selective
238
+ if (!subject.startsWith('?')) selectivity -= 40;
239
+
240
+ // Concrete predicate is selective
241
+ if (!predicate.startsWith('?')) selectivity -= 30;
242
+
243
+ // Concrete object is selective
244
+ if (!object.startsWith('?')) selectivity -= 20;
245
+
246
+ triples.push({
247
+ pattern: fullMatch,
248
+ subject,
249
+ predicate,
250
+ object,
251
+ selectivity,
252
+ });
253
+ }
254
+
255
+ if (triples.length <= 1) {
256
+ return { query, modified: false };
257
+ }
258
+
259
+ // Sort by selectivity (most selective first)
260
+ const sorted = [...triples].sort((a, b) => a.selectivity - b.selectivity);
261
+
262
+ // Check if order changed
263
+ const orderChanged = !sorted.every((t, i) => t.pattern === triples[i].pattern);
264
+
265
+ if (!orderChanged) {
266
+ return { query, modified: false };
267
+ }
268
+
269
+ // Rebuild WHERE clause with optimized order
270
+ let optimizedWhere = whereClause;
271
+
272
+ // Remove all triples
273
+ for (const triple of triples) {
274
+ optimizedWhere = optimizedWhere.replace(triple.pattern, '');
275
+ }
276
+
277
+ // Add back in optimized order
278
+ const newTriples = sorted.map(t => ` ${t.pattern}`).join('\n');
279
+ optimizedWhere = newTriples + '\n' + optimizedWhere;
280
+
281
+ const optimized = query.replace(whereClause, optimizedWhere);
282
+
283
+ return { query: optimized, modified: true };
284
+ }
285
+
286
+ /**
287
+ * Apply OPTIONAL optimization
288
+ * @param {string} query - Query
289
+ * @returns {Object} Result
290
+ * @private
291
+ */
292
+ _applyOptionalOptimization(query) {
293
+ if (!query.includes('OPTIONAL')) {
294
+ return { query, modified: false };
295
+ }
296
+
297
+ // Simple optimization: If OPTIONAL is used but result is always filtered,
298
+ // replace with FILTER EXISTS
299
+ // This is a conservative optimization that would need query analysis
300
+
301
+ return { query, modified: false };
302
+ }
303
+
304
+ /**
305
+ * Apply UNION optimization
306
+ * @param {string} query - Query
307
+ * @returns {Object} Result
308
+ * @private
309
+ */
310
+ _applyUnionOptimization(query) {
311
+ if (!query.includes('UNION')) {
312
+ return { query, modified: false };
313
+ }
314
+
315
+ // In some cases, UNION can be replaced with property paths or VALUES
316
+ // This is a placeholder for more sophisticated optimization
317
+
318
+ return { query, modified: false };
319
+ }
320
+
321
+ /**
322
+ * Apply early LIMIT optimization
323
+ * @param {string} query - Query
324
+ * @returns {Object} Result
325
+ * @private
326
+ */
327
+ _applyEarlyLimit(query) {
328
+ // If query has no LIMIT and no aggregation, add a reasonable default
329
+ if (
330
+ query.includes('LIMIT') ||
331
+ query.includes('COUNT') ||
332
+ query.includes('SUM') ||
333
+ query.includes('AVG')
334
+ ) {
335
+ return { query, modified: false };
336
+ }
337
+
338
+ // Add LIMIT 1000 as a safety measure
339
+ const optimized = query.trim() + '\nLIMIT 1000';
340
+
341
+ return { query: optimized, modified: true };
342
+ }
343
+
344
+ /**
345
+ * Apply index hints
346
+ * @param {string} query - Query
347
+ * @returns {Object} Result
348
+ * @private
349
+ */
350
+ _applyIndexHints(query) {
351
+ // Add comments with index hints for query engines that support them
352
+ // This is database-specific and would need customization
353
+
354
+ return { query, modified: false };
355
+ }
356
+
357
+ /**
358
+ * Estimate query cost
359
+ * @param {string} query - Query
360
+ * @returns {number} Estimated cost
361
+ * @private
362
+ */
363
+ _estimateCost(query) {
364
+ let cost = 10; // Base cost
365
+
366
+ // Count patterns
367
+ const patterns = (query.match(/\?[a-zA-Z0-9]+\s+/g) || []).length;
368
+ cost += patterns * 5;
369
+
370
+ // FILTER increases cost
371
+ const filters = (query.match(/FILTER/gi) || []).length;
372
+ cost += filters * 10;
373
+
374
+ // OPTIONAL increases cost significantly
375
+ const optionals = (query.match(/OPTIONAL/gi) || []).length;
376
+ cost += optionals * 20;
377
+
378
+ // UNION is very expensive
379
+ const unions = (query.match(/UNION/gi) || []).length;
380
+ cost += unions * 30;
381
+
382
+ // Aggregations are expensive
383
+ const aggregations = (query.match(/(COUNT|SUM|AVG|MIN|MAX)/gi) || []).length;
384
+ cost += aggregations * 15;
385
+
386
+ // ORDER BY adds cost
387
+ if (query.includes('ORDER BY')) cost += 10;
388
+
389
+ // DISTINCT adds cost
390
+ if (query.includes('DISTINCT')) cost += 8;
391
+
392
+ return cost;
393
+ }
394
+
395
+ /**
396
+ * Get optimizer statistics
397
+ * @returns {Object} Statistics
398
+ */
399
+ getStats() {
400
+ return {
401
+ totalOptimizations: this.stats.totalOptimizations,
402
+ rulesApplied: Object.fromEntries(this.stats.rulesApplied),
403
+ };
404
+ }
405
+
406
+ /**
407
+ * Reset statistics
408
+ */
409
+ resetStats() {
410
+ this.stats = {
411
+ totalOptimizations: 0,
412
+ rulesApplied: new Map(),
413
+ };
414
+ }
415
+ }
416
+
417
+ /**
418
+ * Create an optimizer instance
419
+ * @param {Object} [config] - Configuration
420
+ * @returns {DarkMatterOptimizer} Optimizer
421
+ */
422
+ export function createDarkMatterOptimizer(config = {}) {
423
+ return new DarkMatterOptimizer(config);
424
+ }
425
+
426
+ export default DarkMatterOptimizer;
@@ -0,0 +1,242 @@
1
+ /**
2
+ * @file Performance Metrics - Query execution tracking
3
+ * @module @unrdf/dark-matter/performance-metrics
4
+ */
5
+
6
+ import { z } from 'zod';
7
+
8
+ /**
9
+ * Query record schema
10
+ */
11
+ const QueryRecordSchema = z.object({
12
+ query: z.string(),
13
+ executionTime: z.number(),
14
+ resultCount: z.number(),
15
+ timestamp: z.number(),
16
+ });
17
+
18
+ /**
19
+ * Create metrics collector
20
+ * @returns {Object} Metrics collector instance
21
+ *
22
+ * @example
23
+ * const metrics = createMetricsCollector();
24
+ * metrics.recordQuery(query, 150, 42);
25
+ * const stats = metrics.analyzePerformance();
26
+ */
27
+ export function createMetricsCollector() {
28
+ const queries = [];
29
+ const queryFrequency = new Map();
30
+
31
+ return {
32
+ /**
33
+ * Record a query execution
34
+ * @param {string} query - SPARQL query
35
+ * @param {number} executionTime - Time in milliseconds
36
+ * @param {number} resultCount - Number of results
37
+ */
38
+ recordQuery(query, executionTime, resultCount) {
39
+ if (typeof query !== 'string') {
40
+ throw new TypeError('recordQuery: query must be a string');
41
+ }
42
+
43
+ if (typeof executionTime !== 'number' || executionTime < 0) {
44
+ throw new TypeError('recordQuery: executionTime must be a non-negative number');
45
+ }
46
+
47
+ if (typeof resultCount !== 'number' || resultCount < 0) {
48
+ throw new TypeError('recordQuery: resultCount must be a non-negative number');
49
+ }
50
+
51
+ const record = {
52
+ query,
53
+ executionTime,
54
+ resultCount,
55
+ timestamp: Date.now(),
56
+ };
57
+
58
+ QueryRecordSchema.parse(record);
59
+ queries.push(record);
60
+
61
+ // Track frequency
62
+ const normalized = normalizeQuery(query);
63
+ queryFrequency.set(normalized, (queryFrequency.get(normalized) || 0) + 1);
64
+ },
65
+
66
+ /**
67
+ * Analyze performance statistics
68
+ * @param {Store} [store] - Optional store reference
69
+ * @returns {Object} Performance statistics
70
+ */
71
+ analyzePerformance(_store = null) {
72
+ if (queries.length === 0) {
73
+ return {
74
+ totalQueries: 0,
75
+ averageExecutionTime: 0,
76
+ slowestQueries: [],
77
+ mostExecutedQueries: [],
78
+ };
79
+ }
80
+
81
+ // Calculate average execution time
82
+ const totalTime = queries.reduce((sum, q) => sum + q.executionTime, 0);
83
+ const averageExecutionTime = totalTime / queries.length;
84
+
85
+ // Find slowest queries
86
+ const sorted = [...queries].sort((a, b) => b.executionTime - a.executionTime);
87
+ const slowestQueries = sorted.slice(0, 5).map(q => ({
88
+ query: q.query.substring(0, 100),
89
+ executionTime: q.executionTime,
90
+ resultCount: q.resultCount,
91
+ }));
92
+
93
+ // Most executed queries
94
+ const frequencyEntries = Array.from(queryFrequency.entries())
95
+ .sort((a, b) => b[1] - a[1])
96
+ .slice(0, 5);
97
+
98
+ const mostExecutedQueries = frequencyEntries.map(([query, count]) => ({
99
+ query: query.substring(0, 100),
100
+ executionCount: count,
101
+ }));
102
+
103
+ return {
104
+ totalQueries: queries.length,
105
+ averageExecutionTime: Math.round(averageExecutionTime * 100) / 100,
106
+ slowestQueries,
107
+ mostExecutedQueries,
108
+ };
109
+ },
110
+
111
+ /**
112
+ * Get all collected metrics
113
+ * @returns {Array<Object>} All query records
114
+ */
115
+ getMetrics() {
116
+ return [...queries];
117
+ },
118
+
119
+ /**
120
+ * Clear all metrics
121
+ */
122
+ clearMetrics() {
123
+ queries.length = 0;
124
+ queryFrequency.clear();
125
+ },
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Normalize query for frequency tracking
131
+ * @param {string} query - SPARQL query
132
+ * @returns {string} Normalized query
133
+ */
134
+ function normalizeQuery(query) {
135
+ // Remove whitespace variations
136
+ return query.trim().replace(/\s+/g, ' ').toUpperCase();
137
+ }
138
+
139
+ /**
140
+ * Record query execution with timing
141
+ * @param {string} query - SPARQL query
142
+ * @param {number} executionTime - Execution time in ms
143
+ * @param {number} resultCount - Number of results
144
+ * @returns {Object} Query record
145
+ *
146
+ * @throws {TypeError} If parameters are invalid
147
+ *
148
+ * @example
149
+ * const record = recordQuery(query, 150, 42);
150
+ * console.log('Recorded:', record.timestamp);
151
+ */
152
+ export function recordQuery(query, executionTime, resultCount) {
153
+ if (typeof query !== 'string') {
154
+ throw new TypeError('recordQuery: query must be a string');
155
+ }
156
+
157
+ if (typeof executionTime !== 'number' || executionTime < 0) {
158
+ throw new TypeError('recordQuery: executionTime must be a non-negative number');
159
+ }
160
+
161
+ if (typeof resultCount !== 'number' || resultCount < 0) {
162
+ throw new TypeError('recordQuery: resultCount must be a non-negative number');
163
+ }
164
+
165
+ const record = {
166
+ query,
167
+ executionTime,
168
+ resultCount,
169
+ timestamp: Date.now(),
170
+ };
171
+
172
+ return QueryRecordSchema.parse(record);
173
+ }
174
+
175
+ /**
176
+ * Analyze performance from store
177
+ * @param {Store} store - RDF store
178
+ * @returns {Object} Performance analysis
179
+ *
180
+ * @throws {TypeError} If store is invalid
181
+ *
182
+ * @example
183
+ * const analysis = analyzePerformance(store);
184
+ * console.log('Average time:', analysis.averageExecutionTime);
185
+ */
186
+ export function analyzePerformance(store) {
187
+ if (!store || typeof store.getQuads !== 'function') {
188
+ throw new TypeError('analyzePerformance: store must be a valid Store instance');
189
+ }
190
+
191
+ // Basic store statistics
192
+ const quadCount = store.size || 0;
193
+
194
+ return {
195
+ quadCount,
196
+ estimatedMemoryUsage: quadCount * 200, // Rough estimate: 200 bytes per quad
197
+ recommendations: generateRecommendations(quadCount),
198
+ };
199
+ }
200
+
201
+ /**
202
+ * Generate performance recommendations
203
+ * @param {number} quadCount - Number of quads in store
204
+ * @returns {Array<Object>} Recommendations
205
+ */
206
+ function generateRecommendations(quadCount) {
207
+ const recommendations = [];
208
+
209
+ if (quadCount > 100000) {
210
+ recommendations.push({
211
+ type: 'indexing',
212
+ priority: 'high',
213
+ description: 'Large store detected - consider adding indexes for common predicates',
214
+ });
215
+ }
216
+
217
+ if (quadCount > 1000000) {
218
+ recommendations.push({
219
+ type: 'partitioning',
220
+ priority: 'medium',
221
+ description: 'Very large store - consider partitioning by graph or subject',
222
+ });
223
+ }
224
+
225
+ return recommendations;
226
+ }
227
+
228
+ /**
229
+ * Get performance metrics
230
+ * @returns {Object} Current metrics snapshot
231
+ *
232
+ * @example
233
+ * const metrics = getMetrics();
234
+ * console.log('Total queries:', metrics.totalQueries);
235
+ */
236
+ export function getMetrics() {
237
+ return {
238
+ totalQueries: 0,
239
+ averageExecutionTime: 0,
240
+ slowestQueries: [],
241
+ };
242
+ }