@unrdf/dark-matter 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/package.json +59 -0
- package/src/dark-matter/critical-path.mjs +367 -0
- package/src/dark-matter/index-advisor.mjs +242 -0
- package/src/dark-matter/index.mjs +244 -0
- package/src/dark-matter/optimizer.mjs +426 -0
- package/src/dark-matter/performance-metrics.mjs +242 -0
- package/src/dark-matter/query-analyzer.mjs +442 -0
- package/src/dark-matter/query-optimizer.mjs +283 -0
- package/src/dark-matter-core.mjs +743 -0
- package/src/index.mjs +60 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Dark Matter 80/20 Query Analyzer
|
|
3
|
+
* @module dark-matter/query-analyzer
|
|
4
|
+
*
|
|
5
|
+
* @description
|
|
6
|
+
* Analyzes SPARQL queries to extract patterns, calculate complexity scores,
|
|
7
|
+
* and identify expensive operations for 80/20 optimization.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { analyzeSPARQLQuery, extractVariables } from '../../utils/sparql-utils.mjs';
|
|
11
|
+
import { z } from 'zod';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Schema for query analysis result
|
|
15
|
+
*/
|
|
16
|
+
const QueryAnalysisSchema = z.object({
|
|
17
|
+
queryId: z.string(),
|
|
18
|
+
query: z.string(),
|
|
19
|
+
type: z.enum(['SELECT', 'ASK', 'CONSTRUCT', 'DESCRIBE', 'UNKNOWN']),
|
|
20
|
+
patterns: z.array(
|
|
21
|
+
z.object({
|
|
22
|
+
type: z.string(),
|
|
23
|
+
subject: z.string().optional(),
|
|
24
|
+
predicate: z.string().optional(),
|
|
25
|
+
object: z.string().optional(),
|
|
26
|
+
complexity: z.number(),
|
|
27
|
+
})
|
|
28
|
+
),
|
|
29
|
+
filters: z.array(z.string()),
|
|
30
|
+
joins: z.array(
|
|
31
|
+
z.object({
|
|
32
|
+
type: z.string(),
|
|
33
|
+
variables: z.array(z.string()),
|
|
34
|
+
estimatedCost: z.number(),
|
|
35
|
+
})
|
|
36
|
+
),
|
|
37
|
+
aggregations: z.array(z.string()),
|
|
38
|
+
complexity: z.object({
|
|
39
|
+
score: z.number(),
|
|
40
|
+
patternCount: z.number(),
|
|
41
|
+
filterCount: z.number(),
|
|
42
|
+
joinCount: z.number(),
|
|
43
|
+
aggregationCount: z.number(),
|
|
44
|
+
variableCount: z.number(),
|
|
45
|
+
estimatedRows: z.number(),
|
|
46
|
+
}),
|
|
47
|
+
expensiveOperations: z.array(
|
|
48
|
+
z.object({
|
|
49
|
+
type: z.string(),
|
|
50
|
+
cost: z.number(),
|
|
51
|
+
reason: z.string(),
|
|
52
|
+
})
|
|
53
|
+
),
|
|
54
|
+
timestamp: z.number(),
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Query Analyzer for Dark Matter 80/20 optimization
|
|
59
|
+
*/
|
|
60
|
+
export class QueryAnalyzer {
|
|
61
|
+
/**
|
|
62
|
+
* Create a new query analyzer
|
|
63
|
+
* @param {Object} [config] - Configuration
|
|
64
|
+
*/
|
|
65
|
+
constructor(config = {}) {
|
|
66
|
+
this.config = {
|
|
67
|
+
complexityThreshold: config.complexityThreshold || 100,
|
|
68
|
+
expensiveOperationThreshold: config.expensiveOperationThreshold || 50,
|
|
69
|
+
joinCostMultiplier: config.joinCostMultiplier || 10,
|
|
70
|
+
filterCostMultiplier: config.filterCostMultiplier || 5,
|
|
71
|
+
aggregationCostMultiplier: config.aggregationCostMultiplier || 8,
|
|
72
|
+
...config,
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
this.stats = {
|
|
76
|
+
totalAnalyzed: 0,
|
|
77
|
+
complexQueries: 0,
|
|
78
|
+
simpleQueries: 0,
|
|
79
|
+
avgComplexity: 0,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Analyze a SPARQL query
|
|
85
|
+
* @param {string} query - SPARQL query string
|
|
86
|
+
* @param {string} [queryId] - Optional query identifier
|
|
87
|
+
* @param {Object} [metadata] - Optional metadata
|
|
88
|
+
* @returns {Object} Query analysis result
|
|
89
|
+
*/
|
|
90
|
+
analyze(query, queryId = null, metadata = {}) {
|
|
91
|
+
const analysis = analyzeSPARQLQuery(query);
|
|
92
|
+
const patterns = this._extractPatterns(query, analysis);
|
|
93
|
+
const filters = this._extractFilters(query, analysis);
|
|
94
|
+
const joins = this._identifyJoins(query, analysis);
|
|
95
|
+
const aggregations = this._extractAggregations(query);
|
|
96
|
+
|
|
97
|
+
// Calculate complexity score
|
|
98
|
+
const complexity = this._calculateComplexity(patterns, filters, joins, aggregations, analysis);
|
|
99
|
+
|
|
100
|
+
// Identify expensive operations
|
|
101
|
+
const expensiveOperations = this._identifyExpensiveOperations(
|
|
102
|
+
patterns,
|
|
103
|
+
filters,
|
|
104
|
+
joins,
|
|
105
|
+
aggregations,
|
|
106
|
+
complexity
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const result = {
|
|
110
|
+
queryId: queryId || `query-${Date.now()}`,
|
|
111
|
+
query,
|
|
112
|
+
type: analysis.type,
|
|
113
|
+
patterns,
|
|
114
|
+
filters,
|
|
115
|
+
joins,
|
|
116
|
+
aggregations,
|
|
117
|
+
complexity,
|
|
118
|
+
expensiveOperations,
|
|
119
|
+
timestamp: Date.now(),
|
|
120
|
+
metadata,
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
// Update stats
|
|
124
|
+
this.stats.totalAnalyzed++;
|
|
125
|
+
if (complexity.score >= this.config.complexityThreshold) {
|
|
126
|
+
this.stats.complexQueries++;
|
|
127
|
+
} else {
|
|
128
|
+
this.stats.simpleQueries++;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Update average complexity
|
|
132
|
+
this.stats.avgComplexity =
|
|
133
|
+
(this.stats.avgComplexity * (this.stats.totalAnalyzed - 1) + complexity.score) /
|
|
134
|
+
this.stats.totalAnalyzed;
|
|
135
|
+
|
|
136
|
+
return QueryAnalysisSchema.parse(result);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Extract triple patterns from query
|
|
141
|
+
* @param {string} query - SPARQL query
|
|
142
|
+
* @param {Object} analysis - Basic analysis
|
|
143
|
+
* @returns {Array} Triple patterns
|
|
144
|
+
* @private
|
|
145
|
+
*/
|
|
146
|
+
_extractPatterns(query, _analysis) {
|
|
147
|
+
const patterns = [];
|
|
148
|
+
|
|
149
|
+
// Extract WHERE clause
|
|
150
|
+
const whereMatch = query.match(/WHERE\s*\{([^}]+)\}/is);
|
|
151
|
+
if (!whereMatch) return patterns;
|
|
152
|
+
|
|
153
|
+
const whereClause = whereMatch[1];
|
|
154
|
+
|
|
155
|
+
// Simple pattern extraction (s p o .)
|
|
156
|
+
const triplePattern = /(\??\w+|<[^>]+>)\s+(\??\w+|<[^>]+>)\s+(\??\w+|<[^>]+>|"[^"]*")\s*\./g;
|
|
157
|
+
let match;
|
|
158
|
+
|
|
159
|
+
while ((match = triplePattern.exec(whereClause)) !== null) {
|
|
160
|
+
const [, subject, predicate, object] = match;
|
|
161
|
+
|
|
162
|
+
// Calculate pattern complexity
|
|
163
|
+
let complexity = 1;
|
|
164
|
+
|
|
165
|
+
// Variable in subject position = join likely
|
|
166
|
+
if (subject.startsWith('?')) complexity += 5;
|
|
167
|
+
|
|
168
|
+
// Variable in predicate position = very expensive
|
|
169
|
+
if (predicate.startsWith('?')) complexity += 10;
|
|
170
|
+
|
|
171
|
+
// Variable in object position = filter likely
|
|
172
|
+
if (object.startsWith('?')) complexity += 3;
|
|
173
|
+
|
|
174
|
+
patterns.push({
|
|
175
|
+
type: 'triple',
|
|
176
|
+
subject,
|
|
177
|
+
predicate,
|
|
178
|
+
object,
|
|
179
|
+
complexity,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return patterns;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Extract FILTER clauses
|
|
188
|
+
* @param {string} query - SPARQL query
|
|
189
|
+
* @param {Object} analysis - Basic analysis
|
|
190
|
+
* @returns {Array} Filters
|
|
191
|
+
* @private
|
|
192
|
+
*/
|
|
193
|
+
_extractFilters(query, _analysis) {
|
|
194
|
+
const filters = [];
|
|
195
|
+
const filterPattern = /FILTER\s*\(([^)]+)\)/gi;
|
|
196
|
+
let match;
|
|
197
|
+
|
|
198
|
+
while ((match = filterPattern.exec(query)) !== null) {
|
|
199
|
+
filters.push(match[1].trim());
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return filters;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Identify JOIN operations
|
|
207
|
+
* @param {string} query - SPARQL query
|
|
208
|
+
* @param {Object} analysis - Basic analysis
|
|
209
|
+
* @returns {Array} Joins
|
|
210
|
+
* @private
|
|
211
|
+
*/
|
|
212
|
+
_identifyJoins(query, _analysis) {
|
|
213
|
+
const joins = [];
|
|
214
|
+
const variables = extractVariables(query);
|
|
215
|
+
|
|
216
|
+
// Simple heuristic: if a variable appears multiple times, it's a join
|
|
217
|
+
const variableCounts = new Map();
|
|
218
|
+
|
|
219
|
+
for (const variable of variables) {
|
|
220
|
+
const regex = new RegExp(`\\?${variable}`, 'g');
|
|
221
|
+
const matches = query.match(regex);
|
|
222
|
+
variableCounts.set(variable, matches ? matches.length : 0);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Identify joins based on shared variables
|
|
226
|
+
for (const [variable, count] of variableCounts.entries()) {
|
|
227
|
+
if (count >= 2) {
|
|
228
|
+
// Estimate join cost based on number of occurrences
|
|
229
|
+
const estimatedCost = count * this.config.joinCostMultiplier;
|
|
230
|
+
|
|
231
|
+
joins.push({
|
|
232
|
+
type: 'variable-join',
|
|
233
|
+
variables: [variable],
|
|
234
|
+
estimatedCost,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Detect OPTIONAL joins (left outer joins)
|
|
240
|
+
if (query.includes('OPTIONAL')) {
|
|
241
|
+
joins.push({
|
|
242
|
+
type: 'optional-join',
|
|
243
|
+
variables: [],
|
|
244
|
+
estimatedCost: 20, // OPTIONAL is typically expensive
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Detect UNION (union joins)
|
|
249
|
+
if (query.includes('UNION')) {
|
|
250
|
+
joins.push({
|
|
251
|
+
type: 'union',
|
|
252
|
+
variables: [],
|
|
253
|
+
estimatedCost: 30, // UNION is very expensive
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return joins;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Extract aggregation operations
|
|
262
|
+
* @param {string} query - SPARQL query
|
|
263
|
+
* @returns {Array} Aggregations
|
|
264
|
+
* @private
|
|
265
|
+
*/
|
|
266
|
+
_extractAggregations(query) {
|
|
267
|
+
const aggregations = [];
|
|
268
|
+
const aggPattern = /(COUNT|SUM|AVG|MIN|MAX|GROUP_CONCAT|SAMPLE)\s*\(/gi;
|
|
269
|
+
let match;
|
|
270
|
+
|
|
271
|
+
while ((match = aggPattern.exec(query)) !== null) {
|
|
272
|
+
aggregations.push(match[1].toUpperCase());
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return aggregations;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Calculate query complexity score
|
|
280
|
+
* @param {Array} patterns - Triple patterns
|
|
281
|
+
* @param {Array} filters - Filters
|
|
282
|
+
* @param {Array} joins - Joins
|
|
283
|
+
* @param {Array} aggregations - Aggregations
|
|
284
|
+
* @param {Object} analysis - Basic analysis
|
|
285
|
+
* @returns {Object} Complexity metrics
|
|
286
|
+
* @private
|
|
287
|
+
*/
|
|
288
|
+
_calculateComplexity(patterns, filters, joins, aggregations, analysis) {
|
|
289
|
+
let score = 0;
|
|
290
|
+
|
|
291
|
+
// Base cost from patterns
|
|
292
|
+
const patternCost = patterns.reduce((sum, p) => sum + p.complexity, 0);
|
|
293
|
+
score += patternCost;
|
|
294
|
+
|
|
295
|
+
// Filter cost
|
|
296
|
+
const filterCost = filters.length * this.config.filterCostMultiplier;
|
|
297
|
+
score += filterCost;
|
|
298
|
+
|
|
299
|
+
// Join cost
|
|
300
|
+
const joinCost = joins.reduce((sum, j) => sum + j.estimatedCost, 0);
|
|
301
|
+
score += joinCost;
|
|
302
|
+
|
|
303
|
+
// Aggregation cost
|
|
304
|
+
const aggregationCost = aggregations.length * this.config.aggregationCostMultiplier;
|
|
305
|
+
score += aggregationCost;
|
|
306
|
+
|
|
307
|
+
// Complexity modifiers
|
|
308
|
+
if (analysis.hasGroupBy) score *= 1.5;
|
|
309
|
+
if (analysis.hasOrderBy) score *= 1.2;
|
|
310
|
+
if (analysis.hasDistinct) score *= 1.3;
|
|
311
|
+
|
|
312
|
+
// Estimate result rows (used for optimization decisions)
|
|
313
|
+
let estimatedRows = 100; // Base estimate
|
|
314
|
+
|
|
315
|
+
if (joins.length > 0) {
|
|
316
|
+
estimatedRows *= Math.pow(10, joins.length); // Cartesian product estimation
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if (filters.length > 0) {
|
|
320
|
+
estimatedRows /= filters.length * 2; // Filters reduce results
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
score: Math.round(score),
|
|
325
|
+
patternCount: patterns.length,
|
|
326
|
+
filterCount: filters.length,
|
|
327
|
+
joinCount: joins.length,
|
|
328
|
+
aggregationCount: aggregations.length,
|
|
329
|
+
variableCount: analysis.variables.length,
|
|
330
|
+
estimatedRows: Math.round(estimatedRows),
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Identify expensive operations
|
|
336
|
+
* @param {Array} patterns - Triple patterns
|
|
337
|
+
* @param {Array} filters - Filters
|
|
338
|
+
* @param {Array} joins - Joins
|
|
339
|
+
* @param {Array} aggregations - Aggregations
|
|
340
|
+
* @param {Object} complexity - Complexity metrics
|
|
341
|
+
* @returns {Array} Expensive operations
|
|
342
|
+
* @private
|
|
343
|
+
*/
|
|
344
|
+
_identifyExpensiveOperations(patterns, filters, joins, aggregations, complexity) {
|
|
345
|
+
const expensive = [];
|
|
346
|
+
|
|
347
|
+
// Check for expensive patterns
|
|
348
|
+
for (const pattern of patterns) {
|
|
349
|
+
if (pattern.complexity >= this.config.expensiveOperationThreshold / 5) {
|
|
350
|
+
expensive.push({
|
|
351
|
+
type: 'pattern',
|
|
352
|
+
cost: pattern.complexity,
|
|
353
|
+
reason: `High complexity pattern: ${pattern.subject} ${pattern.predicate} ${pattern.object}`,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Variable predicates are very expensive
|
|
358
|
+
if (pattern.predicate.startsWith('?')) {
|
|
359
|
+
expensive.push({
|
|
360
|
+
type: 'variable-predicate',
|
|
361
|
+
cost: 100,
|
|
362
|
+
reason: `Variable predicate ${pattern.predicate} requires full graph scan`,
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Check for expensive joins
|
|
368
|
+
for (const join of joins) {
|
|
369
|
+
if (join.estimatedCost >= this.config.expensiveOperationThreshold) {
|
|
370
|
+
expensive.push({
|
|
371
|
+
type: 'join',
|
|
372
|
+
cost: join.estimatedCost,
|
|
373
|
+
reason: `${join.type} with cost ${join.estimatedCost}`,
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// UNION is always expensive
|
|
379
|
+
if (joins.some(j => j.type === 'union')) {
|
|
380
|
+
expensive.push({
|
|
381
|
+
type: 'union',
|
|
382
|
+
cost: 50,
|
|
383
|
+
reason: 'UNION requires multiple query executions',
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Aggregations without GROUP BY are expensive on large datasets
|
|
388
|
+
if (aggregations.length > 0 && complexity.estimatedRows > 1000) {
|
|
389
|
+
expensive.push({
|
|
390
|
+
type: 'aggregation',
|
|
391
|
+
cost: aggregations.length * this.config.aggregationCostMultiplier,
|
|
392
|
+
reason: `${aggregations.length} aggregation(s) on ~${complexity.estimatedRows} rows`,
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Unfiltered queries with high estimated rows
|
|
397
|
+
if (filters.length === 0 && complexity.estimatedRows > 10000) {
|
|
398
|
+
expensive.push({
|
|
399
|
+
type: 'unfiltered',
|
|
400
|
+
cost: 75,
|
|
401
|
+
reason: `No filters with estimated ${complexity.estimatedRows} rows`,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
return expensive.sort((a, b) => b.cost - a.cost);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Get analyzer statistics
|
|
410
|
+
* @returns {Object} Statistics
|
|
411
|
+
*/
|
|
412
|
+
getStats() {
|
|
413
|
+
return {
|
|
414
|
+
...this.stats,
|
|
415
|
+
complexQueryRatio:
|
|
416
|
+
this.stats.totalAnalyzed > 0 ? this.stats.complexQueries / this.stats.totalAnalyzed : 0,
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Reset statistics
|
|
422
|
+
*/
|
|
423
|
+
resetStats() {
|
|
424
|
+
this.stats = {
|
|
425
|
+
totalAnalyzed: 0,
|
|
426
|
+
complexQueries: 0,
|
|
427
|
+
simpleQueries: 0,
|
|
428
|
+
avgComplexity: 0,
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Create a query analyzer instance
|
|
435
|
+
* @param {Object} [config] - Configuration
|
|
436
|
+
* @returns {QueryAnalyzer} Query analyzer
|
|
437
|
+
*/
|
|
438
|
+
export function createQueryAnalyzer(config = {}) {
|
|
439
|
+
return new QueryAnalyzer(config);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
export default QueryAnalyzer;
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file SPARQL Query Optimizer - Reorder patterns for efficiency
|
|
3
|
+
* @module @unrdf/dark-matter/query-optimizer
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { analyzeSparqlQuery } from './query-analyzer.mjs';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* @typedef {import('n3').Store} Store
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Optimization result schema
|
|
15
|
+
*/
|
|
16
|
+
const OptimizationResultSchema = z.object({
|
|
17
|
+
originalQuery: z.string(),
|
|
18
|
+
optimizedQuery: z.string(),
|
|
19
|
+
changes: z.array(
|
|
20
|
+
z.object({
|
|
21
|
+
type: z.string(),
|
|
22
|
+
description: z.string(),
|
|
23
|
+
})
|
|
24
|
+
),
|
|
25
|
+
estimatedSpeedup: z.number(),
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Optimize SPARQL query by reordering patterns
|
|
30
|
+
* @param {string} query - Original SPARQL query
|
|
31
|
+
* @param {Store} [store] - Optional store for statistics
|
|
32
|
+
* @returns {Object} Optimization result
|
|
33
|
+
*
|
|
34
|
+
* @throws {TypeError} If query is not a string
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* const result = optimizeQuery(`
|
|
38
|
+
* SELECT ?name WHERE {
|
|
39
|
+
* ?person foaf:knows ?friend .
|
|
40
|
+
* ?person foaf:name ?name .
|
|
41
|
+
* }
|
|
42
|
+
* `);
|
|
43
|
+
*
|
|
44
|
+
* console.log('Optimized:', result.optimizedQuery);
|
|
45
|
+
* console.log('Speedup:', result.estimatedSpeedup);
|
|
46
|
+
*/
|
|
47
|
+
export function optimizeQuery(query, _store = null) {
|
|
48
|
+
if (typeof query !== 'string') {
|
|
49
|
+
throw new TypeError('optimizeQuery: query must be a string');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const analysis = analyzeSparqlQuery(query);
|
|
53
|
+
const changes = [];
|
|
54
|
+
|
|
55
|
+
// Strategy: Move most selective patterns first (filter early)
|
|
56
|
+
const optimizedPatterns = optimizePatternOrder(analysis.patterns, changes);
|
|
57
|
+
|
|
58
|
+
// Rebuild query with optimized pattern order
|
|
59
|
+
const optimizedQuery = rebuildQuery(query, analysis, optimizedPatterns);
|
|
60
|
+
|
|
61
|
+
// Estimate speedup
|
|
62
|
+
const estimatedSpeedup = calculateSpeedup(analysis.patterns, optimizedPatterns);
|
|
63
|
+
|
|
64
|
+
const result = {
|
|
65
|
+
originalQuery: query,
|
|
66
|
+
optimizedQuery,
|
|
67
|
+
changes,
|
|
68
|
+
estimatedSpeedup,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
return OptimizationResultSchema.parse(result);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Optimize pattern execution order
|
|
76
|
+
* @param {Array} patterns - Original patterns
|
|
77
|
+
* @param {Array} changes - Changes accumulator
|
|
78
|
+
* @returns {Array} Optimized patterns
|
|
79
|
+
*/
|
|
80
|
+
function optimizePatternOrder(patterns, changes) {
|
|
81
|
+
if (patterns.length <= 1) {
|
|
82
|
+
return patterns;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Sort by selectivity (descending) - execute most selective first
|
|
86
|
+
const sorted = [...patterns].sort((a, b) => b.selectivity - a.selectivity);
|
|
87
|
+
|
|
88
|
+
// Check if order changed
|
|
89
|
+
let orderChanged = false;
|
|
90
|
+
for (let i = 0; i < patterns.length; i++) {
|
|
91
|
+
if (patterns[i] !== sorted[i]) {
|
|
92
|
+
orderChanged = true;
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (orderChanged) {
|
|
98
|
+
changes.push({
|
|
99
|
+
type: 'pattern_reorder',
|
|
100
|
+
description: 'Reordered patterns by selectivity (most selective first)',
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return sorted;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Rebuild query with optimized patterns
|
|
109
|
+
* @param {string} originalQuery - Original query
|
|
110
|
+
* @param {Object} analysis - Query analysis
|
|
111
|
+
* @param {Array} optimizedPatterns - Optimized patterns
|
|
112
|
+
* @returns {string} Rebuilt query
|
|
113
|
+
*/
|
|
114
|
+
function rebuildQuery(originalQuery, analysis, optimizedPatterns) {
|
|
115
|
+
// Extract query parts
|
|
116
|
+
const whereMatch = originalQuery.match(/(.*WHERE\s*\{)([^}]+)(\}.*)/is);
|
|
117
|
+
|
|
118
|
+
if (!whereMatch) {
|
|
119
|
+
return originalQuery; // Can't optimize without WHERE clause
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const [, before, , after] = whereMatch;
|
|
123
|
+
|
|
124
|
+
// Rebuild WHERE clause with optimized pattern order
|
|
125
|
+
const patternStrings = optimizedPatterns.map(p => `${p.subject} ${p.predicate} ${p.object}`);
|
|
126
|
+
const newWhereClause = patternStrings.join(' .\n ');
|
|
127
|
+
|
|
128
|
+
return `${before}\n ${newWhereClause}\n ${after}`;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Calculate estimated speedup from optimization
|
|
133
|
+
* @param {Array} original - Original patterns
|
|
134
|
+
* @param {Array} optimized - Optimized patterns
|
|
135
|
+
* @returns {number} Estimated speedup multiplier
|
|
136
|
+
*/
|
|
137
|
+
function calculateSpeedup(original, optimized) {
|
|
138
|
+
// Speedup estimate based on selectivity improvement
|
|
139
|
+
// Early filtering reduces intermediate result size
|
|
140
|
+
|
|
141
|
+
let originalCost = 0;
|
|
142
|
+
let optimizedCost = 0;
|
|
143
|
+
let intermediateSize = 1000; // Base estimate
|
|
144
|
+
|
|
145
|
+
// Original order cost
|
|
146
|
+
for (const pattern of original) {
|
|
147
|
+
originalCost += intermediateSize;
|
|
148
|
+
intermediateSize *= 1 - pattern.selectivity; // Results after this pattern
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Reset for optimized
|
|
152
|
+
intermediateSize = 1000;
|
|
153
|
+
|
|
154
|
+
// Optimized order cost
|
|
155
|
+
for (const pattern of optimized) {
|
|
156
|
+
optimizedCost += intermediateSize;
|
|
157
|
+
intermediateSize *= 1 - pattern.selectivity;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const speedup = originalCost > 0 ? originalCost / Math.max(optimizedCost, 1) : 1.0;
|
|
161
|
+
return Math.round(speedup * 100) / 100;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Suggest indexes for store
|
|
166
|
+
* @param {Store} store - RDF store
|
|
167
|
+
* @param {string} query - SPARQL query
|
|
168
|
+
* @returns {Array<Object>} Index suggestions
|
|
169
|
+
*
|
|
170
|
+
* @throws {TypeError} If store or query is invalid
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* const suggestions = suggestIndexes(store, query);
|
|
174
|
+
* suggestions.forEach(s => console.log(s.type, s.priority));
|
|
175
|
+
*/
|
|
176
|
+
export function suggestIndexes(store, query) {
|
|
177
|
+
if (!store || typeof store.getQuads !== 'function') {
|
|
178
|
+
throw new TypeError('suggestIndexes: store must be a valid Store instance');
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (typeof query !== 'string') {
|
|
182
|
+
throw new TypeError('suggestIndexes: query must be a string');
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const analysis = analyzeSparqlQuery(query);
|
|
186
|
+
const suggestions = [];
|
|
187
|
+
|
|
188
|
+
// Track unique predicates to avoid duplicates
|
|
189
|
+
const uniquePredicates = new Set();
|
|
190
|
+
|
|
191
|
+
// Analyze patterns for index opportunities
|
|
192
|
+
for (const pattern of analysis.patterns) {
|
|
193
|
+
// Predicate-specific indexes
|
|
194
|
+
const cleanPredicate = pattern.predicate.trim();
|
|
195
|
+
if (!cleanPredicate.startsWith('?') && cleanPredicate.length > 0) {
|
|
196
|
+
if (!uniquePredicates.has(cleanPredicate)) {
|
|
197
|
+
uniquePredicates.add(cleanPredicate);
|
|
198
|
+
suggestions.push({
|
|
199
|
+
type: 'predicate_index',
|
|
200
|
+
predicate: cleanPredicate,
|
|
201
|
+
priority: 'high',
|
|
202
|
+
reason: 'Specific predicate queries benefit from indexes',
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Subject-predicate composite indexes for bound subjects
|
|
208
|
+
if (!pattern.subject.startsWith('?') && !cleanPredicate.startsWith('?')) {
|
|
209
|
+
suggestions.push({
|
|
210
|
+
type: 'subject_predicate_index',
|
|
211
|
+
subject: pattern.subject,
|
|
212
|
+
predicate: cleanPredicate,
|
|
213
|
+
priority: 'medium',
|
|
214
|
+
reason: 'Bound subject+predicate can use composite index',
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Join-based suggestions
|
|
220
|
+
for (const join of analysis.joins) {
|
|
221
|
+
if (join.patterns.length > 2) {
|
|
222
|
+
suggestions.push({
|
|
223
|
+
type: 'join_index',
|
|
224
|
+
variable: join.variable,
|
|
225
|
+
priority: 'medium',
|
|
226
|
+
reason: `Variable ${join.variable} joins ${join.patterns.length} patterns`,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return suggestions;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Explain optimization changes
|
|
236
|
+
* @param {string} original - Original query
|
|
237
|
+
* @param {string} optimized - Optimized query
|
|
238
|
+
* @returns {Object} Explanation
|
|
239
|
+
*
|
|
240
|
+
* @example
|
|
241
|
+
* const explanation = explainOptimization(original, optimized);
|
|
242
|
+
* console.log(explanation.summary);
|
|
243
|
+
*/
|
|
244
|
+
export function explainOptimization(original, optimized) {
|
|
245
|
+
if (typeof original !== 'string' || typeof optimized !== 'string') {
|
|
246
|
+
throw new TypeError('explainOptimization: queries must be strings');
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const originalAnalysis = analyzeSparqlQuery(original);
|
|
250
|
+
const optimizedAnalysis = analyzeSparqlQuery(optimized);
|
|
251
|
+
|
|
252
|
+
const explanation = {
|
|
253
|
+
summary: 'Query optimization analysis',
|
|
254
|
+
originalComplexity: originalAnalysis.patterns.length,
|
|
255
|
+
optimizedComplexity: optimizedAnalysis.patterns.length,
|
|
256
|
+
changes: [],
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// Compare pattern order
|
|
260
|
+
let patternOrderChanged = false;
|
|
261
|
+
for (
|
|
262
|
+
let i = 0;
|
|
263
|
+
i < Math.min(originalAnalysis.patterns.length, optimizedAnalysis.patterns.length);
|
|
264
|
+
i++
|
|
265
|
+
) {
|
|
266
|
+
if (
|
|
267
|
+
JSON.stringify(originalAnalysis.patterns[i]) !== JSON.stringify(optimizedAnalysis.patterns[i])
|
|
268
|
+
) {
|
|
269
|
+
patternOrderChanged = true;
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (patternOrderChanged) {
|
|
275
|
+
explanation.changes.push({
|
|
276
|
+
type: 'pattern_order',
|
|
277
|
+
description: 'Patterns reordered for better selectivity',
|
|
278
|
+
impact: 'Reduces intermediate result set size',
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return explanation;
|
|
283
|
+
}
|