@unrdf/dark-matter 5.0.0 → 26.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +10 -6
- package/src/dark-matter/query-analyzer.mjs +1 -1
- package/README.md +0 -81
- package/src/dark-matter/critical-path.mjs +0 -367
- package/src/dark-matter/index-advisor.mjs +0 -242
- package/src/dark-matter/index.mjs +0 -244
- package/src/dark-matter/optimizer.mjs +0 -426
- package/src/dark-matter/performance-metrics.mjs +0 -242
- package/src/dark-matter/query-optimizer.mjs +0 -283
- package/src/dark-matter-core.mjs +0 -743
- package/src/index.mjs +0 -60
|
@@ -1,283 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file SPARQL Query Optimizer - Reorder patterns for efficiency
|
|
3
|
-
* @module @unrdf/dark-matter/query-optimizer
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { z } from 'zod';
|
|
7
|
-
import { analyzeSparqlQuery } from './query-analyzer.mjs';
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* @typedef {import('n3').Store} Store
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Optimization result schema
|
|
15
|
-
*/
|
|
16
|
-
const OptimizationResultSchema = z.object({
|
|
17
|
-
originalQuery: z.string(),
|
|
18
|
-
optimizedQuery: z.string(),
|
|
19
|
-
changes: z.array(
|
|
20
|
-
z.object({
|
|
21
|
-
type: z.string(),
|
|
22
|
-
description: z.string(),
|
|
23
|
-
})
|
|
24
|
-
),
|
|
25
|
-
estimatedSpeedup: z.number(),
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Optimize SPARQL query by reordering patterns
|
|
30
|
-
* @param {string} query - Original SPARQL query
|
|
31
|
-
* @param {Store} [store] - Optional store for statistics
|
|
32
|
-
* @returns {Object} Optimization result
|
|
33
|
-
*
|
|
34
|
-
* @throws {TypeError} If query is not a string
|
|
35
|
-
*
|
|
36
|
-
* @example
|
|
37
|
-
* const result = optimizeQuery(`
|
|
38
|
-
* SELECT ?name WHERE {
|
|
39
|
-
* ?person foaf:knows ?friend .
|
|
40
|
-
* ?person foaf:name ?name .
|
|
41
|
-
* }
|
|
42
|
-
* `);
|
|
43
|
-
*
|
|
44
|
-
* console.log('Optimized:', result.optimizedQuery);
|
|
45
|
-
* console.log('Speedup:', result.estimatedSpeedup);
|
|
46
|
-
*/
|
|
47
|
-
export function optimizeQuery(query, _store = null) {
|
|
48
|
-
if (typeof query !== 'string') {
|
|
49
|
-
throw new TypeError('optimizeQuery: query must be a string');
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const analysis = analyzeSparqlQuery(query);
|
|
53
|
-
const changes = [];
|
|
54
|
-
|
|
55
|
-
// Strategy: Move most selective patterns first (filter early)
|
|
56
|
-
const optimizedPatterns = optimizePatternOrder(analysis.patterns, changes);
|
|
57
|
-
|
|
58
|
-
// Rebuild query with optimized pattern order
|
|
59
|
-
const optimizedQuery = rebuildQuery(query, analysis, optimizedPatterns);
|
|
60
|
-
|
|
61
|
-
// Estimate speedup
|
|
62
|
-
const estimatedSpeedup = calculateSpeedup(analysis.patterns, optimizedPatterns);
|
|
63
|
-
|
|
64
|
-
const result = {
|
|
65
|
-
originalQuery: query,
|
|
66
|
-
optimizedQuery,
|
|
67
|
-
changes,
|
|
68
|
-
estimatedSpeedup,
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
return OptimizationResultSchema.parse(result);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Optimize pattern execution order
|
|
76
|
-
* @param {Array} patterns - Original patterns
|
|
77
|
-
* @param {Array} changes - Changes accumulator
|
|
78
|
-
* @returns {Array} Optimized patterns
|
|
79
|
-
*/
|
|
80
|
-
function optimizePatternOrder(patterns, changes) {
|
|
81
|
-
if (patterns.length <= 1) {
|
|
82
|
-
return patterns;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// Sort by selectivity (descending) - execute most selective first
|
|
86
|
-
const sorted = [...patterns].sort((a, b) => b.selectivity - a.selectivity);
|
|
87
|
-
|
|
88
|
-
// Check if order changed
|
|
89
|
-
let orderChanged = false;
|
|
90
|
-
for (let i = 0; i < patterns.length; i++) {
|
|
91
|
-
if (patterns[i] !== sorted[i]) {
|
|
92
|
-
orderChanged = true;
|
|
93
|
-
break;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
if (orderChanged) {
|
|
98
|
-
changes.push({
|
|
99
|
-
type: 'pattern_reorder',
|
|
100
|
-
description: 'Reordered patterns by selectivity (most selective first)',
|
|
101
|
-
});
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
return sorted;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Rebuild query with optimized patterns
|
|
109
|
-
* @param {string} originalQuery - Original query
|
|
110
|
-
* @param {Object} analysis - Query analysis
|
|
111
|
-
* @param {Array} optimizedPatterns - Optimized patterns
|
|
112
|
-
* @returns {string} Rebuilt query
|
|
113
|
-
*/
|
|
114
|
-
function rebuildQuery(originalQuery, analysis, optimizedPatterns) {
|
|
115
|
-
// Extract query parts
|
|
116
|
-
const whereMatch = originalQuery.match(/(.*WHERE\s*\{)([^}]+)(\}.*)/is);
|
|
117
|
-
|
|
118
|
-
if (!whereMatch) {
|
|
119
|
-
return originalQuery; // Can't optimize without WHERE clause
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const [, before, , after] = whereMatch;
|
|
123
|
-
|
|
124
|
-
// Rebuild WHERE clause with optimized pattern order
|
|
125
|
-
const patternStrings = optimizedPatterns.map(p => `${p.subject} ${p.predicate} ${p.object}`);
|
|
126
|
-
const newWhereClause = patternStrings.join(' .\n ');
|
|
127
|
-
|
|
128
|
-
return `${before}\n ${newWhereClause}\n ${after}`;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Calculate estimated speedup from optimization
|
|
133
|
-
* @param {Array} original - Original patterns
|
|
134
|
-
* @param {Array} optimized - Optimized patterns
|
|
135
|
-
* @returns {number} Estimated speedup multiplier
|
|
136
|
-
*/
|
|
137
|
-
function calculateSpeedup(original, optimized) {
|
|
138
|
-
// Speedup estimate based on selectivity improvement
|
|
139
|
-
// Early filtering reduces intermediate result size
|
|
140
|
-
|
|
141
|
-
let originalCost = 0;
|
|
142
|
-
let optimizedCost = 0;
|
|
143
|
-
let intermediateSize = 1000; // Base estimate
|
|
144
|
-
|
|
145
|
-
// Original order cost
|
|
146
|
-
for (const pattern of original) {
|
|
147
|
-
originalCost += intermediateSize;
|
|
148
|
-
intermediateSize *= 1 - pattern.selectivity; // Results after this pattern
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Reset for optimized
|
|
152
|
-
intermediateSize = 1000;
|
|
153
|
-
|
|
154
|
-
// Optimized order cost
|
|
155
|
-
for (const pattern of optimized) {
|
|
156
|
-
optimizedCost += intermediateSize;
|
|
157
|
-
intermediateSize *= 1 - pattern.selectivity;
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
const speedup = originalCost > 0 ? originalCost / Math.max(optimizedCost, 1) : 1.0;
|
|
161
|
-
return Math.round(speedup * 100) / 100;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Suggest indexes for store
|
|
166
|
-
* @param {Store} store - RDF store
|
|
167
|
-
* @param {string} query - SPARQL query
|
|
168
|
-
* @returns {Array<Object>} Index suggestions
|
|
169
|
-
*
|
|
170
|
-
* @throws {TypeError} If store or query is invalid
|
|
171
|
-
*
|
|
172
|
-
* @example
|
|
173
|
-
* const suggestions = suggestIndexes(store, query);
|
|
174
|
-
* suggestions.forEach(s => console.log(s.type, s.priority));
|
|
175
|
-
*/
|
|
176
|
-
export function suggestIndexes(store, query) {
|
|
177
|
-
if (!store || typeof store.getQuads !== 'function') {
|
|
178
|
-
throw new TypeError('suggestIndexes: store must be a valid Store instance');
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
if (typeof query !== 'string') {
|
|
182
|
-
throw new TypeError('suggestIndexes: query must be a string');
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
const analysis = analyzeSparqlQuery(query);
|
|
186
|
-
const suggestions = [];
|
|
187
|
-
|
|
188
|
-
// Track unique predicates to avoid duplicates
|
|
189
|
-
const uniquePredicates = new Set();
|
|
190
|
-
|
|
191
|
-
// Analyze patterns for index opportunities
|
|
192
|
-
for (const pattern of analysis.patterns) {
|
|
193
|
-
// Predicate-specific indexes
|
|
194
|
-
const cleanPredicate = pattern.predicate.trim();
|
|
195
|
-
if (!cleanPredicate.startsWith('?') && cleanPredicate.length > 0) {
|
|
196
|
-
if (!uniquePredicates.has(cleanPredicate)) {
|
|
197
|
-
uniquePredicates.add(cleanPredicate);
|
|
198
|
-
suggestions.push({
|
|
199
|
-
type: 'predicate_index',
|
|
200
|
-
predicate: cleanPredicate,
|
|
201
|
-
priority: 'high',
|
|
202
|
-
reason: 'Specific predicate queries benefit from indexes',
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Subject-predicate composite indexes for bound subjects
|
|
208
|
-
if (!pattern.subject.startsWith('?') && !cleanPredicate.startsWith('?')) {
|
|
209
|
-
suggestions.push({
|
|
210
|
-
type: 'subject_predicate_index',
|
|
211
|
-
subject: pattern.subject,
|
|
212
|
-
predicate: cleanPredicate,
|
|
213
|
-
priority: 'medium',
|
|
214
|
-
reason: 'Bound subject+predicate can use composite index',
|
|
215
|
-
});
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// Join-based suggestions
|
|
220
|
-
for (const join of analysis.joins) {
|
|
221
|
-
if (join.patterns.length > 2) {
|
|
222
|
-
suggestions.push({
|
|
223
|
-
type: 'join_index',
|
|
224
|
-
variable: join.variable,
|
|
225
|
-
priority: 'medium',
|
|
226
|
-
reason: `Variable ${join.variable} joins ${join.patterns.length} patterns`,
|
|
227
|
-
});
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
return suggestions;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
/**
|
|
235
|
-
* Explain optimization changes
|
|
236
|
-
* @param {string} original - Original query
|
|
237
|
-
* @param {string} optimized - Optimized query
|
|
238
|
-
* @returns {Object} Explanation
|
|
239
|
-
*
|
|
240
|
-
* @example
|
|
241
|
-
* const explanation = explainOptimization(original, optimized);
|
|
242
|
-
* console.log(explanation.summary);
|
|
243
|
-
*/
|
|
244
|
-
export function explainOptimization(original, optimized) {
|
|
245
|
-
if (typeof original !== 'string' || typeof optimized !== 'string') {
|
|
246
|
-
throw new TypeError('explainOptimization: queries must be strings');
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
const originalAnalysis = analyzeSparqlQuery(original);
|
|
250
|
-
const optimizedAnalysis = analyzeSparqlQuery(optimized);
|
|
251
|
-
|
|
252
|
-
const explanation = {
|
|
253
|
-
summary: 'Query optimization analysis',
|
|
254
|
-
originalComplexity: originalAnalysis.patterns.length,
|
|
255
|
-
optimizedComplexity: optimizedAnalysis.patterns.length,
|
|
256
|
-
changes: [],
|
|
257
|
-
};
|
|
258
|
-
|
|
259
|
-
// Compare pattern order
|
|
260
|
-
let patternOrderChanged = false;
|
|
261
|
-
for (
|
|
262
|
-
let i = 0;
|
|
263
|
-
i < Math.min(originalAnalysis.patterns.length, optimizedAnalysis.patterns.length);
|
|
264
|
-
i++
|
|
265
|
-
) {
|
|
266
|
-
if (
|
|
267
|
-
JSON.stringify(originalAnalysis.patterns[i]) !== JSON.stringify(optimizedAnalysis.patterns[i])
|
|
268
|
-
) {
|
|
269
|
-
patternOrderChanged = true;
|
|
270
|
-
break;
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
if (patternOrderChanged) {
|
|
275
|
-
explanation.changes.push({
|
|
276
|
-
type: 'pattern_order',
|
|
277
|
-
description: 'Patterns reordered for better selectivity',
|
|
278
|
-
impact: 'Reduces intermediate result set size',
|
|
279
|
-
});
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
return explanation;
|
|
283
|
-
}
|