kollabor 0.4.9__py3-none-any.whl → 0.4.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. agents/__init__.py +2 -0
  2. agents/coder/__init__.py +0 -0
  3. agents/coder/agent.json +4 -0
  4. agents/coder/api-integration.md +2150 -0
  5. agents/coder/cli-pretty.md +765 -0
  6. agents/coder/code-review.md +1092 -0
  7. agents/coder/database-design.md +1525 -0
  8. agents/coder/debugging.md +1102 -0
  9. agents/coder/dependency-management.md +1397 -0
  10. agents/coder/git-workflow.md +1099 -0
  11. agents/coder/refactoring.md +1454 -0
  12. agents/coder/security-hardening.md +1732 -0
  13. agents/coder/system_prompt.md +1448 -0
  14. agents/coder/tdd.md +1367 -0
  15. agents/creative-writer/__init__.py +0 -0
  16. agents/creative-writer/agent.json +4 -0
  17. agents/creative-writer/character-development.md +1852 -0
  18. agents/creative-writer/dialogue-craft.md +1122 -0
  19. agents/creative-writer/plot-structure.md +1073 -0
  20. agents/creative-writer/revision-editing.md +1484 -0
  21. agents/creative-writer/system_prompt.md +690 -0
  22. agents/creative-writer/worldbuilding.md +2049 -0
  23. agents/data-analyst/__init__.py +30 -0
  24. agents/data-analyst/agent.json +4 -0
  25. agents/data-analyst/data-visualization.md +992 -0
  26. agents/data-analyst/exploratory-data-analysis.md +1110 -0
  27. agents/data-analyst/pandas-data-manipulation.md +1081 -0
  28. agents/data-analyst/sql-query-optimization.md +881 -0
  29. agents/data-analyst/statistical-analysis.md +1118 -0
  30. agents/data-analyst/system_prompt.md +928 -0
  31. agents/default/__init__.py +0 -0
  32. agents/default/agent.json +4 -0
  33. agents/default/dead-code.md +794 -0
  34. agents/default/explore-agent-system.md +585 -0
  35. agents/default/system_prompt.md +1448 -0
  36. agents/kollabor/__init__.py +0 -0
  37. agents/kollabor/analyze-plugin-lifecycle.md +175 -0
  38. agents/kollabor/analyze-terminal-rendering.md +388 -0
  39. agents/kollabor/code-review.md +1092 -0
  40. agents/kollabor/debug-mcp-integration.md +521 -0
  41. agents/kollabor/debug-plugin-hooks.md +547 -0
  42. agents/kollabor/debugging.md +1102 -0
  43. agents/kollabor/dependency-management.md +1397 -0
  44. agents/kollabor/git-workflow.md +1099 -0
  45. agents/kollabor/inspect-llm-conversation.md +148 -0
  46. agents/kollabor/monitor-event-bus.md +558 -0
  47. agents/kollabor/profile-performance.md +576 -0
  48. agents/kollabor/refactoring.md +1454 -0
  49. agents/kollabor/system_prompt copy.md +1448 -0
  50. agents/kollabor/system_prompt.md +757 -0
  51. agents/kollabor/trace-command-execution.md +178 -0
  52. agents/kollabor/validate-config.md +879 -0
  53. agents/research/__init__.py +0 -0
  54. agents/research/agent.json +4 -0
  55. agents/research/architecture-mapping.md +1099 -0
  56. agents/research/codebase-analysis.md +1077 -0
  57. agents/research/dependency-audit.md +1027 -0
  58. agents/research/performance-profiling.md +1047 -0
  59. agents/research/security-review.md +1359 -0
  60. agents/research/system_prompt.md +492 -0
  61. agents/technical-writer/__init__.py +0 -0
  62. agents/technical-writer/agent.json +4 -0
  63. agents/technical-writer/api-documentation.md +2328 -0
  64. agents/technical-writer/changelog-management.md +1181 -0
  65. agents/technical-writer/readme-writing.md +1360 -0
  66. agents/technical-writer/style-guide.md +1410 -0
  67. agents/technical-writer/system_prompt.md +653 -0
  68. agents/technical-writer/tutorial-creation.md +1448 -0
  69. core/__init__.py +0 -2
  70. core/application.py +343 -88
  71. core/cli.py +229 -10
  72. core/commands/menu_renderer.py +463 -59
  73. core/commands/registry.py +14 -9
  74. core/commands/system_commands.py +2461 -14
  75. core/config/loader.py +151 -37
  76. core/config/service.py +18 -6
  77. core/events/bus.py +29 -9
  78. core/events/executor.py +205 -75
  79. core/events/models.py +27 -8
  80. core/fullscreen/command_integration.py +20 -24
  81. core/fullscreen/components/__init__.py +10 -1
  82. core/fullscreen/components/matrix_components.py +1 -2
  83. core/fullscreen/components/space_shooter_components.py +654 -0
  84. core/fullscreen/plugin.py +5 -0
  85. core/fullscreen/renderer.py +52 -13
  86. core/fullscreen/session.py +52 -15
  87. core/io/__init__.py +29 -5
  88. core/io/buffer_manager.py +6 -1
  89. core/io/config_status_view.py +7 -29
  90. core/io/core_status_views.py +267 -347
  91. core/io/input/__init__.py +25 -0
  92. core/io/input/command_mode_handler.py +711 -0
  93. core/io/input/display_controller.py +128 -0
  94. core/io/input/hook_registrar.py +286 -0
  95. core/io/input/input_loop_manager.py +421 -0
  96. core/io/input/key_press_handler.py +502 -0
  97. core/io/input/modal_controller.py +1011 -0
  98. core/io/input/paste_processor.py +339 -0
  99. core/io/input/status_modal_renderer.py +184 -0
  100. core/io/input_errors.py +5 -1
  101. core/io/input_handler.py +211 -2452
  102. core/io/key_parser.py +7 -0
  103. core/io/layout.py +15 -3
  104. core/io/message_coordinator.py +111 -2
  105. core/io/message_renderer.py +129 -4
  106. core/io/status_renderer.py +147 -607
  107. core/io/terminal_renderer.py +97 -51
  108. core/io/terminal_state.py +21 -4
  109. core/io/visual_effects.py +816 -165
  110. core/llm/agent_manager.py +1063 -0
  111. core/llm/api_adapters/__init__.py +44 -0
  112. core/llm/api_adapters/anthropic_adapter.py +432 -0
  113. core/llm/api_adapters/base.py +241 -0
  114. core/llm/api_adapters/openai_adapter.py +326 -0
  115. core/llm/api_communication_service.py +167 -113
  116. core/llm/conversation_logger.py +322 -16
  117. core/llm/conversation_manager.py +556 -30
  118. core/llm/file_operations_executor.py +84 -32
  119. core/llm/llm_service.py +934 -103
  120. core/llm/mcp_integration.py +541 -57
  121. core/llm/message_display_service.py +135 -18
  122. core/llm/plugin_sdk.py +1 -2
  123. core/llm/profile_manager.py +1183 -0
  124. core/llm/response_parser.py +274 -56
  125. core/llm/response_processor.py +16 -3
  126. core/llm/tool_executor.py +6 -1
  127. core/logging/__init__.py +2 -0
  128. core/logging/setup.py +34 -6
  129. core/models/resume.py +54 -0
  130. core/plugins/__init__.py +4 -2
  131. core/plugins/base.py +127 -0
  132. core/plugins/collector.py +23 -161
  133. core/plugins/discovery.py +37 -3
  134. core/plugins/factory.py +6 -12
  135. core/plugins/registry.py +5 -17
  136. core/ui/config_widgets.py +128 -28
  137. core/ui/live_modal_renderer.py +2 -1
  138. core/ui/modal_actions.py +5 -0
  139. core/ui/modal_overlay_renderer.py +0 -60
  140. core/ui/modal_renderer.py +268 -7
  141. core/ui/modal_state_manager.py +29 -4
  142. core/ui/widgets/base_widget.py +7 -0
  143. core/updates/__init__.py +10 -0
  144. core/updates/version_check_service.py +348 -0
  145. core/updates/version_comparator.py +103 -0
  146. core/utils/config_utils.py +685 -526
  147. core/utils/plugin_utils.py +1 -1
  148. core/utils/session_naming.py +111 -0
  149. fonts/LICENSE +21 -0
  150. fonts/README.md +46 -0
  151. fonts/SymbolsNerdFont-Regular.ttf +0 -0
  152. fonts/SymbolsNerdFontMono-Regular.ttf +0 -0
  153. fonts/__init__.py +44 -0
  154. {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/METADATA +54 -4
  155. kollabor-0.4.15.dist-info/RECORD +228 -0
  156. {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/top_level.txt +2 -0
  157. plugins/agent_orchestrator/__init__.py +39 -0
  158. plugins/agent_orchestrator/activity_monitor.py +181 -0
  159. plugins/agent_orchestrator/file_attacher.py +77 -0
  160. plugins/agent_orchestrator/message_injector.py +135 -0
  161. plugins/agent_orchestrator/models.py +48 -0
  162. plugins/agent_orchestrator/orchestrator.py +403 -0
  163. plugins/agent_orchestrator/plugin.py +976 -0
  164. plugins/agent_orchestrator/xml_parser.py +191 -0
  165. plugins/agent_orchestrator_plugin.py +9 -0
  166. plugins/enhanced_input/box_styles.py +1 -0
  167. plugins/enhanced_input/color_engine.py +19 -4
  168. plugins/enhanced_input/config.py +2 -2
  169. plugins/enhanced_input_plugin.py +61 -11
  170. plugins/fullscreen/__init__.py +6 -2
  171. plugins/fullscreen/example_plugin.py +1035 -222
  172. plugins/fullscreen/setup_wizard_plugin.py +592 -0
  173. plugins/fullscreen/space_shooter_plugin.py +131 -0
  174. plugins/hook_monitoring_plugin.py +436 -78
  175. plugins/query_enhancer_plugin.py +66 -30
  176. plugins/resume_conversation_plugin.py +1494 -0
  177. plugins/save_conversation_plugin.py +98 -32
  178. plugins/system_commands_plugin.py +70 -56
  179. plugins/tmux_plugin.py +154 -78
  180. plugins/workflow_enforcement_plugin.py +94 -92
  181. system_prompt/default.md +952 -886
  182. core/io/input_mode_manager.py +0 -402
  183. core/io/modal_interaction_handler.py +0 -315
  184. core/io/raw_input_processor.py +0 -946
  185. core/storage/__init__.py +0 -5
  186. core/storage/state_manager.py +0 -84
  187. core/ui/widget_integration.py +0 -222
  188. core/utils/key_reader.py +0 -171
  189. kollabor-0.4.9.dist-info/RECORD +0 -128
  190. {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/WHEEL +0 -0
  191. {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/entry_points.txt +0 -0
  192. {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,881 @@
1
+ <!-- SQL Query Optimization skill - write efficient database queries and optimize performance -->
2
+
3
+ sql optimization mode: PERFORMANCE-FOCUSED QUERIES
4
+
5
+ when this skill is active, you follow disciplined sql optimization practices.
6
+ this is a comprehensive guide to writing efficient, scalable database queries.
7
+
8
+
9
+ PHASE 0: SQL ENVIRONMENT VERIFICATION
10
+
11
+ before writing ANY sql queries, verify your database environment.
12
+
13
+
14
+ check database connectivity
15
+
16
+ <terminal>python -c "import sqlite3; print('sqlite3 available')" 2>/dev/null || echo "sqlite3 not available"</terminal>
17
+
18
+ if using postgresql:
19
+ <terminal>python -c "import psycopg2; print('psycopg2 available')" 2>/dev/null || pip install psycopg2-binary</terminal>
20
+
21
+ if using mysql:
22
+ <terminal>python -c "import pymysql; print('pymysql available')" 2>/dev/null || pip install pymysql</terminal>
23
+
24
+ verify sqlalchemy:
25
+ <terminal>python -c "import sqlalchemy; print(f'sqlalchemy {sqlalchemy.__version__}')" 2>/dev/null || pip install sqlalchemy</terminal>
26
+
27
+
28
+ check database files
29
+
30
+ <terminal>find . -maxdepth 2 -type f \( -name "*.db" -o -name "*.sqlite" -o -name "*.sql" \) 2>/dev/null | head -10</terminal>
31
+
32
+ list database sizes:
33
+ <terminal>find . -maxdepth 2 -type f \( -name "*.db" -o -name "*.sqlite" \) -exec ls -lh {} \; 2>/dev/null</terminal>
34
+
35
+
36
+ check existing query patterns
37
+
38
+ <terminal>find . -name "*.sql" -type f 2>/dev/null | head -10</terminal>
39
+
40
+ sample existing queries:
41
+ <terminal>find . -name "*.sql" -type f 2>/dev/null -exec head -50 {} \; | head -100</terminal>
42
+
43
+
44
+ check for query logging
45
+
46
+ <terminal>python -c "import logging; print('logging module ready')" 2>/dev/null</terminal>
47
+
48
+ if using sqlalchemy, enable query logging:
49
+ import logging
50
+ logging.basicConfig()
51
+ logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
52
+
53
+
54
+ verify database introspection tools
55
+
56
+ <terminal>python -c "import pandas; print('pandas available for query results')" 2>/dev/null</terminal>
57
+
58
+ verify read_sql:
59
+ <terminal>python -c "import pandas; import sqlite3; con = sqlite3.connect(':memory:'); print('pandas.read_sql available')" 2>/dev/null</terminal>
60
+
61
+
62
+ PHASE 1: SQL OPTIMIZATION MINDSET
63
+
64
+
65
+ understand the data before querying
66
+
67
+ optimization starts with understanding:
68
+ - table sizes and row counts
69
+ - existing indexes
70
+ - column types and distributions
71
+ - foreign key relationships
72
+ - query patterns and frequency
73
+
74
+ measure before optimizing:
75
+ - execution time
76
+ - rows examined
77
+ - index usage
78
+ - memory consumption
79
+
80
+ premature optimization is the root of all evil.
81
+
82
+
83
+ the optimization hierarchy
84
+
85
+ [1] eliminate unnecessary work
86
+ - select only needed columns
87
+ - filter early, filter often
88
+ - avoid cartesian products
89
+
90
+ [2] use indexes effectively
91
+ - index columns used in where, join, order by
92
+ - use composite indexes for multi-column queries
93
+ - avoid function calls on indexed columns
94
+
95
+ [3] minimize data movement
96
+ - use appropriate joins
97
+ - filter before joining when possible
98
+ - use subqueries wisely
99
+
100
+ [4] optimize query structure
101
+ - use exists instead of in for subqueries
102
+ - use union all instead of union
103
+ - avoid select distinct when possible
104
+
105
+
106
+ PHASE 2: QUERY PERFORMANCE ANALYSIS
107
+
108
+
109
+ explain plan analysis
110
+
111
+ sqlite explain:
112
+ import sqlite3
113
+
114
+ conn = sqlite3.connect('database.db')
115
+ cursor = conn.cursor()
116
+ cursor.execute("EXPLAIN QUERY PLAN SELECT * FROM users WHERE id = 1")
117
+ for row in cursor.fetchall():
118
+ print(row)
119
+
120
+ postgresql explain:
121
+ EXPLAIN ANALYZE SELECT * FROM users WHERE id = 1;
122
+
123
+ mysql explain:
124
+ EXPLAIN SELECT * FROM users WHERE id = 1;
125
+
126
+ what to look for:
127
+ - full table scans (bad)
128
+ - index scans (good)
129
+ - index seeks (better)
130
+ - key lookups (context-dependent)
131
+ - sort operations (can be avoided with indexes)
132
+
133
+
134
+ execution time measurement
135
+
136
+ python timing:
137
+ import time
138
+ import sqlite3
139
+ import pandas as pd
140
+
141
+ start = time.time()
142
+ df = pd.read_sql("SELECT * FROM large_table", conn)
143
+ elapsed = time.time() - start
144
+ print(f"Query executed in {elapsed:.2f} seconds")
145
+ print(f"Rows returned: {len(df)}")
146
+
147
+ multiple runs for average:
148
+ times = []
149
+ for i in range(5):
150
+ start = time.time()
151
+ df = pd.read_sql(query, conn)
152
+ times.append(time.time() - start)
153
+
154
+ avg_time = sum(times) / len(times)
155
+ print(f"Average execution time: {avg_time:.2f}s")
156
+
157
+
158
+ rows examined vs returned
159
+
160
+ sqlite:
161
+ SELECT COUNT(*) FROM table; -- total rows
162
+
163
+ postgresql:
164
+ EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM table;
165
+ -- look for "rows=" vs "actual rows="
166
+
167
+ goal:
168
+ - rows examined should be close to rows returned
169
+ - large difference indicates missing index
170
+
171
+
172
+ PHASE 3: SELECT OPTIMIZATION
173
+
174
+
175
+ select only needed columns
176
+
177
+ bad:
178
+ SELECT * FROM orders
179
+
180
+ good:
181
+ SELECT id, customer_id, order_date, total_amount
182
+ FROM orders
183
+
184
+ why:
185
+ - reduces i/o
186
+ - reduces memory usage
187
+ - reduces network transfer
188
+ - enables index-only scans
189
+
190
+
191
+ avoid select distinct when possible
192
+
193
+ bad:
194
+ SELECT DISTINCT customer_id FROM orders
195
+
196
+ better:
197
+ SELECT customer_id FROM orders GROUP BY customer_id
198
+
199
+ best (if table has unique customer_id):
200
+ SELECT id FROM customers WHERE EXISTS (
201
+ SELECT 1 FROM orders WHERE customer_id = customers.id
202
+ )
203
+
204
+
205
+ use limit for testing
206
+
207
+ before running on full dataset:
208
+ SELECT * FROM large_table LIMIT 100
209
+
210
+ verify query logic and performance:
211
+ SELECT * FROM expensive_query LIMIT 1000
212
+
213
+
214
+ avoid function calls in where clause
215
+
216
+ bad (prevents index usage):
217
+ WHERE UPPER(name) = 'JOHN'
218
+ WHERE DATE(created_at) = '2024-01-15'
219
+ WHERE YEAR(created_at) = 2024
220
+
221
+ good:
222
+ WHERE name = 'JOHN' COLLATE NOCASE -- sqlite
223
+ WHERE created_at >= '2024-01-15' AND created_at < '2024-01-16'
224
+ WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01'
225
+
226
+
227
+ PHASE 4: WHERE CLAUSE OPTIMIZATION
228
+
229
+
230
+ filter early, filter often
231
+
232
+ bad:
233
+ SELECT * FROM orders, customers
234
+ WHERE orders.customer_id = customers.id
235
+ AND customers.active = 1
236
+
237
+ good:
238
+ SELECT o.*
239
+ FROM orders o
240
+ INNER JOIN customers c ON o.customer_id = c.id
241
+ WHERE c.active = 1
242
+
243
+
244
+ use indexed columns first
245
+
246
+ in where clause, put indexed columns first:
247
+ WHERE indexed_column = value AND non_indexed = value
248
+
249
+
250
+ use exists instead of in for subqueries
251
+
252
+ bad (may scan entire table):
253
+ SELECT * FROM orders
254
+ WHERE customer_id IN (SELECT id FROM customers WHERE active = 1)
255
+
256
+ good (stops at first match):
257
+ SELECT * FROM orders o
258
+ WHERE EXISTS (
259
+ SELECT 1 FROM customers c
260
+ WHERE c.id = o.customer_id AND c.active = 1
261
+ )
262
+
263
+
264
+ use between for ranges
265
+
266
+ bad:
267
+ WHERE date >= '2024-01-01' AND date <= '2024-12-31'
268
+
269
+ good:
270
+ WHERE date BETWEEN '2024-01-01' AND '2024-12-31'
271
+
272
+ note: between is inclusive on both ends
273
+
274
+
275
+ PHASE 5: JOIN OPTIMIZATION
276
+
277
+
278
+ choose correct join type
279
+
280
+ inner join:
281
+ - only matching rows
282
+ - fastest when most rows match
283
+ - use when you only want related data
284
+
285
+ left join:
286
+ - all rows from left table
287
+ - slower, may cause null handling
288
+ - use when you need unmatched rows
289
+
290
+ cross join:
291
+ - cartesian product
292
+ - very slow, rarely needed
293
+ - avoid unless specifically required
294
+
295
+
296
+ join order matters
297
+
298
+ join smaller tables first:
299
+ SELECT *
300
+ FROM small_table s
301
+ JOIN medium_table m ON s.id = m.small_id
302
+ JOIN large_table l ON m.id = l.medium_id
303
+
304
+ filter before joining:
305
+ SELECT *
306
+ FROM orders o
307
+ INNER JOIN (
308
+ SELECT id, name FROM customers WHERE active = 1
309
+ ) c ON o.customer_id = c.id
310
+
311
+
312
+ use alias for readability
313
+
314
+ good:
315
+ SELECT o.id, o.total, c.name
316
+ FROM orders o
317
+ INNER JOIN customers c ON o.customer_id = c.id
318
+
319
+
320
+ avoid joining on nullable columns
321
+
322
+ nullable columns in joins:
323
+ - prevent index usage
324
+ - slow down queries
325
+ - may produce unexpected nulls
326
+
327
+ use not null columns when possible.
328
+
329
+
330
+ PHASE 6: INDEX OPTIMIZATION
331
+
332
+
333
+ create indexes on query columns
334
+
335
+ columns in where clauses:
336
+ CREATE INDEX idx_orders_customer_id ON orders(customer_id);
337
+
338
+ columns in joins:
339
+ CREATE INDEX idx_orders_customer_id ON orders(customer_id);
340
+ CREATE INDEX idx_customers_id ON customers(id);
341
+
342
+ columns in order by:
343
+ CREATE INDEX idx_orders_date ON orders(order_date);
344
+
345
+
346
+ composite indexes for multiple columns
347
+
348
+ order matters in composite indexes:
349
+ WHERE customer_id = 1 AND order_date > '2024-01-01'
350
+
351
+ create:
352
+ CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);
353
+
354
+ note: order in where clause should match index order
355
+
356
+
357
+ cover indexes for common queries
358
+
359
+ avoid table access entirely:
360
+ CREATE INDEX idx_orders_cover ON orders(customer_id, order_date, total_amount);
361
+
362
+ query uses only indexed columns:
363
+ SELECT order_date, total_amount
364
+ FROM orders
365
+ WHERE customer_id = 123;
366
+
367
+ this is an "index-only scan" - very fast.
368
+
369
+
370
+ check index usage
371
+
372
+ sqlite:
373
+ EXPLAIN QUERY PLAN SELECT * FROM orders WHERE customer_id = 1;
374
+ -- look for "USING INDEX"
375
+
376
+ postgresql:
377
+ EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 1;
378
+ -- look for "Index Scan" vs "Seq Scan"
379
+
380
+
381
+ remove unused indexes
382
+
383
+ indexes slow down inserts/updates.
384
+ remove indexes that are never used.
385
+
386
+ postgresql find unused indexes:
387
+ SELECT schemaname, tablename, indexname, idx_scan
388
+ FROM pg_stat_user_indexes
389
+ WHERE idx_scan = 0;
390
+
391
+
392
+ PHASE 7: AGGREGATION OPTIMIZATION
393
+
394
+
395
+ use group by with indexes
396
+
397
+ query:
398
+ SELECT customer_id, COUNT(*), SUM(total_amount)
399
+ FROM orders
400
+ GROUP BY customer_id;
401
+
402
+ index should be on grouping column:
403
+ CREATE INDEX idx_orders_customer_id ON orders(customer_id);
404
+
405
+
406
+ use having for filtering aggregates
407
+
408
+ bad:
409
+ SELECT customer_id, COUNT(*) as order_count
410
+ FROM orders
411
+ GROUP BY customer_id
412
+ WHERE order_count > 10; -- error: can't use alias in where
413
+
414
+ good:
415
+ SELECT customer_id, COUNT(*) as order_count
416
+ FROM orders
417
+ GROUP BY customer_id
418
+ HAVING COUNT(*) > 10;
419
+
420
+
421
+ pre-aggregate in subqueries
422
+
423
+ for complex aggregations:
424
+ SELECT
425
+ c.name,
426
+ o.order_count,
427
+ o.total_amount
428
+ FROM customers c
429
+ INNER JOIN (
430
+ SELECT customer_id,
431
+ COUNT(*) as order_count,
432
+ SUM(total_amount) as total_amount
433
+ FROM orders
434
+ GROUP BY customer_id
435
+ HAVING COUNT(*) > 10
436
+ ) o ON c.id = o.customer_id;
437
+
438
+
439
+ window functions vs subqueries
440
+
441
+ use window functions when possible:
442
+ SELECT
443
+ id,
444
+ customer_id,
445
+ total_amount,
446
+ SUM(total_amount) OVER (
447
+ PARTITION BY customer_id
448
+ ORDER BY order_date
449
+ ) as running_total
450
+ FROM orders;
451
+
452
+ better than correlated subqueries.
453
+
454
+
455
+ PHASE 8: SUBQUERY OPTIMIZATION
456
+
457
+
458
+ use exists instead of in
459
+
460
+ bad:
461
+ SELECT * FROM orders o
462
+ WHERE o.customer_id IN (SELECT id FROM customers WHERE active = 1)
463
+
464
+ good:
465
+ SELECT * FROM orders o
466
+ WHERE EXISTS (
467
+ SELECT 1 FROM customers c
468
+ WHERE c.id = o.customer_id AND c.active = 1
469
+ )
470
+
471
+
472
+ use lateral joins (postgresql) instead of correlated subqueries
473
+
474
+ bad:
475
+ SELECT
476
+ c.id,
477
+ c.name,
478
+ (SELECT MAX(total_amount)
479
+ FROM orders o
480
+ WHERE o.customer_id = c.id) as max_order
481
+ FROM customers c
482
+
483
+ good:
484
+ SELECT
485
+ c.id,
486
+ c.name,
487
+ o.max_order
488
+ FROM customers c
489
+ LEFT JOIN LATERAL (
490
+ SELECT MAX(total_amount) as max_order
491
+ FROM orders o
492
+ WHERE o.customer_id = c.id
493
+ ) o ON true
494
+
495
+
496
+ materialize common subqueries
497
+
498
+ if subquery used multiple times:
499
+ WITH customer_totals AS (
500
+ SELECT
501
+ customer_id,
502
+ COUNT(*) as order_count,
503
+ SUM(total_amount) as total_amount
504
+ FROM orders
505
+ GROUP BY customer_id
506
+ )
507
+ SELECT c.name, ct.order_count, ct.total_amount
508
+ FROM customers c
509
+ INNER JOIN customer_totals ct ON c.id = ct.customer_id
510
+ WHERE ct.order_count > 10;
511
+
512
+
513
+ PHASE 9: UNION OPTIMIZATION
514
+
515
+
516
+ use union all instead of union
517
+
518
+ union removes duplicates (expensive):
519
+ SELECT name FROM employees_a
520
+ UNION
521
+ SELECT name FROM employees_b;
522
+
523
+ union all is faster (no duplicate removal):
524
+ SELECT name FROM employees_a
525
+ UNION ALL
526
+ SELECT name FROM employees_b;
527
+
528
+ use union all if you know data doesn't overlap or duplicates are acceptable.
529
+
530
+
531
+ avoid union when possible
532
+
533
+ if unions are from same table, use or:
534
+ SELECT * FROM orders
535
+ WHERE customer_id = 1 OR customer_id = 2;
536
+
537
+ instead of:
538
+ SELECT * FROM orders WHERE customer_id = 1
539
+ UNION
540
+ SELECT * FROM orders WHERE customer_id = 2;
541
+
542
+
543
+ PHASE 10: PAGINATION OPTIMIZATION
544
+
545
+
546
+ avoid offset for large offsets
547
+
548
+ bad (slow for large offsets):
549
+ SELECT * FROM orders
550
+ ORDER BY order_date DESC
551
+ LIMIT 20 OFFSET 1000;
552
+
553
+ good (keyset pagination):
554
+ SELECT * FROM orders
555
+ WHERE order_date < '2024-01-01' -- last seen date
556
+ ORDER BY order_date DESC
557
+ LIMIT 20;
558
+
559
+ use last row's values for next page.
560
+
561
+
562
+ cursor-based pagination
563
+
564
+ store last id and use for next page:
565
+ -- page 1
566
+ SELECT * FROM orders
567
+ WHERE id > 0
568
+ ORDER BY id
569
+ LIMIT 20;
570
+
571
+ -- page 2 (last_id from page 1)
572
+ SELECT * FROM orders
573
+ WHERE id > 12345
574
+ ORDER BY id
575
+ LIMIT 20;
576
+
577
+
578
+ PHASE 11: DATA TYPE OPTIMIZATION
579
+
580
+
581
+ use appropriate data types
582
+
583
+ sqlite types:
584
+ - INTEGER: numbers, primary keys
585
+ - REAL: floating point
586
+ - TEXT: strings
587
+ - BLOB: binary data
588
+
589
+ postgresql types:
590
+ - SMALLINT: 2-byte integer
591
+ - INTEGER: 4-byte integer
592
+ - BIGINT: 8-byte integer
593
+ - DECIMAL/NUMERIC: precise decimal
594
+ - VARCHAR(n): variable-length string
595
+ - TEXT: unlimited string
596
+ - TIMESTAMP: date/time
597
+ - JSON: json data
598
+
599
+
600
+ use smallest sufficient type
601
+
602
+ bad (overkill):
603
+ BIGINT for ids under 1 million
604
+ VARCHAR(255) for 10-character codes
605
+
606
+ good:
607
+ INTEGER for ids up to 2 billion
608
+ VARCHAR(10) for 10-character codes
609
+
610
+
611
+ PHASE 12: QUERY CACHING
612
+
613
+
614
+ use parameterized queries
615
+
616
+ python with sqlite:
617
+ import sqlite3
618
+
619
+ conn = sqlite3.connect('database.db')
620
+ cursor = conn.cursor()
621
+
622
+ # parameterized query (safe, cacheable)
623
+ cursor.execute(
624
+ "SELECT * FROM orders WHERE customer_id = ?",
625
+ (customer_id,)
626
+ )
627
+
628
+ python with sqlalchemy:
629
+ from sqlalchemy import create_engine, text
630
+
631
+ engine = create_engine('sqlite:///database.db')
632
+
633
+ with engine.connect() as conn:
634
+ result = conn.execute(
635
+ text("SELECT * FROM orders WHERE customer_id = :cust_id"),
636
+ {"cust_id": customer_id}
637
+ )
638
+
639
+
640
+ prepare statements for repeated queries
641
+
642
+ postgresql:
643
+ PREPARE get_orders (INT) AS
644
+ SELECT * FROM orders WHERE customer_id = $1;
645
+
646
+ EXECUTE get_orders(123);
647
+ EXECUTE get_orders(456);
648
+ DEALLOCATE get_orders;
649
+
650
+
651
+ PHASE 13: DATABASE-SPECIFIC OPTIMIZATION
652
+
653
+
654
+ sqlite specific
655
+
656
+ enable wal mode for concurrent access:
657
+ PRAGMA journal_mode = WAL;
658
+
659
+ increase cache size:
660
+ PRAGMA cache_size = -10000; -- 10MB pages
661
+
662
+ optimize for specific queries:
663
+ PRAGMA synchronous = NORMAL; -- less durability, more speed
664
+
665
+
666
+ postgresql specific
667
+
668
+ use vacuum analyze after large changes:
669
+ VACUUM ANALYZE orders;
670
+
671
+ use parallel query for large scans:
672
+ SET max_parallel_workers_per_gather = 4;
673
+ SELECT * FROM large_table;
674
+
675
+ use partitioning for large tables:
676
+ CREATE TABLE orders (
677
+ id SERIAL,
678
+ order_date DATE,
679
+ ...
680
+ ) PARTITION BY RANGE (order_date);
681
+
682
+
683
+ mysql specific
684
+
685
+ use explain analyze for detailed plans:
686
+ EXPLAIN ANALYZE SELECT * FROM orders;
687
+
688
+ optimize join buffer size:
689
+ SET join_buffer_size = 4194304;
690
+
691
+ use query cache (if enabled):
692
+ SELECT SQL_CACHE * FROM orders WHERE id = 1;
693
+
694
+
695
+ PHASE 14: COMMON ANTI-PATTERNS
696
+
697
+
698
+ anti-pattern: select *
699
+
700
+ problem:
701
+ - returns unnecessary columns
702
+ - prevents index-only scans
703
+ - increases i/o and memory
704
+
705
+ solution:
706
+ SELECT id, name, email FROM users;
707
+
708
+
709
+ anti-pattern: function in where clause
710
+
711
+ problem:
712
+ WHERE UPPER(name) = 'JOHN'
713
+ WHERE DATE(created_at) = '2024-01-01'
714
+
715
+ solution:
716
+ WHERE name = 'JOHN' COLLATE NOCASE
717
+ WHERE created_at >= '2024-01-01' AND created_at < '2024-01-02'
718
+
719
+
720
+ anti-pattern: order by on non-indexed column
721
+
722
+ problem:
723
+ SELECT * FROM large_table ORDER BY name
724
+ - requires full scan + sort
725
+ - slow on large datasets
726
+
727
+ solution:
728
+ CREATE INDEX idx_name ON large_table(name);
729
+ SELECT * FROM large_table ORDER BY name
730
+
731
+
732
+ anti-pattern: excessive joins
733
+
734
+ problem:
735
+ - joining 10+ tables
736
+ - complex join conditions
737
+ - performance degrades exponentially
738
+
739
+ solution:
740
+ - break into multiple queries
741
+ - use temporary tables
742
+ - use materialized views
743
+
744
+
745
+ anti-pattern: n+1 queries
746
+
747
+ problem:
748
+ for each customer:
749
+ SELECT * FROM orders WHERE customer_id = ?
750
+
751
+ solution:
752
+ SELECT * FROM orders WHERE customer_id IN (1, 2, 3, ...)
753
+
754
+
755
+ PHASE 15: SQL OPTIMIZATION CHECKLIST
756
+
757
+
758
+ before writing query
759
+
760
+ [ ] do you understand the data structure?
761
+ [ ] do you know table sizes?
762
+ [ ] do you know existing indexes?
763
+ [ ] do you know query frequency?
764
+
765
+
766
+ writing the query
767
+
768
+ [ ] select only needed columns
769
+ [ ] filter early in where clause
770
+ [ ] use indexed columns in where/join/order by
771
+ [ ] use appropriate join types
772
+ [ ] avoid functions on indexed columns
773
+ [ ] use exists instead of in for subqueries
774
+
775
+
776
+ after writing query
777
+
778
+ [ ] run explain plan
779
+ [ ] check for full table scans
780
+ [ ] check index usage
781
+ [ ] measure execution time
782
+ [ ] test with sample data
783
+
784
+
785
+ optimization
786
+
787
+ [ ] create missing indexes
788
+ [ ] consider composite indexes
789
+ [ ] consider cover indexes
790
+ [ ] rewrite subqueries as joins
791
+ [ ] use window functions if appropriate
792
+
793
+
794
+ verification
795
+
796
+ [ ] re-run explain plan
797
+ [ ] compare execution times
798
+ [ ] test with realistic data volume
799
+ [ ] verify results are correct
800
+
801
+
802
+ PHASE 16: MANDATORY RULES
803
+
804
+
805
+ while this skill is active, these rules are MANDATORY:
806
+
807
+ [1] ALWAYS RUN EXPLAIN PLAN before optimization
808
+ never optimize without understanding current plan
809
+ measure before and after
810
+
811
+ [2] NEVER USE SELECT * IN PRODUCTION QUERIES
812
+ always specify columns
813
+ prevents unnecessary data transfer
814
+
815
+ [3] ALWAYS FILTER EARLY
816
+ apply where clauses as early as possible
817
+ reduce working set size
818
+
819
+ [4] ALWAYS USE INDEXED COLUMNS IN WHERE/JOIN/ORDER BY
820
+ verify indexes exist
821
+ create if missing
822
+
823
+ [5] NEVER USE FUNCTIONS ON INDEXED COLUMNS IN WHERE CLAUSE
824
+ this prevents index usage
825
+ rewrite to use sargable expressions
826
+
827
+ [6] ALWAYS USE PARAMETERIZED QUERIES
828
+ prevents sql injection
829
+ enables query caching
830
+
831
+ [7] ALWAYS MEASURE EXECUTION TIME
832
+ optimize based on measurements
833
+ not guesses
834
+
835
+ [8] NEVER IGNORE FULL TABLE SCANS
836
+ investigate why index not used
837
+ fix the problem
838
+
839
+ [9] ALWAYS TEST WITH REALISTIC DATA VOLUMES
840
+ performance on 100 rows differs from 10 million rows
841
+ use production-like data
842
+
843
+ [10] ALWAYS VERIFY CORRECTNESS AFTER OPTIMIZATION
844
+ faster but wrong = useless
845
+ ensure results are identical
846
+
847
+
848
+ FINAL REMINDERS
849
+
850
+
851
+ optimization is iterative
852
+
853
+ start with working query.
854
+ measure performance.
855
+ apply one optimization.
856
+ measure again.
857
+ repeat until acceptable.
858
+
859
+
860
+ data characteristics matter
861
+
862
+ optimizations that work for one dataset
863
+ may not work for another.
864
+ understand your data distribution.
865
+
866
+
867
+ readability matters
868
+
869
+ optimized but unreadable code
870
+ is hard to maintain.
871
+ balance performance with clarity.
872
+
873
+
874
+ document decisions
875
+
876
+ why did you add this index?
877
+ why did you rewrite this query?
878
+ future developers need to know.
879
+
880
+
881
+ now optimize those queries.