@memberjunction/db-auto-doc 2.117.0 → 2.119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/README.md +803 -165
  2. package/bin/run.js +7 -0
  3. package/dist/api/DBAutoDocAPI.d.ts +252 -0
  4. package/dist/api/DBAutoDocAPI.d.ts.map +1 -0
  5. package/dist/api/DBAutoDocAPI.js +530 -0
  6. package/dist/api/DBAutoDocAPI.js.map +1 -0
  7. package/dist/api/index.d.ts +7 -0
  8. package/dist/api/index.d.ts.map +1 -0
  9. package/dist/api/index.js +10 -0
  10. package/dist/api/index.js.map +1 -0
  11. package/dist/commands/analyze.d.ts +6 -4
  12. package/dist/commands/analyze.d.ts.map +1 -1
  13. package/dist/commands/analyze.js +58 -71
  14. package/dist/commands/analyze.js.map +1 -1
  15. package/dist/commands/export.d.ts +14 -4
  16. package/dist/commands/export.d.ts.map +1 -1
  17. package/dist/commands/export.js +156 -61
  18. package/dist/commands/export.js.map +1 -1
  19. package/dist/commands/generate-queries.d.ts +17 -0
  20. package/dist/commands/generate-queries.d.ts.map +1 -0
  21. package/dist/commands/generate-queries.js +182 -0
  22. package/dist/commands/generate-queries.js.map +1 -0
  23. package/dist/commands/init.d.ts +3 -4
  24. package/dist/commands/init.d.ts.map +1 -1
  25. package/dist/commands/init.js +206 -144
  26. package/dist/commands/init.js.map +1 -1
  27. package/dist/commands/reset.d.ts +4 -1
  28. package/dist/commands/reset.d.ts.map +1 -1
  29. package/dist/commands/reset.js +33 -19
  30. package/dist/commands/reset.js.map +1 -1
  31. package/dist/commands/status.d.ts +10 -0
  32. package/dist/commands/status.d.ts.map +1 -0
  33. package/dist/commands/status.js +66 -0
  34. package/dist/commands/status.js.map +1 -0
  35. package/dist/core/AnalysisEngine.d.ts +108 -0
  36. package/dist/core/AnalysisEngine.d.ts.map +1 -0
  37. package/dist/core/AnalysisEngine.js +716 -0
  38. package/dist/core/AnalysisEngine.js.map +1 -0
  39. package/dist/core/AnalysisOrchestrator.d.ts +41 -0
  40. package/dist/core/AnalysisOrchestrator.d.ts.map +1 -0
  41. package/dist/core/AnalysisOrchestrator.js +377 -0
  42. package/dist/core/AnalysisOrchestrator.js.map +1 -0
  43. package/dist/core/BackpropagationEngine.d.ts +32 -0
  44. package/dist/core/BackpropagationEngine.d.ts.map +1 -0
  45. package/dist/core/BackpropagationEngine.js +121 -0
  46. package/dist/core/BackpropagationEngine.js.map +1 -0
  47. package/dist/core/ConvergenceDetector.d.ts +27 -0
  48. package/dist/core/ConvergenceDetector.d.ts.map +1 -0
  49. package/dist/core/ConvergenceDetector.js +92 -0
  50. package/dist/core/ConvergenceDetector.js.map +1 -0
  51. package/dist/core/GuardrailsManager.d.ts +78 -0
  52. package/dist/core/GuardrailsManager.d.ts.map +1 -0
  53. package/dist/core/GuardrailsManager.js +367 -0
  54. package/dist/core/GuardrailsManager.js.map +1 -0
  55. package/dist/core/index.d.ts +7 -0
  56. package/dist/core/index.d.ts.map +1 -0
  57. package/dist/core/index.js +13 -0
  58. package/dist/core/index.js.map +1 -0
  59. package/dist/database/Database.d.ts +56 -0
  60. package/dist/database/Database.d.ts.map +1 -0
  61. package/dist/database/Database.js +172 -0
  62. package/dist/database/Database.js.map +1 -0
  63. package/dist/database/TopologicalSorter.d.ts +25 -0
  64. package/dist/database/TopologicalSorter.d.ts.map +1 -0
  65. package/dist/database/TopologicalSorter.js +150 -0
  66. package/dist/database/TopologicalSorter.js.map +1 -0
  67. package/dist/database/index.d.ts +6 -0
  68. package/dist/database/index.d.ts.map +1 -0
  69. package/dist/database/index.js +14 -0
  70. package/dist/database/index.js.map +1 -0
  71. package/dist/discovery/ColumnStatsCache.d.ts +91 -0
  72. package/dist/discovery/ColumnStatsCache.d.ts.map +1 -0
  73. package/dist/discovery/ColumnStatsCache.js +231 -0
  74. package/dist/discovery/ColumnStatsCache.js.map +1 -0
  75. package/dist/discovery/DiscoveryEngine.d.ts +100 -0
  76. package/dist/discovery/DiscoveryEngine.d.ts.map +1 -0
  77. package/dist/discovery/DiscoveryEngine.js +726 -0
  78. package/dist/discovery/DiscoveryEngine.js.map +1 -0
  79. package/dist/discovery/DiscoveryTriggerAnalyzer.d.ts +57 -0
  80. package/dist/discovery/DiscoveryTriggerAnalyzer.d.ts.map +1 -0
  81. package/dist/discovery/DiscoveryTriggerAnalyzer.js +186 -0
  82. package/dist/discovery/DiscoveryTriggerAnalyzer.js.map +1 -0
  83. package/dist/discovery/FKDetector.d.ts +47 -0
  84. package/dist/discovery/FKDetector.d.ts.map +1 -0
  85. package/dist/discovery/FKDetector.js +317 -0
  86. package/dist/discovery/FKDetector.js.map +1 -0
  87. package/dist/discovery/LLMDiscoveryValidator.d.ts +64 -0
  88. package/dist/discovery/LLMDiscoveryValidator.d.ts.map +1 -0
  89. package/dist/discovery/LLMDiscoveryValidator.js +431 -0
  90. package/dist/discovery/LLMDiscoveryValidator.js.map +1 -0
  91. package/dist/discovery/LLMSanityChecker.d.ts +38 -0
  92. package/dist/discovery/LLMSanityChecker.d.ts.map +1 -0
  93. package/dist/discovery/LLMSanityChecker.js +156 -0
  94. package/dist/discovery/LLMSanityChecker.js.map +1 -0
  95. package/dist/discovery/PKDetector.d.ts +62 -0
  96. package/dist/discovery/PKDetector.d.ts.map +1 -0
  97. package/dist/discovery/PKDetector.js +436 -0
  98. package/dist/discovery/PKDetector.js.map +1 -0
  99. package/dist/discovery/index.d.ts +9 -0
  100. package/dist/discovery/index.d.ts.map +1 -0
  101. package/dist/discovery/index.js +25 -0
  102. package/dist/discovery/index.js.map +1 -0
  103. package/dist/drivers/BaseAutoDocDriver.d.ts +132 -0
  104. package/dist/drivers/BaseAutoDocDriver.d.ts.map +1 -0
  105. package/dist/drivers/BaseAutoDocDriver.js +121 -0
  106. package/dist/drivers/BaseAutoDocDriver.js.map +1 -0
  107. package/dist/drivers/MySQLDriver.d.ts +61 -0
  108. package/dist/drivers/MySQLDriver.d.ts.map +1 -0
  109. package/dist/drivers/MySQLDriver.js +668 -0
  110. package/dist/drivers/MySQLDriver.js.map +1 -0
  111. package/dist/drivers/PostgreSQLDriver.d.ts +65 -0
  112. package/dist/drivers/PostgreSQLDriver.d.ts.map +1 -0
  113. package/dist/drivers/PostgreSQLDriver.js +704 -0
  114. package/dist/drivers/PostgreSQLDriver.js.map +1 -0
  115. package/dist/drivers/SQLServerDriver.d.ts +61 -0
  116. package/dist/drivers/SQLServerDriver.d.ts.map +1 -0
  117. package/dist/drivers/SQLServerDriver.js +667 -0
  118. package/dist/drivers/SQLServerDriver.js.map +1 -0
  119. package/dist/generators/CSVGenerator.d.ts +35 -0
  120. package/dist/generators/CSVGenerator.d.ts.map +1 -0
  121. package/dist/generators/CSVGenerator.js +154 -0
  122. package/dist/generators/CSVGenerator.js.map +1 -0
  123. package/dist/generators/HTMLGenerator.d.ts +29 -0
  124. package/dist/generators/HTMLGenerator.d.ts.map +1 -0
  125. package/dist/generators/HTMLGenerator.js +710 -0
  126. package/dist/generators/HTMLGenerator.js.map +1 -0
  127. package/dist/generators/MarkdownGenerator.d.ts +27 -0
  128. package/dist/generators/MarkdownGenerator.d.ts.map +1 -0
  129. package/dist/generators/MarkdownGenerator.js +361 -0
  130. package/dist/generators/MarkdownGenerator.js.map +1 -0
  131. package/dist/generators/MermaidGenerator.d.ts +35 -0
  132. package/dist/generators/MermaidGenerator.d.ts.map +1 -0
  133. package/dist/generators/MermaidGenerator.js +321 -0
  134. package/dist/generators/MermaidGenerator.js.map +1 -0
  135. package/dist/generators/ReportGenerator.d.ts +22 -0
  136. package/dist/generators/ReportGenerator.d.ts.map +1 -0
  137. package/dist/generators/ReportGenerator.js +176 -0
  138. package/dist/generators/ReportGenerator.js.map +1 -0
  139. package/dist/generators/SQLGenerator.d.ts +31 -0
  140. package/dist/generators/SQLGenerator.d.ts.map +1 -0
  141. package/dist/generators/SQLGenerator.js +168 -0
  142. package/dist/generators/SQLGenerator.js.map +1 -0
  143. package/dist/generators/SampleQueryGenerator.d.ts +64 -0
  144. package/dist/generators/SampleQueryGenerator.d.ts.map +1 -0
  145. package/dist/generators/SampleQueryGenerator.js +500 -0
  146. package/dist/generators/SampleQueryGenerator.js.map +1 -0
  147. package/dist/generators/index.d.ts +10 -0
  148. package/dist/generators/index.d.ts.map +1 -0
  149. package/dist/generators/index.js +19 -0
  150. package/dist/generators/index.js.map +1 -0
  151. package/dist/index.d.ts +11 -20
  152. package/dist/index.d.ts.map +1 -1
  153. package/dist/index.js +19 -20
  154. package/dist/index.js.map +1 -1
  155. package/dist/prompts/PromptEngine.d.ts +65 -0
  156. package/dist/prompts/PromptEngine.d.ts.map +1 -0
  157. package/dist/prompts/PromptEngine.js +305 -0
  158. package/dist/prompts/PromptEngine.js.map +1 -0
  159. package/dist/prompts/PromptFileLoader.d.ts +21 -0
  160. package/dist/prompts/PromptFileLoader.d.ts.map +1 -0
  161. package/dist/prompts/PromptFileLoader.js +74 -0
  162. package/dist/prompts/PromptFileLoader.js.map +1 -0
  163. package/dist/prompts/index.d.ts +6 -0
  164. package/dist/prompts/index.d.ts.map +1 -0
  165. package/dist/prompts/index.js +11 -0
  166. package/dist/prompts/index.js.map +1 -0
  167. package/dist/state/IterationTracker.d.ts +64 -0
  168. package/dist/state/IterationTracker.d.ts.map +1 -0
  169. package/dist/state/IterationTracker.js +136 -0
  170. package/dist/state/IterationTracker.js.map +1 -0
  171. package/dist/state/StateManager.d.ts +79 -0
  172. package/dist/state/StateManager.d.ts.map +1 -0
  173. package/dist/state/StateManager.js +348 -0
  174. package/dist/state/StateManager.js.map +1 -0
  175. package/dist/state/StateValidator.d.ts +24 -0
  176. package/dist/state/StateValidator.d.ts.map +1 -0
  177. package/dist/state/StateValidator.js +147 -0
  178. package/dist/state/StateValidator.js.map +1 -0
  179. package/dist/state/index.d.ts +7 -0
  180. package/dist/state/index.d.ts.map +1 -0
  181. package/dist/state/index.js +13 -0
  182. package/dist/state/index.js.map +1 -0
  183. package/dist/types/analysis.d.ts +76 -0
  184. package/dist/types/analysis.d.ts.map +1 -0
  185. package/dist/types/analysis.js +6 -0
  186. package/dist/types/analysis.js.map +1 -0
  187. package/dist/types/config.d.ts +143 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +7 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/discovery.d.ts +277 -0
  192. package/dist/types/discovery.d.ts.map +1 -0
  193. package/dist/types/discovery.js +7 -0
  194. package/dist/types/discovery.js.map +1 -0
  195. package/dist/types/driver.d.ts +148 -0
  196. package/dist/types/driver.d.ts.map +1 -0
  197. package/dist/types/driver.js +7 -0
  198. package/dist/types/driver.js.map +1 -0
  199. package/dist/types/index.d.ts +8 -0
  200. package/dist/types/index.d.ts.map +1 -0
  201. package/dist/types/index.js +24 -0
  202. package/dist/types/index.js.map +1 -0
  203. package/dist/types/prompts.d.ts +158 -0
  204. package/dist/types/prompts.d.ts.map +1 -0
  205. package/dist/types/prompts.js +6 -0
  206. package/dist/types/prompts.js.map +1 -0
  207. package/dist/types/sample-queries.d.ts +172 -0
  208. package/dist/types/sample-queries.d.ts.map +1 -0
  209. package/dist/types/sample-queries.js +7 -0
  210. package/dist/types/sample-queries.js.map +1 -0
  211. package/dist/types/state.d.ts +291 -0
  212. package/dist/types/state.d.ts.map +1 -0
  213. package/dist/types/state.js +7 -0
  214. package/dist/types/state.js.map +1 -0
  215. package/dist/utils/config-loader.d.ts +29 -0
  216. package/dist/utils/config-loader.d.ts.map +1 -0
  217. package/dist/utils/config-loader.js +163 -0
  218. package/dist/utils/config-loader.js.map +1 -0
  219. package/dist/utils/index.d.ts +5 -0
  220. package/dist/utils/index.d.ts.map +1 -0
  221. package/dist/utils/index.js +9 -0
  222. package/dist/utils/index.js.map +1 -0
  223. package/package.json +28 -3
  224. package/dist/ai/simple-ai-client.d.ts +0 -70
  225. package/dist/ai/simple-ai-client.d.ts.map +0 -1
  226. package/dist/ai/simple-ai-client.js +0 -181
  227. package/dist/ai/simple-ai-client.js.map +0 -1
  228. package/dist/analyzers/analyzer.d.ts +0 -23
  229. package/dist/analyzers/analyzer.d.ts.map +0 -1
  230. package/dist/analyzers/analyzer.js +0 -127
  231. package/dist/analyzers/analyzer.js.map +0 -1
  232. package/dist/cli-old/cli.d.ts +0 -3
  233. package/dist/cli-old/cli.d.ts.map +0 -1
  234. package/dist/cli-old/cli.js +0 -388
  235. package/dist/cli-old/cli.js.map +0 -1
  236. package/dist/commands/review.d.ts +0 -11
  237. package/dist/commands/review.d.ts.map +0 -1
  238. package/dist/commands/review.js +0 -82
  239. package/dist/commands/review.js.map +0 -1
  240. package/dist/database/connection.d.ts +0 -40
  241. package/dist/database/connection.d.ts.map +0 -1
  242. package/dist/database/connection.js +0 -136
  243. package/dist/database/connection.js.map +0 -1
  244. package/dist/database/introspection.d.ts +0 -59
  245. package/dist/database/introspection.d.ts.map +0 -1
  246. package/dist/database/introspection.js +0 -124
  247. package/dist/database/introspection.js.map +0 -1
  248. package/dist/generators/markdown-generator.d.ts +0 -8
  249. package/dist/generators/markdown-generator.d.ts.map +0 -1
  250. package/dist/generators/markdown-generator.js +0 -106
  251. package/dist/generators/markdown-generator.js.map +0 -1
  252. package/dist/generators/sql-generator.d.ts +0 -20
  253. package/dist/generators/sql-generator.d.ts.map +0 -1
  254. package/dist/generators/sql-generator.js +0 -83
  255. package/dist/generators/sql-generator.js.map +0 -1
  256. package/dist/state/state-manager.d.ts +0 -95
  257. package/dist/state/state-manager.d.ts.map +0 -1
  258. package/dist/state/state-manager.js +0 -236
  259. package/dist/state/state-manager.js.map +0 -1
  260. package/dist/types/state-file.d.ts +0 -124
  261. package/dist/types/state-file.d.ts.map +0 -1
  262. package/dist/types/state-file.js +0 -79
  263. package/dist/types/state-file.js.map +0 -1
package/README.md CHANGED
@@ -1,244 +1,882 @@
1
- # Database Auto-Documentation Generator
1
+ # DBAutoDoc - AI-Powered Database Documentation Generator
2
2
 
3
- AI-powered documentation generator for SQL Server databases. Analyzes your database structure, uses AI to generate comprehensive table and column descriptions, and saves them as SQL Server extended properties.
4
-
5
- ## 🚀 **Standalone Tool - No MemberJunction Runtime Required**
6
-
7
- This tool works with **ANY** SQL Server database. You don't need MemberJunction installed or running.
3
+ Automatically generate comprehensive documentation for SQL Server, MySQL, and PostgreSQL databases using AI. DBAutoDoc analyzes your database structure, uses Large Language Models to understand the purpose of tables and columns, and saves descriptions as database metadata (extended properties for SQL Server, comments for MySQL/PostgreSQL).
8
4
 
9
5
  ## Features
10
6
 
11
- - **🤖 AI-Powered**: Uses LLMs (OpenAI, Anthropic, etc.) to generate intelligent descriptions
12
- - **🔄 Human-in-Loop**: Interactive mode to provide context and approve AI-generated descriptions
13
- - **💾 State Management**: JSON state file tracks progress, user input, and AI generations across runs
14
- - **🎯 Incremental**: Only processes new or changed tables on subsequent runs
15
- - **🔍 Smart Analysis**:
16
- - Dependency graph analysis (documents root tables first)
17
- - Pattern detection (lookup tables, bridge tables, audit tables)
18
- - Data profiling (sample data, statistics, pattern recognition)
19
- - Constraint analysis (PKs, FKs, CHECK, UNIQUE)
20
- - **📊 Multiple Outputs**:
21
- - SQL scripts with `sp_addextendedproperty` statements
22
- - Markdown documentation
23
- - Updated state file for next run
7
+ ### Core Capabilities
8
+ - **🤖 AI-Powered Analysis** - Uses OpenAI, Anthropic, Google, or Groq to generate intelligent descriptions
9
+ - **🔄 Iterative Refinement** - Multi-pass analysis with backpropagation for accuracy
10
+ - **📊 Topological Processing** - Analyzes tables in dependency order for better context
11
+ - **📈 Data-Driven** - Leverages cardinality, statistics, and sample data for insights
12
+ - **🎯 Convergence Detection** - Automatically knows when analysis is complete
13
+ - **💾 State Tracking** - Full audit trail of all iterations and reasoning
14
+ - **🔌 Standalone** - Works with ANY database, no MemberJunction required
15
+
16
+ ### Multi-Database Support
17
+ - **SQL Server** - Full support with extended properties
18
+ - **PostgreSQL** - Complete implementation with COMMENT syntax
19
+ - **MySQL** - Full support with column/table comments
20
+ - **Unified Interface** - Single configuration approach across all databases
21
+
22
+ ### Advanced Features
23
+ - **🔍 Relationship Discovery** - Automatically detect missing primary and foreign keys using statistical analysis and LLM validation
24
+ - **🎯 Sample Query Generation** - Generate reference SQL queries for AI agents with alignment tracking
25
+ - **🛡️ Granular Guardrails** - Multi-level resource controls (run, phase, iteration limits)
26
+ - **⏸️ Resume Capability** - Pause and resume analysis from checkpoint state files
27
+ - **📦 Programmatic API** - Use as a library in your own applications
28
+ - **🔧 Extensible** - Custom database drivers and analysis plugins
29
+
30
+ ### Output Formats
31
+ - **SQL Scripts** - Database-specific metadata scripts (extended properties, comments)
32
+ - **Markdown Documentation** - Human-readable docs with ERD diagrams
33
+ - **HTML Documentation** - Interactive, searchable documentation with embedded CSS/JS
34
+ - **CSV Exports** - Spreadsheet-ready table and column data
35
+ - **Mermaid Diagrams** - Standalone ERD files (.mmd and .html)
36
+ - **Analysis Reports** - Detailed metrics and quality assessments
24
37
 
25
38
  ## Installation
26
39
 
40
+ ### Global Installation (Recommended for DBAs)
41
+
27
42
  ```bash
28
- # Install globally (for standalone use)
29
43
  npm install -g @memberjunction/db-auto-doc
44
+ ```
45
+
46
+ ### Within MemberJunction Project
47
+
48
+ ```bash
49
+ npm install @memberjunction/db-auto-doc
50
+ ```
30
51
 
31
- # Or use with npx
32
- npx @memberjunction/db-auto-doc
52
+ ### As a Library Dependency
33
53
 
34
- # Or use via MJ CLI (if you have MemberJunction installed)
35
- mj dbdoc --help
54
+ ```bash
55
+ npm install @memberjunction/db-auto-doc --save
36
56
  ```
37
57
 
38
58
  ## Quick Start
39
59
 
40
- ### Standalone CLI
60
+ ### 1. Initialize
41
61
 
42
62
  ```bash
43
- # 1. Initialize project
44
63
  db-auto-doc init
64
+ ```
65
+
66
+ This interactive wizard will:
67
+ - Configure database connection
68
+ - Set up AI provider (OpenAI, Anthropic, Google, or Groq)
69
+ - Configure guardrails and resource limits
70
+ - Optionally add seed context for better analysis
71
+ - Create `config.json`
45
72
 
46
- # 2. Edit .env and add your AI API key
47
- # AI_API_KEY=sk-your-key-here
73
+ ### 2. Analyze
48
74
 
49
- # 3. Analyze database
75
+ ```bash
50
76
  db-auto-doc analyze
77
+ ```
78
+
79
+ This will:
80
+ - Introspect your database structure
81
+ - Analyze data (cardinality, statistics, patterns)
82
+ - Optionally discover missing primary and foreign keys
83
+ - Build dependency graph
84
+ - Run iterative AI analysis with backpropagation
85
+ - Perform sanity checks
86
+ - Save state to `db-doc-state.json`
87
+
88
+ ### 3. Generate Sample Queries (Optional)
89
+
90
+ Generate reference SQL queries for AI agent training:
91
+
92
+ ```bash
93
+ # During analysis (if enabled in config)
94
+ db-auto-doc analyze # Automatically generates queries
51
95
 
52
- # 4. Review results
53
- db-auto-doc review
96
+ # Or generate separately from existing state
97
+ db-auto-doc generate-queries --from-state ./output/run-1/state.json
54
98
 
55
- # 5. Export documentation
56
- db-auto-doc export --format=all
99
+ # With custom settings
100
+ db-auto-doc generate-queries --from-state ./output/run-1/state.json \
101
+ --queries-per-table 10 \
102
+ --max-execution-time 60000 \
103
+ --output-dir ./queries
57
104
  ```
58
105
 
59
- ### Via MJ CLI (MemberJunction Users)
106
+ This generates:
107
+ - **sample-queries.json**: Full query specifications with SQL, metadata, and alignment info
108
+ - **sample-queries-summary.json**: Execution statistics, token usage, and cost breakdown
109
+
110
+ **Configuration Options:**
111
+ ```json
112
+ {
113
+ "analysis": {
114
+ "sampleQueryGeneration": {
115
+ "enabled": true, // Enable sample query generation
116
+ "queriesPerTable": 5, // Number of queries per table
117
+ "maxTables": 10, // Max tables to process (0 = all tables)
118
+ "tokenBudget": 100000, // Token limit (0 = unlimited)
119
+ "maxExecutionTime": 30000, // Query validation timeout (ms)
120
+ "includeMultiQueryPatterns": true, // Generate related query patterns
121
+ "validateAlignment": true, // Validate alignment between queries
122
+ "maxRowsInSample": 10 // Sample result rows to capture
123
+ }
124
+ }
125
+ }
126
+ ```
127
+
128
+ **Key Configuration Settings:**
129
+ - **`maxTables`**: Controls table selection
130
+ - `10` (default) - Generate queries for top 10 most important tables
131
+ - `0` - Generate queries for **all tables** with data
132
+ - Custom value - Generate queries for top N tables
133
+
134
+ - **`tokenBudget`**: Controls LLM token usage and cost
135
+ - `100000` (default) - Limit to 100K tokens (~$0.50-1.00 with GPT-4o)
136
+ - `0` - **Unlimited** token budget (useful with `maxTables: 0`)
137
+ - Custom value - Set specific token limit for cost control
138
+
139
+ **Example Configurations:**
140
+
141
+ *Cost-conscious (default):*
142
+ ```json
143
+ {
144
+ "maxTables": 10,
145
+ "tokenBudget": 100000
146
+ }
147
+ ```
148
+
149
+ *Medium coverage (~25 tables):*
150
+ ```json
151
+ {
152
+ "maxTables": 25,
153
+ "tokenBudget": 500000
154
+ }
155
+ ```
156
+
157
+ *Complete coverage (all tables):*
158
+ ```json
159
+ {
160
+ "maxTables": 0,
161
+ "tokenBudget": 0
162
+ }
163
+ ```
164
+
165
+ **Model Recommendations:**
166
+ - ✅ **GPT-4o** - Best balance of speed, cost, and quality (~$6-10 for 50 tables)
167
+ - ✅ **Claude 3.5 Sonnet** - High quality, good reasoning about alignment
168
+ - ⚠️ **GPT-5** - Very slow (reasoning model), doesn't support JSON format, expensive
169
+ - ⚠️ **Groq** - Fast and cheap but may struggle with complex alignment
170
+
171
+ ### 4. Export
60
172
 
61
173
  ```bash
62
- # Same commands, different prefix
63
- mj dbdoc init
64
- mj dbdoc analyze --schemas=dbo
65
- mj dbdoc review --unapproved-only
66
- mj dbdoc export --approved-only --execute
174
+ db-auto-doc export --sql --markdown --html --csv --mermaid
67
175
  ```
68
176
 
69
- ### Programmatic Usage
177
+ This generates:
178
+ - **SQL Script**: Database-specific metadata statements
179
+ - **Markdown Documentation**: Human-readable docs with ERD links
180
+ - **HTML Documentation**: Interactive searchable documentation
181
+ - **CSV Files**: tables.csv and columns.csv for spreadsheet analysis
182
+ - **Mermaid Diagrams**: erd.mmd and erd.html for visualization
70
183
 
71
- ```typescript
72
- import {
73
- DatabaseConnection,
74
- StateManager,
75
- DatabaseAnalyzer,
76
- SimpleAIClient,
77
- } from '@memberjunction/db-auto-doc';
184
+ Optionally apply directly to database:
78
185
 
79
- // Initialize
80
- const connection = DatabaseConnection.fromEnv();
81
- const stateManager = new StateManager();
82
- const aiClient = new SimpleAIClient();
83
- const analyzer = new DatabaseAnalyzer(connection, stateManager, aiClient);
186
+ ```bash
187
+ db-auto-doc export --sql --apply
188
+ ```
84
189
 
85
- // Analyze
86
- await analyzer.analyze({ schemas: ['dbo'] });
190
+ ### 5. Check Status
87
191
 
88
- // Export
89
- import { SQLGenerator, MarkdownGenerator } from '@memberjunction/db-auto-doc';
90
- const state = stateManager.getState();
91
- const sqlGen = new SQLGenerator();
92
- const sql = sqlGen.generate(state);
192
+ ```bash
193
+ db-auto-doc status
93
194
  ```
94
195
 
95
- ## CLI Commands
196
+ Shows:
197
+ - Analysis progress and phase completion
198
+ - Convergence status
199
+ - Low-confidence tables and columns
200
+ - Token usage, cost, and duration
201
+ - Guardrail status and warnings
96
202
 
97
- ### `db-auto-doc init`
98
- Initialize new documentation project
99
- - Prompts for database connection
100
- - Creates `.env` file
101
- - Creates `db-doc-state.json`
102
- - Optionally asks seed questions
203
+ ### 6. Resume Analysis
103
204
 
104
- ### `db-auto-doc analyze`
105
- Analyze database and generate documentation
106
- - `--interactive` - Ask questions during analysis
107
- - `--incremental` - Only process new/changed tables
108
- - `--schemas <schemas>` - Comma-separated schema list
109
- - `--batch` - Non-interactive mode
205
+ ```bash
206
+ db-auto-doc analyze --resume ./db-doc-state.json
207
+ ```
110
208
 
111
- ### `db-auto-doc review`
112
- Review and approve AI-generated documentation
113
- - `--schema <schema>` - Review specific schema
114
- - `--unapproved-only` - Only show unapproved items
209
+ Resume a previous analysis from a checkpoint state file, useful for:
210
+ - Continuing after hitting guardrail limits
211
+ - Recovering from interruptions
212
+ - Incremental database updates
115
213
 
116
- ### `db-auto-doc export`
117
- Generate output files
118
- - `--format <format>` - sql|markdown|all (default: all)
119
- - `--output <path>` - Output directory
120
- - `--execute` - Execute SQL script (apply to database)
121
- - `--approved-only` - Only export approved items
214
+ ## How It Works
122
215
 
123
- ### `db-auto-doc reset`
124
- Reset state file
125
- - `--all` - Reset entire state file
216
+ ### Topological Analysis
126
217
 
127
- ## Configuration
218
+ DBAutoDoc processes tables in dependency order:
128
219
 
129
- Create a `.env` file:
220
+ ```
221
+ Level 0: Users, Products, Categories (no dependencies)
222
+
223
+ Level 1: Orders (depends on Users), ProductCategories (Products + Categories)
224
+
225
+ Level 2: OrderItems (depends on Orders + Products)
226
+
227
+ Level 3: Shipments (depends on OrderItems)
228
+ ```
130
229
 
131
- ```env
132
- # Database Connection
133
- DB_SERVER=localhost
134
- DB_DATABASE=YourDatabase
135
- DB_USER=sa
136
- DB_PASSWORD=YourPassword
137
- DB_ENCRYPT=true
138
- DB_TRUST_SERVER_CERTIFICATE=true
230
+ Processing in this order allows child tables to benefit from parent table context.
139
231
 
140
- # AI Configuration
141
- AI_PROVIDER=openai
142
- AI_MODEL=gpt-4
143
- AI_API_KEY=sk-your-api-key-here
232
+ ### Relationship Discovery
233
+
234
+ For legacy databases missing primary/foreign key constraints, DBAutoDoc can:
235
+ - **Detect Primary Keys** using statistical analysis (uniqueness, nullability, cardinality)
236
+ - **Find Foreign Keys** using value overlap analysis and naming patterns
237
+ - **LLM Validation** to verify discovered relationships make business sense
238
+ - **Backpropagation** to refine parent table analysis based on child relationships
239
+
240
+ Triggered automatically when:
241
+ - Tables lack primary key constraints
242
+ - Insufficient foreign key relationships detected (below threshold)
243
+
244
+ ### Sample Query Generation
245
+
246
+ DBAutoDoc can generate reference SQL queries for AI agents, solving the **query alignment problem** where multi-query patterns (summary + detail) have inconsistent filtering logic:
247
+
248
+ **The Problem:**
249
+ ```sql
250
+ -- Summary query
251
+ SELECT COUNT(*) FROM Registrations -- All registrations
252
+
253
+ -- Detail query
254
+ SELECT * FROM Registrations WHERE Status='Attended' -- Only attended
255
+
256
+ -- Result: Numbers don't match! Bad UX.
144
257
  ```
145
258
 
146
- ## State File
259
+ **The Solution:**
260
+ DBAutoDoc generates "gold standard" reference queries with:
261
+ - **Explicit Filtering Rules** - Documents filter logic for consistency
262
+ - **Alignment Tracking** - Links related queries via `relatedQueryIds`
263
+ - **Query Patterns** - Summary+Detail, Multi-Entity Drilldown, Time Series, etc.
264
+ - **Validation** - Executes queries and validates results
265
+ - **Few-Shot Training** - Use as examples for AI agent prompting
266
+
267
+ **Two-Prompt Architecture:**
268
+ 1. **Planning Phase** - AI designs what queries to create (lightweight, ~4K tokens)
269
+ 2. **Generation Phase** - AI generates SQL for each query individually (~3K tokens each)
270
+
271
+ This approach prevents JSON truncation issues while maintaining alignment context between related queries.
272
+
273
+ **Use Cases:**
274
+ - Training AI agents like Skip to generate consistent multi-query patterns
275
+ - Creating reference examples for few-shot prompting
276
+ - Documenting common query patterns for your database
277
+ - Validating that related queries use consistent filtering logic
147
278
 
148
- The `db-doc-state.json` file tracks everything:
279
+ ### Backpropagation
280
+
281
+ After analyzing child tables, DBAutoDoc can detect insights about parent tables and trigger re-analysis:
282
+
283
+ ```
284
+ Level 0: "Persons" → Initially thinks: "General contact information"
285
+
286
+ Level 1: "Students" table reveals Persons.Type has values: Student, Teacher, Staff
287
+
288
+ BACKPROPAGATE to Level 0: "Persons" → Revise to: "School personnel with role-based typing"
289
+ ```
290
+
291
+ ### Convergence
292
+
293
+ Analysis stops when:
294
+ 1. **No changes** in last N iterations (stability window)
295
+ 2. **All tables** meet confidence threshold
296
+ 3. **Max iterations** reached
297
+ 4. **Guardrail limits** exceeded (tokens, cost, duration)
298
+
299
+ ### Granular Guardrails
300
+
301
+ Multi-level resource controls ensure analysis stays within bounds:
302
+
303
+ **Run-Level Limits**:
304
+ - `maxTokensPerRun`: Total token budget for entire analysis
305
+ - `maxDurationSeconds`: Maximum wall-clock time
306
+ - `maxCostDollars`: Maximum AI cost
307
+
308
+ **Phase-Level Limits**:
309
+ - `maxTokensPerPhase.discovery`: Budget for relationship discovery
310
+ - `maxTokensPerPhase.analysis`: Budget for description generation
311
+ - `maxTokensPerPhase.sanityChecks`: Budget for validation
312
+
313
+ **Iteration-Level Limits**:
314
+ - `maxTokensPerIteration`: Per-iteration token cap
315
+ - `maxIterationDurationSeconds`: Per-iteration time limit
316
+
317
+ **Warning Thresholds**:
318
+ - Configurable percentage-based warnings (default 80-85%)
319
+ - Early notification before hitting hard limits
320
+
321
+ ### Data Analysis
322
+
323
+ For each column, DBAutoDoc collects:
324
+ - **Cardinality**: Distinct value counts
325
+ - **Statistics**: Min, max, average, standard deviation
326
+ - **Patterns**: Common prefixes, format detection
327
+ - **Value Distribution**: Actual enum values if low cardinality
328
+ - **Sample Data**: Stratified sampling across value ranges
329
+
330
+ This rich context enables AI to make accurate inferences.
331
+
332
+ ## Configuration
333
+
334
+ ### SQL Server Configuration
149
335
 
150
336
  ```json
151
337
  {
152
- "version": "1.0",
338
+ "version": "1.0.0",
153
339
  "database": {
154
- "server": "localhost",
155
- "database": "MyDatabase"
340
+ "provider": "sqlserver",
341
+ "host": "localhost",
342
+ "database": "MyDatabase",
343
+ "user": "sa",
344
+ "password": "YourPassword",
345
+ "encrypt": true,
346
+ "trustServerCertificate": false
156
347
  },
157
- "seedContext": {
158
- "overallPurpose": "E-commerce platform",
159
- "businessDomains": ["Sales", "Inventory"]
348
+ "ai": {
349
+ "provider": "openai",
350
+ "model": "gpt-4-turbo-preview",
351
+ "apiKey": "sk-...",
352
+ "temperature": 0.1,
353
+ "maxTokens": 8000,
354
+ "effortLevel": 50
160
355
  },
161
- "schemas": {
162
- "dbo": {
163
- "tables": {
164
- "Customers": {
165
- "userNotes": "Merged from Stripe and internal CRM",
166
- "userApproved": true,
167
- "aiGenerated": {
168
- "description": "Primary customer records...",
169
- "confidence": 0.85
170
- },
171
- "finalDescription": "...",
172
- "columns": { }
173
- }
356
+ "analysis": {
357
+ "cardinalityThreshold": 20,
358
+ "sampleSize": 10,
359
+ "includeStatistics": true,
360
+ "includePatternAnalysis": true,
361
+ "convergence": {
362
+ "maxIterations": 50,
363
+ "stabilityWindow": 2,
364
+ "confidenceThreshold": 0.85
365
+ },
366
+ "backpropagation": {
367
+ "enabled": true,
368
+ "maxDepth": 3
369
+ },
370
+ "sanityChecks": {
371
+ "dependencyLevel": true,
372
+ "schemaLevel": true,
373
+ "crossSchema": true
374
+ },
375
+ "sampleQueryGeneration": {
376
+ "enabled": true,
377
+ "queriesPerTable": 5,
378
+ "maxExecutionTime": 30000,
379
+ "includeMultiQueryPatterns": true,
380
+ "validateAlignment": true,
381
+ "tokenBudget": 100000,
382
+ "maxRowsInSample": 10
383
+ },
384
+ "guardrails": {
385
+ "enabled": true,
386
+ "stopOnExceeded": true,
387
+ "maxTokensPerRun": 250000,
388
+ "maxDurationSeconds": 3600,
389
+ "maxCostDollars": 50,
390
+ "maxTokensPerPhase": {
391
+ "discovery": 100000,
392
+ "analysis": 150000,
393
+ "sanityChecks": 50000
394
+ },
395
+ "maxTokensPerIteration": 50000,
396
+ "maxIterationDurationSeconds": 600,
397
+ "warnThresholds": {
398
+ "tokenPercentage": 80,
399
+ "durationPercentage": 80,
400
+ "costPercentage": 80,
401
+ "iterationTokenPercentage": 85,
402
+ "phaseTokenPercentage": 85
403
+ }
404
+ },
405
+ "relationshipDiscovery": {
406
+ "enabled": true,
407
+ "triggers": {
408
+ "runOnMissingPKs": true,
409
+ "runOnInsufficientFKs": true,
410
+ "fkDeficitThreshold": 0.4
411
+ },
412
+ "tokenBudget": {
413
+ "ratioOfTotal": 0.4
414
+ },
415
+ "confidence": {
416
+ "primaryKeyMinimum": 0.7,
417
+ "foreignKeyMinimum": 0.6,
418
+ "llmValidationThreshold": 0.8
419
+ },
420
+ "sampling": {
421
+ "maxRowsPerTable": 1000,
422
+ "valueOverlapSampleSize": 100,
423
+ "statisticalSignificance": 100,
424
+ "compositeKeyMaxColumns": 3
425
+ },
426
+ "patterns": {
427
+ "primaryKeyNames": ["^id$", ".*_id$", "^pk_.*", ".*_key$"],
428
+ "foreignKeyNames": [".*_id$", ".*_fk$", "^fk_.*"]
429
+ },
430
+ "llmValidation": {
431
+ "enabled": true,
432
+ "batchSize": 10
433
+ },
434
+ "backpropagation": {
435
+ "enabled": true,
436
+ "maxIterations": 5
174
437
  }
175
438
  }
176
439
  },
177
- "runHistory": [ ]
440
+ "output": {
441
+ "stateFile": "./db-doc-state.json",
442
+ "outputDir": "./output",
443
+ "sqlFile": "./output/add-descriptions.sql",
444
+ "markdownFile": "./output/database-documentation.md"
445
+ },
446
+ "schemas": {
447
+ "exclude": ["sys", "INFORMATION_SCHEMA"]
448
+ },
449
+ "tables": {
450
+ "exclude": ["sysdiagrams", "__MigrationHistory"]
451
+ }
178
452
  }
179
453
  ```
180
454
 
181
- ## Example Workflow
455
+ ### PostgreSQL Configuration
182
456
 
183
- ```bash
184
- # First run
185
- db-auto-doc init
186
- db-auto-doc analyze --interactive
187
- db-auto-doc review
188
- db-auto-doc export --format=all
457
+ ```json
458
+ {
459
+ "version": "1.0.0",
460
+ "database": {
461
+ "provider": "postgresql",
462
+ "host": "localhost",
463
+ "port": 5432,
464
+ "database": "mydatabase",
465
+ "user": "postgres",
466
+ "password": "YourPassword",
467
+ "ssl": false
468
+ },
469
+ "ai": {
470
+ "provider": "openai",
471
+ "model": "gpt-4-turbo-preview",
472
+ "apiKey": "sk-...",
473
+ "temperature": 0.1,
474
+ "maxTokens": 8000
475
+ },
476
+ "analysis": {
477
+ "cardinalityThreshold": 20,
478
+ "sampleSize": 10,
479
+ "includeStatistics": true,
480
+ "guardrails": {
481
+ "enabled": true,
482
+ "maxTokensPerRun": 250000
483
+ }
484
+ },
485
+ "output": {
486
+ "stateFile": "./db-doc-state.json",
487
+ "outputDir": "./output",
488
+ "sqlFile": "./output/add-descriptions.sql",
489
+ "markdownFile": "./output/database-documentation.md"
490
+ },
491
+ "schemas": {
492
+ "exclude": ["pg_catalog", "information_schema"]
493
+ }
494
+ }
495
+ ```
496
+
497
+ ### MySQL Configuration
498
+
499
+ ```json
500
+ {
501
+ "version": "1.0.0",
502
+ "database": {
503
+ "provider": "mysql",
504
+ "host": "localhost",
505
+ "port": 3306,
506
+ "database": "mydatabase",
507
+ "user": "root",
508
+ "password": "YourPassword"
509
+ },
510
+ "ai": {
511
+ "provider": "openai",
512
+ "model": "gpt-4-turbo-preview",
513
+ "apiKey": "sk-...",
514
+ "temperature": 0.1,
515
+ "maxTokens": 8000
516
+ },
517
+ "analysis": {
518
+ "cardinalityThreshold": 20,
519
+ "sampleSize": 10,
520
+ "includeStatistics": true,
521
+ "guardrails": {
522
+ "enabled": true,
523
+ "maxTokensPerRun": 250000
524
+ }
525
+ },
526
+ "output": {
527
+ "stateFile": "./db-doc-state.json",
528
+ "outputDir": "./output",
529
+ "sqlFile": "./output/add-descriptions.sql",
530
+ "markdownFile": "./output/database-documentation.md"
531
+ },
532
+ "schemas": {
533
+ "exclude": ["mysql", "information_schema", "performance_schema", "sys"]
534
+ }
535
+ }
536
+ ```
189
537
 
190
- # Add context and refine
191
- # Edit db-doc-state.json to add notes
192
- db-auto-doc analyze --incremental
193
- db-auto-doc review --unapproved-only
538
+ ## Supported AI Providers
194
539
 
195
- # Ready for production
196
- db-auto-doc export --approved-only --execute
540
+ DBAutoDoc integrates with MemberJunction's AI provider system, supporting:
541
+
542
+ ### OpenAI
543
+ ```json
544
+ {
545
+ "provider": "OpenAILLM",
546
+ "model": "gpt-4-turbo-preview",
547
+ "apiKey": "sk-..."
548
+ }
197
549
  ```
198
550
 
199
- ## How It Works
551
+ ### Anthropic
552
+ ```json
553
+ {
554
+ "provider": "AnthropicLLM",
555
+ "model": "claude-3-5-sonnet-20241022",
556
+ "apiKey": "sk-ant-..."
557
+ }
558
+ ```
559
+
560
+ ### Google
561
+ ```json
562
+ {
563
+ "provider": "GoogleLLM",
564
+ "model": "gemini-1.5-pro",
565
+ "apiKey": "..."
566
+ }
567
+ ```
568
+
569
+ ### Groq
570
+ ```json
571
+ {
572
+ "provider": "GroqLLM",
573
+ "model": "llama-3.3-70b-versatile",
574
+ "apiKey": "gsk_..."
575
+ }
576
+ ```
577
+
578
+ ### Other Providers
579
+ Any BaseLLM-compatible provider registered with MemberJunction can be used.
200
580
 
201
- 1. **Introspection**: Queries SQL Server system catalogs
202
- 2. **Profiling**: Samples data and analyzes patterns
203
- 3. **AI Generation**: Sends context to LLM for descriptions
204
- 4. **Human Review**: User approves/refines results
205
- 5. **Output**: Generates SQL scripts and markdown docs
206
- 6. **Application**: Optionally executes SQL to add extended properties
581
+ ## State File
582
+
583
+ The `db-doc-state.json` file tracks:
584
+ - All schemas, tables, and columns
585
+ - **Description iterations** with reasoning and confidence
586
+ - **Analysis runs** with metrics (tokens, cost, duration)
587
+ - **Processing logs** for debugging
588
+ - **Relationship discovery results** (primary keys, foreign keys)
589
+ - **Guardrail metrics** (phase and iteration budgets)
590
+
591
+ ### Iteration Tracking
592
+
593
+ Each description has an iteration history:
594
+
595
+ ```json
596
+ {
597
+ "descriptionIterations": [
598
+ {
599
+ "description": "Initial hypothesis...",
600
+ "reasoning": "Based on column names...",
601
+ "generatedAt": "2024-01-15T10:00:00Z",
602
+ "modelUsed": "gpt-4",
603
+ "confidence": 0.75,
604
+ "triggeredBy": "initial"
605
+ },
606
+ {
607
+ "description": "Revised hypothesis...",
608
+ "reasoning": "Child table analysis revealed...",
609
+ "generatedAt": "2024-01-15T10:05:00Z",
610
+ "modelUsed": "gpt-4",
611
+ "confidence": 0.92,
612
+ "triggeredBy": "backpropagation",
613
+ "changedFrom": "Initial hypothesis..."
614
+ }
615
+ ]
616
+ }
617
+ ```
618
+
619
+ ## Programmatic Usage
620
+
621
+ DBAutoDoc can be used as a library with a comprehensive programmatic API:
622
+
623
+ ### Simple API (Recommended)
624
+
625
+ ```typescript
626
+ import { DBAutoDocAPI } from '@memberjunction/db-auto-doc';
627
+
628
+ const api = new DBAutoDocAPI();
629
+
630
+ // Analyze database
631
+ const result = await api.analyze({
632
+ database: {
633
+ provider: 'sqlserver',
634
+ host: 'localhost',
635
+ database: 'MyDB',
636
+ user: 'sa',
637
+ password: 'password'
638
+ },
639
+ ai: {
640
+ provider: 'OpenAILLM',
641
+ model: 'gpt-4-turbo-preview',
642
+ apiKey: 'sk-...'
643
+ },
644
+ analysis: {
645
+ convergence: { maxIterations: 10 },
646
+ guardrails: { maxTokensPerRun: 100000 }
647
+ },
648
+ output: {
649
+ outputDir: './output'
650
+ },
651
+ onProgress: (message, data) => {
652
+ console.log(message, data);
653
+ }
654
+ });
655
+
656
+ // Resume from state file
657
+ const resumed = await api.resume('./db-doc-state.json', {
658
+ analysis: {
659
+ convergence: { maxIterations: 20 }
660
+ }
661
+ });
662
+
663
+ // Export documentation
664
+ const exported = await api.export('./db-doc-state.json', {
665
+ formats: ['sql', 'markdown', 'html', 'csv', 'mermaid'],
666
+ outputDir: './docs',
667
+ applyToDatabase: true
668
+ });
669
+
670
+ // Get analysis status
671
+ const status = await api.getStatus('./db-doc-state.json');
672
+ console.log('Progress:', status.progress);
673
+ console.log('Tokens used:', status.metrics.totalTokens);
674
+ console.log('Estimated cost:', status.metrics.estimatedCost);
675
+ ```
676
+
677
+ ### Advanced API (Full Control)
678
+
679
+ ```typescript
680
+ import {
681
+ ConfigLoader,
682
+ DatabaseConnection,
683
+ Introspector,
684
+ TopologicalSorter,
685
+ StateManager,
686
+ PromptEngine,
687
+ AnalysisEngine,
688
+ GuardrailsManager,
689
+ SQLGenerator,
690
+ MarkdownGenerator,
691
+ HTMLGenerator,
692
+ CSVGenerator,
693
+ MermaidGenerator
694
+ } from '@memberjunction/db-auto-doc';
695
+
696
+ // Load config
697
+ const config = await ConfigLoader.load('./config.json');
698
+
699
+ // Connect to database
700
+ const db = new DatabaseConnection(config.database);
701
+ await db.connect();
702
+
703
+ // Introspect
704
+ const driver = db.getDriver();
705
+ const introspector = new Introspector(driver);
706
+ const schemas = await introspector.getSchemas(config.schemas, config.tables);
707
+
708
+ // Initialize analysis components
709
+ const promptEngine = new PromptEngine(config.ai, './prompts');
710
+ await promptEngine.initialize();
711
+
712
+ const stateManager = new StateManager(config.output.stateFile);
713
+ const state = stateManager.createInitialState(config.database.database, config.database.server);
714
+ state.schemas = schemas;
715
+
716
+ const guardrails = new GuardrailsManager(config.analysis.guardrails);
717
+ const iterationTracker = new IterationTracker();
718
+
719
+ // Run analysis
720
+ const analysisEngine = new AnalysisEngine(config, promptEngine, stateManager, iterationTracker);
721
+ // ... custom analysis workflow
722
+
723
+ // Generate outputs
724
+ const sqlGen = new SQLGenerator();
725
+ const sql = sqlGen.generate(state, { approvedOnly: false });
726
+
727
+ const mdGen = new MarkdownGenerator();
728
+ const markdown = mdGen.generate(state);
729
+
730
+ const htmlGen = new HTMLGenerator();
731
+ const html = htmlGen.generate(state, { confidenceThreshold: 0.7 });
732
+
733
+ const csvGen = new CSVGenerator();
734
+ const { tables, columns } = csvGen.generate(state);
735
+
736
+ const mermaidGen = new MermaidGenerator();
737
+ const erdDiagram = mermaidGen.generate(state);
738
+ const erdHtml = mermaidGen.generateHtml(state);
739
+ ```
740
+
741
+ ## Cost Estimation
742
+
743
+ Typical costs (will vary by database size and complexity):
744
+
745
+ | Database Size | Tables | Iterations | Tokens | Cost (GPT-4) | Cost (Groq) |
746
+ |---------------|--------|------------|--------|--------------|-------------|
747
+ | Small | 10-20 | 2-3 | ~50K | $0.50 | $0.02 |
748
+ | Medium | 50-100 | 3-5 | ~200K | $2.00 | $0.08 |
749
+ | Large | 200+ | 5-8 | ~500K | $5.00 | $0.20 |
750
+ | Enterprise | 500+ | 8-15 | ~1.5M | $15.00 | $0.60 |
751
+
752
+ **With Relationship Discovery**: Add 25-40% to token/cost estimates for databases with missing constraints.
753
+
754
+ **With Sample Query Generation** (5 queries/table, GPT-4o):
755
+
756
+ | Database Size | Tables | Additional Tokens | Additional Cost |
757
+ |---------------|--------|-------------------|-----------------|
758
+ | Small | 10-20 | ~100K | $0.50-1.00 |
759
+ | Medium | 50-100 | ~500K | $2.50-5.00 |
760
+ | Large | 200+ | ~2M | $10-20 |
761
+
762
+ Note: Sample query generation uses ~6× more API calls than description generation (planning + individual SQL generation for each query), adding ~50% to total token usage.
763
+
764
+ **Guardrails** help control costs by setting hard limits on token usage and runtime.
765
+
766
+ ## Best Practices
767
+
768
+ 1. **Start with guardrails** - Set reasonable token/cost limits to avoid surprises
769
+ 2. **Add seed context** - Helps AI understand database purpose and domain
770
+ 3. **Review low-confidence items** - Focus manual effort where AI is uncertain
771
+ 4. **Use backpropagation** - Improves accuracy significantly
772
+ 5. **Enable relationship discovery** - For legacy databases missing constraints
773
+ 6. **Filter exports** - Use `--confidence-threshold` to only apply high-confidence descriptions
774
+ 7. **Iterate** - Run analysis multiple times if first pass isn't satisfactory
775
+ 8. **Resume from checkpoints** - Save costs by continuing previous runs
776
+ 9. **Use appropriate models** - Balance cost vs. quality (GPT-4 vs. Groq)
777
+ 10. **Export multiple formats** - HTML for browsing, CSV for analysis, SQL for database
778
+
779
+ ### Sample Query Generation Best Practices
780
+
781
+ 1. **Use GPT-4o or Claude 3.5** - Best balance of quality, speed, and cost
782
+ 2. **Set token budget** - Prevents runaway costs (default: 100K tokens)
783
+ 3. **Start with 5 queries/table** - Good balance of coverage and cost
784
+ 4. **Enable alignment validation** - Ensures related queries use consistent logic
785
+ 5. **Review generated queries** - Verify SQL correctness before using for training
786
+ 6. **Use for few-shot prompting** - Include in AI agent system prompts as examples
787
+ 7. **Generate separately** - Use `generate-queries` command on existing state to avoid re-running full analysis
788
+ 8. **Focus on complex tables** - Skip simple lookup tables to save costs
789
+ 9. **Validate execution** - Enable `maxExecutionTime` to test queries run successfully
790
+ 10. **Document patterns** - Use generated queries to document common query patterns for your domain
791
+
792
+ ## Troubleshooting
793
+
794
+ ### "Connection failed"
795
+ - Check server, database, user, password in config
796
+ - Verify database server is running and accessible
797
+ - Check firewall rules and network connectivity
798
+ - For PostgreSQL: verify SSL settings
799
+ - For MySQL: check port and authentication method
800
+
801
+ ### "Analysis not converging"
802
+ - Increase `maxIterations` in config
803
+ - Lower `confidenceThreshold`
804
+ - Add more seed context
805
+ - Check warnings in state file for specific issues
806
+ - Review guardrail limits (may be hitting token budget)
807
+
808
+ ### "High token usage"
809
+ - Enable guardrails with appropriate limits
810
+ - Reduce `maxTokens` per prompt
811
+ - Filter schemas/tables to focus on subset
812
+ - Use cheaper model (Groq instead of GPT-4)
813
+ - Disable relationship discovery if not needed
814
+
815
+ ### "Guardrail limits exceeded"
816
+ - Review metrics in state file
817
+ - Adjust limits upward if budget allows
818
+ - Use `--resume` to continue from checkpoint
819
+ - Focus on specific schemas/tables
820
+ - Reduce iteration count
821
+
822
+ ### "Relationship discovery not finding keys"
823
+ - Check confidence thresholds (may be too high)
824
+ - Review statistical significance settings
825
+ - Enable LLM validation for better accuracy
826
+ - Check naming patterns configuration
827
+ - Verify sample size is adequate
828
+
829
+ ## Documentation
830
+
831
+ Comprehensive documentation is available in the `docs/` folder:
832
+
833
+ - **[USER_GUIDE.md](./docs/USER_GUIDE.md)** - Complete user documentation
834
+ - **[ARCHITECTURE.md](./docs/ARCHITECTURE.md)** - Technical architecture and design
835
+ - **[API_USAGE.md](./docs/API_USAGE.md)** - Programmatic API examples
836
+ - **[GUARDRAILS.md](./docs/GUARDRAILS.md)** - Guardrails system documentation
837
+ - **[CHANGES.md](./docs/CHANGES.md)** - Recent changes and enhancements
207
838
 
208
839
  ## Architecture
209
840
 
210
- Built following the **AI CLI pattern** (similar to `@memberjunction/ai-cli`):
211
- - **oclif-based commands** in `src/commands/` (init, analyze, review, export, reset)
212
- - **Standalone package** with zero MJ runtime dependencies
213
- - **MJCLI integration** via thin delegation commands in `packages/MJCLI/src/commands/dbdoc/`
214
- - **Reusable services** exported for programmatic use
215
- - **State file architecture** enables incremental refinement across runs
841
+ DBAutoDoc uses a sophisticated multi-phase architecture:
216
842
 
217
- ## Future Enhancements
843
+ 1. **Discovery Phase** - Introspection and optional relationship discovery
844
+ 2. **Analysis Phase** - Iterative LLM-based description generation
845
+ 3. **Sanity Check Phase** - Validation and quality assurance
846
+ 4. **Export Phase** - Multi-format documentation generation
218
847
 
219
- Possible future additions (not needed for current functionality):
220
- - Support for more AI providers (Groq, Cerebras, local models)
221
- - PostgreSQL/MySQL versions
222
- - Web UI for review
223
- - Dependency graph visualization
224
- - CI/CD integration examples
225
- - Schema diff detection for automatic re-documentation
848
+ See [ARCHITECTURE.md](./docs/ARCHITECTURE.md) for comprehensive architecture documentation, including:
849
+ - Phase flow diagrams
850
+ - Extension points for customization
851
+ - Database driver development guide
852
+ - LLM intelligence strategy
226
853
 
227
- ## Requirements
854
+ ## Contributing
228
855
 
229
- - Node.js 18+
230
- - SQL Server database access
231
- - OpenAI or Anthropic API key
856
+ DBAutoDoc is part of the MemberJunction project. Contributions welcome!
232
857
 
233
858
  ## License
234
859
 
235
- MIT License - see LICENSE file for details
860
+ MIT
861
+
862
+ ## Demo Databases
863
+
864
+ ### LousyDB - Legacy Database Demo
865
+
866
+ Located in `/Demos/LousyDB/`, this demo showcases **Relationship Discovery** capabilities on a realistic legacy database:
867
+
868
+ - ❌ **Zero metadata** - No PK or FK constraints defined
869
+ - 🔤 **Cryptic naming** - Short abbreviations (`cst`, `ord`, `pmt`)
870
+ - 🔡 **Single-char codes** - Undocumented status values (`A`, `T`, `P`)
871
+ - 💔 **Data quality issues** - Orphaned records, NULLs, duplicates
872
+ - 📊 **20 tables** across 2 schemas with 1000+ rows
236
873
 
237
- ## Support
874
+ Perfect for testing DBAutoDoc's ability to **reverse-engineer** poorly-documented databases.
238
875
 
239
- - GitHub Issues: https://github.com/MemberJunction/MJ/issues
240
- - Documentation: https://docs.memberjunction.org
876
+ See `/Demos/LousyDB/README.md` for details and testing instructions.
241
877
 
242
- ## Credits
878
+ ## Links
243
879
 
244
- Built by the MemberJunction team for the SQL Server community.
880
+ - **GitHub**: https://github.com/MemberJunction/MJ
881
+ - **Documentation**: https://docs.memberjunction.org
882
+ - **Support**: https://github.com/MemberJunction/MJ/issues