@n8n/ai-workflow-builder 0.31.1 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/ai-workflow-builder-agent.service.d.ts +6 -2
  2. package/dist/ai-workflow-builder-agent.service.js +45 -3
  3. package/dist/ai-workflow-builder-agent.service.js.map +1 -1
  4. package/dist/build.tsbuildinfo +1 -1
  5. package/dist/tools/best-practices/data-analysis.d.ts +7 -0
  6. package/dist/tools/best-practices/data-analysis.js +367 -0
  7. package/dist/tools/best-practices/data-analysis.js.map +1 -0
  8. package/dist/tools/best-practices/data-extraction.js +7 -0
  9. package/dist/tools/best-practices/data-extraction.js.map +1 -1
  10. package/dist/tools/best-practices/data-transformation.d.ts +7 -0
  11. package/dist/tools/best-practices/data-transformation.js +181 -0
  12. package/dist/tools/best-practices/data-transformation.js.map +1 -0
  13. package/dist/tools/best-practices/document-processing.d.ts +7 -0
  14. package/dist/tools/best-practices/document-processing.js +324 -0
  15. package/dist/tools/best-practices/document-processing.js.map +1 -0
  16. package/dist/tools/best-practices/enrichment.d.ts +7 -0
  17. package/dist/tools/best-practices/enrichment.js +271 -0
  18. package/dist/tools/best-practices/enrichment.js.map +1 -0
  19. package/dist/tools/best-practices/human-in-the-loop.d.ts +7 -0
  20. package/dist/tools/best-practices/human-in-the-loop.js +268 -0
  21. package/dist/tools/best-practices/human-in-the-loop.js.map +1 -0
  22. package/dist/tools/best-practices/index.js +7 -6
  23. package/dist/tools/best-practices/index.js.map +1 -1
  24. package/dist/tools/best-practices/knowledge-base.d.ts +7 -0
  25. package/dist/tools/best-practices/knowledge-base.js +268 -0
  26. package/dist/tools/best-practices/knowledge-base.js.map +1 -0
  27. package/dist/tools/best-practices/monitoring.d.ts +7 -0
  28. package/dist/tools/best-practices/monitoring.js +178 -0
  29. package/dist/tools/best-practices/monitoring.js.map +1 -0
  30. package/dist/tools/best-practices/notification.d.ts +7 -0
  31. package/dist/tools/best-practices/notification.js +229 -0
  32. package/dist/tools/best-practices/notification.js.map +1 -0
  33. package/dist/tools/best-practices/scheduling.d.ts +7 -0
  34. package/dist/tools/best-practices/scheduling.js +281 -0
  35. package/dist/tools/best-practices/scheduling.js.map +1 -0
  36. package/dist/tools/best-practices/triage.d.ts +7 -0
  37. package/dist/tools/best-practices/triage.js +211 -0
  38. package/dist/tools/best-practices/triage.js.map +1 -0
  39. package/dist/tools/categorize-prompt.tool.js +1 -0
  40. package/dist/tools/categorize-prompt.tool.js.map +1 -1
  41. package/dist/tools/helpers/response.js +2 -0
  42. package/dist/tools/helpers/response.js.map +1 -1
  43. package/dist/tools/prompts/main-agent.prompt.js +9 -1
  44. package/dist/tools/prompts/main-agent.prompt.js.map +1 -1
  45. package/dist/tools/validate-workflow.tool.js +12 -0
  46. package/dist/tools/validate-workflow.tool.js.map +1 -1
  47. package/dist/utils/tool-executor.js +19 -0
  48. package/dist/utils/tool-executor.js.map +1 -1
  49. package/dist/validation/checks/agent-prompt.js +2 -0
  50. package/dist/validation/checks/agent-prompt.js.map +1 -1
  51. package/dist/validation/checks/connections.js +8 -0
  52. package/dist/validation/checks/connections.js.map +1 -1
  53. package/dist/validation/checks/from-ai.js +1 -0
  54. package/dist/validation/checks/from-ai.js.map +1 -1
  55. package/dist/validation/checks/tools.js +2 -0
  56. package/dist/validation/checks/tools.js.map +1 -1
  57. package/dist/validation/checks/trigger.js +2 -0
  58. package/dist/validation/checks/trigger.js.map +1 -1
  59. package/dist/validation/types.d.ts +4 -0
  60. package/dist/validation/types.js +18 -0
  61. package/dist/validation/types.js.map +1 -1
  62. package/dist/workflow-builder-agent.d.ts +5 -2
  63. package/dist/workflow-builder-agent.js +4 -3
  64. package/dist/workflow-builder-agent.js.map +1 -1
  65. package/dist/workflow-state.d.ts +3 -1
  66. package/dist/workflow-state.js +8 -0
  67. package/dist/workflow-state.js.map +1 -1
  68. package/package.json +11 -7
@@ -0,0 +1,268 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.HumanInTheLoopBestPractices = void 0;
4
+ const categorization_1 = require("../../types/categorization");
5
+ class HumanInTheLoopBestPractices {
6
+ technique = categorization_1.WorkflowTechnique.HUMAN_IN_THE_LOOP;
7
+ version = '1.0.0';
8
+ documentation = `# Best Practices: Human-in-the-Loop Workflows
9
+
10
+ ## Workflow Design
11
+
12
+ Structure workflows in three stages: Automation → Human Decision → Resume Processing. The Wait node is the core component for implementing pauses.
13
+
14
+ Break the workflow into clear stages:
15
+ 1. **Initial Automation**: Execute automated steps leading to the decision point (data extraction, AI content generation, etc.)
16
+ 2. **Human Notification**: Send notification with resume URL via preferred channel (Email, Slack, Telegram)
17
+ 3. **Wait for Response**: Configure Wait node with appropriate resume condition
18
+ 4. **Process Decision**: Use IF/Switch nodes to branch based on human input
19
+
20
+ Example pattern:
21
+ - Trigger → Generate Content → Email (with resume URLs) → Wait Node → IF (decision) → Publish/Reject
22
+ - HTTP Request (fetch data) → Filter → Email Manager → Wait (On Webhook) → Switch (approval status) → Database Update
23
+
24
+ CRITICAL: Always include the $execution.resumeUrl in notification messages. This unique URL resumes the specific workflow execution when accessed.
25
+
26
+ ## Wait Node Configuration
27
+
28
+ The Wait node supports four resume conditions:
29
+ - **After Time Interval**: Resume after fixed delay (not for human decisions)
30
+ - **At Specified Time**: Resume at specific date/time (not for human decisions)
31
+ - **On Webhook Call**: Resume when URL is accessed (ideal for link-based approvals)
32
+ - **On Form Submitted**: Resume when user submits n8n-hosted form (best for structured input)
33
+
34
+ For human-in-the-loop, use "On Webhook Call" or "On Form Submitted" modes.
35
+
36
+ ### Webhook Configuration
37
+ - Set HTTP method (GET for simple links, POST for data)
38
+ - Configure authentication if needed (None for private URLs, Basic/Token for security)
39
+ - Enable "Ignore Bots" to prevent email scanners/chat bots from triggering
40
+ - Use Webhook Suffix for multiple wait points in same workflow
41
+
42
+ ### Form Configuration
43
+ - Design form fields directly in Wait node
44
+ - Specify field types, labels, validation
45
+ - Form automatically hosts at resume URL
46
+ - Submitted data merges with workflow context
47
+
48
+ ## Timeout Management
49
+
50
+ Always configure "Limit Wait Time" to prevent infinite waits:
51
+ - Set maximum wait duration (e.g., 48 hours)
52
+ - Or specify absolute deadline date
53
+ - Handle timeout case in workflow logic
54
+ - Check if resumed by timeout (no webhook/form data present)
55
+
56
+ ## Communication Patterns
57
+
58
+ ### Direct Link Method
59
+ Include $execution.resumeUrl directly in messages:
60
+
61
+ Email: Click to [Approve]({{$execution.resumeUrl}}?decision=approve) or [Reject]({{$execution.resumeUrl}}?decision=reject)
62
+ Slack: Please review and click: {{$execution.resumeUrl}}
63
+
64
+ ### Platform Response Method
65
+ For responses within platform (Slack reply, email response):
66
+ - Use separate trigger workflow to catch responses
67
+ - Correlate to correct execution via ID
68
+ - Call resume URL programmatically
69
+ - More complex but keeps interaction native
70
+
71
+ ## Data Handling
72
+
73
+ The Wait node preserves all prior workflow data when resuming:
74
+ - Original context remains available
75
+ - New input data (form/webhook) adds to context
76
+ - Access webhook data via $json after Wait node
77
+ - Query parameters available in $json.query
78
+ - Form fields merge directly into JSON output
79
+
80
+ ## Recommended Nodes
81
+
82
+ ### Wait (n8n-nodes-base.wait)
83
+
84
+ **Purpose**: Core node for pausing workflow execution until human input
85
+
86
+ **Key Settings**:
87
+ - Resume: "On Webhook Call" or "On Form Submitted"
88
+ - Authentication: Configure based on security needs
89
+ - Ignore Bots: Enable to prevent accidental triggers
90
+ - Limit Wait Time: Set timeout to prevent infinite waits
91
+
92
+ **Use Cases**:
93
+ - Approval workflows
94
+ - Data collection from humans
95
+ - Multi-step verification processes
96
+
97
+ **Best Practices**:
98
+ - Always include timeout handling
99
+ - Use unique webhook suffixes for multiple waits
100
+ - Test resume URLs during development
101
+
102
+ ### Email (n8n-nodes-base.emailSend)
103
+
104
+ **Purpose**: Send notifications with resume links to users
105
+
106
+ **Use Cases**:
107
+ - Approval request emails
108
+ - Data verification requests
109
+ - Task assignment notifications
110
+
111
+ **Best Practices**:
112
+ - Include clear call-to-action buttons
113
+ - Embed resume URL as hyperlinks
114
+ - Provide context about the decision needed
115
+
116
+ ### Slack (n8n-nodes-base.slack)
117
+
118
+ **Purpose**: Send notifications via Slack with resume links
119
+
120
+ **Best Practices**:
121
+ - Wrap URLs in <> to prevent unfurling
122
+ - Use clear message formatting
123
+ - Consider using blocks for rich formatting
124
+
125
+ ### Telegram (n8n-nodes-base.telegram)
126
+
127
+ **Purpose**: Send notifications via Telegram
128
+
129
+ **Best Practices**:
130
+ - Enable "Ignore Bots" in Wait node
131
+ - Use inline keyboards for better UX
132
+ - Keep messages concise
133
+
134
+ ### IF (n8n-nodes-base.if)
135
+
136
+ **Purpose**: Branch workflow based on human decision
137
+
138
+ **Use Cases**:
139
+ - Route approved vs rejected items
140
+ - Check if response was received (vs timeout)
141
+ - Validate input data
142
+
143
+ **Best Practices**:
144
+ - Handle all possible decision values
145
+ - Include default/timeout branch
146
+ - Use clear condition names
147
+
148
+ ### Switch (n8n-nodes-base.switch)
149
+
150
+ **Purpose**: Multi-way branching for complex decisions
151
+
152
+ **Use Cases**:
153
+ - Multiple approval levels
154
+ - Various response options
155
+ - Status-based routing
156
+
157
+ ### HTTP Request (n8n-nodes-base.httpRequest)
158
+
159
+ **Purpose**: Call external APIs after human decision
160
+
161
+ **Use Cases**:
162
+ - Update external systems
163
+ - Trigger downstream processes
164
+ - Log decisions to external services
165
+
166
+ ### Database Nodes
167
+
168
+ **MySQL** (n8n-nodes-base.mySql), **Postgres** (n8n-nodes-base.postgres), **MongoDB** (n8n-nodes-base.mongoDb):
169
+ - Store approval history
170
+ - Update record status after decision
171
+ - Log human inputs for audit
172
+
173
+ ### Google Sheets (n8n-nodes-base.googleSheets)
174
+
175
+ **Purpose**: Track decisions and maintain approval logs
176
+
177
+ **Use Cases**:
178
+ - Approval tracking spreadsheets
179
+ - Decision audit logs
180
+ - User response collection
181
+
182
+ ## Common Pitfalls to Avoid
183
+
184
+ ### Accidental Resume Triggers
185
+
186
+ **Problem**: Email clients or chat apps preview links and trigger resume unintentionally.
187
+
188
+ **Solution**:
189
+ - Enable "Ignore Bots" option in Wait node
190
+ - Wrap Slack URLs in <> to prevent unfurling
191
+ - Use authentication for sensitive workflows
192
+ - Educate users to click only when ready
193
+
194
+ ### Missing Timeouts
195
+
196
+ **Problem**: Workflow waits forever if human never responds.
197
+
198
+ **Solution**:
199
+ - Always set "Limit Wait Time" (e.g., 3 days)
200
+ - Handle timeout scenario in workflow logic
201
+ - Send reminder before deadline
202
+ - Escalate to another person if timeout occurs
203
+
204
+ ### Lost Context After Wait
205
+
206
+ **Problem**: Assuming data is lost after Wait node resumes.
207
+
208
+ **Solution**:
209
+ - Wait node preserves all prior data automatically
210
+ - New input merges with existing context
211
+ - Test data structure after resume
212
+ - Use expressions to access both old and new data
213
+
214
+ ### Security Issues with Resume URLs
215
+
216
+ **Problem**: Resume URLs exposed to unauthorized users.
217
+
218
+ **Solution**:
219
+ - Treat resume URLs as secrets
220
+ - Use authentication options for sensitive workflows
221
+ - Send only through private channels
222
+ - Consider IP whitelisting if needed
223
+
224
+ ### Multiple Wait Node Confusion
225
+
226
+ **Problem**: Using same webhook URL for different wait points.
227
+
228
+ **Solution**:
229
+ - Use unique Webhook Suffix for each Wait node
230
+ - Generate fresh $execution.resumeUrl for each wait
231
+ - Label wait points clearly
232
+ - Document which URL corresponds to which decision
233
+
234
+ ### Not Handling All Response Types
235
+
236
+ **Problem**: Only handling expected responses, not edge cases.
237
+
238
+ **Solution**:
239
+ - Handle timeout case explicitly
240
+ - Provide default action for unexpected inputs
241
+ - Validate form data before processing
242
+ - Log all decisions for debugging
243
+
244
+ ### Workflow State Persistence
245
+
246
+ **Problem**: Worrying about resource consumption during wait.
247
+
248
+ **Solution**:
249
+ - Waiting executions are saved to database, not running
250
+ - No worker threads consumed during wait
251
+ - Can have hundreds of paused workflows
252
+ - Survives n8n restarts (state in database)
253
+
254
+ ### Complex Parallel Approvals
255
+
256
+ **Problem**: Need multiple people to approve before continuing.
257
+
258
+ **Solution**:
259
+ - Use separate Wait node per approver
260
+ - Or create sub-workflow per approver
261
+ - Use Merge node to synchronize branches
262
+ - Consider approval tracking in database`;
263
+ getDocumentation() {
264
+ return this.documentation;
265
+ }
266
+ }
267
+ exports.HumanInTheLoopBestPractices = HumanInTheLoopBestPractices;
268
+ //# sourceMappingURL=human-in-the-loop.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"human-in-the-loop.js","sourceRoot":"","sources":["../../../src/tools/best-practices/human-in-the-loop.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,2BAA2B;IAC9B,SAAS,GAAG,kCAAiB,CAAC,iBAAiB,CAAC;IAChD,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yCA8PO,CAAC;IAEzC,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AAvQD,kEAuQC"}
@@ -5,23 +5,24 @@ const categorization_1 = require("../../types/categorization");
5
5
  const chatbot_1 = require("./chatbot");
6
6
  const content_generation_1 = require("./content-generation");
7
7
  const data_extraction_1 = require("./data-extraction");
8
+ const document_processing_1 = require("./document-processing");
8
9
  const form_input_1 = require("./form-input");
9
10
  const scraping_and_research_1 = require("./scraping-and-research");
10
11
  exports.documentation = {
11
12
  [categorization_1.WorkflowTechnique.SCRAPING_AND_RESEARCH]: new scraping_and_research_1.ScrapingAndResearchBestPractices(),
12
13
  [categorization_1.WorkflowTechnique.CHATBOT]: new chatbot_1.ChatbotBestPractices(),
13
14
  [categorization_1.WorkflowTechnique.CONTENT_GENERATION]: new content_generation_1.ContentGenerationBestPractices(),
14
- [categorization_1.WorkflowTechnique.DATA_EXTRACTION]: new data_extraction_1.DataExtractionBestPractices(),
15
- [categorization_1.WorkflowTechnique.FORM_INPUT]: new form_input_1.FormInputBestPractices(),
16
15
  [categorization_1.WorkflowTechnique.DATA_ANALYSIS]: undefined,
16
+ [categorization_1.WorkflowTechnique.DATA_EXTRACTION]: new data_extraction_1.DataExtractionBestPractices(),
17
17
  [categorization_1.WorkflowTechnique.DATA_TRANSFORMATION]: undefined,
18
- [categorization_1.WorkflowTechnique.DOCUMENT_PROCESSING]: undefined,
18
+ [categorization_1.WorkflowTechnique.DOCUMENT_PROCESSING]: new document_processing_1.DocumentProcessingBestPractices(),
19
19
  [categorization_1.WorkflowTechnique.ENRICHMENT]: undefined,
20
- [categorization_1.WorkflowTechnique.HUMAN_IN_THE_LOOP]: undefined,
20
+ [categorization_1.WorkflowTechnique.FORM_INPUT]: new form_input_1.FormInputBestPractices(),
21
21
  [categorization_1.WorkflowTechnique.KNOWLEDGE_BASE]: undefined,
22
- [categorization_1.WorkflowTechnique.MONITORING]: undefined,
23
22
  [categorization_1.WorkflowTechnique.NOTIFICATION]: undefined,
24
- [categorization_1.WorkflowTechnique.SCHEDULING]: undefined,
25
23
  [categorization_1.WorkflowTechnique.TRIAGE]: undefined,
24
+ [categorization_1.WorkflowTechnique.HUMAN_IN_THE_LOOP]: undefined,
25
+ [categorization_1.WorkflowTechnique.MONITORING]: undefined,
26
+ [categorization_1.WorkflowTechnique.SCHEDULING]: undefined,
26
27
  };
27
28
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/best-practices/index.ts"],"names":[],"mappings":";;;AACA,2DAAuF;AAEvF,uCAAiD;AACjD,6DAAsE;AACtE,uDAAgE;AAChE,6CAAsD;AACtD,mEAA2E;AAE9D,QAAA,aAAa,GAAqE;IAC9F,CAAC,kCAAiB,CAAC,qBAAqB,CAAC,EAAE,IAAI,wDAAgC,EAAE;IACjF,CAAC,kCAAiB,CAAC,OAAO,CAAC,EAAE,IAAI,8BAAoB,EAAE;IACvD,CAAC,kCAAiB,CAAC,kBAAkB,CAAC,EAAE,IAAI,mDAA8B,EAAE;IAC5E,CAAC,kCAAiB,CAAC,eAAe,CAAC,EAAE,IAAI,6CAA2B,EAAE;IACtE,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,IAAI,mCAAsB,EAAE;IAG5D,CAAC,kCAAiB,CAAC,aAAa,CAAC,EAAE,SAAS;IAC5C,CAAC,kCAAiB,CAAC,mBAAmB,CAAC,EAAE,SAAS;IAClD,CAAC,kCAAiB,CAAC,mBAAmB,CAAC,EAAE,SAAS;IAClD,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,iBAAiB,CAAC,EAAE,SAAS;IAChD,CAAC,kCAAiB,CAAC,cAAc,CAAC,EAAE,SAAS;IAC7C,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,YAAY,CAAC,EAAE,SAAS;IAC3C,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,MAAM,CAAC,EAAE,SAAS;CACrC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/best-practices/index.ts"],"names":[],"mappings":";;;AACA,2DAAuF;AAEvF,uCAAiD;AACjD,6DAAsE;AAEtE,uDAAgE;AAEhE,+DAAwE;AAExE,6CAAsD;AAKtD,mEAA2E;AAI9D,QAAA,aAAa,GAAqE;IAC9F,CAAC,kCAAiB,CAAC,qBAAqB,CAAC,EAAE,IAAI,wDAAgC,EAAE;IACjF,CAAC,kCAAiB,CAAC,OAAO,CAAC,EAAE,IAAI,8BAAoB,EAAE;IACvD,CAAC,kCAAiB,CAAC,kBAAkB,CAAC,EAAE,IAAI,mDAA8B,EAAE;IAC5E,CAAC,kCAAiB,CAAC,aAAa,CAAC,EAAE,SAAS;IAC5C,CAAC,kCAAiB,CAAC,eAAe,CAAC,EAAE,IAAI,6CAA2B,EAAE;IACtE,CAAC,kCAAiB,CAAC,mBAAmB,CAAC,EAAE,SAAS;IAClD,CAAC,kCAAiB,CAAC,mBAAmB,CAAC,EAAE,IAAI,qDAA+B,EAAE;IAC9E,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,IAAI,mCAAsB,EAAE;IAC5D,CAAC,kCAAiB,CAAC,cAAc,CAAC,EAAE,SAAS;IAC7C,CAAC,kCAAiB,CAAC,YAAY,CAAC,EAAE,SAAS;IAC3C,CAAC,kCAAiB,CAAC,MAAM,CAAC,EAAE,SAAS;IACrC,CAAC,kCAAiB,CAAC,iBAAiB,CAAC,EAAE,SAAS;IAChD,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;CACzC,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class KnowledgeBaseBestPractices implements BestPracticesDocument {
3
+ readonly technique: "knowledge_base";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }
@@ -0,0 +1,268 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.KnowledgeBaseBestPractices = void 0;
4
+ const categorization_1 = require("../../types/categorization");
5
+ class KnowledgeBaseBestPractices {
6
+ technique = categorization_1.WorkflowTechnique.KNOWLEDGE_BASE;
7
+ version = '1.0.0';
8
+ documentation = `# Best Practices: Knowledge Base Workflows
9
+
10
+ ## Workflow Design
11
+
12
+ ### Architecture Pattern
13
+ - **Separate Workflows**: Split into two distinct parts:
14
+ - **Ingestion Workflow**: Processes and indexes documents into vector database (triggered on new content or schedule)
15
+ - **Query Workflow**: Retrieves relevant information and generates answers (triggered by user queries)
16
+ - **Modular Design**: Use Execute Workflow node to call query workflow from multiple channels (chat, API, Slack, etc.)
17
+
18
+ ### Trigger Strategy
19
+ - **Ingestion Triggers**: File Watchers (Google Drive, S3), Schedule triggers for periodic re-indexing
20
+ - **Query Triggers**: Chat Trigger, Webhook, Slack Trigger based on input channel
21
+
22
+ ### Data Type Handling
23
+ - Use Switch/If nodes or Code node to route different file types to appropriate extraction branches
24
+ - Separate processing paths for PDFs, databases, web pages, etc.
25
+
26
+ ## Core Processing Pipeline
27
+
28
+ ### Document Processing
29
+ 1. **Fetch Documents**: Google Drive/Dropbox/S3 nodes, HTTP Request node, Database nodes
30
+ 2. **Load & Split**: Default Data Loader → Recursive Character Text Splitter
31
+ - Chunk size: 500-1000 characters (~200 tokens)
32
+ - Overlap: 10-15% to preserve context
33
+ 3. **Generate Embeddings**: Embeddings node (OpenAI/HuggingFace/Cohere)
34
+ - **Critical**: Use same model for indexing and queries
35
+ - Example: text-embedding-ada-002 (1536 dimensions)
36
+
37
+ ### Vector Store Configuration
38
+ - **Insert Mode**:
39
+ - Use upsert with unique IDs (document ID + chunk number)
40
+ - Include metadata (source, title, page number)
41
+ - Clear namespace option for complete replacement
42
+ - **Query Mode**:
43
+ - Top-K limit: 3-5 results typically optimal
44
+ - Apply similarity score threshold to filter irrelevant matches
45
+
46
+ ### LLM Integration
47
+ - **Agent Approach**: AI Agent node with Vector Store Tool
48
+ - Configure clear tool description: "Company Knowledge Base – use this to find relevant policy documents"
49
+ - Connect Window Buffer Memory for conversation history
50
+ - **Direct Query**: Vector Store (Get Many) → OpenAI Chat Model with crafted prompt
51
+ - **System Prompt**: "Answer using only the information from our knowledge base. If you don't find an answer in the provided documents, say you don't know."
52
+ - **Temperature**: 0-0.3 for factual accuracy
53
+
54
+ ## Recommended Nodes
55
+
56
+ ### Document Handling
57
+
58
+ **Google Drive** (n8n-nodes-base.googleDrive):
59
+ - Purpose: File triggers and retrieval from Google Drive
60
+ - Use cases: Monitor folders for new documents, fetch specific files
61
+ - Best practices: Use triggers for automatic ingestion, handle file types appropriately
62
+
63
+ **HTTP Request** (n8n-nodes-base.httpRequest):
64
+ - Purpose: Fetch documents from URLs/APIs
65
+ - Use cases: Pull content from web pages, download files from APIs
66
+ - Best practices: Handle authentication, check response formats
67
+
68
+ **Notion** (n8n-nodes-base.notion):
69
+ - Purpose: Retrieve content from Notion databases and pages
70
+ - Use cases: Index company wikis, documentation in Notion
71
+ - Best practices: Use appropriate API version, handle nested content
72
+
73
+ **Postgres** (n8n-nodes-base.postgres):
74
+ - Purpose: Query database content for indexing
75
+ - Use cases: Index structured data, retrieve records for embedding
76
+ - Best practices: Use efficient queries, batch large datasets
77
+
78
+ ### AI Processing Chain
79
+
80
+ **Document Default Data Loader** (@n8n/n8n-nodes-langchain.documentDefaultDataLoader):
81
+ - Purpose: Load documents into LangChain format
82
+ - Use cases: Initial document processing, format conversion
83
+ - Best practices: Handle various document types, preserve metadata
84
+
85
+ **Text Splitter Recursive Character** (@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter):
86
+ - Purpose: Split documents into manageable chunks
87
+ - Configuration:
88
+ - Chunk size: 500-1000 characters (~200 tokens)
89
+ - Overlap: 10-15% to preserve context
90
+ - Best practices: Test chunk sizes for optimal retrieval quality, ensure context preservation
91
+
92
+ **Embeddings OpenAI** (@n8n/n8n-nodes-langchain.embeddingsOpenAi):
93
+ - Purpose: Generate vector embeddings for text
94
+ - Model options:
95
+ - text-embedding-3-small (newer, cost-effective)
96
+ - text-embedding-ada-002 (1536 dimensions, widely used)
97
+ - **Critical**: Use same model for indexing and queries
98
+ - Best practices: Choose model based on quality/cost tradeoffs, maintain consistency
99
+
100
+ ### Vector Stores
101
+
102
+ **Vector Store Pinecone** (@n8n/n8n-nodes-langchain.vectorStorePinecone):
103
+ - Purpose: Pinecone vector database integration
104
+ - Use cases: Production knowledge bases, scalable deployments
105
+ - Best practices: Use namespaces for organization, set appropriate index dimensions
106
+
107
+ **Vector Store Qdrant** (@n8n/n8n-nodes-langchain.vectorStoreQdrant):
108
+ - Purpose: Qdrant vector database integration
109
+ - Use cases: Self-hosted vector storage, high-performance search
110
+ - Best practices: Configure collections properly, use filters for metadata
111
+
112
+ **Vector Store Supabase** (@n8n/n8n-nodes-langchain.vectorStoreSupabase):
113
+ - Purpose: Supabase pgvector integration
114
+ - Use cases: PostgreSQL-based vector storage, integrated with existing Supabase projects
115
+ - Best practices: Ensure pgvector extension is enabled, use proper indexing
116
+
117
+ **Vector Store In Memory** (@n8n/n8n-nodes-langchain.vectorStoreInMemory):
118
+ - Purpose: In-memory vector storage for testing
119
+ - Use cases: Development, testing, small datasets
120
+ - Best practices: Not for production, data lost on restart
121
+
122
+ ### Agent & LLM
123
+
124
+ **AI Agent** (@n8n/n8n-nodes-langchain.agent):
125
+ - Purpose: Orchestrate tool use and LLM interactions
126
+ - Configuration: Connect Vector Store Tool, add memory
127
+ - Best practices: Configure clear tool descriptions, use appropriate prompts
128
+
129
+ **Tool Vector Store** (@n8n/n8n-nodes-langchain.toolVectorStore):
130
+ - Purpose: Vector store tool for agents
131
+ - Configuration: "Company Knowledge Base – use this to find relevant policy documents"
132
+ - Best practices: Use descriptive tool names, set appropriate retrieval limits (3-5 results)
133
+
134
+ **OpenAI** (@n8n/n8n-nodes-langchain.openAi):
135
+ - Purpose: Chat model for generating responses
136
+ - Configuration:
137
+ - Temperature: 0-0.3 for factual Q&A
138
+ - System prompt: "Answer using only the information from our knowledge base"
139
+ - Best practices: Use low temperature for accuracy, instruct to admit when unsure
140
+
141
+ **Memory Window Buffer** (@n8n/n8n-nodes-langchain.memoryBufferWindow):
142
+ - Purpose: Maintain conversation history
143
+ - Configuration: 3-5 message turns typically sufficient
144
+ - Best practices: Balance context preservation with token limits
145
+
146
+ ### Utility
147
+
148
+ **Switch** (n8n-nodes-base.switch):
149
+ - Purpose: Route by file type or content type
150
+ - Use cases: Different processing for PDFs vs text vs images
151
+ - Best practices: Always define default case, use clear conditions
152
+
153
+ **Execute Workflow** (n8n-nodes-base.executeWorkflow):
154
+ - Purpose: Call sub-workflows for modular design
155
+ - Use cases: Reuse query workflow across channels, separate ingestion logic
156
+ - Best practices: Design for reusability, pass appropriate parameters
157
+
158
+ ## Common Pitfalls to Avoid
159
+
160
+ ### Critical Mistakes
161
+
162
+ **Inconsistent Embeddings**:
163
+ - **Problem**: Using different embedding models for indexing vs queries breaks semantic search
164
+ - **Solution**: Always use the same model throughout (e.g., text-embedding-ada-002 for both)
165
+ - Document which model is used in workflow description
166
+
167
+ **Vector Dimension Mismatch**:
168
+ - **Problem**: Index dimensions don't match embedding model output, causing errors
169
+ - **Solution**: Ensure vector store index dimensions match embedding model output exactly
170
+ - Common: ada-002 = 1536 dimensions, text-embedding-3-small = 1536 dimensions
171
+
172
+ **Missing Updates**:
173
+ - **Problem**: Not updating or removing outdated vectors leads to conflicting information
174
+ - **Solution**: Implement update/delete mechanisms with unique IDs
175
+ - Use document ID + chunk number as unique identifier
176
+ - Schedule regular re-indexing for changing content
177
+
178
+ **Treating Vector DB as Full Database**:
179
+ - **Problem**: Using vector stores for general data storage instead of semantic search
180
+ - **Solution**: Vector DBs are for semantic search only, not bulk data storage
181
+ - Store full documents in traditional databases, only embeddings in vector store
182
+
183
+ ### Performance Issues
184
+
185
+ **Oversized Chunks**:
186
+ - **Problem**: Large chunks dilute relevance and exceed token limits
187
+ - **Solution**: Keep chunks to 500-1000 characters (~200 tokens)
188
+ - Test different sizes to find optimal retrieval quality
189
+
190
+ **Undersized Chunks**:
191
+ - **Problem**: Too small chunks lose necessary context
192
+ - **Solution**: Ensure chunks have sufficient context to be meaningful
193
+ - Use 10-15% overlap between chunks
194
+
195
+ **Too Many Retrieved Documents**:
196
+ - **Problem**: Retrieving 10+ documents overwhelms LLM and reduces accuracy
197
+ - **Solution**: Limit to 3-5 results for optimal quality
198
+ - Use similarity thresholds to filter irrelevant matches
199
+
200
+ **UI Overload**:
201
+ - **Problem**: Indexing thousands of chunks freezes workflow editor
202
+ - **Solution**: Run large indexing jobs in production mode, not editor
203
+ - Consider batch processing for very large datasets
204
+
205
+ ### Configuration Errors
206
+
207
+ **No Metadata**:
208
+ - **Problem**: Missing source/date metadata makes results less interpretable
209
+ - **Solution**: Always include metadata (source, title, page number, date)
210
+ - Helps users understand context of retrieved information
211
+
212
+ **No Unique IDs**:
213
+ - **Problem**: Can't update specific documents, causes duplicates
214
+ - **Solution**: Use document ID + chunk number as unique identifier
215
+ - Enables targeted updates and deletions
216
+
217
+ **High Temperature**:
218
+ - **Problem**: Creative temperature settings cause hallucinations in factual Q&A
219
+ - **Solution**: Use temperature 0-0.3 for factual responses
220
+ - Higher temperatures (0.7-1.0) only for creative tasks
221
+
222
+ **Generic Tool Descriptions**:
223
+ - **Problem**: Vague descriptions cause agents to misuse tools
224
+ - **Solution**: Use specific, descriptive tool names
225
+ - Good: "Company HR Policy Knowledge Base"
226
+ - Bad: "Knowledge base"
227
+
228
+ ### Data Management
229
+
230
+ **Stale Data**:
231
+ - **Problem**: Outdated information in knowledge base leads to wrong answers
232
+ - **Solution**: Schedule regular re-indexing or implement change detection
233
+ - Use document timestamps to track freshness
234
+
235
+ **No Namespace Separation**:
236
+ - **Problem**: Mixing unrelated domains in same index reduces accuracy
237
+ - **Solution**: Use namespaces to separate different knowledge domains
238
+ - Example: "hr-policies", "technical-docs", "customer-faqs"
239
+
240
+ **Ignoring Token Limits**:
241
+ - **Problem**: Combined length of query + context + response exceeds model limits
242
+ - **Solution**: Monitor total token usage, limit context appropriately
243
+ - GPT-4: 8k/32k tokens, GPT-3.5: 4k/16k tokens
244
+
245
+ **Security Gaps**:
246
+ - **Problem**: Sending sensitive data without access control or encryption
247
+ - **Solution**: Implement proper access controls, use secure connections
248
+ - Consider data classification and access restrictions
249
+
250
+ ## Best Practices Summary
251
+
252
+ 1. **Always use consistent embedding models** throughout the pipeline
253
+ 2. **Design modular workflows** for reusability across channels
254
+ 3. **Include metadata** for better context and filtering
255
+ 4. **Implement proper update/delete mechanisms** with unique IDs
256
+ 5. **Test chunk sizes** for optimal retrieval quality (500-1000 characters)
257
+ 6. **Run large indexing operations** in production mode
258
+ 7. **Set appropriate retrieval limits** (3-5 results) and similarity thresholds
259
+ 8. **Use low temperature** (0-0.3) for factual responses
260
+ 9. **Secure sensitive data** with proper access controls
261
+ 10. **Monitor and update** regularly to prevent stale information
262
+ `;
263
+ getDocumentation() {
264
+ return this.documentation;
265
+ }
266
+ }
267
+ exports.KnowledgeBaseBestPractices = KnowledgeBaseBestPractices;
268
+ //# sourceMappingURL=knowledge-base.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"knowledge-base.js","sourceRoot":"","sources":["../../../src/tools/best-practices/knowledge-base.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,0BAA0B;IAC7B,SAAS,GAAG,kCAAiB,CAAC,cAAc,CAAC;IAC7C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8PjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AAvQD,gEAuQC"}
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class MonitoringBestPractices implements BestPracticesDocument {
3
+ readonly technique: "monitoring";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }