@n8n/ai-workflow-builder 0.31.2 → 0.32.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-workflow-builder-agent.service.d.ts +6 -2
- package/dist/ai-workflow-builder-agent.service.js +45 -3
- package/dist/ai-workflow-builder-agent.service.js.map +1 -1
- package/dist/build.tsbuildinfo +1 -1
- package/dist/tools/best-practices/data-analysis.d.ts +7 -0
- package/dist/tools/best-practices/data-analysis.js +367 -0
- package/dist/tools/best-practices/data-analysis.js.map +1 -0
- package/dist/tools/best-practices/data-extraction.js +7 -0
- package/dist/tools/best-practices/data-extraction.js.map +1 -1
- package/dist/tools/best-practices/data-transformation.d.ts +7 -0
- package/dist/tools/best-practices/data-transformation.js +181 -0
- package/dist/tools/best-practices/data-transformation.js.map +1 -0
- package/dist/tools/best-practices/document-processing.d.ts +7 -0
- package/dist/tools/best-practices/document-processing.js +324 -0
- package/dist/tools/best-practices/document-processing.js.map +1 -0
- package/dist/tools/best-practices/enrichment.d.ts +7 -0
- package/dist/tools/best-practices/enrichment.js +271 -0
- package/dist/tools/best-practices/enrichment.js.map +1 -0
- package/dist/tools/best-practices/human-in-the-loop.d.ts +7 -0
- package/dist/tools/best-practices/human-in-the-loop.js +268 -0
- package/dist/tools/best-practices/human-in-the-loop.js.map +1 -0
- package/dist/tools/best-practices/index.js +7 -6
- package/dist/tools/best-practices/index.js.map +1 -1
- package/dist/tools/best-practices/knowledge-base.d.ts +7 -0
- package/dist/tools/best-practices/knowledge-base.js +268 -0
- package/dist/tools/best-practices/knowledge-base.js.map +1 -0
- package/dist/tools/best-practices/monitoring.d.ts +7 -0
- package/dist/tools/best-practices/monitoring.js +178 -0
- package/dist/tools/best-practices/monitoring.js.map +1 -0
- package/dist/tools/best-practices/notification.d.ts +7 -0
- package/dist/tools/best-practices/notification.js +229 -0
- package/dist/tools/best-practices/notification.js.map +1 -0
- package/dist/tools/best-practices/scheduling.d.ts +7 -0
- package/dist/tools/best-practices/scheduling.js +281 -0
- package/dist/tools/best-practices/scheduling.js.map +1 -0
- package/dist/tools/best-practices/triage.d.ts +7 -0
- package/dist/tools/best-practices/triage.js +211 -0
- package/dist/tools/best-practices/triage.js.map +1 -0
- package/dist/tools/categorize-prompt.tool.js +1 -0
- package/dist/tools/categorize-prompt.tool.js.map +1 -1
- package/dist/tools/helpers/response.js +2 -0
- package/dist/tools/helpers/response.js.map +1 -1
- package/dist/tools/prompts/main-agent.prompt.js +9 -1
- package/dist/tools/prompts/main-agent.prompt.js.map +1 -1
- package/dist/tools/validate-workflow.tool.js +12 -0
- package/dist/tools/validate-workflow.tool.js.map +1 -1
- package/dist/utils/tool-executor.js +19 -0
- package/dist/utils/tool-executor.js.map +1 -1
- package/dist/validation/checks/agent-prompt.js +2 -0
- package/dist/validation/checks/agent-prompt.js.map +1 -1
- package/dist/validation/checks/connections.js +8 -0
- package/dist/validation/checks/connections.js.map +1 -1
- package/dist/validation/checks/from-ai.js +1 -0
- package/dist/validation/checks/from-ai.js.map +1 -1
- package/dist/validation/checks/tools.js +2 -0
- package/dist/validation/checks/tools.js.map +1 -1
- package/dist/validation/checks/trigger.js +2 -0
- package/dist/validation/checks/trigger.js.map +1 -1
- package/dist/validation/types.d.ts +4 -0
- package/dist/validation/types.js +18 -0
- package/dist/validation/types.js.map +1 -1
- package/dist/workflow-builder-agent.d.ts +5 -2
- package/dist/workflow-builder-agent.js +4 -3
- package/dist/workflow-builder-agent.js.map +1 -1
- package/dist/workflow-state.d.ts +3 -1
- package/dist/workflow-state.js +8 -0
- package/dist/workflow-state.js.map +1 -1
- package/package.json +11 -7
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.HumanInTheLoopBestPractices = void 0;
|
|
4
|
+
const categorization_1 = require("../../types/categorization");
|
|
5
|
+
class HumanInTheLoopBestPractices {
|
|
6
|
+
technique = categorization_1.WorkflowTechnique.HUMAN_IN_THE_LOOP;
|
|
7
|
+
version = '1.0.0';
|
|
8
|
+
documentation = `# Best Practices: Human-in-the-Loop Workflows
|
|
9
|
+
|
|
10
|
+
## Workflow Design
|
|
11
|
+
|
|
12
|
+
Structure workflows in three stages: Automation → Human Decision → Resume Processing. The Wait node is the core component for implementing pauses.
|
|
13
|
+
|
|
14
|
+
Break the workflow into clear stages:
|
|
15
|
+
1. **Initial Automation**: Execute automated steps leading to the decision point (data extraction, AI content generation, etc.)
|
|
16
|
+
2. **Human Notification**: Send notification with resume URL via preferred channel (Email, Slack, Telegram)
|
|
17
|
+
3. **Wait for Response**: Configure Wait node with appropriate resume condition
|
|
18
|
+
4. **Process Decision**: Use IF/Switch nodes to branch based on human input
|
|
19
|
+
|
|
20
|
+
Example pattern:
|
|
21
|
+
- Trigger → Generate Content → Email (with resume URLs) → Wait Node → IF (decision) → Publish/Reject
|
|
22
|
+
- HTTP Request (fetch data) → Filter → Email Manager → Wait (On Webhook) → Switch (approval status) → Database Update
|
|
23
|
+
|
|
24
|
+
CRITICAL: Always include the $execution.resumeUrl in notification messages. This unique URL resumes the specific workflow execution when accessed.
|
|
25
|
+
|
|
26
|
+
## Wait Node Configuration
|
|
27
|
+
|
|
28
|
+
The Wait node supports four resume conditions:
|
|
29
|
+
- **After Time Interval**: Resume after fixed delay (not for human decisions)
|
|
30
|
+
- **At Specified Time**: Resume at specific date/time (not for human decisions)
|
|
31
|
+
- **On Webhook Call**: Resume when URL is accessed (ideal for link-based approvals)
|
|
32
|
+
- **On Form Submitted**: Resume when user submits n8n-hosted form (best for structured input)
|
|
33
|
+
|
|
34
|
+
For human-in-the-loop, use "On Webhook Call" or "On Form Submitted" modes.
|
|
35
|
+
|
|
36
|
+
### Webhook Configuration
|
|
37
|
+
- Set HTTP method (GET for simple links, POST for data)
|
|
38
|
+
- Configure authentication if needed (None for private URLs, Basic/Token for security)
|
|
39
|
+
- Enable "Ignore Bots" to prevent email scanners/chat bots from triggering
|
|
40
|
+
- Use Webhook Suffix for multiple wait points in same workflow
|
|
41
|
+
|
|
42
|
+
### Form Configuration
|
|
43
|
+
- Design form fields directly in Wait node
|
|
44
|
+
- Specify field types, labels, validation
|
|
45
|
+
- Form automatically hosts at resume URL
|
|
46
|
+
- Submitted data merges with workflow context
|
|
47
|
+
|
|
48
|
+
## Timeout Management
|
|
49
|
+
|
|
50
|
+
Always configure "Limit Wait Time" to prevent infinite waits:
|
|
51
|
+
- Set maximum wait duration (e.g., 48 hours)
|
|
52
|
+
- Or specify absolute deadline date
|
|
53
|
+
- Handle timeout case in workflow logic
|
|
54
|
+
- Check if resumed by timeout (no webhook/form data present)
|
|
55
|
+
|
|
56
|
+
## Communication Patterns
|
|
57
|
+
|
|
58
|
+
### Direct Link Method
|
|
59
|
+
Include $execution.resumeUrl directly in messages:
|
|
60
|
+
|
|
61
|
+
Email: Click to [Approve]({{$execution.resumeUrl}}?decision=approve) or [Reject]({{$execution.resumeUrl}}?decision=reject)
|
|
62
|
+
Slack: Please review and click: {{$execution.resumeUrl}}
|
|
63
|
+
|
|
64
|
+
### Platform Response Method
|
|
65
|
+
For responses within platform (Slack reply, email response):
|
|
66
|
+
- Use separate trigger workflow to catch responses
|
|
67
|
+
- Correlate to correct execution via ID
|
|
68
|
+
- Call resume URL programmatically
|
|
69
|
+
- More complex but keeps interaction native
|
|
70
|
+
|
|
71
|
+
## Data Handling
|
|
72
|
+
|
|
73
|
+
The Wait node preserves all prior workflow data when resuming:
|
|
74
|
+
- Original context remains available
|
|
75
|
+
- New input data (form/webhook) adds to context
|
|
76
|
+
- Access webhook data via $json after Wait node
|
|
77
|
+
- Query parameters available in $json.query
|
|
78
|
+
- Form fields merge directly into JSON output
|
|
79
|
+
|
|
80
|
+
## Recommended Nodes
|
|
81
|
+
|
|
82
|
+
### Wait (n8n-nodes-base.wait)
|
|
83
|
+
|
|
84
|
+
**Purpose**: Core node for pausing workflow execution until human input
|
|
85
|
+
|
|
86
|
+
**Key Settings**:
|
|
87
|
+
- Resume: "On Webhook Call" or "On Form Submitted"
|
|
88
|
+
- Authentication: Configure based on security needs
|
|
89
|
+
- Ignore Bots: Enable to prevent accidental triggers
|
|
90
|
+
- Limit Wait Time: Set timeout to prevent infinite waits
|
|
91
|
+
|
|
92
|
+
**Use Cases**:
|
|
93
|
+
- Approval workflows
|
|
94
|
+
- Data collection from humans
|
|
95
|
+
- Multi-step verification processes
|
|
96
|
+
|
|
97
|
+
**Best Practices**:
|
|
98
|
+
- Always include timeout handling
|
|
99
|
+
- Use unique webhook suffixes for multiple waits
|
|
100
|
+
- Test resume URLs during development
|
|
101
|
+
|
|
102
|
+
### Email (n8n-nodes-base.emailSend)
|
|
103
|
+
|
|
104
|
+
**Purpose**: Send notifications with resume links to users
|
|
105
|
+
|
|
106
|
+
**Use Cases**:
|
|
107
|
+
- Approval request emails
|
|
108
|
+
- Data verification requests
|
|
109
|
+
- Task assignment notifications
|
|
110
|
+
|
|
111
|
+
**Best Practices**:
|
|
112
|
+
- Include clear call-to-action buttons
|
|
113
|
+
- Embed resume URL as hyperlinks
|
|
114
|
+
- Provide context about the decision needed
|
|
115
|
+
|
|
116
|
+
### Slack (n8n-nodes-base.slack)
|
|
117
|
+
|
|
118
|
+
**Purpose**: Send notifications via Slack with resume links
|
|
119
|
+
|
|
120
|
+
**Best Practices**:
|
|
121
|
+
- Wrap URLs in <> to prevent unfurling
|
|
122
|
+
- Use clear message formatting
|
|
123
|
+
- Consider using blocks for rich formatting
|
|
124
|
+
|
|
125
|
+
### Telegram (n8n-nodes-base.telegram)
|
|
126
|
+
|
|
127
|
+
**Purpose**: Send notifications via Telegram
|
|
128
|
+
|
|
129
|
+
**Best Practices**:
|
|
130
|
+
- Enable "Ignore Bots" in Wait node
|
|
131
|
+
- Use inline keyboards for better UX
|
|
132
|
+
- Keep messages concise
|
|
133
|
+
|
|
134
|
+
### IF (n8n-nodes-base.if)
|
|
135
|
+
|
|
136
|
+
**Purpose**: Branch workflow based on human decision
|
|
137
|
+
|
|
138
|
+
**Use Cases**:
|
|
139
|
+
- Route approved vs rejected items
|
|
140
|
+
- Check if response was received (vs timeout)
|
|
141
|
+
- Validate input data
|
|
142
|
+
|
|
143
|
+
**Best Practices**:
|
|
144
|
+
- Handle all possible decision values
|
|
145
|
+
- Include default/timeout branch
|
|
146
|
+
- Use clear condition names
|
|
147
|
+
|
|
148
|
+
### Switch (n8n-nodes-base.switch)
|
|
149
|
+
|
|
150
|
+
**Purpose**: Multi-way branching for complex decisions
|
|
151
|
+
|
|
152
|
+
**Use Cases**:
|
|
153
|
+
- Multiple approval levels
|
|
154
|
+
- Various response options
|
|
155
|
+
- Status-based routing
|
|
156
|
+
|
|
157
|
+
### HTTP Request (n8n-nodes-base.httpRequest)
|
|
158
|
+
|
|
159
|
+
**Purpose**: Call external APIs after human decision
|
|
160
|
+
|
|
161
|
+
**Use Cases**:
|
|
162
|
+
- Update external systems
|
|
163
|
+
- Trigger downstream processes
|
|
164
|
+
- Log decisions to external services
|
|
165
|
+
|
|
166
|
+
### Database Nodes
|
|
167
|
+
|
|
168
|
+
**MySQL** (n8n-nodes-base.mySql), **Postgres** (n8n-nodes-base.postgres), **MongoDB** (n8n-nodes-base.mongoDb):
|
|
169
|
+
- Store approval history
|
|
170
|
+
- Update record status after decision
|
|
171
|
+
- Log human inputs for audit
|
|
172
|
+
|
|
173
|
+
### Google Sheets (n8n-nodes-base.googleSheets)
|
|
174
|
+
|
|
175
|
+
**Purpose**: Track decisions and maintain approval logs
|
|
176
|
+
|
|
177
|
+
**Use Cases**:
|
|
178
|
+
- Approval tracking spreadsheets
|
|
179
|
+
- Decision audit logs
|
|
180
|
+
- User response collection
|
|
181
|
+
|
|
182
|
+
## Common Pitfalls to Avoid
|
|
183
|
+
|
|
184
|
+
### Accidental Resume Triggers
|
|
185
|
+
|
|
186
|
+
**Problem**: Email clients or chat apps preview links and trigger resume unintentionally.
|
|
187
|
+
|
|
188
|
+
**Solution**:
|
|
189
|
+
- Enable "Ignore Bots" option in Wait node
|
|
190
|
+
- Wrap Slack URLs in <> to prevent unfurling
|
|
191
|
+
- Use authentication for sensitive workflows
|
|
192
|
+
- Educate users to click only when ready
|
|
193
|
+
|
|
194
|
+
### Missing Timeouts
|
|
195
|
+
|
|
196
|
+
**Problem**: Workflow waits forever if human never responds.
|
|
197
|
+
|
|
198
|
+
**Solution**:
|
|
199
|
+
- Always set "Limit Wait Time" (e.g., 3 days)
|
|
200
|
+
- Handle timeout scenario in workflow logic
|
|
201
|
+
- Send reminder before deadline
|
|
202
|
+
- Escalate to another person if timeout occurs
|
|
203
|
+
|
|
204
|
+
### Lost Context After Wait
|
|
205
|
+
|
|
206
|
+
**Problem**: Assuming data is lost after Wait node resumes.
|
|
207
|
+
|
|
208
|
+
**Solution**:
|
|
209
|
+
- Wait node preserves all prior data automatically
|
|
210
|
+
- New input merges with existing context
|
|
211
|
+
- Test data structure after resume
|
|
212
|
+
- Use expressions to access both old and new data
|
|
213
|
+
|
|
214
|
+
### Security Issues with Resume URLs
|
|
215
|
+
|
|
216
|
+
**Problem**: Resume URLs exposed to unauthorized users.
|
|
217
|
+
|
|
218
|
+
**Solution**:
|
|
219
|
+
- Treat resume URLs as secrets
|
|
220
|
+
- Use authentication options for sensitive workflows
|
|
221
|
+
- Send only through private channels
|
|
222
|
+
- Consider IP whitelisting if needed
|
|
223
|
+
|
|
224
|
+
### Multiple Wait Node Confusion
|
|
225
|
+
|
|
226
|
+
**Problem**: Using same webhook URL for different wait points.
|
|
227
|
+
|
|
228
|
+
**Solution**:
|
|
229
|
+
- Use unique Webhook Suffix for each Wait node
|
|
230
|
+
- Generate fresh $execution.resumeUrl for each wait
|
|
231
|
+
- Label wait points clearly
|
|
232
|
+
- Document which URL corresponds to which decision
|
|
233
|
+
|
|
234
|
+
### Not Handling All Response Types
|
|
235
|
+
|
|
236
|
+
**Problem**: Only handling expected responses, not edge cases.
|
|
237
|
+
|
|
238
|
+
**Solution**:
|
|
239
|
+
- Handle timeout case explicitly
|
|
240
|
+
- Provide default action for unexpected inputs
|
|
241
|
+
- Validate form data before processing
|
|
242
|
+
- Log all decisions for debugging
|
|
243
|
+
|
|
244
|
+
### Workflow State Persistence
|
|
245
|
+
|
|
246
|
+
**Problem**: Worrying about resource consumption during wait.
|
|
247
|
+
|
|
248
|
+
**Solution**:
|
|
249
|
+
- Waiting executions are saved to database, not running
|
|
250
|
+
- No worker threads consumed during wait
|
|
251
|
+
- Can have hundreds of paused workflows
|
|
252
|
+
- Survives n8n restarts (state in database)
|
|
253
|
+
|
|
254
|
+
### Complex Parallel Approvals
|
|
255
|
+
|
|
256
|
+
**Problem**: Need multiple people to approve before continuing.
|
|
257
|
+
|
|
258
|
+
**Solution**:
|
|
259
|
+
- Use separate Wait node per approver
|
|
260
|
+
- Or create sub-workflow per approver
|
|
261
|
+
- Use Merge node to synchronize branches
|
|
262
|
+
- Consider approval tracking in database`;
|
|
263
|
+
getDocumentation() {
|
|
264
|
+
return this.documentation;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
exports.HumanInTheLoopBestPractices = HumanInTheLoopBestPractices;
|
|
268
|
+
//# sourceMappingURL=human-in-the-loop.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"human-in-the-loop.js","sourceRoot":"","sources":["../../../src/tools/best-practices/human-in-the-loop.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,2BAA2B;IAC9B,SAAS,GAAG,kCAAiB,CAAC,iBAAiB,CAAC;IAChD,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yCA8PO,CAAC;IAEzC,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AAvQD,kEAuQC"}
|
|
@@ -5,23 +5,24 @@ const categorization_1 = require("../../types/categorization");
|
|
|
5
5
|
const chatbot_1 = require("./chatbot");
|
|
6
6
|
const content_generation_1 = require("./content-generation");
|
|
7
7
|
const data_extraction_1 = require("./data-extraction");
|
|
8
|
+
const document_processing_1 = require("./document-processing");
|
|
8
9
|
const form_input_1 = require("./form-input");
|
|
9
10
|
const scraping_and_research_1 = require("./scraping-and-research");
|
|
10
11
|
exports.documentation = {
|
|
11
12
|
[categorization_1.WorkflowTechnique.SCRAPING_AND_RESEARCH]: new scraping_and_research_1.ScrapingAndResearchBestPractices(),
|
|
12
13
|
[categorization_1.WorkflowTechnique.CHATBOT]: new chatbot_1.ChatbotBestPractices(),
|
|
13
14
|
[categorization_1.WorkflowTechnique.CONTENT_GENERATION]: new content_generation_1.ContentGenerationBestPractices(),
|
|
14
|
-
[categorization_1.WorkflowTechnique.DATA_EXTRACTION]: new data_extraction_1.DataExtractionBestPractices(),
|
|
15
|
-
[categorization_1.WorkflowTechnique.FORM_INPUT]: new form_input_1.FormInputBestPractices(),
|
|
16
15
|
[categorization_1.WorkflowTechnique.DATA_ANALYSIS]: undefined,
|
|
16
|
+
[categorization_1.WorkflowTechnique.DATA_EXTRACTION]: new data_extraction_1.DataExtractionBestPractices(),
|
|
17
17
|
[categorization_1.WorkflowTechnique.DATA_TRANSFORMATION]: undefined,
|
|
18
|
-
[categorization_1.WorkflowTechnique.DOCUMENT_PROCESSING]:
|
|
18
|
+
[categorization_1.WorkflowTechnique.DOCUMENT_PROCESSING]: new document_processing_1.DocumentProcessingBestPractices(),
|
|
19
19
|
[categorization_1.WorkflowTechnique.ENRICHMENT]: undefined,
|
|
20
|
-
[categorization_1.WorkflowTechnique.
|
|
20
|
+
[categorization_1.WorkflowTechnique.FORM_INPUT]: new form_input_1.FormInputBestPractices(),
|
|
21
21
|
[categorization_1.WorkflowTechnique.KNOWLEDGE_BASE]: undefined,
|
|
22
|
-
[categorization_1.WorkflowTechnique.MONITORING]: undefined,
|
|
23
22
|
[categorization_1.WorkflowTechnique.NOTIFICATION]: undefined,
|
|
24
|
-
[categorization_1.WorkflowTechnique.SCHEDULING]: undefined,
|
|
25
23
|
[categorization_1.WorkflowTechnique.TRIAGE]: undefined,
|
|
24
|
+
[categorization_1.WorkflowTechnique.HUMAN_IN_THE_LOOP]: undefined,
|
|
25
|
+
[categorization_1.WorkflowTechnique.MONITORING]: undefined,
|
|
26
|
+
[categorization_1.WorkflowTechnique.SCHEDULING]: undefined,
|
|
26
27
|
};
|
|
27
28
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/best-practices/index.ts"],"names":[],"mappings":";;;AACA,2DAAuF;AAEvF,uCAAiD;AACjD,6DAAsE;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/best-practices/index.ts"],"names":[],"mappings":";;;AACA,2DAAuF;AAEvF,uCAAiD;AACjD,6DAAsE;AAEtE,uDAAgE;AAEhE,+DAAwE;AAExE,6CAAsD;AAKtD,mEAA2E;AAI9D,QAAA,aAAa,GAAqE;IAC9F,CAAC,kCAAiB,CAAC,qBAAqB,CAAC,EAAE,IAAI,wDAAgC,EAAE;IACjF,CAAC,kCAAiB,CAAC,OAAO,CAAC,EAAE,IAAI,8BAAoB,EAAE;IACvD,CAAC,kCAAiB,CAAC,kBAAkB,CAAC,EAAE,IAAI,mDAA8B,EAAE;IAC5E,CAAC,kCAAiB,CAAC,aAAa,CAAC,EAAE,SAAS;IAC5C,CAAC,kCAAiB,CAAC,eAAe,CAAC,EAAE,IAAI,6CAA2B,EAAE;IACtE,CAAC,kCAAiB,CAAC,mBAAmB,CAAC,EAAE,SAAS;IAClD,CAAC,kCAAiB,CAAC,mBAAmB,CAAC,EAAE,IAAI,qDAA+B,EAAE;IAC9E,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,IAAI,mCAAsB,EAAE;IAC5D,CAAC,kCAAiB,CAAC,cAAc,CAAC,EAAE,SAAS;IAC7C,CAAC,kCAAiB,CAAC,YAAY,CAAC,EAAE,SAAS;IAC3C,CAAC,kCAAiB,CAAC,MAAM,CAAC,EAAE,SAAS;IACrC,CAAC,kCAAiB,CAAC,iBAAiB,CAAC,EAAE,SAAS;IAChD,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;IACzC,CAAC,kCAAiB,CAAC,UAAU,CAAC,EAAE,SAAS;CACzC,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { BestPracticesDocument } from '../../types/best-practices';
|
|
2
|
+
export declare class KnowledgeBaseBestPractices implements BestPracticesDocument {
|
|
3
|
+
readonly technique: "knowledge_base";
|
|
4
|
+
readonly version = "1.0.0";
|
|
5
|
+
private readonly documentation;
|
|
6
|
+
getDocumentation(): string;
|
|
7
|
+
}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.KnowledgeBaseBestPractices = void 0;
|
|
4
|
+
const categorization_1 = require("../../types/categorization");
|
|
5
|
+
class KnowledgeBaseBestPractices {
|
|
6
|
+
technique = categorization_1.WorkflowTechnique.KNOWLEDGE_BASE;
|
|
7
|
+
version = '1.0.0';
|
|
8
|
+
documentation = `# Best Practices: Knowledge Base Workflows
|
|
9
|
+
|
|
10
|
+
## Workflow Design
|
|
11
|
+
|
|
12
|
+
### Architecture Pattern
|
|
13
|
+
- **Separate Workflows**: Split into two distinct parts:
|
|
14
|
+
- **Ingestion Workflow**: Processes and indexes documents into vector database (triggered on new content or schedule)
|
|
15
|
+
- **Query Workflow**: Retrieves relevant information and generates answers (triggered by user queries)
|
|
16
|
+
- **Modular Design**: Use Execute Workflow node to call query workflow from multiple channels (chat, API, Slack, etc.)
|
|
17
|
+
|
|
18
|
+
### Trigger Strategy
|
|
19
|
+
- **Ingestion Triggers**: File Watchers (Google Drive, S3), Schedule triggers for periodic re-indexing
|
|
20
|
+
- **Query Triggers**: Chat Trigger, Webhook, Slack Trigger based on input channel
|
|
21
|
+
|
|
22
|
+
### Data Type Handling
|
|
23
|
+
- Use Switch/If nodes or Code node to route different file types to appropriate extraction branches
|
|
24
|
+
- Separate processing paths for PDFs, databases, web pages, etc.
|
|
25
|
+
|
|
26
|
+
## Core Processing Pipeline
|
|
27
|
+
|
|
28
|
+
### Document Processing
|
|
29
|
+
1. **Fetch Documents**: Google Drive/Dropbox/S3 nodes, HTTP Request node, Database nodes
|
|
30
|
+
2. **Load & Split**: Default Data Loader → Recursive Character Text Splitter
|
|
31
|
+
- Chunk size: 500-1000 characters (~200 tokens)
|
|
32
|
+
- Overlap: 10-15% to preserve context
|
|
33
|
+
3. **Generate Embeddings**: Embeddings node (OpenAI/HuggingFace/Cohere)
|
|
34
|
+
- **Critical**: Use same model for indexing and queries
|
|
35
|
+
- Example: text-embedding-ada-002 (1536 dimensions)
|
|
36
|
+
|
|
37
|
+
### Vector Store Configuration
|
|
38
|
+
- **Insert Mode**:
|
|
39
|
+
- Use upsert with unique IDs (document ID + chunk number)
|
|
40
|
+
- Include metadata (source, title, page number)
|
|
41
|
+
- Clear namespace option for complete replacement
|
|
42
|
+
- **Query Mode**:
|
|
43
|
+
- Top-K limit: 3-5 results typically optimal
|
|
44
|
+
- Apply similarity score threshold to filter irrelevant matches
|
|
45
|
+
|
|
46
|
+
### LLM Integration
|
|
47
|
+
- **Agent Approach**: AI Agent node with Vector Store Tool
|
|
48
|
+
- Configure clear tool description: "Company Knowledge Base – use this to find relevant policy documents"
|
|
49
|
+
- Connect Window Buffer Memory for conversation history
|
|
50
|
+
- **Direct Query**: Vector Store (Get Many) → OpenAI Chat Model with crafted prompt
|
|
51
|
+
- **System Prompt**: "Answer using only the information from our knowledge base. If you don't find an answer in the provided documents, say you don't know."
|
|
52
|
+
- **Temperature**: 0-0.3 for factual accuracy
|
|
53
|
+
|
|
54
|
+
## Recommended Nodes
|
|
55
|
+
|
|
56
|
+
### Document Handling
|
|
57
|
+
|
|
58
|
+
**Google Drive** (n8n-nodes-base.googleDrive):
|
|
59
|
+
- Purpose: File triggers and retrieval from Google Drive
|
|
60
|
+
- Use cases: Monitor folders for new documents, fetch specific files
|
|
61
|
+
- Best practices: Use triggers for automatic ingestion, handle file types appropriately
|
|
62
|
+
|
|
63
|
+
**HTTP Request** (n8n-nodes-base.httpRequest):
|
|
64
|
+
- Purpose: Fetch documents from URLs/APIs
|
|
65
|
+
- Use cases: Pull content from web pages, download files from APIs
|
|
66
|
+
- Best practices: Handle authentication, check response formats
|
|
67
|
+
|
|
68
|
+
**Notion** (n8n-nodes-base.notion):
|
|
69
|
+
- Purpose: Retrieve content from Notion databases and pages
|
|
70
|
+
- Use cases: Index company wikis, documentation in Notion
|
|
71
|
+
- Best practices: Use appropriate API version, handle nested content
|
|
72
|
+
|
|
73
|
+
**Postgres** (n8n-nodes-base.postgres):
|
|
74
|
+
- Purpose: Query database content for indexing
|
|
75
|
+
- Use cases: Index structured data, retrieve records for embedding
|
|
76
|
+
- Best practices: Use efficient queries, batch large datasets
|
|
77
|
+
|
|
78
|
+
### AI Processing Chain
|
|
79
|
+
|
|
80
|
+
**Document Default Data Loader** (@n8n/n8n-nodes-langchain.documentDefaultDataLoader):
|
|
81
|
+
- Purpose: Load documents into LangChain format
|
|
82
|
+
- Use cases: Initial document processing, format conversion
|
|
83
|
+
- Best practices: Handle various document types, preserve metadata
|
|
84
|
+
|
|
85
|
+
**Text Splitter Recursive Character** (@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter):
|
|
86
|
+
- Purpose: Split documents into manageable chunks
|
|
87
|
+
- Configuration:
|
|
88
|
+
- Chunk size: 500-1000 characters (~200 tokens)
|
|
89
|
+
- Overlap: 10-15% to preserve context
|
|
90
|
+
- Best practices: Test chunk sizes for optimal retrieval quality, ensure context preservation
|
|
91
|
+
|
|
92
|
+
**Embeddings OpenAI** (@n8n/n8n-nodes-langchain.embeddingsOpenAi):
|
|
93
|
+
- Purpose: Generate vector embeddings for text
|
|
94
|
+
- Model options:
|
|
95
|
+
- text-embedding-3-small (newer, cost-effective)
|
|
96
|
+
- text-embedding-ada-002 (1536 dimensions, widely used)
|
|
97
|
+
- **Critical**: Use same model for indexing and queries
|
|
98
|
+
- Best practices: Choose model based on quality/cost tradeoffs, maintain consistency
|
|
99
|
+
|
|
100
|
+
### Vector Stores
|
|
101
|
+
|
|
102
|
+
**Vector Store Pinecone** (@n8n/n8n-nodes-langchain.vectorStorePinecone):
|
|
103
|
+
- Purpose: Pinecone vector database integration
|
|
104
|
+
- Use cases: Production knowledge bases, scalable deployments
|
|
105
|
+
- Best practices: Use namespaces for organization, set appropriate index dimensions
|
|
106
|
+
|
|
107
|
+
**Vector Store Qdrant** (@n8n/n8n-nodes-langchain.vectorStoreQdrant):
|
|
108
|
+
- Purpose: Qdrant vector database integration
|
|
109
|
+
- Use cases: Self-hosted vector storage, high-performance search
|
|
110
|
+
- Best practices: Configure collections properly, use filters for metadata
|
|
111
|
+
|
|
112
|
+
**Vector Store Supabase** (@n8n/n8n-nodes-langchain.vectorStoreSupabase):
|
|
113
|
+
- Purpose: Supabase pgvector integration
|
|
114
|
+
- Use cases: PostgreSQL-based vector storage, integrated with existing Supabase projects
|
|
115
|
+
- Best practices: Ensure pgvector extension is enabled, use proper indexing
|
|
116
|
+
|
|
117
|
+
**Vector Store In Memory** (@n8n/n8n-nodes-langchain.vectorStoreInMemory):
|
|
118
|
+
- Purpose: In-memory vector storage for testing
|
|
119
|
+
- Use cases: Development, testing, small datasets
|
|
120
|
+
- Best practices: Not for production, data lost on restart
|
|
121
|
+
|
|
122
|
+
### Agent & LLM
|
|
123
|
+
|
|
124
|
+
**AI Agent** (@n8n/n8n-nodes-langchain.agent):
|
|
125
|
+
- Purpose: Orchestrate tool use and LLM interactions
|
|
126
|
+
- Configuration: Connect Vector Store Tool, add memory
|
|
127
|
+
- Best practices: Configure clear tool descriptions, use appropriate prompts
|
|
128
|
+
|
|
129
|
+
**Tool Vector Store** (@n8n/n8n-nodes-langchain.toolVectorStore):
|
|
130
|
+
- Purpose: Vector store tool for agents
|
|
131
|
+
- Configuration: "Company Knowledge Base – use this to find relevant policy documents"
|
|
132
|
+
- Best practices: Use descriptive tool names, set appropriate retrieval limits (3-5 results)
|
|
133
|
+
|
|
134
|
+
**OpenAI** (@n8n/n8n-nodes-langchain.openAi):
|
|
135
|
+
- Purpose: Chat model for generating responses
|
|
136
|
+
- Configuration:
|
|
137
|
+
- Temperature: 0-0.3 for factual Q&A
|
|
138
|
+
- System prompt: "Answer using only the information from our knowledge base"
|
|
139
|
+
- Best practices: Use low temperature for accuracy, instruct to admit when unsure
|
|
140
|
+
|
|
141
|
+
**Memory Window Buffer** (@n8n/n8n-nodes-langchain.memoryBufferWindow):
|
|
142
|
+
- Purpose: Maintain conversation history
|
|
143
|
+
- Configuration: 3-5 message turns typically sufficient
|
|
144
|
+
- Best practices: Balance context preservation with token limits
|
|
145
|
+
|
|
146
|
+
### Utility
|
|
147
|
+
|
|
148
|
+
**Switch** (n8n-nodes-base.switch):
|
|
149
|
+
- Purpose: Route by file type or content type
|
|
150
|
+
- Use cases: Different processing for PDFs vs text vs images
|
|
151
|
+
- Best practices: Always define default case, use clear conditions
|
|
152
|
+
|
|
153
|
+
**Execute Workflow** (n8n-nodes-base.executeWorkflow):
|
|
154
|
+
- Purpose: Call sub-workflows for modular design
|
|
155
|
+
- Use cases: Reuse query workflow across channels, separate ingestion logic
|
|
156
|
+
- Best practices: Design for reusability, pass appropriate parameters
|
|
157
|
+
|
|
158
|
+
## Common Pitfalls to Avoid
|
|
159
|
+
|
|
160
|
+
### Critical Mistakes
|
|
161
|
+
|
|
162
|
+
**Inconsistent Embeddings**:
|
|
163
|
+
- **Problem**: Using different embedding models for indexing vs queries breaks semantic search
|
|
164
|
+
- **Solution**: Always use the same model throughout (e.g., text-embedding-ada-002 for both)
|
|
165
|
+
- Document which model is used in workflow description
|
|
166
|
+
|
|
167
|
+
**Vector Dimension Mismatch**:
|
|
168
|
+
- **Problem**: Index dimensions don't match embedding model output, causing errors
|
|
169
|
+
- **Solution**: Ensure vector store index dimensions match embedding model output exactly
|
|
170
|
+
- Common: ada-002 = 1536 dimensions, text-embedding-3-small = 1536 dimensions
|
|
171
|
+
|
|
172
|
+
**Missing Updates**:
|
|
173
|
+
- **Problem**: Not updating or removing outdated vectors leads to conflicting information
|
|
174
|
+
- **Solution**: Implement update/delete mechanisms with unique IDs
|
|
175
|
+
- Use document ID + chunk number as unique identifier
|
|
176
|
+
- Schedule regular re-indexing for changing content
|
|
177
|
+
|
|
178
|
+
**Treating Vector DB as Full Database**:
|
|
179
|
+
- **Problem**: Using vector stores for general data storage instead of semantic search
|
|
180
|
+
- **Solution**: Vector DBs are for semantic search only, not bulk data storage
|
|
181
|
+
- Store full documents in traditional databases, only embeddings in vector store
|
|
182
|
+
|
|
183
|
+
### Performance Issues
|
|
184
|
+
|
|
185
|
+
**Oversized Chunks**:
|
|
186
|
+
- **Problem**: Large chunks dilute relevance and exceed token limits
|
|
187
|
+
- **Solution**: Keep chunks to 500-1000 characters (~200 tokens)
|
|
188
|
+
- Test different sizes to find optimal retrieval quality
|
|
189
|
+
|
|
190
|
+
**Undersized Chunks**:
|
|
191
|
+
- **Problem**: Too small chunks lose necessary context
|
|
192
|
+
- **Solution**: Ensure chunks have sufficient context to be meaningful
|
|
193
|
+
- Use 10-15% overlap between chunks
|
|
194
|
+
|
|
195
|
+
**Too Many Retrieved Documents**:
|
|
196
|
+
- **Problem**: Retrieving 10+ documents overwhelms LLM and reduces accuracy
|
|
197
|
+
- **Solution**: Limit to 3-5 results for optimal quality
|
|
198
|
+
- Use similarity thresholds to filter irrelevant matches
|
|
199
|
+
|
|
200
|
+
**UI Overload**:
|
|
201
|
+
- **Problem**: Indexing thousands of chunks freezes workflow editor
|
|
202
|
+
- **Solution**: Run large indexing jobs in production mode, not editor
|
|
203
|
+
- Consider batch processing for very large datasets
|
|
204
|
+
|
|
205
|
+
### Configuration Errors
|
|
206
|
+
|
|
207
|
+
**No Metadata**:
|
|
208
|
+
- **Problem**: Missing source/date metadata makes results less interpretable
|
|
209
|
+
- **Solution**: Always include metadata (source, title, page number, date)
|
|
210
|
+
- Helps users understand context of retrieved information
|
|
211
|
+
|
|
212
|
+
**No Unique IDs**:
|
|
213
|
+
- **Problem**: Can't update specific documents, causes duplicates
|
|
214
|
+
- **Solution**: Use document ID + chunk number as unique identifier
|
|
215
|
+
- Enables targeted updates and deletions
|
|
216
|
+
|
|
217
|
+
**High Temperature**:
|
|
218
|
+
- **Problem**: Creative temperature settings cause hallucinations in factual Q&A
|
|
219
|
+
- **Solution**: Use temperature 0-0.3 for factual responses
|
|
220
|
+
- Higher temperatures (0.7-1.0) only for creative tasks
|
|
221
|
+
|
|
222
|
+
**Generic Tool Descriptions**:
|
|
223
|
+
- **Problem**: Vague descriptions cause agents to misuse tools
|
|
224
|
+
- **Solution**: Use specific, descriptive tool names
|
|
225
|
+
- Good: "Company HR Policy Knowledge Base"
|
|
226
|
+
- Bad: "Knowledge base"
|
|
227
|
+
|
|
228
|
+
### Data Management
|
|
229
|
+
|
|
230
|
+
**Stale Data**:
|
|
231
|
+
- **Problem**: Outdated information in knowledge base leads to wrong answers
|
|
232
|
+
- **Solution**: Schedule regular re-indexing or implement change detection
|
|
233
|
+
- Use document timestamps to track freshness
|
|
234
|
+
|
|
235
|
+
**No Namespace Separation**:
|
|
236
|
+
- **Problem**: Mixing unrelated domains in same index reduces accuracy
|
|
237
|
+
- **Solution**: Use namespaces to separate different knowledge domains
|
|
238
|
+
- Example: "hr-policies", "technical-docs", "customer-faqs"
|
|
239
|
+
|
|
240
|
+
**Ignoring Token Limits**:
|
|
241
|
+
- **Problem**: Combined length of query + context + response exceeds model limits
|
|
242
|
+
- **Solution**: Monitor total token usage, limit context appropriately
|
|
243
|
+
- GPT-4: 8k/32k tokens, GPT-3.5: 4k/16k tokens
|
|
244
|
+
|
|
245
|
+
**Security Gaps**:
|
|
246
|
+
- **Problem**: Sending sensitive data without access control or encryption
|
|
247
|
+
- **Solution**: Implement proper access controls, use secure connections
|
|
248
|
+
- Consider data classification and access restrictions
|
|
249
|
+
|
|
250
|
+
## Best Practices Summary
|
|
251
|
+
|
|
252
|
+
1. **Always use consistent embedding models** throughout the pipeline
|
|
253
|
+
2. **Design modular workflows** for reusability across channels
|
|
254
|
+
3. **Include metadata** for better context and filtering
|
|
255
|
+
4. **Implement proper update/delete mechanisms** with unique IDs
|
|
256
|
+
5. **Test chunk sizes** for optimal retrieval quality (500-1000 characters)
|
|
257
|
+
6. **Run large indexing operations** in production mode
|
|
258
|
+
7. **Set appropriate retrieval limits** (3-5 results) and similarity thresholds
|
|
259
|
+
8. **Use low temperature** (0-0.3) for factual responses
|
|
260
|
+
9. **Secure sensitive data** with proper access controls
|
|
261
|
+
10. **Monitor and update** regularly to prevent stale information
|
|
262
|
+
`;
|
|
263
|
+
getDocumentation() {
|
|
264
|
+
return this.documentation;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
exports.KnowledgeBaseBestPractices = KnowledgeBaseBestPractices;
|
|
268
|
+
//# sourceMappingURL=knowledge-base.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-base.js","sourceRoot":"","sources":["../../../src/tools/best-practices/knowledge-base.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,0BAA0B;IAC7B,SAAS,GAAG,kCAAiB,CAAC,cAAc,CAAC;IAC7C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8PjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AAvQD,gEAuQC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { BestPracticesDocument } from '../../types/best-practices';
|
|
2
|
+
export declare class MonitoringBestPractices implements BestPracticesDocument {
|
|
3
|
+
readonly technique: "monitoring";
|
|
4
|
+
readonly version = "1.0.0";
|
|
5
|
+
private readonly documentation;
|
|
6
|
+
getDocumentation(): string;
|
|
7
|
+
}
|