@n8n/ai-workflow-builder 0.31.1 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/ai-workflow-builder-agent.service.d.ts +6 -2
  2. package/dist/ai-workflow-builder-agent.service.js +45 -3
  3. package/dist/ai-workflow-builder-agent.service.js.map +1 -1
  4. package/dist/build.tsbuildinfo +1 -1
  5. package/dist/tools/best-practices/data-analysis.d.ts +7 -0
  6. package/dist/tools/best-practices/data-analysis.js +367 -0
  7. package/dist/tools/best-practices/data-analysis.js.map +1 -0
  8. package/dist/tools/best-practices/data-extraction.js +7 -0
  9. package/dist/tools/best-practices/data-extraction.js.map +1 -1
  10. package/dist/tools/best-practices/data-transformation.d.ts +7 -0
  11. package/dist/tools/best-practices/data-transformation.js +181 -0
  12. package/dist/tools/best-practices/data-transformation.js.map +1 -0
  13. package/dist/tools/best-practices/document-processing.d.ts +7 -0
  14. package/dist/tools/best-practices/document-processing.js +324 -0
  15. package/dist/tools/best-practices/document-processing.js.map +1 -0
  16. package/dist/tools/best-practices/enrichment.d.ts +7 -0
  17. package/dist/tools/best-practices/enrichment.js +271 -0
  18. package/dist/tools/best-practices/enrichment.js.map +1 -0
  19. package/dist/tools/best-practices/human-in-the-loop.d.ts +7 -0
  20. package/dist/tools/best-practices/human-in-the-loop.js +268 -0
  21. package/dist/tools/best-practices/human-in-the-loop.js.map +1 -0
  22. package/dist/tools/best-practices/index.js +7 -6
  23. package/dist/tools/best-practices/index.js.map +1 -1
  24. package/dist/tools/best-practices/knowledge-base.d.ts +7 -0
  25. package/dist/tools/best-practices/knowledge-base.js +268 -0
  26. package/dist/tools/best-practices/knowledge-base.js.map +1 -0
  27. package/dist/tools/best-practices/monitoring.d.ts +7 -0
  28. package/dist/tools/best-practices/monitoring.js +178 -0
  29. package/dist/tools/best-practices/monitoring.js.map +1 -0
  30. package/dist/tools/best-practices/notification.d.ts +7 -0
  31. package/dist/tools/best-practices/notification.js +229 -0
  32. package/dist/tools/best-practices/notification.js.map +1 -0
  33. package/dist/tools/best-practices/scheduling.d.ts +7 -0
  34. package/dist/tools/best-practices/scheduling.js +281 -0
  35. package/dist/tools/best-practices/scheduling.js.map +1 -0
  36. package/dist/tools/best-practices/triage.d.ts +7 -0
  37. package/dist/tools/best-practices/triage.js +211 -0
  38. package/dist/tools/best-practices/triage.js.map +1 -0
  39. package/dist/tools/categorize-prompt.tool.js +1 -0
  40. package/dist/tools/categorize-prompt.tool.js.map +1 -1
  41. package/dist/tools/helpers/response.js +2 -0
  42. package/dist/tools/helpers/response.js.map +1 -1
  43. package/dist/tools/prompts/main-agent.prompt.js +9 -1
  44. package/dist/tools/prompts/main-agent.prompt.js.map +1 -1
  45. package/dist/tools/validate-workflow.tool.js +12 -0
  46. package/dist/tools/validate-workflow.tool.js.map +1 -1
  47. package/dist/utils/tool-executor.js +19 -0
  48. package/dist/utils/tool-executor.js.map +1 -1
  49. package/dist/validation/checks/agent-prompt.js +2 -0
  50. package/dist/validation/checks/agent-prompt.js.map +1 -1
  51. package/dist/validation/checks/connections.js +8 -0
  52. package/dist/validation/checks/connections.js.map +1 -1
  53. package/dist/validation/checks/from-ai.js +1 -0
  54. package/dist/validation/checks/from-ai.js.map +1 -1
  55. package/dist/validation/checks/tools.js +2 -0
  56. package/dist/validation/checks/tools.js.map +1 -1
  57. package/dist/validation/checks/trigger.js +2 -0
  58. package/dist/validation/checks/trigger.js.map +1 -1
  59. package/dist/validation/types.d.ts +4 -0
  60. package/dist/validation/types.js +18 -0
  61. package/dist/validation/types.js.map +1 -1
  62. package/dist/workflow-builder-agent.d.ts +5 -2
  63. package/dist/workflow-builder-agent.js +4 -3
  64. package/dist/workflow-builder-agent.js.map +1 -1
  65. package/dist/workflow-state.d.ts +3 -1
  66. package/dist/workflow-state.js +8 -0
  67. package/dist/workflow-state.js.map +1 -1
  68. package/package.json +11 -7
@@ -0,0 +1,324 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DocumentProcessingBestPractices = void 0;
4
+ const categorization_1 = require("../../types/categorization");
5
+ class DocumentProcessingBestPractices {
6
+ technique = categorization_1.WorkflowTechnique.DOCUMENT_PROCESSING;
7
+ version = '1.0.0';
8
+ documentation = `# Best Practices: Document Processing Workflows
9
+
10
+ ## Workflow Design
11
+
12
+ Document processing workflows extract and act on content from files like PDFs, images, Word documents, and spreadsheets. Design your workflow following these core patterns:
13
+
14
+ ### Core Architecture Pattern
15
+ Trigger → Capture Binary → Extract Text → Parse/Transform → Route to Destination → Notify
16
+
17
+ ### Common Flow Patterns
18
+
19
+ **Simple Document Processing:**
20
+ - Gmail Trigger → Check file type → Extract from File → DataTable → Slack notification
21
+ - Best for: Basic text-based PDFs with straightforward data extraction
22
+
23
+ **Complex Document Processing with AI:**
24
+ - Webhook → File Type Check → OCR (if image) → AI Extract → Validate → CRM Update → Multiple notifications
25
+ - Best for: Varied document formats requiring intelligent parsing
26
+
27
+ **Batch Document Processing:**
28
+ - Main workflow: Schedule Trigger → Fetch Files → Split In Batches → Sub-workflow → Merge Results → Bulk Update
29
+ - Sub-workflow When Executed by Another Workflow -> Process result
30
+ - Best for: High-volume processing with API rate limits
31
+
32
+ **Multi-Source Document Aggregation:**
33
+ - Multiple Triggers (Email + Drive + Webhook) → Set common fields → Standardize → Process → Store
34
+ - Best for: Documents from various channels needing unified processing
35
+
36
+ ### Branching Strategy
37
+
38
+ Always branch early based on document characteristics:
39
+ - **File Type Branching**: Use IF/Switch nodes immediately after ingestion to route PDFs vs images vs spreadsheets
40
+ - **Provider Branching**: Route documents to provider-specific processing (e.g., different invoice formats)
41
+ - **Quality Branching**: Separate high-confidence extractions from those needing manual review
42
+
43
+ ## Binary Data Management
44
+ Documents in n8n are handled as binary data that must be carefully preserved throughout the workflow.
45
+
46
+ ### Referencing Binary Data from Other Nodes
47
+ When you need to reference binary data from a previous node, use this syntax:
48
+ - Expression: '{{ $('Node Name').item.binary.property_name }}' or {{ $binary.property_name }} if previous item
49
+ - Example for Gmail attachments: '{{ $('Gmail Trigger').item.binary.attachment_0 }}' or {{ $binary.attachment_0 }} if previous item
50
+ - Example for webhook data: '{{ $('Webhook').item.binary.data }}' or {{ $binary.data }} if previous item
51
+ - Important: The property name depends on how the previous node names the binary data
52
+
53
+ ### Preserving Binary Data
54
+ - Many nodes (Code, Edit Fields, AI nodes) output JSON and drop binary data by default
55
+ - Use parallel branches: one for processing, one to preserve the original file
56
+ - Rejoin branches with Merge node in pass-through mode
57
+ - Alternative: Configure nodes to keep binary (e.g., Edit field node's "Include Other Input Fields" option ON)
58
+
59
+ ### Memory Optimization
60
+ For high-volume processing:
61
+ - Process files sequentially or in small batches
62
+ - Drop unnecessary binary data after extraction to free memory
63
+
64
+ ## Text Extraction Strategy
65
+
66
+ Choose extraction method based on document type and content:
67
+
68
+ ### Critical: File Type Detection
69
+ **ALWAYS check the file type before using Extract from File node** (unless the file type is already known):
70
+ - Use IF node to check file extension or MIME type first
71
+ - The Extract from File node has multiple operations, each for a specific file type:
72
+ - "Extract from PDF" for PDF files
73
+ - "Extract from MS Excel" for Excel files (.xlsx, .xls)
74
+ - "Extract from MS Word" for Word documents (.docx, .doc)
75
+ - "Extract from CSV" for CSV files
76
+ - "Extract from HTML" for HTML files
77
+ - "Extract from RTF" for Rich Text Format files
78
+ - "Extract from Text File" for plain text files
79
+ - Using the wrong operation will result in errors or empty output
80
+
81
+ ### Decision Tree for Extraction
82
+ 1. **Check file type** → Route to appropriate extraction method
83
+ 2. **Scanned image/PDF?** → Use OCR service (OCR.space, AWS Textract, Google Vision)
84
+ 3. **Structured invoice/receipt?** → Use specialized parser (Mindee) or AI extraction
85
+ 4. **Text-based document?** → Use Extract from File with the correct operation for that file type
86
+
87
+ ### Fallback Strategy
88
+ Always implement fallback for extraction failures:
89
+ - Check if text extraction returns empty
90
+ - If empty, automatically route to OCR
91
+ - If OCR fails, send to manual review queue
92
+
93
+ ## Data Parsing & Classification
94
+
95
+ ### AI-Powered Extraction Pattern
96
+ For varied or complex documents:
97
+
98
+ Option 1 - Using Document Loader (Recommended for binary files):
99
+ 1. Pass binary data directly to Document Loader node (set Data Source to "Binary")
100
+ 2. Connect to AI Agent or LLM Chain for processing
101
+ 3. Use Structured Output Parser to ensure consistent JSON
102
+ 4. Validate extracted fields before processing
103
+
104
+ Option 2 - Using text extraction:
105
+ 1. Extract raw text using Extract from File or OCR
106
+ 2. Pass to AI Agent or LLM Chain with structured prompt
107
+ 3. Use Structured Output Parser to ensure consistent JSON
108
+ 4. Validate extracted fields before processing
109
+
110
+ Example system prompt structure:
111
+ "Extract the following fields from the document: [field list]. Return as JSON with this schema: [schema example]"
112
+
113
+ ### Document Classification Flow
114
+ Classify before processing for better accuracy:
115
+ 1. Initial AI classification (Invoice vs Receipt vs Contract)
116
+ 2. Route to specialized sub-workflow based on type
117
+ 3. Use type-specific prompts and validation rules
118
+ 4. This reduces errors and improves extraction quality
119
+
120
+ ## Error Handling Strategy
121
+
122
+ Build resilience at every step:
123
+
124
+ ### Validation Checkpoints
125
+ - After extraction: Verify text not empty
126
+ - After AI parsing: Validate JSON schema
127
+ - Before database insert: Check required fields
128
+ - After API calls: Verify success response
129
+
130
+ ## Performance Optimization
131
+
132
+ ### Batch Processing Strategy
133
+ - Use Split In Batches node: process 5-10 files at a time
134
+ - Implement delays between batches for rate-limited APIs
135
+ - Monitor memory usage and adjust batch size accordingly
136
+
137
+ ## Recommended Nodes
138
+
139
+ ### Triggers & Input
140
+
141
+ **Gmail Trigger (n8n-nodes-base.gmailTrigger)**
142
+ Purpose: Monitor Gmail for emails with attachments (Recommended over IMAP)
143
+ Advantages: Real-time processing, simpler authentication, better integration with Google Workspace
144
+ Critical Configuration for Attachments:
145
+ - **MUST set "Simplify" to FALSE** - otherwise attachments won't be available
146
+ - **MUST set "Download Attachments" to TRUE** to retrieve files
147
+ - Set appropriate label filters
148
+ - Set "Property Prefix Name" (e.g., "attachment_") - attachments will be named with this prefix plus index
149
+ - Important: When referencing its binary data, it will be referenced "attachment_0", "attachment_1", etc., NOT "data"
150
+
151
+ **Email Read (IMAP) (n8n-nodes-base.emailReadImap)**
152
+ Purpose: Alternative email fetching if there's no specialized node for email provider
153
+ Configuration:
154
+ - Enable "Download Attachments" to retrieve files
155
+ - Set "Property Prefix Name" (e.g., "attachment_") - attachments will be named with this prefix plus index
156
+ - Important: When referencing binary data, it will be referenced "attachment_0", "attachment_1", etc., NOT "data"
157
+
158
+ **HTTP Webhook (n8n-nodes-base.webhook)**
159
+ Purpose: Receive file uploads from web forms
160
+ Configuration: Enable "Raw Body" for binary data
161
+
162
+ **Google Drive Trigger (n8n-nodes-base.googleDriveTrigger)**
163
+ Purpose: Monitor folders for new documents
164
+ Configuration: Set appropriate folder and file type filters
165
+
166
+ ### Text Extraction
167
+
168
+ **Extract from File (n8n-nodes-base.extractFromFile)**
169
+ Purpose: Extract text from various file formats using format-specific operations
170
+ Critical: ALWAYS check file type first with an IF or Switch before and select the correct operation (Extract from PDF, Extract from MS Excel, etc.)
171
+ Output: Extracted text is returned under the "text" key in JSON (e.g., access with {{ $json.text }})
172
+ Pitfalls: Returns empty for scanned documents - always check and fallback to OCR; Using wrong operation causes errors
173
+
174
+ **AWS Textract (n8n-nodes-base.awsTextract)**
175
+ Purpose: Advanced OCR with table and form detection
176
+ Best for: Structured documents like invoices and forms
177
+
178
+ **Mindee (n8n-nodes-base.mindee)**
179
+ Purpose: Specialized invoice and receipt parsing
180
+ Returns: Structured JSON with line items, totals, dates
181
+
182
+ ### Data Processing
183
+
184
+ **AI Agent (@n8n/n8n-nodes-langchain.agent)**
185
+ Purpose: Intelligent document parsing and decision making
186
+ Configuration: Include structured output tools for consistent results
187
+
188
+ **LLM Chain (@n8n/n8n-nodes-langchain.chainLlm)**
189
+ Purpose: Document classification and data extraction
190
+ Use with: Structured Output Parser for JSON consistency
191
+
192
+ **Document Loader (@n8n/n8n-nodes-langchain.documentLoader)**
193
+ Purpose: Load and process documents directly from binary data for AI processing
194
+ Critical: Use the "Binary" data source option to handle binary files directly - no need to convert to JSON first
195
+ Configuration: Select "Binary" as Data Source, specify the binary property name (by default data unless renamed in a previous node)
196
+ Best for: Direct document processing in AI workflows without intermediate extraction steps
197
+
198
+ **Structured Output Parser (@n8n/n8n-nodes-langchain.outputParserStructured)**
199
+ Purpose: Ensure AI outputs match expected JSON schema
200
+ Critical for: Database inserts and API calls
201
+
202
+ ### Vector Storage (for RAG/Semantic Search)
203
+ **Simple Vector Store (@n8n/n8n-nodes-langchain.vectorStore) - RECOMMENDED**
204
+ Purpose: Easy-to-setup vector storage for document embeddings
205
+ Advantages:
206
+ - No external dependencies or API keys required
207
+ - Works out of the box with local storage
208
+ - Perfect for prototyping and small to medium datasets
209
+ Configuration: Just connect and use - no complex setup needed
210
+ Best for: Most document processing workflows that need semantic search
211
+
212
+ ### Flow Control
213
+
214
+ **Split In Batches (n8n-nodes-base.splitInBatches)**
215
+ Purpose: Process multiple documents in controlled batches
216
+ Configuration: Set batch size based on API limits and memory
217
+ Outputs (in order):
218
+ - Output 0 "done": Executes after all batches are processed - use for final aggregation or notifications
219
+ - Output 1 "loop": Connect processing nodes here - executes for each batch
220
+ Important: Connect processing logic to the second output (loop), completion logic to the first output (done)
221
+
222
+ **Merge (n8n-nodes-base.merge)**
223
+ Purpose: Combine data from multiple branches that need to execute together
224
+ Critical: Merge node waits for ALL input branches to complete - do NOT use for independent/optional branches
225
+ Modes: Use "Pass Through" to preserve binary from one branch
226
+
227
+ **Edit Fields (Set) (n8n-nodes-base.set)**
228
+ Purpose: Better choice for combining data from separate/independent branches
229
+ Use for: Adding fields from different sources, preserving binary while adding processed data
230
+ Configuration: Set common fields and use "Include Other Input Fields" OFF to preserve existing data including binary
231
+
232
+ **Execute Workflow Trigger (n8n-nodes-base.executeWorkflowTrigger)**
233
+ Purpose: Start point for sub-workflows that are called by other workflows
234
+ Configuration: Automatically receives data from the calling workflow including binary data
235
+ Best practice: Use for modular workflow design, heavy processing tasks, or reusable workflow components
236
+ Pairing: Must be used with Execute Workflow node in the parent workflow
237
+
238
+ **Execute Workflow (n8n-nodes-base.executeWorkflow)**
239
+ Purpose: Call and execute another workflow from within the current workflow
240
+ Critical configurations:
241
+ - Workflow ID: Use expression "{{ $workflow.id }}" to reference sub-workflows in the same workflow
242
+ - Choose execution mode: "Run Once for All Items" or "Run Once for Each Item"
243
+ - Binary data is automatically passed to the sub-workflow
244
+ Best practice: Use for delegating heavy processing, creating reusable modules, or managing memory in large batch operations
245
+
246
+ ### Data Destinations
247
+
248
+ **DataTable (n8n-nodes-base.dataTable)**
249
+ Purpose: Store extracted data in n8n's built-in data tables
250
+ Operations: Insert, Update, Select rows without external dependencies
251
+ Best for: Self-contained workflows that don't require external storage
252
+
253
+ **Google Sheets (n8n-nodes-base.googleSheets)**
254
+ Purpose: Log extracted data in external spreadsheet
255
+ Operations: Use "Append" for new rows, "Update" with key column for existing
256
+ Best for: Collaborative review and manual data validation
257
+
258
+ **Database Nodes**
259
+ - Postgres (n8n-nodes-base.postgres)
260
+ - MySQL (n8n-nodes-base.mySql)
261
+ - MongoDB (n8n-nodes-base.mongoDb)
262
+ Purpose: Store structured extraction results in production databases
263
+ Best Practice: Validate data schema before insert
264
+
265
+ ### Utilities
266
+
267
+ **IF/Switch (n8n-nodes-base.if, n8n-nodes-base.switch)**
268
+ Purpose: Route based on file type, extraction quality, or classification
269
+
270
+ **Function/Code (n8n-nodes-base.function, n8n-nodes-base.code)**
271
+ Purpose: Custom validation, data transformation, or regex extraction
272
+
273
+ **HTTP Request (n8n-nodes-base.httpRequest)**
274
+ Purpose: Call external OCR APIs (OCR.space, Google Vision, Mistral OCR)
275
+ Configuration: Set "Response Format: File" for downloads
276
+ Critical: NEVER set API keys directly in the request - user can set credentials from the UI for secure API key management
277
+
278
+ ## Common Pitfalls to Avoid
279
+
280
+ ### Binary Data Loss
281
+
282
+ **Problem**: Binary file "disappears" after processing nodes
283
+ **Solution**:
284
+ - Use Merge node to reattach binary after processing
285
+ - Or configure nodes to explicitly keep binary data
286
+ - In Code nodes: copy items[0].binary to output
287
+
288
+ ### Incorrect OCR Fallback
289
+
290
+ **Problem**: Not detecting when text extraction fails
291
+ **Solution**:
292
+ - Always check if extraction result is empty
293
+ - Implement automatic OCR fallback for scanned documents
294
+ - Don't assume all PDFs have extractable text
295
+
296
+ ### API Format Mismatches
297
+
298
+ **Problem**: Sending files in wrong format to APIs
299
+ **Solution**:
300
+ - Check if API needs multipart/form-data vs Base64
301
+ - Use "Extract from File" and "Convert to File" format conversion
302
+
303
+ ### Memory Overload
304
+
305
+ **Problem**: Workflow crashes with large or multiple files
306
+ **Solution**:
307
+ - Process files sequentially or in small batches
308
+ - Enable filesystem mode for binary data storage
309
+ - Drop unnecessary data after extraction
310
+ - Create a sub-workflow in the same workflow using "When Executed by Another Workflow" and "Execute Workflow". Delegate the heavy part of the workflow to the sub-workflow.
311
+
312
+ ### Duplicate Processing
313
+
314
+ **Problem**: Same documents processed repeatedly
315
+ **Solution**:
316
+ - Configure email triggers to mark as read
317
+ - Use "unseen" filters for email fetching
318
+ - Implement deduplication logic based on file hash or name`;
319
+ getDocumentation() {
320
+ return this.documentation;
321
+ }
322
+ }
323
+ exports.DocumentProcessingBestPractices = DocumentProcessingBestPractices;
324
+ //# sourceMappingURL=document-processing.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-processing.js","sourceRoot":"","sources":["../../../src/tools/best-practices/document-processing.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,+BAA+B;IAClC,SAAS,GAAG,kCAAiB,CAAC,mBAAmB,CAAC;IAClD,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2DAsTyB,CAAC;IAE3D,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AA/TD,0EA+TC"}
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class EnrichmentBestPractices implements BestPracticesDocument {
3
+ readonly technique: "enrichment";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }
@@ -0,0 +1,271 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.EnrichmentBestPractices = void 0;
4
+ const categorization_1 = require("../../types/categorization");
5
+ class EnrichmentBestPractices {
6
+ technique = categorization_1.WorkflowTechnique.ENRICHMENT;
7
+ version = '1.0.0';
8
+ documentation = `# Best Practices: Data Enrichment Workflows
9
+
10
+ ## Workflow Design
11
+
12
+ ### Core Principles
13
+ - Start with data retrieval and validation before enrichment
14
+ - Process data incrementally to avoid overwhelming APIs
15
+ - Always include error handling for failed enrichments
16
+ - Design for reusability with sub-workflows where appropriate
17
+
18
+ ### Architecture Pattern
19
+ 1. **Input Stage**: Validate and prepare incoming data
20
+ 2. **Enrichment Stage**: Parallel or sequential API calls based on dependencies
21
+ 3. **Transformation Stage**: Normalize and merge enriched data
22
+ 4. **Output Stage**: Format and deliver enriched results
23
+
24
+ ## Data Enrichment Guidelines
25
+
26
+ ### 1. Input Validation
27
+ **Always validate incoming data before enrichment**
28
+ - Use IF node (n8n-nodes-base.if) to check for required fields
29
+ - Implement Set node (n8n-nodes-base.set) to standardize data format
30
+ - Add Code node (n8n-nodes-base.code) for complex validation logic
31
+
32
+ ### 2. API Rate Limiting
33
+ **Respect external service limits**
34
+ - Implement Wait node (n8n-nodes-base.wait) between batch requests
35
+ - Use SplitInBatches node (n8n-nodes-base.splitInBatches) for large datasets
36
+ - Set batch size: 10-50 items depending on API limits
37
+ - Add delay: 1-2 seconds between batches
38
+
39
+ ### 3. Error Handling
40
+ **Build resilient enrichment flows**
41
+ - Wrap API calls in Try/Catch pattern using Error Trigger node
42
+ - Use StopAndError node (n8n-nodes-base.stopAndError) for critical failures
43
+ - Implement fallback enrichment sources with Switch node (n8n-nodes-base.switch)
44
+ - Log failures to database or file for later retry
45
+
46
+ ### 4. Data Merging
47
+ **Combine enriched data effectively**
48
+ - Use Merge node (n8n-nodes-base.merge) with "Merge By Key" mode
49
+ - Specify unique identifiers for accurate matching
50
+ - Handle missing enrichment data with default values
51
+ - Preserve original data alongside enrichments
52
+
53
+ ### 5. Caching Strategy
54
+ **Minimize redundant API calls**
55
+ - Check cache before making external requests
56
+ - Use Redis node (n8n-nodes-base.redis) or database for caching
57
+ - Set appropriate TTL values:
58
+ - Static data: 7-30 days
59
+ - Dynamic data: 1-24 hours
60
+ - Real-time data: No caching
61
+
62
+ ### 6. Field Mapping
63
+ **Standardize enriched data structure**
64
+ - Use Set node to rename fields consistently
65
+ - Remove unnecessary fields with unset operations
66
+ - Apply data transformations in Code node for complex mappings
67
+ - Document field mappings in workflow description
68
+
69
+ ### 7. Quality Scoring
70
+ **Assess enrichment quality**
71
+ - Add confidence scores to enriched fields
72
+ - Track enrichment source for each field
73
+ - Implement validation rules for enriched data
74
+ - Flag incomplete or suspicious enrichments
75
+
76
+ ## Recommended Nodes
77
+
78
+ ### Essential Nodes
79
+
80
+ **HTTP Request** (n8n-nodes-base.httpRequest):
81
+ - Purpose: Primary enrichment API calls
82
+ - Use cases: Call external APIs for data enrichment
83
+ - Best practices: Configure proper authentication, handle timeouts
84
+
85
+ **Merge** (n8n-nodes-base.merge):
86
+ - Purpose: Combine original and enriched data
87
+ - Modes: Merge By Key, Merge By Index, Append
88
+ - Best practices: Use unique identifiers for matching, handle missing data
89
+
90
+ **Set** (n8n-nodes-base.set):
91
+ - Purpose: Transform and standardize data
92
+ - Use cases: Rename fields, remove unnecessary data, add metadata
93
+ - Best practices: Use "Keep Only Set" carefully, document transformations
94
+
95
+ **IF** (n8n-nodes-base.if):
96
+ - Purpose: Conditional enrichment logic
97
+ - Use cases: Validate required fields, route based on data quality
98
+ - Best practices: Check for null values, validate data types
99
+
100
+ **SplitInBatches** (n8n-nodes-base.splitInBatches):
101
+ - Purpose: Process large datasets in chunks
102
+ - Use cases: Handle datasets with 100+ items
103
+ - Best practices: Set appropriate batch size (10-50 items), add delays
104
+
105
+ ### Enrichment Sources
106
+
107
+ **Clearbit** (n8n-nodes-base.clearbit):
108
+ - Purpose: Company and person enrichment
109
+ - Use cases: Enrich email addresses with company data, get person details
110
+ - Best practices: Handle rate limits, cache results
111
+
112
+ **Hunter** (n8n-nodes-base.hunter):
113
+ - Purpose: Email finder and verification
114
+ - Use cases: Find email addresses, verify email validity
115
+ - Best practices: Respect API quotas, handle verification failures
116
+
117
+ **Brandfetch** (n8n-nodes-base.Brandfetch):
118
+ - Purpose: Company branding data
119
+ - Use cases: Get company logos, colors, brand assets
120
+ - Best practices: Cache brand data, handle missing brands
121
+
122
+ **OpenAI** (@n8n/n8n-nodes-langchain.openAi):
123
+ - Purpose: AI-powered data enrichment
124
+ - Use cases: Extract insights, classify data, generate descriptions
125
+ - Best practices: Minimize token usage, batch similar requests
126
+
127
+ **Google Sheets** (n8n-nodes-base.googleSheets):
128
+ - Purpose: Lookup table enrichment
129
+ - Use cases: Reference data enrichment, mapping tables
130
+ - Best practices: Use efficient lookup methods, cache sheet data
131
+
132
+ ### Utility Nodes
133
+
134
+ **Code** (n8n-nodes-base.code):
135
+ - Purpose: Custom enrichment logic
136
+ - Use cases: Complex transformations, custom algorithms
137
+ - Best practices: Keep code modular, handle errors gracefully
138
+
139
+ **Wait** (n8n-nodes-base.wait):
140
+ - Purpose: Rate limiting delays
141
+ - Use cases: Add delays between API calls, implement backoff
142
+ - Best practices: Use appropriate delay values (1-2 seconds)
143
+
144
+ **DateTime** (n8n-nodes-base.dateTime):
145
+ - Purpose: Timestamp handling
146
+ - Use cases: Add enrichment timestamps, calculate ages
147
+ - Best practices: Use consistent timezone handling
148
+
149
+ **Redis** (n8n-nodes-base.redis):
150
+ - Purpose: Caching layer
151
+ - Use cases: Cache enrichment results, track processed items
152
+ - Best practices: Set appropriate TTL, handle cache misses
153
+
154
+ **Error Trigger** (n8n-nodes-base.errorTrigger):
155
+ - Purpose: Error handling workflow
156
+ - Use cases: Global error handling, logging failures
157
+ - Best practices: Implement retry logic, alert on critical failures
158
+
159
+ **Switch** (n8n-nodes-base.switch):
160
+ - Purpose: Route based on enrichment results
161
+ - Use cases: Fallback enrichment sources, quality-based routing
162
+ - Best practices: Always define default case
163
+
164
+ **Stop and Error** (n8n-nodes-base.stopAndError):
165
+ - Purpose: Halt workflow on critical failures
166
+ - Use cases: Stop processing on invalid data, critical API failures
167
+ - Best practices: Use for unrecoverable errors only
168
+
169
+ ## Common Pitfalls to Avoid
170
+
171
+ ### Performance Issues
172
+
173
+ **Problem**: Enriching all fields for every record
174
+ - **Solution**: Only enrich fields that are actually needed
175
+ - Profile your workflow to identify bottlenecks
176
+ - Use conditional enrichment based on data needs
177
+
178
+ **Problem**: Sequential processing of independent enrichments
179
+ - **Solution**: Use parallel branches for non-dependent enrichments
180
+ - Split workflow into parallel paths
181
+ - Merge results after parallel processing
182
+
183
+ **Problem**: No batching for large datasets
184
+ - **Solution**: Always use SplitInBatches for >100 items
185
+ - Set appropriate batch sizes (10-50 items)
186
+ - Add delays between batches
187
+
188
+ ### Data Quality Problems
189
+
190
+ **Problem**: Overwriting original data with enrichments
191
+ - **Solution**: Preserve original data and add enriched fields separately
192
+ - Use Set node to add new fields without removing original ones
193
+ - Document which fields are enriched
194
+
195
+ **Problem**: Not handling null or missing enrichment results
196
+ - **Solution**: Implement fallback values and error flags
197
+ - Use IF nodes to check for empty results
198
+ - Add default values for missing enrichments
199
+
200
+ **Problem**: Mixing data types in enriched fields
201
+ - **Solution**: Enforce consistent data types through validation
202
+ - Convert types explicitly in Set or Code nodes
203
+ - Document expected data types
204
+
205
+ ### Resource Management
206
+
207
+ **Problem**: No rate limiting on external APIs
208
+ - **Solution**: Implement delays and respect API quotas
209
+ - Use Wait node between API calls
210
+ - Monitor API usage and adjust delays
211
+
212
+ **Problem**: Infinite retry loops on failures
213
+ - **Solution**: Set maximum retry attempts (typically 3)
214
+ - Use exponential backoff for retries
215
+ - Log failed attempts for manual review
216
+
217
+ **Problem**: No caching of expensive enrichments
218
+ - **Solution**: Cache results with appropriate expiration times
219
+ - Use Redis or database for caching
220
+ - Set TTL based on data freshness requirements
221
+
222
+ ### Workflow Design Flaws
223
+
224
+ **Problem**: Single point of failure for entire enrichment
225
+ - **Solution**: Use error boundaries and continue on failure options
226
+ - Enable "Continue on Fail" for non-critical enrichments
227
+ - Implement Error Trigger workflow
228
+
229
+ **Problem**: Hard-coded API keys in workflows
230
+ - **Solution**: Use credentials and environment variables
231
+ - Store sensitive data in n8n credentials system
232
+ - Never commit credentials in workflow JSON
233
+
234
+ **Problem**: No monitoring or logging of enrichment quality
235
+ - **Solution**: Add metrics collection and alerting
236
+ - Log enrichment success/failure rates
237
+ - Track enrichment coverage and quality
238
+
239
+ ### Common Error Scenarios
240
+
241
+ **API Rate Limits**:
242
+ - Implement exponential backoff
243
+ - Add Wait nodes with increasing delays
244
+ - Use SplitInBatches to control request rate
245
+
246
+ **Invalid API Responses**:
247
+ - Validate response structure before processing
248
+ - Use IF nodes to check response format
249
+ - Log unexpected responses for debugging
250
+
251
+ **Timeout Issues**:
252
+ - Set reasonable timeout values (10-30s)
253
+ - Use shorter timeouts for non-critical enrichments
254
+ - Implement retry logic for timeouts
255
+
256
+ **Data Mismatches**:
257
+ - Use fuzzy matching for lookups
258
+ - Normalize data before matching
259
+ - Handle missing keys gracefully
260
+
261
+ **Duplicate Enrichments**:
262
+ - Implement deduplication logic
263
+ - Check cache before enriching
264
+ - Use unique identifiers for tracking
265
+ `;
266
+ getDocumentation() {
267
+ return this.documentation;
268
+ }
269
+ }
270
+ exports.EnrichmentBestPractices = EnrichmentBestPractices;
271
+ //# sourceMappingURL=enrichment.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"enrichment.js","sourceRoot":"","sources":["../../../src/tools/best-practices/enrichment.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,uBAAuB;IAC1B,SAAS,GAAG,kCAAiB,CAAC,UAAU,CAAC;IACzC,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiQjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AA1QD,0DA0QC"}
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class HumanInTheLoopBestPractices implements BestPracticesDocument {
3
+ readonly technique: "human_in_the_loop";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }