@n8n/ai-workflow-builder 0.31.2 → 0.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/ai-workflow-builder-agent.service.d.ts +6 -2
  2. package/dist/ai-workflow-builder-agent.service.js +45 -3
  3. package/dist/ai-workflow-builder-agent.service.js.map +1 -1
  4. package/dist/build.tsbuildinfo +1 -1
  5. package/dist/tools/best-practices/data-analysis.d.ts +7 -0
  6. package/dist/tools/best-practices/data-analysis.js +367 -0
  7. package/dist/tools/best-practices/data-analysis.js.map +1 -0
  8. package/dist/tools/best-practices/data-extraction.js +7 -0
  9. package/dist/tools/best-practices/data-extraction.js.map +1 -1
  10. package/dist/tools/best-practices/data-transformation.d.ts +7 -0
  11. package/dist/tools/best-practices/data-transformation.js +181 -0
  12. package/dist/tools/best-practices/data-transformation.js.map +1 -0
  13. package/dist/tools/best-practices/document-processing.d.ts +7 -0
  14. package/dist/tools/best-practices/document-processing.js +324 -0
  15. package/dist/tools/best-practices/document-processing.js.map +1 -0
  16. package/dist/tools/best-practices/enrichment.d.ts +7 -0
  17. package/dist/tools/best-practices/enrichment.js +271 -0
  18. package/dist/tools/best-practices/enrichment.js.map +1 -0
  19. package/dist/tools/best-practices/human-in-the-loop.d.ts +7 -0
  20. package/dist/tools/best-practices/human-in-the-loop.js +268 -0
  21. package/dist/tools/best-practices/human-in-the-loop.js.map +1 -0
  22. package/dist/tools/best-practices/index.js +7 -6
  23. package/dist/tools/best-practices/index.js.map +1 -1
  24. package/dist/tools/best-practices/knowledge-base.d.ts +7 -0
  25. package/dist/tools/best-practices/knowledge-base.js +268 -0
  26. package/dist/tools/best-practices/knowledge-base.js.map +1 -0
  27. package/dist/tools/best-practices/monitoring.d.ts +7 -0
  28. package/dist/tools/best-practices/monitoring.js +178 -0
  29. package/dist/tools/best-practices/monitoring.js.map +1 -0
  30. package/dist/tools/best-practices/notification.d.ts +7 -0
  31. package/dist/tools/best-practices/notification.js +229 -0
  32. package/dist/tools/best-practices/notification.js.map +1 -0
  33. package/dist/tools/best-practices/scheduling.d.ts +7 -0
  34. package/dist/tools/best-practices/scheduling.js +281 -0
  35. package/dist/tools/best-practices/scheduling.js.map +1 -0
  36. package/dist/tools/best-practices/triage.d.ts +7 -0
  37. package/dist/tools/best-practices/triage.js +211 -0
  38. package/dist/tools/best-practices/triage.js.map +1 -0
  39. package/dist/tools/categorize-prompt.tool.js +1 -0
  40. package/dist/tools/categorize-prompt.tool.js.map +1 -1
  41. package/dist/tools/helpers/response.js +2 -0
  42. package/dist/tools/helpers/response.js.map +1 -1
  43. package/dist/tools/prompts/main-agent.prompt.js +9 -1
  44. package/dist/tools/prompts/main-agent.prompt.js.map +1 -1
  45. package/dist/tools/validate-workflow.tool.js +12 -0
  46. package/dist/tools/validate-workflow.tool.js.map +1 -1
  47. package/dist/utils/tool-executor.js +19 -0
  48. package/dist/utils/tool-executor.js.map +1 -1
  49. package/dist/validation/checks/agent-prompt.js +2 -0
  50. package/dist/validation/checks/agent-prompt.js.map +1 -1
  51. package/dist/validation/checks/connections.js +8 -0
  52. package/dist/validation/checks/connections.js.map +1 -1
  53. package/dist/validation/checks/from-ai.js +1 -0
  54. package/dist/validation/checks/from-ai.js.map +1 -1
  55. package/dist/validation/checks/tools.js +2 -0
  56. package/dist/validation/checks/tools.js.map +1 -1
  57. package/dist/validation/checks/trigger.js +2 -0
  58. package/dist/validation/checks/trigger.js.map +1 -1
  59. package/dist/validation/types.d.ts +4 -0
  60. package/dist/validation/types.js +18 -0
  61. package/dist/validation/types.js.map +1 -1
  62. package/dist/workflow-builder-agent.d.ts +5 -2
  63. package/dist/workflow-builder-agent.js +4 -3
  64. package/dist/workflow-builder-agent.js.map +1 -1
  65. package/dist/workflow-state.d.ts +3 -1
  66. package/dist/workflow-state.js +8 -0
  67. package/dist/workflow-state.js.map +1 -1
  68. package/package.json +11 -7
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class DataAnalysisBestPractices implements BestPracticesDocument {
3
+ readonly technique: "data_analysis";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }
@@ -0,0 +1,367 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DataAnalysisBestPractices = void 0;
4
+ const categorization_1 = require("../../types/categorization");
5
+ class DataAnalysisBestPractices {
6
+ technique = categorization_1.WorkflowTechnique.DATA_ANALYSIS;
7
+ version = '1.0.0';
8
+ documentation = `# Best Practices: Data Analysis Workflows
9
+
10
+ ## Workflow Design
11
+
12
+ Structure workflows following the Input → Transform → Output pattern. Use clear node naming (e.g., "Fetch Sales Data", "Calculate Averages", "IF High Variance?") to document the flow.
13
+
14
+ Start with appropriate triggers:
15
+ - Manual Trigger for on-demand analysis
16
+ - Cron/Schedule Trigger for periodic analysis (daily/weekly reports)
17
+ - Webhook Trigger for event-driven analysis
18
+
19
+ Break complex workflows into modular sub-workflows using the Execute Workflow node for reusable components like "Outlier Detection" or "Data Preparation".
20
+
21
+ CRITICAL: For large datasets, use Split In Batches node to process items in chunks (e.g., 100 at a time) to prevent memory issues. Always test with realistic data volumes.
22
+
23
+ Example pattern:
24
+ - Trigger → HTTP Request (fetch data) → Spreadsheet File (parse CSV) → Set (clean fields) → Filter (remove nulls) → Code (analyze) → HTML (format report) → Email (send results)
25
+
26
+ ## Data Preparation Strategy
27
+
28
+ 1. **Fetch Data**: Use dedicated integration nodes or HTTP Request for APIs. Import cURL commands directly to HTTP node for complex APIs.
29
+ 2. **Parse & Convert**: Convert to JSON using Spreadsheet File node for CSV/Excel. Enable "Convert types where required" on condition nodes.
30
+ 3. **Clean Data**: Use Set node with "Keep Only Set" enabled to drop unused fields. Filter node for removing null values or focusing on data subsets.
31
+ 4. **Merge/Enrich**: Use Merge node by key or index to combine multiple sources. Choose correct merge mode to avoid mismatched items.
32
+
33
+ ## Analysis Implementation
34
+
35
+ Use Function node (not Function Item) when analysis needs all items as a whole (calculating totals, finding trends). Function Item operates per item only.
36
+
37
+ For AI-powered analysis, filter irrelevant content first to minimize tokens. Batch data into single prompts when possible.
38
+
39
+ Always pin data after external calls to test downstream logic without re-fetching. This saves API costs and speeds development.
40
+
41
+ ## Output & Integration
42
+
43
+ Format results appropriately:
44
+ - HTML/Markdown nodes for reports
45
+ - Set node to prepare specific output fields (totalSales, anomalyCount)
46
+ - Database nodes to store analysis history
47
+ - Webhook Response for API-triggered workflows
48
+
49
+ Use conditional branches (IF nodes) for post-analysis actions:
50
+ - Create tasks if anomalies detected
51
+ - Send alerts for critical thresholds
52
+ - Avoid infinite loops by using proper conditions
53
+
54
+ ## Recommended Nodes
55
+
56
+ ### HTTP Request (n8n-nodes-base.httpRequest)
57
+
58
+ **Purpose**: Fetch datasets from URLs or APIs
59
+
60
+ **Use Cases**:
61
+ - Pull data from REST APIs for analysis
62
+ - Fetch CSV/JSON files from URLs
63
+ - Query external data sources
64
+
65
+ **Best Practices**:
66
+ - Import cURL commands for complex requests
67
+ - Use authentication credentials properly
68
+ - Handle pagination for large datasets
69
+
70
+ ### Spreadsheet File (n8n-nodes-base.spreadsheetFile)
71
+
72
+ **Purpose**: Parse CSV/Excel files into JSON items for processing
73
+
74
+ **Use Cases**:
75
+ - Import CSV data from file uploads
76
+ - Process Excel spreadsheets
77
+ - Convert tabular data to JSON
78
+
79
+ **Best Practices**:
80
+ - Specify correct file format
81
+ - Handle header rows properly
82
+ - Test with various file encodings
83
+
84
+ ### Set / Edit Fields (n8n-nodes-base.set)
85
+
86
+ **Purpose**: Clean data, select relevant fields, rename columns, convert data types
87
+
88
+ **Key Setting**: "Keep Only Set" - drops all fields not explicitly defined
89
+
90
+ **Use Cases**:
91
+ - Remove unused columns to reduce data size
92
+ - Rename fields to standardized names
93
+ - Convert data types (string to number)
94
+ - Add calculated fields
95
+
96
+ **Best Practices**:
97
+ - Enable "Keep Only Set" to drop unused data
98
+ - Always verify output structure
99
+ - Use expressions for calculated fields
100
+
101
+ ### Filter (n8n-nodes-base.filter)
102
+
103
+ **Purpose**: Remove unwanted items based on conditions
104
+
105
+ **Use Cases**:
106
+ - Remove null values
107
+ - Filter outliers before analysis
108
+ - Focus on specific data subsets
109
+
110
+ **Best Practices**:
111
+ - Filter early to reduce processing load
112
+ - Use multiple conditions when needed
113
+ - Document filter logic clearly
114
+
115
+ ### IF (n8n-nodes-base.if)
116
+
117
+ **Purpose**: Branch workflow based on analysis results
118
+
119
+ **Use Cases**:
120
+ - Route anomalies vs normal data
121
+ - Trigger alerts for threshold breaches
122
+ - Create conditional outputs
123
+
124
+ **Best Practices**:
125
+ - Enable "Convert types where required" for comparisons
126
+ - Use clear condition names
127
+ - Handle both true and false branches
128
+
129
+ ### Code / Function (n8n-nodes-base.function)
130
+
131
+ **Purpose**: Custom JavaScript for calculations, statistics, anomaly detection
132
+
133
+ **Use Cases**:
134
+ - Calculate statistical measures (mean, median, std dev)
135
+ - Detect outliers and anomalies
136
+ - Perform complex transformations
137
+ - Implement custom algorithms
138
+
139
+ **Best Practices**:
140
+ - Use Function node (not Function Item) for whole-dataset operations
141
+ - Return proper data structure: \`return items\`
142
+ - Add comments to explain logic
143
+ - Test with edge cases
144
+
145
+ **Note**: Consider using the newer Code node (n8n-nodes-base.code) as Function node is deprecated.
146
+
147
+ ### Aggregate (n8n-nodes-base.aggregate)
148
+
149
+ **Purpose**: Group items, gather values into arrays, count occurrences by category
150
+
151
+ **Use Cases**:
152
+ - Group sales by region
153
+ - Count items by category
154
+ - Calculate sums and averages per group
155
+
156
+ **Best Practices**:
157
+ - Choose appropriate aggregation function
158
+ - Use grouping keys effectively
159
+ - Simplifies statistical calculations
160
+
161
+ ### Split In Batches (n8n-nodes-base.splitInBatches)
162
+
163
+ **Purpose**: Process large datasets in chunks to prevent memory overload
164
+
165
+ **Use Cases**:
166
+ - Handle datasets with 1000+ items
167
+ - Process API results in batches
168
+ - Prevent workflow timeouts
169
+
170
+ **Best Practices**:
171
+ - Set appropriate batch size (e.g., 100 items)
172
+ - Test with realistic data volumes
173
+ - Use loop logic properly
174
+
175
+ ### Merge (n8n-nodes-base.merge)
176
+
177
+ **Purpose**: Combine data from multiple sources by key/index
178
+
179
+ **Modes**:
180
+ - Merge by Key: Join data like database operations
181
+ - Merge by Index: Combine parallel data streams
182
+ - Wait mode: Synchronize parallel branches
183
+
184
+ **Use Cases**:
185
+ - Join customer data with transaction data
186
+ - Combine multiple API responses
187
+ - Enrich data from multiple sources
188
+
189
+ **Best Practices**:
190
+ - Choose correct merge mode
191
+ - Ensure matching keys exist
192
+ - Handle missing data gracefully
193
+
194
+ ### Database Nodes
195
+
196
+ **MySQL** (n8n-nodes-base.mySql):
197
+ - Purpose: Query MySQL databases for analysis data
198
+ - Use cases: Fetch historical data, store results
199
+
200
+ **Postgres** (n8n-nodes-base.postgres):
201
+ - Purpose: Query PostgreSQL databases
202
+ - Use cases: Complex analytical queries, time-series data
203
+
204
+ **MongoDB** (n8n-nodes-base.mongoDb):
205
+ - Purpose: Query NoSQL document databases
206
+ - Use cases: Unstructured data analysis, JSON documents
207
+
208
+ **Best Practices**:
209
+ - Use parameterized queries for security
210
+ - Query source data efficiently with proper indexes
211
+ - Store analysis results for historical tracking
212
+ - Use appropriate data types
213
+
214
+ ### Google Sheets (n8n-nodes-base.googleSheets)
215
+
216
+ **Purpose**: Read/write spreadsheet data
217
+
218
+ **Use Cases**:
219
+ - Import data for analysis
220
+ - Append summary statistics
221
+ - Build analysis logs
222
+ - Share results with stakeholders
223
+
224
+ **Best Practices**:
225
+ - Use range specifications efficiently
226
+ - Handle large sheets with batching
227
+ - Consider API rate limits
228
+
229
+ ### AI Agent (@n8n/n8n-nodes-langchain.agent)
230
+
231
+ **Purpose**: Leverage AI for text analysis, sentiment detection, complex pattern recognition
232
+
233
+ **Use Cases**:
234
+ - Sentiment analysis of customer feedback
235
+ - Text classification
236
+ - Extract insights from unstructured data
237
+ - Natural language processing
238
+
239
+ **Best Practices**:
240
+ - Filter irrelevant content first to minimize tokens
241
+ - Batch data into single prompts when possible
242
+ - Use structured output for consistency
243
+ - Consider API costs and latency
244
+
245
+ ### HTML (n8n-nodes-base.html)
246
+
247
+ **Purpose**: Generate formatted reports with tables and styling
248
+
249
+ **Use Cases**:
250
+ - Create analysis reports
251
+ - Build dashboards
252
+ - Color-code data quality scores
253
+ - Format tables with results
254
+
255
+ **Best Practices**:
256
+ - Use templates for consistent formatting
257
+ - Include visualizations where helpful
258
+ - Make reports mobile-friendly
259
+
260
+ ### Email (n8n-nodes-base.emailSend)
261
+
262
+ **Purpose**: Send analysis reports to stakeholders automatically
263
+
264
+ **Use Cases**:
265
+ - Scheduled report delivery
266
+ - Alert notifications
267
+ - Share findings with teams
268
+
269
+ **Best Practices**:
270
+ - Use clear subject lines
271
+ - Include summary in email body
272
+ - Attach detailed reports when needed
273
+ - Schedule appropriately
274
+
275
+ ## Common Pitfalls to Avoid
276
+
277
+ ### Data Type Mismatches
278
+
279
+ **Problem**: JSON data types matter. Comparing string vs number yields incorrect results. The comparison "5" > "10" is lexicographically true (wrong for numbers).
280
+
281
+ **Solution**:
282
+ - Always convert data types before comparisons
283
+ - Use Number() or parseInt() for numeric operations
284
+ - Enable "Convert types where required" on IF nodes
285
+ - Validate data types early in workflow
286
+ - Use proper type casting in Code nodes
287
+
288
+ ### Memory Issues with Large Datasets
289
+
290
+ **Problem**: Processing thousands of items at once can crash workflows or timeout.
291
+
292
+ **Solution**:
293
+ - Use Split In Batches node for datasets over 100 items
294
+ - Set appropriate batch sizes (50-100 items)
295
+ - Test with realistic data volumes during development
296
+ - Monitor memory usage in production
297
+ - Consider sub-workflows for complex processing
298
+
299
+ ### Not Pinning Data During Development
300
+
301
+ **Problem**: Re-fetching data from APIs repeatedly wastes time and costs money during development.
302
+
303
+ **Solution**:
304
+ - Always pin data after external calls
305
+ - Test downstream logic without re-fetching
306
+ - Saves API costs and speeds development
307
+ - Use manual triggers to control execution
308
+
309
+ ### Incorrect Aggregation Logic
310
+
311
+ **Problem**: Using Function Item instead of Function node for whole-dataset calculations.
312
+
313
+ **Solution**:
314
+ - Use Function node (not Function Item) for totals, averages, trends
315
+ - Function Item operates per item only
316
+ - Understand the difference between item-level and dataset-level operations
317
+ - Use Aggregate node for common operations
318
+
319
+ ### Missing Data Cleaning
320
+
321
+ **Problem**: Null values, inconsistent formats, or outliers skew analysis results.
322
+
323
+ **Solution**:
324
+ - Use Filter node to remove null values
325
+ - Standardize data formats early
326
+ - Handle missing data explicitly
327
+ - Use Set node to clean and normalize fields
328
+ - Document data quality assumptions
329
+
330
+ ### Poor Error Handling
331
+
332
+ **Problem**: Failed API calls or data issues break the entire workflow.
333
+
334
+ **Solution**:
335
+ - Use "Continue on Fail" setting appropriately
336
+ - Add IF nodes to check for empty datasets
337
+ - Implement error logging
338
+ - Use Error Trigger workflows for global handling
339
+ - Validate data quality before analysis
340
+
341
+ ### Hardcoded Values
342
+
343
+ **Problem**: Thresholds, API endpoints, or configuration values are hardcoded in nodes.
344
+
345
+ **Solution**:
346
+ - Store configuration in environment variables
347
+ - Use Google Sheets or databases for thresholds
348
+ - Makes workflows reusable and maintainable
349
+ - Enable non-technical users to adjust parameters
350
+
351
+ ### Inefficient Query Patterns
352
+
353
+ **Problem**: Fetching entire datasets when only summary data is needed.
354
+
355
+ **Solution**:
356
+ - Use database aggregation functions
357
+ - Filter data at the source
358
+ - Use appropriate indexes
359
+ - Implement pagination for large results
360
+ - Consider pre-aggregated views
361
+ `;
362
+ getDocumentation() {
363
+ return this.documentation;
364
+ }
365
+ }
366
+ exports.DataAnalysisBestPractices = DataAnalysisBestPractices;
367
+ //# sourceMappingURL=data-analysis.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"data-analysis.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-analysis.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,yBAAyB;IAC5B,SAAS,GAAG,kCAAiB,CAAC,aAAa,CAAC;IAC5C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiWjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AA1WD,8DA0WC"}
@@ -15,6 +15,13 @@ Use Information Extractor or AI nodes for extracting structured data from unstru
15
15
 
16
16
  For binary data, ensure you use nodes like Extract From File to handle files properly.
17
17
 
18
+ ### Referencing Binary Data from Other Nodes
19
+ When you need to reference binary data from a previous node, use this syntax:
20
+ - Expression: '{{ $('Node Name').item.binary.property_name }}' or {{ $binary.property_name }} if previous item
21
+ - Example for Gmail attachments: '{{ $('Gmail Trigger').item.binary.attachment_0 }}' or {{ $binary.attachment_0 }} if previous item
22
+ - Example for webhook data: '{{ $('Webhook').item.binary.data }}' or {{ $binary.data }} if previous item
23
+ - Important: The property name depends on how the previous node names the binary data
24
+
18
25
  ## Data Structure & Type Management
19
26
 
20
27
  Normalize data structure early in your workflow. Use transformation nodes like Split Out, Aggregate, or Set to ensure your data matches n8n's expected structure: an array of objects with a json key.
@@ -1 +1 @@
1
- {"version":3,"file":"data-extraction.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-extraction.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,2BAA2B;IAC9B,SAAS,GAAG,kCAAiB,CAAC,eAAe,CAAC;IAC9C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2FjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AApGD,kEAoGC"}
1
+ {"version":3,"file":"data-extraction.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-extraction.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,2BAA2B;IAC9B,SAAS,GAAG,kCAAiB,CAAC,eAAe,CAAC;IAC9C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAkGjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AA3GD,kEA2GC"}
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class DataTransformationBestPractices implements BestPracticesDocument {
3
+ readonly technique: "data_transformation";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }
@@ -0,0 +1,181 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DataTransformationBestPractices = void 0;
4
+ const categorization_1 = require("../../types/categorization");
5
+ class DataTransformationBestPractices {
6
+ technique = categorization_1.WorkflowTechnique.DATA_TRANSFORMATION;
7
+ version = '1.0.0';
8
+ documentation = `# Best Practices: Data Transformation
9
+
10
+ ## Workflow Design
11
+
12
+ ### Core Principles
13
+ - **Structure**: Always follow Input → Transform → Output pattern
14
+ - **Modularity**: Break complex workflows into sub-workflows using Execute Workflow node
15
+ - **Optimization**: Filter and reduce data early to improve performance
16
+ - **Documentation**: Use descriptive node names and sticky notes for clarity
17
+ - **Testing**: Test with edge cases (empty data, missing fields, special characters)
18
+
19
+ ### Design Best Practices
20
+ - Plan transformation requirements in plain language before building
21
+ - Keep main workflows to ~5 nodes, offload details to sub-workflows
22
+ - Process inexpensive transformations first (especially data reduction)
23
+ - Use Modular Design: Create reusable sub-workflows for common tasks like "Data Cleaning" or "Error Handler"
24
+ - Batch large datasets using Split In Batches node to prevent timeouts
25
+
26
+ ## Error Handling
27
+ - **Validate Early**: Use IF node at workflow start to check required fields and data types
28
+ - **Error Outputs**: Connect red error output connectors to logging/notification chains
29
+ - **Continue on Fail**: Enable in node settings to flag errors without breaking workflow
30
+ - **Global Error Workflow**: Create separate workflow with Error Trigger node as safety net
31
+ - **Logging**: Log key events with context (which record failed, error message, etc.)
32
+
33
+ ## Recommended Nodes
34
+
35
+ ### Essential Transformation Nodes
36
+
37
+ #### Edit Fields (Set) (n8n-nodes-base.set)
38
+
39
+ **Purpose**: Create, modify, rename fields; change data types
40
+
41
+ **Key Setting**: "Keep Only Set" - drops all fields not explicitly defined (default: disabled)
42
+
43
+ **Use Cases**:
44
+ - Extract specific columns
45
+ - Add calculated fields
46
+ - Convert data types (string to number)
47
+ - Format dates using expressions
48
+
49
+ **Pitfalls**:
50
+ - Not understanding "Keep Only Set" behavior can lead to data loss
51
+ - Enabled: Drops all fields not explicitly defined (data loss risk)
52
+ - Disabled: Carries forward all fields (potential bloat)
53
+ - Always verify output structure after configuration
54
+
55
+ #### IF/Filter Nodes
56
+
57
+ **IF Node** (n8n-nodes-base.if):
58
+ - **Purpose**: Conditional processing and routing
59
+ - **Best Practice**: Use early to validate inputs and remove bad data
60
+ - **Example**: Check if required fields exist before processing
61
+
62
+ **Filter Node** (n8n-nodes-base.filter):
63
+ - **Purpose**: Filter items based on conditions
64
+ - **Best Practice**: Use early in workflow to reduce data volume
65
+
66
+ #### Merge Node (n8n-nodes-base.merge)
67
+
68
+ **Purpose**: Combine two data streams
69
+
70
+ **Modes**:
71
+ - Merge by Key (like database join)
72
+ - Merge by Index
73
+ - Append
74
+
75
+ **Pitfalls**:
76
+ - **Missing Keys**: Trying to merge on non-existent fields
77
+ - **Field Name Mismatch**: Different field names in sources
78
+ - **Solution**: Use Set node to normalize field names before merging
79
+
80
+ #### Code Node (n8n-nodes-base.code)
81
+
82
+ **When to Use**: Complex transformations impossible with built-in nodes
83
+
84
+ **Execution Modes**:
85
+ - "Run Once per Item": Process each item independently
86
+ - "Run Once for All Items": Access entire dataset (for aggregation)
87
+
88
+ **Return Format**: Must return array of objects with json property
89
+ \`\`\`javascript
90
+ return items; // or return [{ json: {...} }];
91
+ \`\`\`
92
+
93
+ **Pitfalls**:
94
+ - Wrong return format: Not returning array of objects with json property
95
+ - Overly complex: Stuffing entire workflow logic in one Code node
96
+ - Keep code nodes focused on single transformation aspect
97
+
98
+ #### Summarize Node (n8n-nodes-base.summarize)
99
+
100
+ **Purpose**: Pivot table-style aggregations (count, sum, average, min/max)
101
+
102
+ **Configuration**:
103
+ - Fields to Summarize: Choose aggregation function
104
+ - Fields to Split By: Grouping keys
105
+
106
+ **Output**: Single item with summary or multiple items per group
107
+
108
+ ### Data Restructuring Nodes
109
+
110
+ - **Split Out** (n8n-nodes-base.splitOut): Convert single item with array into multiple items
111
+ - **Aggregate** (n8n-nodes-base.aggregate): Combine multiple items into one
112
+ - **Remove Duplicates** (n8n-nodes-base.removeDuplicates): Delete duplicate items based on field criteria
113
+ - **Sort** (n8n-nodes-base.sort): Order items alphabetically/numerically
114
+ - **Limit** (n8n-nodes-base.limit): Trim to maximum number of items
115
+
116
+ ### Batch Processing
117
+
118
+ **Split In Batches** (n8n-nodes-base.splitInBatches):
119
+ - **Purpose**: Process large datasets in chunks
120
+ - **Use When**: Handling 100+ items with expensive operations (API calls, AI)
121
+
122
+ ### Workflow Orchestration
123
+
124
+ **Execute Workflow** (n8n-nodes-base.executeWorkflow):
125
+ - **Purpose**: Call sub-workflows for modular design
126
+ - **Best Practice**: Create reusable sub-workflows for common tasks like "Data Cleaning" or "Error Handler"
127
+
128
+ **Error Trigger** (n8n-nodes-base.errorTrigger):
129
+ - **Purpose**: Create global error handling workflow
130
+ - **Best Practice**: Use as safety net to catch all workflow errors
131
+
132
+ ## Common Pitfalls to Avoid
133
+
134
+ ### Critical Mistakes
135
+
136
+ #### Set Node Issues
137
+ - **Mistake**: Not understanding "Keep Only Set" behavior
138
+ - Enabled: Drops all fields not explicitly defined (data loss risk)
139
+ - Disabled: Carries forward all fields (potential bloat)
140
+ - **Solution**: Always verify output structure after configuration
141
+
142
+ #### Code Node Errors
143
+ - **Wrong Return Format**: Not returning array of objects with json property
144
+ - **Fix**: Always return \`items\` or \`[{ json: {...} }]\`
145
+ - **Overly Complex**: Stuffing entire workflow logic in one Code node
146
+ - **Fix**: Keep code nodes focused on single transformation aspect
147
+
148
+ #### Merge Node Problems
149
+ - **Missing Keys**: Trying to merge on non-existent fields
150
+ - **Fix**: Validate both inputs have matching key fields
151
+ - **Field Name Mismatch**: Different field names in sources
152
+ - **Fix**: Use Set node to normalize field names before merging
153
+
154
+ ### General Workflow Issues
155
+ - **No Error Handling**: Workflow crashes on unexpected data
156
+ - **Fix**: Add IF nodes for validation, use error outputs
157
+ - **Hard-coded Values**: URLs, credentials, config in nodes
158
+ - **Fix**: Use environment variables or config nodes
159
+ - **Poor Naming**: Generic names like "Set1", "Function1"
160
+ - **Fix**: Use descriptive names: "Clean Customer Data", "Calculate Totals"
161
+ - **Missing Documentation**: No comments or descriptions
162
+ - **Fix**: Add sticky notes, node descriptions, code comments
163
+
164
+ ### Performance Pitfalls
165
+ - Processing large datasets without batching → timeouts
166
+ - Not filtering early → unnecessary processing overhead
167
+ - Excessive node chaining → visual clutter and slow execution
168
+ - Not using sub-workflows → unmaintainable monolithic workflows
169
+
170
+ ### Data Validation Pitfalls
171
+ - Assuming input data is always perfect
172
+ - Not handling empty/null values
173
+ - Ignoring data type mismatches
174
+ - Missing edge case handling (special characters, empty arrays)
175
+ `;
176
+ getDocumentation() {
177
+ return this.documentation;
178
+ }
179
+ }
180
+ exports.DataTransformationBestPractices = DataTransformationBestPractices;
181
+ //# sourceMappingURL=data-transformation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"data-transformation.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-transformation.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,+BAA+B;IAClC,SAAS,GAAG,kCAAiB,CAAC,mBAAmB,CAAC;IAClD,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuKjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AAhLD,0EAgLC"}
@@ -0,0 +1,7 @@
1
+ import type { BestPracticesDocument } from '../../types/best-practices';
2
+ export declare class DocumentProcessingBestPractices implements BestPracticesDocument {
3
+ readonly technique: "document_processing";
4
+ readonly version = "1.0.0";
5
+ private readonly documentation;
6
+ getDocumentation(): string;
7
+ }