@n8n/ai-workflow-builder 0.31.1 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-workflow-builder-agent.service.d.ts +6 -2
- package/dist/ai-workflow-builder-agent.service.js +45 -3
- package/dist/ai-workflow-builder-agent.service.js.map +1 -1
- package/dist/build.tsbuildinfo +1 -1
- package/dist/tools/best-practices/data-analysis.d.ts +7 -0
- package/dist/tools/best-practices/data-analysis.js +367 -0
- package/dist/tools/best-practices/data-analysis.js.map +1 -0
- package/dist/tools/best-practices/data-extraction.js +7 -0
- package/dist/tools/best-practices/data-extraction.js.map +1 -1
- package/dist/tools/best-practices/data-transformation.d.ts +7 -0
- package/dist/tools/best-practices/data-transformation.js +181 -0
- package/dist/tools/best-practices/data-transformation.js.map +1 -0
- package/dist/tools/best-practices/document-processing.d.ts +7 -0
- package/dist/tools/best-practices/document-processing.js +324 -0
- package/dist/tools/best-practices/document-processing.js.map +1 -0
- package/dist/tools/best-practices/enrichment.d.ts +7 -0
- package/dist/tools/best-practices/enrichment.js +271 -0
- package/dist/tools/best-practices/enrichment.js.map +1 -0
- package/dist/tools/best-practices/human-in-the-loop.d.ts +7 -0
- package/dist/tools/best-practices/human-in-the-loop.js +268 -0
- package/dist/tools/best-practices/human-in-the-loop.js.map +1 -0
- package/dist/tools/best-practices/index.js +7 -6
- package/dist/tools/best-practices/index.js.map +1 -1
- package/dist/tools/best-practices/knowledge-base.d.ts +7 -0
- package/dist/tools/best-practices/knowledge-base.js +268 -0
- package/dist/tools/best-practices/knowledge-base.js.map +1 -0
- package/dist/tools/best-practices/monitoring.d.ts +7 -0
- package/dist/tools/best-practices/monitoring.js +178 -0
- package/dist/tools/best-practices/monitoring.js.map +1 -0
- package/dist/tools/best-practices/notification.d.ts +7 -0
- package/dist/tools/best-practices/notification.js +229 -0
- package/dist/tools/best-practices/notification.js.map +1 -0
- package/dist/tools/best-practices/scheduling.d.ts +7 -0
- package/dist/tools/best-practices/scheduling.js +281 -0
- package/dist/tools/best-practices/scheduling.js.map +1 -0
- package/dist/tools/best-practices/triage.d.ts +7 -0
- package/dist/tools/best-practices/triage.js +211 -0
- package/dist/tools/best-practices/triage.js.map +1 -0
- package/dist/tools/categorize-prompt.tool.js +1 -0
- package/dist/tools/categorize-prompt.tool.js.map +1 -1
- package/dist/tools/helpers/response.js +2 -0
- package/dist/tools/helpers/response.js.map +1 -1
- package/dist/tools/prompts/main-agent.prompt.js +9 -1
- package/dist/tools/prompts/main-agent.prompt.js.map +1 -1
- package/dist/tools/validate-workflow.tool.js +12 -0
- package/dist/tools/validate-workflow.tool.js.map +1 -1
- package/dist/utils/tool-executor.js +19 -0
- package/dist/utils/tool-executor.js.map +1 -1
- package/dist/validation/checks/agent-prompt.js +2 -0
- package/dist/validation/checks/agent-prompt.js.map +1 -1
- package/dist/validation/checks/connections.js +8 -0
- package/dist/validation/checks/connections.js.map +1 -1
- package/dist/validation/checks/from-ai.js +1 -0
- package/dist/validation/checks/from-ai.js.map +1 -1
- package/dist/validation/checks/tools.js +2 -0
- package/dist/validation/checks/tools.js.map +1 -1
- package/dist/validation/checks/trigger.js +2 -0
- package/dist/validation/checks/trigger.js.map +1 -1
- package/dist/validation/types.d.ts +4 -0
- package/dist/validation/types.js +18 -0
- package/dist/validation/types.js.map +1 -1
- package/dist/workflow-builder-agent.d.ts +5 -2
- package/dist/workflow-builder-agent.js +4 -3
- package/dist/workflow-builder-agent.js.map +1 -1
- package/dist/workflow-state.d.ts +3 -1
- package/dist/workflow-state.js +8 -0
- package/dist/workflow-state.js.map +1 -1
- package/package.json +11 -7
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { BestPracticesDocument } from '../../types/best-practices';
|
|
2
|
+
export declare class DataAnalysisBestPractices implements BestPracticesDocument {
|
|
3
|
+
readonly technique: "data_analysis";
|
|
4
|
+
readonly version = "1.0.0";
|
|
5
|
+
private readonly documentation;
|
|
6
|
+
getDocumentation(): string;
|
|
7
|
+
}
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DataAnalysisBestPractices = void 0;
|
|
4
|
+
const categorization_1 = require("../../types/categorization");
|
|
5
|
+
class DataAnalysisBestPractices {
|
|
6
|
+
technique = categorization_1.WorkflowTechnique.DATA_ANALYSIS;
|
|
7
|
+
version = '1.0.0';
|
|
8
|
+
documentation = `# Best Practices: Data Analysis Workflows
|
|
9
|
+
|
|
10
|
+
## Workflow Design
|
|
11
|
+
|
|
12
|
+
Structure workflows following the Input → Transform → Output pattern. Use clear node naming (e.g., "Fetch Sales Data", "Calculate Averages", "IF High Variance?") to document the flow.
|
|
13
|
+
|
|
14
|
+
Start with appropriate triggers:
|
|
15
|
+
- Manual Trigger for on-demand analysis
|
|
16
|
+
- Cron/Schedule Trigger for periodic analysis (daily/weekly reports)
|
|
17
|
+
- Webhook Trigger for event-driven analysis
|
|
18
|
+
|
|
19
|
+
Break complex workflows into modular sub-workflows using the Execute Workflow node for reusable components like "Outlier Detection" or "Data Preparation".
|
|
20
|
+
|
|
21
|
+
CRITICAL: For large datasets, use Split In Batches node to process items in chunks (e.g., 100 at a time) to prevent memory issues. Always test with realistic data volumes.
|
|
22
|
+
|
|
23
|
+
Example pattern:
|
|
24
|
+
- Trigger → HTTP Request (fetch data) → Spreadsheet File (parse CSV) → Set (clean fields) → Filter (remove nulls) → Code (analyze) → HTML (format report) → Email (send results)
|
|
25
|
+
|
|
26
|
+
## Data Preparation Strategy
|
|
27
|
+
|
|
28
|
+
1. **Fetch Data**: Use dedicated integration nodes or HTTP Request for APIs. Import cURL commands directly to HTTP node for complex APIs.
|
|
29
|
+
2. **Parse & Convert**: Convert to JSON using Spreadsheet File node for CSV/Excel. Enable "Convert types where required" on condition nodes.
|
|
30
|
+
3. **Clean Data**: Use Set node with "Keep Only Set" enabled to drop unused fields. Filter node for removing null values or focusing on data subsets.
|
|
31
|
+
4. **Merge/Enrich**: Use Merge node by key or index to combine multiple sources. Choose correct merge mode to avoid mismatched items.
|
|
32
|
+
|
|
33
|
+
## Analysis Implementation
|
|
34
|
+
|
|
35
|
+
Use Function node (not Function Item) when analysis needs all items as a whole (calculating totals, finding trends). Function Item operates per item only.
|
|
36
|
+
|
|
37
|
+
For AI-powered analysis, filter irrelevant content first to minimize tokens. Batch data into single prompts when possible.
|
|
38
|
+
|
|
39
|
+
Always pin data after external calls to test downstream logic without re-fetching. This saves API costs and speeds development.
|
|
40
|
+
|
|
41
|
+
## Output & Integration
|
|
42
|
+
|
|
43
|
+
Format results appropriately:
|
|
44
|
+
- HTML/Markdown nodes for reports
|
|
45
|
+
- Set node to prepare specific output fields (totalSales, anomalyCount)
|
|
46
|
+
- Database nodes to store analysis history
|
|
47
|
+
- Webhook Response for API-triggered workflows
|
|
48
|
+
|
|
49
|
+
Use conditional branches (IF nodes) for post-analysis actions:
|
|
50
|
+
- Create tasks if anomalies detected
|
|
51
|
+
- Send alerts for critical thresholds
|
|
52
|
+
- Avoid infinite loops by using proper conditions
|
|
53
|
+
|
|
54
|
+
## Recommended Nodes
|
|
55
|
+
|
|
56
|
+
### HTTP Request (n8n-nodes-base.httpRequest)
|
|
57
|
+
|
|
58
|
+
**Purpose**: Fetch datasets from URLs or APIs
|
|
59
|
+
|
|
60
|
+
**Use Cases**:
|
|
61
|
+
- Pull data from REST APIs for analysis
|
|
62
|
+
- Fetch CSV/JSON files from URLs
|
|
63
|
+
- Query external data sources
|
|
64
|
+
|
|
65
|
+
**Best Practices**:
|
|
66
|
+
- Import cURL commands for complex requests
|
|
67
|
+
- Use authentication credentials properly
|
|
68
|
+
- Handle pagination for large datasets
|
|
69
|
+
|
|
70
|
+
### Spreadsheet File (n8n-nodes-base.spreadsheetFile)
|
|
71
|
+
|
|
72
|
+
**Purpose**: Parse CSV/Excel files into JSON items for processing
|
|
73
|
+
|
|
74
|
+
**Use Cases**:
|
|
75
|
+
- Import CSV data from file uploads
|
|
76
|
+
- Process Excel spreadsheets
|
|
77
|
+
- Convert tabular data to JSON
|
|
78
|
+
|
|
79
|
+
**Best Practices**:
|
|
80
|
+
- Specify correct file format
|
|
81
|
+
- Handle header rows properly
|
|
82
|
+
- Test with various file encodings
|
|
83
|
+
|
|
84
|
+
### Set / Edit Fields (n8n-nodes-base.set)
|
|
85
|
+
|
|
86
|
+
**Purpose**: Clean data, select relevant fields, rename columns, convert data types
|
|
87
|
+
|
|
88
|
+
**Key Setting**: "Keep Only Set" - drops all fields not explicitly defined
|
|
89
|
+
|
|
90
|
+
**Use Cases**:
|
|
91
|
+
- Remove unused columns to reduce data size
|
|
92
|
+
- Rename fields to standardized names
|
|
93
|
+
- Convert data types (string to number)
|
|
94
|
+
- Add calculated fields
|
|
95
|
+
|
|
96
|
+
**Best Practices**:
|
|
97
|
+
- Enable "Keep Only Set" to drop unused data
|
|
98
|
+
- Always verify output structure
|
|
99
|
+
- Use expressions for calculated fields
|
|
100
|
+
|
|
101
|
+
### Filter (n8n-nodes-base.filter)
|
|
102
|
+
|
|
103
|
+
**Purpose**: Remove unwanted items based on conditions
|
|
104
|
+
|
|
105
|
+
**Use Cases**:
|
|
106
|
+
- Remove null values
|
|
107
|
+
- Filter outliers before analysis
|
|
108
|
+
- Focus on specific data subsets
|
|
109
|
+
|
|
110
|
+
**Best Practices**:
|
|
111
|
+
- Filter early to reduce processing load
|
|
112
|
+
- Use multiple conditions when needed
|
|
113
|
+
- Document filter logic clearly
|
|
114
|
+
|
|
115
|
+
### IF (n8n-nodes-base.if)
|
|
116
|
+
|
|
117
|
+
**Purpose**: Branch workflow based on analysis results
|
|
118
|
+
|
|
119
|
+
**Use Cases**:
|
|
120
|
+
- Route anomalies vs normal data
|
|
121
|
+
- Trigger alerts for threshold breaches
|
|
122
|
+
- Create conditional outputs
|
|
123
|
+
|
|
124
|
+
**Best Practices**:
|
|
125
|
+
- Enable "Convert types where required" for comparisons
|
|
126
|
+
- Use clear condition names
|
|
127
|
+
- Handle both true and false branches
|
|
128
|
+
|
|
129
|
+
### Code / Function (n8n-nodes-base.function)
|
|
130
|
+
|
|
131
|
+
**Purpose**: Custom JavaScript for calculations, statistics, anomaly detection
|
|
132
|
+
|
|
133
|
+
**Use Cases**:
|
|
134
|
+
- Calculate statistical measures (mean, median, std dev)
|
|
135
|
+
- Detect outliers and anomalies
|
|
136
|
+
- Perform complex transformations
|
|
137
|
+
- Implement custom algorithms
|
|
138
|
+
|
|
139
|
+
**Best Practices**:
|
|
140
|
+
- Use Function node (not Function Item) for whole-dataset operations
|
|
141
|
+
- Return proper data structure: \`return items\`
|
|
142
|
+
- Add comments to explain logic
|
|
143
|
+
- Test with edge cases
|
|
144
|
+
|
|
145
|
+
**Note**: Consider using the newer Code node (n8n-nodes-base.code) as Function node is deprecated.
|
|
146
|
+
|
|
147
|
+
### Aggregate (n8n-nodes-base.aggregate)
|
|
148
|
+
|
|
149
|
+
**Purpose**: Group items, gather values into arrays, count occurrences by category
|
|
150
|
+
|
|
151
|
+
**Use Cases**:
|
|
152
|
+
- Group sales by region
|
|
153
|
+
- Count items by category
|
|
154
|
+
- Calculate sums and averages per group
|
|
155
|
+
|
|
156
|
+
**Best Practices**:
|
|
157
|
+
- Choose appropriate aggregation function
|
|
158
|
+
- Use grouping keys effectively
|
|
159
|
+
- Simplifies statistical calculations
|
|
160
|
+
|
|
161
|
+
### Split In Batches (n8n-nodes-base.splitInBatches)
|
|
162
|
+
|
|
163
|
+
**Purpose**: Process large datasets in chunks to prevent memory overload
|
|
164
|
+
|
|
165
|
+
**Use Cases**:
|
|
166
|
+
- Handle datasets with 1000+ items
|
|
167
|
+
- Process API results in batches
|
|
168
|
+
- Prevent workflow timeouts
|
|
169
|
+
|
|
170
|
+
**Best Practices**:
|
|
171
|
+
- Set appropriate batch size (e.g., 100 items)
|
|
172
|
+
- Test with realistic data volumes
|
|
173
|
+
- Use loop logic properly
|
|
174
|
+
|
|
175
|
+
### Merge (n8n-nodes-base.merge)
|
|
176
|
+
|
|
177
|
+
**Purpose**: Combine data from multiple sources by key/index
|
|
178
|
+
|
|
179
|
+
**Modes**:
|
|
180
|
+
- Merge by Key: Join data like database operations
|
|
181
|
+
- Merge by Index: Combine parallel data streams
|
|
182
|
+
- Wait mode: Synchronize parallel branches
|
|
183
|
+
|
|
184
|
+
**Use Cases**:
|
|
185
|
+
- Join customer data with transaction data
|
|
186
|
+
- Combine multiple API responses
|
|
187
|
+
- Enrich data from multiple sources
|
|
188
|
+
|
|
189
|
+
**Best Practices**:
|
|
190
|
+
- Choose correct merge mode
|
|
191
|
+
- Ensure matching keys exist
|
|
192
|
+
- Handle missing data gracefully
|
|
193
|
+
|
|
194
|
+
### Database Nodes
|
|
195
|
+
|
|
196
|
+
**MySQL** (n8n-nodes-base.mySql):
|
|
197
|
+
- Purpose: Query MySQL databases for analysis data
|
|
198
|
+
- Use cases: Fetch historical data, store results
|
|
199
|
+
|
|
200
|
+
**Postgres** (n8n-nodes-base.postgres):
|
|
201
|
+
- Purpose: Query PostgreSQL databases
|
|
202
|
+
- Use cases: Complex analytical queries, time-series data
|
|
203
|
+
|
|
204
|
+
**MongoDB** (n8n-nodes-base.mongoDb):
|
|
205
|
+
- Purpose: Query NoSQL document databases
|
|
206
|
+
- Use cases: Unstructured data analysis, JSON documents
|
|
207
|
+
|
|
208
|
+
**Best Practices**:
|
|
209
|
+
- Use parameterized queries for security
|
|
210
|
+
- Query source data efficiently with proper indexes
|
|
211
|
+
- Store analysis results for historical tracking
|
|
212
|
+
- Use appropriate data types
|
|
213
|
+
|
|
214
|
+
### Google Sheets (n8n-nodes-base.googleSheets)
|
|
215
|
+
|
|
216
|
+
**Purpose**: Read/write spreadsheet data
|
|
217
|
+
|
|
218
|
+
**Use Cases**:
|
|
219
|
+
- Import data for analysis
|
|
220
|
+
- Append summary statistics
|
|
221
|
+
- Build analysis logs
|
|
222
|
+
- Share results with stakeholders
|
|
223
|
+
|
|
224
|
+
**Best Practices**:
|
|
225
|
+
- Use range specifications efficiently
|
|
226
|
+
- Handle large sheets with batching
|
|
227
|
+
- Consider API rate limits
|
|
228
|
+
|
|
229
|
+
### AI Agent (@n8n/n8n-nodes-langchain.agent)
|
|
230
|
+
|
|
231
|
+
**Purpose**: Leverage AI for text analysis, sentiment detection, complex pattern recognition
|
|
232
|
+
|
|
233
|
+
**Use Cases**:
|
|
234
|
+
- Sentiment analysis of customer feedback
|
|
235
|
+
- Text classification
|
|
236
|
+
- Extract insights from unstructured data
|
|
237
|
+
- Natural language processing
|
|
238
|
+
|
|
239
|
+
**Best Practices**:
|
|
240
|
+
- Filter irrelevant content first to minimize tokens
|
|
241
|
+
- Batch data into single prompts when possible
|
|
242
|
+
- Use structured output for consistency
|
|
243
|
+
- Consider API costs and latency
|
|
244
|
+
|
|
245
|
+
### HTML (n8n-nodes-base.html)
|
|
246
|
+
|
|
247
|
+
**Purpose**: Generate formatted reports with tables and styling
|
|
248
|
+
|
|
249
|
+
**Use Cases**:
|
|
250
|
+
- Create analysis reports
|
|
251
|
+
- Build dashboards
|
|
252
|
+
- Color-code data quality scores
|
|
253
|
+
- Format tables with results
|
|
254
|
+
|
|
255
|
+
**Best Practices**:
|
|
256
|
+
- Use templates for consistent formatting
|
|
257
|
+
- Include visualizations where helpful
|
|
258
|
+
- Make reports mobile-friendly
|
|
259
|
+
|
|
260
|
+
### Email (n8n-nodes-base.emailSend)
|
|
261
|
+
|
|
262
|
+
**Purpose**: Send analysis reports to stakeholders automatically
|
|
263
|
+
|
|
264
|
+
**Use Cases**:
|
|
265
|
+
- Scheduled report delivery
|
|
266
|
+
- Alert notifications
|
|
267
|
+
- Share findings with teams
|
|
268
|
+
|
|
269
|
+
**Best Practices**:
|
|
270
|
+
- Use clear subject lines
|
|
271
|
+
- Include summary in email body
|
|
272
|
+
- Attach detailed reports when needed
|
|
273
|
+
- Schedule appropriately
|
|
274
|
+
|
|
275
|
+
## Common Pitfalls to Avoid
|
|
276
|
+
|
|
277
|
+
### Data Type Mismatches
|
|
278
|
+
|
|
279
|
+
**Problem**: JSON data types matter. Comparing string vs number yields incorrect results. The comparison "5" > "10" is lexicographically true (wrong for numbers).
|
|
280
|
+
|
|
281
|
+
**Solution**:
|
|
282
|
+
- Always convert data types before comparisons
|
|
283
|
+
- Use Number() or parseInt() for numeric operations
|
|
284
|
+
- Enable "Convert types where required" on IF nodes
|
|
285
|
+
- Validate data types early in workflow
|
|
286
|
+
- Use proper type casting in Code nodes
|
|
287
|
+
|
|
288
|
+
### Memory Issues with Large Datasets
|
|
289
|
+
|
|
290
|
+
**Problem**: Processing thousands of items at once can crash workflows or timeout.
|
|
291
|
+
|
|
292
|
+
**Solution**:
|
|
293
|
+
- Use Split In Batches node for datasets over 100 items
|
|
294
|
+
- Set appropriate batch sizes (50-100 items)
|
|
295
|
+
- Test with realistic data volumes during development
|
|
296
|
+
- Monitor memory usage in production
|
|
297
|
+
- Consider sub-workflows for complex processing
|
|
298
|
+
|
|
299
|
+
### Not Pinning Data During Development
|
|
300
|
+
|
|
301
|
+
**Problem**: Re-fetching data from APIs repeatedly wastes time and costs money during development.
|
|
302
|
+
|
|
303
|
+
**Solution**:
|
|
304
|
+
- Always pin data after external calls
|
|
305
|
+
- Test downstream logic without re-fetching
|
|
306
|
+
- Saves API costs and speeds development
|
|
307
|
+
- Use manual triggers to control execution
|
|
308
|
+
|
|
309
|
+
### Incorrect Aggregation Logic
|
|
310
|
+
|
|
311
|
+
**Problem**: Using Function Item instead of Function node for whole-dataset calculations.
|
|
312
|
+
|
|
313
|
+
**Solution**:
|
|
314
|
+
- Use Function node (not Function Item) for totals, averages, trends
|
|
315
|
+
- Function Item operates per item only
|
|
316
|
+
- Understand the difference between item-level and dataset-level operations
|
|
317
|
+
- Use Aggregate node for common operations
|
|
318
|
+
|
|
319
|
+
### Missing Data Cleaning
|
|
320
|
+
|
|
321
|
+
**Problem**: Null values, inconsistent formats, or outliers skew analysis results.
|
|
322
|
+
|
|
323
|
+
**Solution**:
|
|
324
|
+
- Use Filter node to remove null values
|
|
325
|
+
- Standardize data formats early
|
|
326
|
+
- Handle missing data explicitly
|
|
327
|
+
- Use Set node to clean and normalize fields
|
|
328
|
+
- Document data quality assumptions
|
|
329
|
+
|
|
330
|
+
### Poor Error Handling
|
|
331
|
+
|
|
332
|
+
**Problem**: Failed API calls or data issues break the entire workflow.
|
|
333
|
+
|
|
334
|
+
**Solution**:
|
|
335
|
+
- Use "Continue on Fail" setting appropriately
|
|
336
|
+
- Add IF nodes to check for empty datasets
|
|
337
|
+
- Implement error logging
|
|
338
|
+
- Use Error Trigger workflows for global handling
|
|
339
|
+
- Validate data quality before analysis
|
|
340
|
+
|
|
341
|
+
### Hardcoded Values
|
|
342
|
+
|
|
343
|
+
**Problem**: Thresholds, API endpoints, or configuration values are hardcoded in nodes.
|
|
344
|
+
|
|
345
|
+
**Solution**:
|
|
346
|
+
- Store configuration in environment variables
|
|
347
|
+
- Use Google Sheets or databases for thresholds
|
|
348
|
+
- Makes workflows reusable and maintainable
|
|
349
|
+
- Enable non-technical users to adjust parameters
|
|
350
|
+
|
|
351
|
+
### Inefficient Query Patterns
|
|
352
|
+
|
|
353
|
+
**Problem**: Fetching entire datasets when only summary data is needed.
|
|
354
|
+
|
|
355
|
+
**Solution**:
|
|
356
|
+
- Use database aggregation functions
|
|
357
|
+
- Filter data at the source
|
|
358
|
+
- Use appropriate indexes
|
|
359
|
+
- Implement pagination for large results
|
|
360
|
+
- Consider pre-aggregated views
|
|
361
|
+
`;
|
|
362
|
+
getDocumentation() {
|
|
363
|
+
return this.documentation;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
exports.DataAnalysisBestPractices = DataAnalysisBestPractices;
|
|
367
|
+
//# sourceMappingURL=data-analysis.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data-analysis.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-analysis.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,yBAAyB;IAC5B,SAAS,GAAG,kCAAiB,CAAC,aAAa,CAAC;IAC5C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiWjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AA1WD,8DA0WC"}
|
|
@@ -15,6 +15,13 @@ Use Information Extractor or AI nodes for extracting structured data from unstru
|
|
|
15
15
|
|
|
16
16
|
For binary data, ensure you use nodes like Extract From File to handle files properly.
|
|
17
17
|
|
|
18
|
+
### Referencing Binary Data from Other Nodes
|
|
19
|
+
When you need to reference binary data from a previous node, use this syntax:
|
|
20
|
+
- Expression: '{{ $('Node Name').item.binary.property_name }}' or {{ $binary.property_name }} if previous item
|
|
21
|
+
- Example for Gmail attachments: '{{ $('Gmail Trigger').item.binary.attachment_0 }}' or {{ $binary.attachment_0 }} if previous item
|
|
22
|
+
- Example for webhook data: '{{ $('Webhook').item.binary.data }}' or {{ $binary.data }} if previous item
|
|
23
|
+
- Important: The property name depends on how the previous node names the binary data
|
|
24
|
+
|
|
18
25
|
## Data Structure & Type Management
|
|
19
26
|
|
|
20
27
|
Normalize data structure early in your workflow. Use transformation nodes like Split Out, Aggregate, or Set to ensure your data matches n8n's expected structure: an array of objects with a json key.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data-extraction.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-extraction.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,2BAA2B;IAC9B,SAAS,GAAG,kCAAiB,CAAC,eAAe,CAAC;IAC9C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG
|
|
1
|
+
{"version":3,"file":"data-extraction.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-extraction.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,2BAA2B;IAC9B,SAAS,GAAG,kCAAiB,CAAC,eAAe,CAAC;IAC9C,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAkGjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AA3GD,kEA2GC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { BestPracticesDocument } from '../../types/best-practices';
|
|
2
|
+
export declare class DataTransformationBestPractices implements BestPracticesDocument {
|
|
3
|
+
readonly technique: "data_transformation";
|
|
4
|
+
readonly version = "1.0.0";
|
|
5
|
+
private readonly documentation;
|
|
6
|
+
getDocumentation(): string;
|
|
7
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DataTransformationBestPractices = void 0;
|
|
4
|
+
const categorization_1 = require("../../types/categorization");
|
|
5
|
+
class DataTransformationBestPractices {
|
|
6
|
+
technique = categorization_1.WorkflowTechnique.DATA_TRANSFORMATION;
|
|
7
|
+
version = '1.0.0';
|
|
8
|
+
documentation = `# Best Practices: Data Transformation
|
|
9
|
+
|
|
10
|
+
## Workflow Design
|
|
11
|
+
|
|
12
|
+
### Core Principles
|
|
13
|
+
- **Structure**: Always follow Input → Transform → Output pattern
|
|
14
|
+
- **Modularity**: Break complex workflows into sub-workflows using Execute Workflow node
|
|
15
|
+
- **Optimization**: Filter and reduce data early to improve performance
|
|
16
|
+
- **Documentation**: Use descriptive node names and sticky notes for clarity
|
|
17
|
+
- **Testing**: Test with edge cases (empty data, missing fields, special characters)
|
|
18
|
+
|
|
19
|
+
### Design Best Practices
|
|
20
|
+
- Plan transformation requirements in plain language before building
|
|
21
|
+
- Keep main workflows to ~5 nodes, offload details to sub-workflows
|
|
22
|
+
- Process inexpensive transformations first (especially data reduction)
|
|
23
|
+
- Use Modular Design: Create reusable sub-workflows for common tasks like "Data Cleaning" or "Error Handler"
|
|
24
|
+
- Batch large datasets using Split In Batches node to prevent timeouts
|
|
25
|
+
|
|
26
|
+
## Error Handling
|
|
27
|
+
- **Validate Early**: Use IF node at workflow start to check required fields and data types
|
|
28
|
+
- **Error Outputs**: Connect red error output connectors to logging/notification chains
|
|
29
|
+
- **Continue on Fail**: Enable in node settings to flag errors without breaking workflow
|
|
30
|
+
- **Global Error Workflow**: Create separate workflow with Error Trigger node as safety net
|
|
31
|
+
- **Logging**: Log key events with context (which record failed, error message, etc.)
|
|
32
|
+
|
|
33
|
+
## Recommended Nodes
|
|
34
|
+
|
|
35
|
+
### Essential Transformation Nodes
|
|
36
|
+
|
|
37
|
+
#### Edit Fields (Set) (n8n-nodes-base.set)
|
|
38
|
+
|
|
39
|
+
**Purpose**: Create, modify, rename fields; change data types
|
|
40
|
+
|
|
41
|
+
**Key Setting**: "Keep Only Set" - drops all fields not explicitly defined (default: disabled)
|
|
42
|
+
|
|
43
|
+
**Use Cases**:
|
|
44
|
+
- Extract specific columns
|
|
45
|
+
- Add calculated fields
|
|
46
|
+
- Convert data types (string to number)
|
|
47
|
+
- Format dates using expressions
|
|
48
|
+
|
|
49
|
+
**Pitfalls**:
|
|
50
|
+
- Not understanding "Keep Only Set" behavior can lead to data loss
|
|
51
|
+
- Enabled: Drops all fields not explicitly defined (data loss risk)
|
|
52
|
+
- Disabled: Carries forward all fields (potential bloat)
|
|
53
|
+
- Always verify output structure after configuration
|
|
54
|
+
|
|
55
|
+
#### IF/Filter Nodes
|
|
56
|
+
|
|
57
|
+
**IF Node** (n8n-nodes-base.if):
|
|
58
|
+
- **Purpose**: Conditional processing and routing
|
|
59
|
+
- **Best Practice**: Use early to validate inputs and remove bad data
|
|
60
|
+
- **Example**: Check if required fields exist before processing
|
|
61
|
+
|
|
62
|
+
**Filter Node** (n8n-nodes-base.filter):
|
|
63
|
+
- **Purpose**: Filter items based on conditions
|
|
64
|
+
- **Best Practice**: Use early in workflow to reduce data volume
|
|
65
|
+
|
|
66
|
+
#### Merge Node (n8n-nodes-base.merge)
|
|
67
|
+
|
|
68
|
+
**Purpose**: Combine two data streams
|
|
69
|
+
|
|
70
|
+
**Modes**:
|
|
71
|
+
- Merge by Key (like database join)
|
|
72
|
+
- Merge by Index
|
|
73
|
+
- Append
|
|
74
|
+
|
|
75
|
+
**Pitfalls**:
|
|
76
|
+
- **Missing Keys**: Trying to merge on non-existent fields
|
|
77
|
+
- **Field Name Mismatch**: Different field names in sources
|
|
78
|
+
- **Solution**: Use Set node to normalize field names before merging
|
|
79
|
+
|
|
80
|
+
#### Code Node (n8n-nodes-base.code)
|
|
81
|
+
|
|
82
|
+
**When to Use**: Complex transformations impossible with built-in nodes
|
|
83
|
+
|
|
84
|
+
**Execution Modes**:
|
|
85
|
+
- "Run Once per Item": Process each item independently
|
|
86
|
+
- "Run Once for All Items": Access entire dataset (for aggregation)
|
|
87
|
+
|
|
88
|
+
**Return Format**: Must return array of objects with json property
|
|
89
|
+
\`\`\`javascript
|
|
90
|
+
return items; // or return [{ json: {...} }];
|
|
91
|
+
\`\`\`
|
|
92
|
+
|
|
93
|
+
**Pitfalls**:
|
|
94
|
+
- Wrong return format: Not returning array of objects with json property
|
|
95
|
+
- Overly complex: Stuffing entire workflow logic in one Code node
|
|
96
|
+
- Keep code nodes focused on single transformation aspect
|
|
97
|
+
|
|
98
|
+
#### Summarize Node (n8n-nodes-base.summarize)
|
|
99
|
+
|
|
100
|
+
**Purpose**: Pivot table-style aggregations (count, sum, average, min/max)
|
|
101
|
+
|
|
102
|
+
**Configuration**:
|
|
103
|
+
- Fields to Summarize: Choose aggregation function
|
|
104
|
+
- Fields to Split By: Grouping keys
|
|
105
|
+
|
|
106
|
+
**Output**: Single item with summary or multiple items per group
|
|
107
|
+
|
|
108
|
+
### Data Restructuring Nodes
|
|
109
|
+
|
|
110
|
+
- **Split Out** (n8n-nodes-base.splitOut): Convert single item with array into multiple items
|
|
111
|
+
- **Aggregate** (n8n-nodes-base.aggregate): Combine multiple items into one
|
|
112
|
+
- **Remove Duplicates** (n8n-nodes-base.removeDuplicates): Delete duplicate items based on field criteria
|
|
113
|
+
- **Sort** (n8n-nodes-base.sort): Order items alphabetically/numerically
|
|
114
|
+
- **Limit** (n8n-nodes-base.limit): Trim to maximum number of items
|
|
115
|
+
|
|
116
|
+
### Batch Processing
|
|
117
|
+
|
|
118
|
+
**Split In Batches** (n8n-nodes-base.splitInBatches):
|
|
119
|
+
- **Purpose**: Process large datasets in chunks
|
|
120
|
+
- **Use When**: Handling 100+ items with expensive operations (API calls, AI)
|
|
121
|
+
|
|
122
|
+
### Workflow Orchestration
|
|
123
|
+
|
|
124
|
+
**Execute Workflow** (n8n-nodes-base.executeWorkflow):
|
|
125
|
+
- **Purpose**: Call sub-workflows for modular design
|
|
126
|
+
- **Best Practice**: Create reusable sub-workflows for common tasks like "Data Cleaning" or "Error Handler"
|
|
127
|
+
|
|
128
|
+
**Error Trigger** (n8n-nodes-base.errorTrigger):
|
|
129
|
+
- **Purpose**: Create global error handling workflow
|
|
130
|
+
- **Best Practice**: Use as safety net to catch all workflow errors
|
|
131
|
+
|
|
132
|
+
## Common Pitfalls to Avoid
|
|
133
|
+
|
|
134
|
+
### Critical Mistakes
|
|
135
|
+
|
|
136
|
+
#### Set Node Issues
|
|
137
|
+
- **Mistake**: Not understanding "Keep Only Set" behavior
|
|
138
|
+
- Enabled: Drops all fields not explicitly defined (data loss risk)
|
|
139
|
+
- Disabled: Carries forward all fields (potential bloat)
|
|
140
|
+
- **Solution**: Always verify output structure after configuration
|
|
141
|
+
|
|
142
|
+
#### Code Node Errors
|
|
143
|
+
- **Wrong Return Format**: Not returning array of objects with json property
|
|
144
|
+
- **Fix**: Always return \`items\` or \`[{ json: {...} }]\`
|
|
145
|
+
- **Overly Complex**: Stuffing entire workflow logic in one Code node
|
|
146
|
+
- **Fix**: Keep code nodes focused on single transformation aspect
|
|
147
|
+
|
|
148
|
+
#### Merge Node Problems
|
|
149
|
+
- **Missing Keys**: Trying to merge on non-existent fields
|
|
150
|
+
- **Fix**: Validate both inputs have matching key fields
|
|
151
|
+
- **Field Name Mismatch**: Different field names in sources
|
|
152
|
+
- **Fix**: Use Set node to normalize field names before merging
|
|
153
|
+
|
|
154
|
+
### General Workflow Issues
|
|
155
|
+
- **No Error Handling**: Workflow crashes on unexpected data
|
|
156
|
+
- **Fix**: Add IF nodes for validation, use error outputs
|
|
157
|
+
- **Hard-coded Values**: URLs, credentials, config in nodes
|
|
158
|
+
- **Fix**: Use environment variables or config nodes
|
|
159
|
+
- **Poor Naming**: Generic names like "Set1", "Function1"
|
|
160
|
+
- **Fix**: Use descriptive names: "Clean Customer Data", "Calculate Totals"
|
|
161
|
+
- **Missing Documentation**: No comments or descriptions
|
|
162
|
+
- **Fix**: Add sticky notes, node descriptions, code comments
|
|
163
|
+
|
|
164
|
+
### Performance Pitfalls
|
|
165
|
+
- Processing large datasets without batching → timeouts
|
|
166
|
+
- Not filtering early → unnecessary processing overhead
|
|
167
|
+
- Excessive node chaining → visual clutter and slow execution
|
|
168
|
+
- Not using sub-workflows → unmaintainable monolithic workflows
|
|
169
|
+
|
|
170
|
+
### Data Validation Pitfalls
|
|
171
|
+
- Assuming input data is always perfect
|
|
172
|
+
- Not handling empty/null values
|
|
173
|
+
- Ignoring data type mismatches
|
|
174
|
+
- Missing edge case handling (special characters, empty arrays)
|
|
175
|
+
`;
|
|
176
|
+
getDocumentation() {
|
|
177
|
+
return this.documentation;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
exports.DataTransformationBestPractices = DataTransformationBestPractices;
|
|
181
|
+
//# sourceMappingURL=data-transformation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data-transformation.js","sourceRoot":"","sources":["../../../src/tools/best-practices/data-transformation.ts"],"names":[],"mappings":";;;AACA,2DAA2D;AAE3D,MAAa,+BAA+B;IAClC,SAAS,GAAG,kCAAiB,CAAC,mBAAmB,CAAC;IAClD,OAAO,GAAG,OAAO,CAAC;IAEV,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuKjC,CAAC;IAED,gBAAgB;QACf,OAAO,IAAI,CAAC,aAAa,CAAC;IAC3B,CAAC;CACD;AAhLD,0EAgLC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { BestPracticesDocument } from '../../types/best-practices';
|
|
2
|
+
export declare class DocumentProcessingBestPractices implements BestPracticesDocument {
|
|
3
|
+
readonly technique: "document_processing";
|
|
4
|
+
readonly version = "1.0.0";
|
|
5
|
+
private readonly documentation;
|
|
6
|
+
getDocumentation(): string;
|
|
7
|
+
}
|