@pagecrawl/n8n-nodes-pagecrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 PageCrawl.io
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,227 @@
1
+ # n8n-nodes-pagecrawl
2
+
3
+ This is an n8n community node that provides integration with [PageCrawl.io](https://pagecrawl.io) for website monitoring and change detection.
4
+
5
+ ## Installation
6
+
7
+ ### Community Node (Recommended)
8
+
9
+ You can install this node directly in n8n:
10
+
11
+ 1. Go to **Settings** > **Community Nodes**
12
+ 2. Search for `n8n-nodes-pagecrawl`
13
+ 3. Click **Install**
14
+
15
+ ### Manual Installation
16
+
17
+ ```bash
18
+ npm install n8n-nodes-pagecrawl
19
+ ```
20
+
21
+ ### Local Development
22
+
23
+ For development and testing:
24
+
25
+ ```bash
26
+ # Clone the repository
27
+ git clone https://github.com/pagecrawl/n8n-nodes-pagecrawl.git
28
+ cd n8n-nodes-pagecrawl
29
+
30
+ # Install dependencies
31
+ npm install
32
+
33
+ # Build the node
34
+ npm run build
35
+
36
+ # Run n8n with the node
37
+ npm run dev
38
+ ```
39
+
40
+ ## Authentication
41
+
42
+ To use this node, you'll need a PageCrawl.io API token.
43
+
44
+ > **Note:** API access requires a paid PageCrawl.io plan. Free accounts do not have API access.
45
+
46
+ 1. Sign up or log in to [PageCrawl.io](https://pagecrawl.io)
47
+ 2. Upgrade to a paid plan if you haven't already
48
+ 3. Go to **Settings > API**
49
+ 4. Copy your API token
50
+ 5. In n8n, create new PageCrawl credentials and paste your token
51
+
52
+ ## Available Nodes
53
+
54
+ ### PageCrawl Node
55
+
56
+ The main node for interacting with PageCrawl.io API, supporting the following resources:
57
+
58
+ #### Page Operations
59
+ - **List All Pages** - Get all tracked pages
60
+ - **Get Page** - Get specific page configuration
61
+ - **Create Page** - Create new tracked page with full configuration
62
+ - **Create Simple Page** - Quick page creation with minimal options
63
+ - **Update Page** - Update existing page configuration
64
+ - **Delete Page** - Remove a tracked page
65
+ - **Run Check Now** - Trigger an immediate check for a page
66
+
67
+ #### Check Operations (History)
68
+ - **Get History** - Retrieve check history for a page
69
+ - **Get Text Diff HTML** - Get text differences as HTML
70
+ - **Get Text Diff Image** - Get text differences as an image
71
+ - **Get Text Diff Markdown** - Get text differences as Markdown
72
+
73
+ #### Screenshot Operations
74
+ - **Get Latest Screenshot** - Get the most recent full-page screenshot
75
+ - **Get Latest Screenshot Diff** - Visual diff of latest vs previous
76
+ - **Get Check Screenshot** - Screenshot for specific check
77
+ - **Get Check Screenshot Diff** - Visual diff for specific check
78
+
79
+ #### Webhook Operations
80
+ - **List Webhooks** - Get all configured webhooks
81
+ - **Create Webhook** - Set up a new webhook
82
+ - **Update Webhook** - Modify webhook configuration
83
+ - **Delete Webhook** - Remove a webhook
84
+ - **Test Webhook** - Send test notification
85
+
86
+ ### PageCrawl Trigger Node
87
+
88
+ Webhook trigger node that receives real-time notifications when changes are detected.
89
+
90
+ Features:
91
+ - Automatic webhook registration/deregistration
92
+ - Configurable payload fields
93
+ - Event filtering (changes, errors)
94
+ - Simplified output option
95
+
96
+ ## Example Workflows
97
+
98
+ ### 1. Monitor Website and Send Email on Change
99
+
100
+ ```
101
+ [PageCrawl Trigger] → [Gmail Send Email]
102
+ ```
103
+
104
+ Configure the trigger to monitor specific pages and send notifications via email when changes are detected.
105
+
106
+ ### 2. Track Price Changes
107
+
108
+ ```
109
+ [Schedule Trigger] → [PageCrawl Get Page] → [IF Price Changed] → [Slack Message]
110
+ ```
111
+
112
+ Periodically check product prices and notify via Slack when they change.
113
+
114
+ ### 3. Archive Website Screenshots
115
+
116
+ ```
117
+ [Schedule Trigger] → [PageCrawl Get Screenshot] → [Google Drive Upload]
118
+ ```
119
+
120
+ Automatically save website screenshots to Google Drive for compliance or archival.
121
+
122
+ ### 4. Sync Changes to Database
123
+
124
+ ```
125
+ [PageCrawl Trigger] → [MySQL Insert]
126
+ ```
127
+
128
+ Store all detected changes in a database for analysis and reporting.
129
+
130
+ ## Configuration Options
131
+
132
+ ### Page Tracking Configuration
133
+
134
+ - **URL**: The webpage to monitor
135
+ - **Elements**: Specific page elements to track (CSS/XPath selectors)
136
+ - **Frequency**: How often to check (3 minutes to weekly)
137
+ - **Location**: Server location for checks (US, UK, CA, DE)
138
+ - **Authentication**: HTTP Basic auth support
139
+ - **Actions**: Pre-check actions (scroll, click, wait, etc.)
140
+ - **Rules**: Conditional notifications based on content
141
+
142
+ ### Notification Settings
143
+
144
+ - **Channels**: Email, Slack, Discord, Teams, Telegram
145
+ - **Rules**: Text difference, content contains, number comparisons
146
+ - **Advanced**: Headers, proxies, user agent customization
147
+
148
+ ## API Rate Limits
149
+
150
+ - Default: 60 requests per minute
151
+ - Contact support for higher limits
152
+ - Rate limit errors return HTTP 429
153
+
154
+ ## Common Use Cases
155
+
156
+ 1. **E-commerce Price Monitoring** - Track competitor prices and stock levels
157
+ 2. **Content Updates** - Monitor news sites, blogs, or documentation
158
+ 3. **Compliance Monitoring** - Ensure website content meets requirements
159
+ 4. **SEO Tracking** - Monitor meta tags, titles, and content changes
160
+ 5. **Security Monitoring** - Detect unauthorized website changes
161
+ 6. **Data Extraction** - Regular scraping of structured data
162
+
163
+ ## Error Handling
164
+
165
+ The node includes comprehensive error handling:
166
+ - Validation errors (HTTP 422) with detailed messages
167
+ - Rate limiting (HTTP 429) with retry guidance
168
+ - Authentication errors with clear instructions
169
+ - Network errors with appropriate retry logic
170
+
171
+ ## Support
172
+
173
+ - **Documentation**: [PageCrawl.io Docs](https://pagecrawl.io/docs)
174
+ - **API Reference**: [API Documentation](https://pagecrawl.io/docs/api)
175
+ - **Issues**: [GitHub Issues](https://github.com/pagecrawl/n8n-nodes-pagecrawl/issues)
176
+ - **Support**: support@pagecrawl.io
177
+
178
+ ## License
179
+
180
+ MIT - See LICENSE file for details
181
+
182
+ ## Contributing
183
+
184
+ Contributions are welcome! Please feel free to submit a Pull Request.
185
+
186
+ 1. Fork the repository
187
+ 2. Create your feature branch
188
+ 3. Commit your changes
189
+ 4. Push to the branch
190
+ 5. Open a Pull Request
191
+
192
+ ## Development
193
+
194
+ ### Building
195
+
196
+ ```bash
197
+ npm run build
198
+ ```
199
+
200
+ ### Testing
201
+
202
+ ```bash
203
+ # Run linter
204
+ npm run lint
205
+
206
+ # Fix linting issues
207
+ npm run lint:fix
208
+
209
+ # Test in n8n
210
+ npm run dev
211
+ ```
212
+
213
+ ### Publishing
214
+
215
+ ```bash
216
+ npm version patch
217
+ npm publish
218
+ ```
219
+
220
+ ## Changelog
221
+
222
+ ### 0.1.0
223
+ - Initial release
224
+ - Full API coverage for Pages, Checks, Screenshots, and Webhooks
225
+ - Trigger node for real-time notifications
226
+ - Comprehensive error handling
227
+ - TypeScript implementation
@@ -0,0 +1,9 @@
1
+ import { IAuthenticateGeneric, ICredentialTestRequest, ICredentialType, INodeProperties } from 'n8n-workflow';
2
+ export declare class PageCrawlApi implements ICredentialType {
3
+ name: string;
4
+ displayName: string;
5
+ documentationUrl: string;
6
+ properties: INodeProperties[];
7
+ authenticate: IAuthenticateGeneric;
8
+ test: ICredentialTestRequest;
9
+ }
@@ -0,0 +1,57 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.PageCrawlApi = void 0;
4
+ class PageCrawlApi {
5
+ constructor() {
6
+ this.name = 'pageCrawlApi';
7
+ this.displayName = 'PageCrawl.io API';
8
+ this.documentationUrl = 'https://pagecrawl.io/docs/api';
9
+ this.properties = [
10
+ {
11
+ displayName: 'API Token',
12
+ name: 'apiToken',
13
+ type: 'string',
14
+ typeOptions: {
15
+ password: true,
16
+ },
17
+ default: '',
18
+ required: true,
19
+ description: 'Your PageCrawl.io API token. API access requires a paid plan. You can find your token in Settings > API.',
20
+ },
21
+ {
22
+ displayName: 'Base URL',
23
+ name: 'baseUrl',
24
+ type: 'string',
25
+ default: 'https://pagecrawl.io',
26
+ description: 'The base URL for the PageCrawl.io API',
27
+ hint: 'Use https://pagecrawl.io for production',
28
+ },
29
+ ];
30
+ this.authenticate = {
31
+ type: 'generic',
32
+ properties: {
33
+ headers: {
34
+ Authorization: '=Bearer {{$credentials.apiToken}}',
35
+ },
36
+ },
37
+ };
38
+ this.test = {
39
+ request: {
40
+ baseURL: '={{$credentials.baseUrl}}',
41
+ url: '/api/user',
42
+ method: 'GET',
43
+ },
44
+ rules: [
45
+ {
46
+ type: 'responseSuccessBody',
47
+ properties: {
48
+ key: 'id',
49
+ value: undefined,
50
+ message: 'Authentication successful',
51
+ },
52
+ },
53
+ ],
54
+ };
55
+ }
56
+ }
57
+ exports.PageCrawlApi = PageCrawlApi;
@@ -0,0 +1,24 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 60 60" width="60" height="60">
2
+ <!-- Background circle -->
3
+ <circle cx="30" cy="30" r="28" fill="#2955c3" stroke="#1e3d8f" stroke-width="2"/>
4
+
5
+ <!-- Page icon -->
6
+ <g transform="translate(15, 12)">
7
+ <!-- Page background -->
8
+ <rect x="0" y="0" width="22" height="28" rx="2" fill="white"/>
9
+
10
+ <!-- Page lines representing content -->
11
+ <rect x="4" y="6" width="14" height="2" rx="1" fill="#2955c3"/>
12
+ <rect x="4" y="11" width="14" height="2" rx="1" fill="#2955c3"/>
13
+ <rect x="4" y="16" width="10" height="2" rx="1" fill="#2955c3"/>
14
+
15
+ <!-- Change indicator (dot) -->
16
+ <circle cx="16" cy="22" r="3" fill="#4ade80"/>
17
+ </g>
18
+
19
+ <!-- Magnifying glass for monitoring -->
20
+ <g transform="translate(30, 30)">
21
+ <circle cx="0" cy="0" r="7" fill="none" stroke="white" stroke-width="2.5"/>
22
+ <line x1="5" y1="5" x2="10" y2="10" stroke="white" stroke-width="2.5" stroke-linecap="round"/>
23
+ </g>
24
+ </svg>
@@ -0,0 +1,5 @@
1
+ import { IExecuteFunctions, INodeExecutionData, INodeType, INodeTypeDescription } from 'n8n-workflow';
2
+ export declare class PageCrawl implements INodeType {
3
+ description: INodeTypeDescription;
4
+ execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]>;
5
+ }