@tummycrypt/acuity-middleware 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/build-paper.yml +39 -0
- package/.github/workflows/ci.yml +37 -0
- package/Dockerfile +53 -0
- package/README.md +103 -0
- package/docs/blog-post.mdx +240 -0
- package/docs/paper/IEEEtran.bst +2409 -0
- package/docs/paper/IEEEtran.cls +6347 -0
- package/docs/paper/acuity-middleware-paper.tex +375 -0
- package/docs/paper/balance.sty +87 -0
- package/docs/paper/references.bib +231 -0
- package/docs/paper.md +400 -0
- package/flake.nix +32 -0
- package/modal-app.py +82 -0
- package/package.json +48 -0
- package/src/adapters/acuity-scraper.ts +543 -0
- package/src/adapters/types.ts +193 -0
- package/src/core/types.ts +325 -0
- package/src/index.ts +75 -0
- package/src/middleware/acuity-wizard.ts +456 -0
- package/src/middleware/browser-service.ts +183 -0
- package/src/middleware/errors.ts +70 -0
- package/src/middleware/index.ts +80 -0
- package/src/middleware/remote-adapter.ts +246 -0
- package/src/middleware/selectors.ts +308 -0
- package/src/middleware/server.ts +372 -0
- package/src/middleware/steps/bypass-payment.ts +226 -0
- package/src/middleware/steps/extract.ts +174 -0
- package/src/middleware/steps/fill-form.ts +359 -0
- package/src/middleware/steps/index.ts +27 -0
- package/src/middleware/steps/navigate.ts +537 -0
- package/src/middleware/steps/read-availability.ts +399 -0
- package/src/middleware/steps/read-slots.ts +405 -0
- package/src/middleware/steps/submit.ts +168 -0
- package/src/server.ts +5 -0
- package/tsconfig.json +25 -0
package/flake.nix
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
description = "Acuity middleware dev shell";
|
|
3
|
+
|
|
4
|
+
inputs = {
|
|
5
|
+
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
|
6
|
+
flake-utils.url = "github:numtide/flake-utils";
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
outputs = { self, nixpkgs, flake-utils }:
|
|
10
|
+
flake-utils.lib.eachDefaultSystem (system:
|
|
11
|
+
let
|
|
12
|
+
pkgs = nixpkgs.legacyPackages.${system};
|
|
13
|
+
in
|
|
14
|
+
{
|
|
15
|
+
devShells.default = pkgs.mkShell {
|
|
16
|
+
buildInputs = with pkgs; [
|
|
17
|
+
nodejs_22
|
|
18
|
+
nodePackages.pnpm
|
|
19
|
+
playwright-driver.browsers
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
shellHook = ''
|
|
23
|
+
export PLAYWRIGHT_BROWSERS_PATH="${pkgs.playwright-driver.browsers}"
|
|
24
|
+
export PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
|
25
|
+
echo "acuity-middleware dev shell"
|
|
26
|
+
echo " node $(node --version)"
|
|
27
|
+
echo " pnpm $(pnpm --version)"
|
|
28
|
+
'';
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
);
|
|
32
|
+
}
|
package/modal-app.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modal Labs deployment for the scheduling middleware server.
|
|
3
|
+
|
|
4
|
+
Runs the Node.js middleware server with Playwright + Chromium
|
|
5
|
+
inside a Modal container with GPU-free compute.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
modal deploy modal-app.py # Deploy to Modal
|
|
9
|
+
modal serve modal-app.py # Local dev with hot reload
|
|
10
|
+
modal run modal-app.py # One-shot test run
|
|
11
|
+
|
|
12
|
+
Environment variables (set in Modal dashboard or .env):
|
|
13
|
+
AUTH_TOKEN - Required Bearer token for all endpoints
|
|
14
|
+
ACUITY_BASE_URL - Acuity scheduling URL
|
|
15
|
+
ACUITY_BYPASS_COUPON - 100% gift certificate code
|
|
16
|
+
PLAYWRIGHT_HEADLESS - Browser headless mode (default: true)
|
|
17
|
+
PLAYWRIGHT_TIMEOUT - Page timeout in ms (default: 30000)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import modal
|
|
21
|
+
|
|
22
|
+
app = modal.App("scheduling-middleware")
|
|
23
|
+
|
|
24
|
+
# Base image: Playwright's official image with Chromium pre-installed
|
|
25
|
+
image = (
|
|
26
|
+
modal.Image.from_registry(
|
|
27
|
+
"mcr.microsoft.com/playwright:v1.58.2-noble",
|
|
28
|
+
add_python="3.12",
|
|
29
|
+
)
|
|
30
|
+
.run_commands(
|
|
31
|
+
# Remove Node 24 from Playwright image, install Node 22 LTS
|
|
32
|
+
"apt-get remove -y nodejs || true",
|
|
33
|
+
"rm -f /usr/local/bin/node /usr/local/bin/npm /usr/local/bin/npx",
|
|
34
|
+
"curl -fsSL https://deb.nodesource.com/setup_22.x | bash -",
|
|
35
|
+
"apt-get install -y nodejs",
|
|
36
|
+
"node --version",
|
|
37
|
+
"corepack enable && corepack prepare pnpm@9.15.9 --activate",
|
|
38
|
+
"apt-get clean && rm -rf /var/lib/apt/lists/*",
|
|
39
|
+
)
|
|
40
|
+
.add_local_file("package.json", "/app/package.json", copy=True)
|
|
41
|
+
.add_local_dir("src", "/app/src", copy=True)
|
|
42
|
+
.add_local_file("tsconfig.json", "/app/tsconfig.json", copy=True)
|
|
43
|
+
.run_commands(
|
|
44
|
+
# Install all deps then compile TS → JS with esbuild (bundler handles fp-ts resolution)
|
|
45
|
+
"cd /app && pnpm install --no-frozen-lockfile",
|
|
46
|
+
"cd /app && pnpm add esbuild",
|
|
47
|
+
# Bundle middleware server to a single JS file with all deps inlined
|
|
48
|
+
"cd /app && npx esbuild src/middleware/server.ts"
|
|
49
|
+
" --bundle --platform=node --format=esm --outfile=dist/server.mjs"
|
|
50
|
+
" --external:playwright-core --external:playwright"
|
|
51
|
+
" --external:@playwright/test",
|
|
52
|
+
"ls -la /app/dist/server.mjs",
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.function(
|
|
58
|
+
image=image,
|
|
59
|
+
# No GPU needed - browser automation only
|
|
60
|
+
cpu=2.0,
|
|
61
|
+
memory=2048,
|
|
62
|
+
# Keep warm for low latency (1 container always ready)
|
|
63
|
+
min_containers=1,
|
|
64
|
+
# 5 minute timeout (wizard can take up to 60s per booking)
|
|
65
|
+
timeout=300,
|
|
66
|
+
secrets=[modal.Secret.from_name("scheduling-middleware-secrets")],
|
|
67
|
+
)
|
|
68
|
+
@modal.concurrent(max_inputs=1)
|
|
69
|
+
@modal.web_server(port=3001, startup_timeout=30)
|
|
70
|
+
def server():
|
|
71
|
+
import subprocess
|
|
72
|
+
|
|
73
|
+
subprocess.Popen(
|
|
74
|
+
["node", "dist/server.mjs"],
|
|
75
|
+
cwd="/app",
|
|
76
|
+
env={
|
|
77
|
+
**__import__("os").environ,
|
|
78
|
+
"NODE_ENV": "production",
|
|
79
|
+
"PORT": "3001",
|
|
80
|
+
"PLAYWRIGHT_HEADLESS": "true",
|
|
81
|
+
},
|
|
82
|
+
)
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@tummycrypt/acuity-middleware",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Playwright-based Acuity booking middleware server",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/Jesssullivan/acuity-middleware.git"
|
|
10
|
+
},
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"import": "./dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"./server": {
|
|
17
|
+
"types": "./dist/server.d.ts",
|
|
18
|
+
"import": "./dist/server.js"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"main": "./dist/index.js",
|
|
22
|
+
"types": "./dist/index.d.ts",
|
|
23
|
+
"scripts": {
|
|
24
|
+
"build": "tsc",
|
|
25
|
+
"start": "node dist/middleware/server.js",
|
|
26
|
+
"dev": "tsx src/middleware/server.ts",
|
|
27
|
+
"test": "vitest run",
|
|
28
|
+
"typecheck": "tsc --noEmit",
|
|
29
|
+
"clean": "rm -rf dist"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"playwright-core": "^1.58.1",
|
|
33
|
+
"effect": "^3.19.14",
|
|
34
|
+
"fp-ts": "^2.16.9"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"typescript": "^5.9.0",
|
|
38
|
+
"tsx": "^4.0.0",
|
|
39
|
+
"vitest": "^4.0.0",
|
|
40
|
+
"@types/node": "^22.0.0"
|
|
41
|
+
},
|
|
42
|
+
"publishConfig": {
|
|
43
|
+
"access": "public"
|
|
44
|
+
},
|
|
45
|
+
"engines": {
|
|
46
|
+
"node": ">=20.0.0"
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Acuity Scraper Adapter
|
|
3
|
+
* Extracts scheduling data from public Acuity pages without API access
|
|
4
|
+
*
|
|
5
|
+
* Uses Playwright for reliable browser automation and handles:
|
|
6
|
+
* - Service (appointment type) extraction
|
|
7
|
+
* - Availability dates
|
|
8
|
+
* - Time slots
|
|
9
|
+
* - Provider information
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Browser, Page } from 'playwright-core';
|
|
13
|
+
|
|
14
|
+
const getChromium = async (): Promise<typeof import('playwright-core').chromium> => {
|
|
15
|
+
try {
|
|
16
|
+
const pw = await import('playwright-core');
|
|
17
|
+
return pw.chromium;
|
|
18
|
+
} catch {
|
|
19
|
+
throw new Error(
|
|
20
|
+
'playwright-core is required for the scraper adapter. Install with: pnpm add playwright-core'
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
import * as E from 'fp-ts/Either';
|
|
25
|
+
import * as TE from 'fp-ts/TaskEither';
|
|
26
|
+
import { pipe } from 'fp-ts/function';
|
|
27
|
+
import type { Service, Provider, TimeSlot, AcuityError, InfrastructureError } from '../core/types.js';
|
|
28
|
+
import { Errors } from '../core/types.js';
|
|
29
|
+
|
|
30
|
+
// =============================================================================
|
|
31
|
+
// TYPES
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
export interface ScraperConfig {
|
|
35
|
+
/** Base URL for the Acuity scheduling page */
|
|
36
|
+
baseUrl: string;
|
|
37
|
+
/** Browser launch options */
|
|
38
|
+
headless?: boolean;
|
|
39
|
+
/** Request timeout in milliseconds */
|
|
40
|
+
timeout?: number;
|
|
41
|
+
/** User agent string */
|
|
42
|
+
userAgent?: string;
|
|
43
|
+
/** Path to Chromium executable (for Lambda/serverless) */
|
|
44
|
+
executablePath?: string;
|
|
45
|
+
/** Additional browser launch args */
|
|
46
|
+
launchArgs?: string[];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface ScrapedService {
|
|
50
|
+
id: string;
|
|
51
|
+
name: string;
|
|
52
|
+
description: string;
|
|
53
|
+
duration: number;
|
|
54
|
+
price: number;
|
|
55
|
+
category?: string;
|
|
56
|
+
href: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface ScrapedAvailability {
|
|
60
|
+
dates: string[];
|
|
61
|
+
serviceId: string;
|
|
62
|
+
providerId?: string;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface ScrapedTimeSlot {
|
|
66
|
+
time: string;
|
|
67
|
+
datetime: string;
|
|
68
|
+
available: boolean;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// =============================================================================
|
|
72
|
+
// SCRAPER CLASS
|
|
73
|
+
// =============================================================================
|
|
74
|
+
|
|
75
|
+
export class AcuityScraper {
|
|
76
|
+
private config: ScraperConfig & { headless: boolean; timeout: number; userAgent: string };
|
|
77
|
+
private browser: Browser | null = null;
|
|
78
|
+
|
|
79
|
+
constructor(config: ScraperConfig) {
|
|
80
|
+
this.config = {
|
|
81
|
+
headless: true,
|
|
82
|
+
timeout: 30000,
|
|
83
|
+
userAgent:
|
|
84
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
85
|
+
...config,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Initialize browser instance
|
|
91
|
+
*/
|
|
92
|
+
async init(): Promise<void> {
|
|
93
|
+
if (!this.browser) {
|
|
94
|
+
const chromium = await getChromium();
|
|
95
|
+
this.browser = await chromium.launch({
|
|
96
|
+
headless: this.config.headless,
|
|
97
|
+
executablePath: this.config.executablePath,
|
|
98
|
+
args: this.config.launchArgs,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Close browser instance
|
|
105
|
+
*/
|
|
106
|
+
async close(): Promise<void> {
|
|
107
|
+
if (this.browser) {
|
|
108
|
+
await this.browser.close();
|
|
109
|
+
this.browser = null;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Create a new page with standard configuration
|
|
115
|
+
*/
|
|
116
|
+
private async createPage(): Promise<Page> {
|
|
117
|
+
if (!this.browser) {
|
|
118
|
+
await this.init();
|
|
119
|
+
}
|
|
120
|
+
const page = await this.browser!.newPage({
|
|
121
|
+
userAgent: this.config.userAgent,
|
|
122
|
+
});
|
|
123
|
+
page.setDefaultTimeout(this.config.timeout);
|
|
124
|
+
return page;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Extract all services (appointment types) from the scheduling page
|
|
129
|
+
*/
|
|
130
|
+
async scrapeServices(): Promise<E.Either<AcuityError | InfrastructureError, ScrapedService[]>> {
|
|
131
|
+
let page: Page | null = null;
|
|
132
|
+
|
|
133
|
+
try {
|
|
134
|
+
page = await this.createPage();
|
|
135
|
+
await page.goto(this.config.baseUrl, { waitUntil: 'networkidle' });
|
|
136
|
+
|
|
137
|
+
// Wait for appointment types to load
|
|
138
|
+
await page.waitForSelector('.select-item, .appointment-type-item, [data-testid="appointment-type"]', {
|
|
139
|
+
timeout: 10000,
|
|
140
|
+
}).catch(() => {
|
|
141
|
+
// Some Acuity pages use different selectors
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Try multiple selector patterns for robustness
|
|
145
|
+
const services = await page.evaluate(() => {
|
|
146
|
+
const results: ScrapedService[] = [];
|
|
147
|
+
|
|
148
|
+
// Pattern 1: Standard select-item layout
|
|
149
|
+
const selectItems = document.querySelectorAll('.select-item');
|
|
150
|
+
selectItems.forEach((item) => {
|
|
151
|
+
const link = item.querySelector('a');
|
|
152
|
+
const nameEl = item.querySelector('.appointment-type-name, .type-name, h3');
|
|
153
|
+
const descEl = item.querySelector('.type-description, .description, p');
|
|
154
|
+
const durationEl = item.querySelector('.duration, .time-duration');
|
|
155
|
+
const priceEl = item.querySelector('.price, .cost');
|
|
156
|
+
|
|
157
|
+
if (nameEl && link) {
|
|
158
|
+
// Extract appointment type ID from href
|
|
159
|
+
const href = link.getAttribute('href') || '';
|
|
160
|
+
const idMatch = href.match(/appointmentType=(\d+)/);
|
|
161
|
+
const id = idMatch ? idMatch[1] : `generated-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
162
|
+
|
|
163
|
+
// Parse duration (e.g., "60 minutes" -> 60)
|
|
164
|
+
const durationText = durationEl?.textContent?.trim() || '';
|
|
165
|
+
const durationMatch = durationText.match(/(\d+)/);
|
|
166
|
+
const duration = durationMatch ? parseInt(durationMatch[1], 10) : 60;
|
|
167
|
+
|
|
168
|
+
// Parse price (e.g., "$150.00" -> 15000 cents)
|
|
169
|
+
const priceText = priceEl?.textContent?.trim() || '';
|
|
170
|
+
const priceMatch = priceText.match(/\$?([\d,.]+)/);
|
|
171
|
+
const price = priceMatch ? Math.round(parseFloat(priceMatch[1].replace(',', '')) * 100) : 0;
|
|
172
|
+
|
|
173
|
+
results.push({
|
|
174
|
+
id,
|
|
175
|
+
name: nameEl.textContent?.trim() || 'Unknown Service',
|
|
176
|
+
description: descEl?.textContent?.trim() || '',
|
|
177
|
+
duration,
|
|
178
|
+
price,
|
|
179
|
+
category: undefined,
|
|
180
|
+
href,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// Pattern 2: Category-based layout
|
|
186
|
+
if (results.length === 0) {
|
|
187
|
+
const categories = document.querySelectorAll('.category-group, .appointment-category');
|
|
188
|
+
categories.forEach((category) => {
|
|
189
|
+
const categoryName = category.querySelector('.category-name, h2')?.textContent?.trim();
|
|
190
|
+
const items = category.querySelectorAll('.appointment-type, .type-item');
|
|
191
|
+
|
|
192
|
+
items.forEach((item) => {
|
|
193
|
+
const link = item.querySelector('a');
|
|
194
|
+
const nameEl = item.querySelector('.name, .title');
|
|
195
|
+
const descEl = item.querySelector('.description');
|
|
196
|
+
const durationEl = item.querySelector('.duration');
|
|
197
|
+
const priceEl = item.querySelector('.price');
|
|
198
|
+
|
|
199
|
+
if (nameEl) {
|
|
200
|
+
const href = link?.getAttribute('href') || '';
|
|
201
|
+
const idMatch = href.match(/appointmentType=(\d+)/);
|
|
202
|
+
const id = idMatch ? idMatch[1] : `generated-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
203
|
+
|
|
204
|
+
const durationText = durationEl?.textContent?.trim() || '';
|
|
205
|
+
const durationMatch = durationText.match(/(\d+)/);
|
|
206
|
+
const duration = durationMatch ? parseInt(durationMatch[1], 10) : 60;
|
|
207
|
+
|
|
208
|
+
const priceText = priceEl?.textContent?.trim() || '';
|
|
209
|
+
const priceMatch = priceText.match(/\$?([\d,.]+)/);
|
|
210
|
+
const price = priceMatch ? Math.round(parseFloat(priceMatch[1].replace(',', '')) * 100) : 0;
|
|
211
|
+
|
|
212
|
+
results.push({
|
|
213
|
+
id,
|
|
214
|
+
name: nameEl.textContent?.trim() || 'Unknown Service',
|
|
215
|
+
description: descEl?.textContent?.trim() || '',
|
|
216
|
+
duration,
|
|
217
|
+
price,
|
|
218
|
+
category: categoryName,
|
|
219
|
+
href,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return results;
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
await page.close();
|
|
230
|
+
return E.right(services);
|
|
231
|
+
} catch (error) {
|
|
232
|
+
if (page) await page.close().catch(() => {});
|
|
233
|
+
|
|
234
|
+
if (error instanceof Error) {
|
|
235
|
+
if (error.message.includes('net::') || error.message.includes('timeout')) {
|
|
236
|
+
return E.left(
|
|
237
|
+
Errors.infrastructure('NETWORK', `Failed to load Acuity page: ${error.message}`, error)
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
return E.left(
|
|
241
|
+
Errors.acuity('SCRAPE_FAILED', `Failed to scrape services: ${error.message}`)
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return E.left(
|
|
246
|
+
Errors.acuity('SCRAPE_FAILED', 'Unknown error during scraping')
|
|
247
|
+
);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Extract available dates for a specific service
|
|
253
|
+
*/
|
|
254
|
+
async scrapeAvailableDates(
|
|
255
|
+
serviceId: string,
|
|
256
|
+
month?: string
|
|
257
|
+
): Promise<E.Either<AcuityError | InfrastructureError, string[]>> {
|
|
258
|
+
let page: Page | null = null;
|
|
259
|
+
|
|
260
|
+
try {
|
|
261
|
+
page = await this.createPage();
|
|
262
|
+
|
|
263
|
+
// Navigate to service-specific page
|
|
264
|
+
const url = new URL(this.config.baseUrl);
|
|
265
|
+
url.searchParams.set('appointmentType', serviceId);
|
|
266
|
+
if (month) {
|
|
267
|
+
// Format: YYYY-MM
|
|
268
|
+
url.searchParams.set('month', month);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
await page.goto(url.toString(), { waitUntil: 'networkidle' });
|
|
272
|
+
|
|
273
|
+
// Wait for calendar to load
|
|
274
|
+
await page.waitForSelector('.scheduleday, .calendar-day, [data-date]', {
|
|
275
|
+
timeout: 10000,
|
|
276
|
+
}).catch(() => {});
|
|
277
|
+
|
|
278
|
+
const dates = await page.evaluate(() => {
|
|
279
|
+
const results: string[] = [];
|
|
280
|
+
|
|
281
|
+
// Pattern 1: scheduleday with activeday class
|
|
282
|
+
const activeDays = document.querySelectorAll('.scheduleday.activeday, .calendar-day.available');
|
|
283
|
+
activeDays.forEach((day) => {
|
|
284
|
+
const date = day.getAttribute('data-date');
|
|
285
|
+
if (date) {
|
|
286
|
+
results.push(date);
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// Pattern 2: data-available attribute
|
|
291
|
+
if (results.length === 0) {
|
|
292
|
+
const availableDays = document.querySelectorAll('[data-available="true"], [data-has-slots="true"]');
|
|
293
|
+
availableDays.forEach((day) => {
|
|
294
|
+
const date = day.getAttribute('data-date');
|
|
295
|
+
if (date) {
|
|
296
|
+
results.push(date);
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return results;
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
await page.close();
|
|
305
|
+
return E.right(dates);
|
|
306
|
+
} catch (error) {
|
|
307
|
+
if (page) await page.close().catch(() => {});
|
|
308
|
+
|
|
309
|
+
if (error instanceof Error) {
|
|
310
|
+
return E.left(
|
|
311
|
+
Errors.acuity('SCRAPE_FAILED', `Failed to scrape available dates: ${error.message}`)
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
return E.left(
|
|
316
|
+
Errors.acuity('SCRAPE_FAILED', 'Unknown error during date scraping')
|
|
317
|
+
);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Extract available time slots for a specific date
|
|
323
|
+
*/
|
|
324
|
+
async scrapeTimeSlots(
|
|
325
|
+
serviceId: string,
|
|
326
|
+
date: string
|
|
327
|
+
): Promise<E.Either<AcuityError | InfrastructureError, ScrapedTimeSlot[]>> {
|
|
328
|
+
let page: Page | null = null;
|
|
329
|
+
|
|
330
|
+
try {
|
|
331
|
+
page = await this.createPage();
|
|
332
|
+
|
|
333
|
+
// Navigate to service page
|
|
334
|
+
const url = new URL(this.config.baseUrl);
|
|
335
|
+
url.searchParams.set('appointmentType', serviceId);
|
|
336
|
+
|
|
337
|
+
await page.goto(url.toString(), { waitUntil: 'networkidle' });
|
|
338
|
+
|
|
339
|
+
// Wait for calendar and click the date
|
|
340
|
+
await page.waitForSelector('.scheduleday, .calendar-day', { timeout: 10000 }).catch(() => {});
|
|
341
|
+
|
|
342
|
+
// Click the specific date
|
|
343
|
+
const dateSelector = `[data-date="${date}"], .scheduleday[data-date="${date}"]`;
|
|
344
|
+
await page.click(dateSelector).catch(() => {
|
|
345
|
+
// Date might not be clickable or use different mechanism
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
// Wait for time slots to load
|
|
349
|
+
await page.waitForSelector('.time-selection, .time-slot, [data-time]', {
|
|
350
|
+
timeout: 10000,
|
|
351
|
+
}).catch(() => {});
|
|
352
|
+
|
|
353
|
+
const slots = await page.evaluate(() => {
|
|
354
|
+
const results: ScrapedTimeSlot[] = [];
|
|
355
|
+
|
|
356
|
+
// Pattern 1: time-selection buttons
|
|
357
|
+
const timeButtons = document.querySelectorAll('.time-selection button, .time-slot');
|
|
358
|
+
timeButtons.forEach((btn) => {
|
|
359
|
+
const timeText = btn.textContent?.trim() || '';
|
|
360
|
+
const datetime = btn.getAttribute('data-time') || btn.getAttribute('data-datetime') || '';
|
|
361
|
+
const isDisabled = btn.hasAttribute('disabled') || btn.classList.contains('disabled');
|
|
362
|
+
|
|
363
|
+
if (timeText || datetime) {
|
|
364
|
+
results.push({
|
|
365
|
+
time: timeText,
|
|
366
|
+
datetime,
|
|
367
|
+
available: !isDisabled,
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
// Pattern 2: list items with time data
|
|
373
|
+
if (results.length === 0) {
|
|
374
|
+
const timeItems = document.querySelectorAll('[data-time], .available-time');
|
|
375
|
+
timeItems.forEach((item) => {
|
|
376
|
+
const timeText = item.textContent?.trim() || '';
|
|
377
|
+
const datetime = item.getAttribute('data-time') || '';
|
|
378
|
+
|
|
379
|
+
results.push({
|
|
380
|
+
time: timeText,
|
|
381
|
+
datetime,
|
|
382
|
+
available: true,
|
|
383
|
+
});
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return results;
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
await page.close();
|
|
391
|
+
return E.right(slots);
|
|
392
|
+
} catch (error) {
|
|
393
|
+
if (page) await page.close().catch(() => {});
|
|
394
|
+
|
|
395
|
+
if (error instanceof Error) {
|
|
396
|
+
return E.left(
|
|
397
|
+
Errors.acuity('SCRAPE_FAILED', `Failed to scrape time slots: ${error.message}`)
|
|
398
|
+
);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
return E.left(
|
|
402
|
+
Errors.acuity('SCRAPE_FAILED', 'Unknown error during time slot scraping')
|
|
403
|
+
);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// =============================================================================
|
|
409
|
+
// TASK EITHER WRAPPERS
|
|
410
|
+
// =============================================================================
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Create a scraper instance with TaskEither wrapper
|
|
414
|
+
*/
|
|
415
|
+
export const createScraperAdapter = (config: ScraperConfig) => {
|
|
416
|
+
const scraper = new AcuityScraper(config);
|
|
417
|
+
|
|
418
|
+
return {
|
|
419
|
+
/**
|
|
420
|
+
* Get all services
|
|
421
|
+
*/
|
|
422
|
+
getServices: (): TE.TaskEither<AcuityError | InfrastructureError, Service[]> =>
|
|
423
|
+
pipe(
|
|
424
|
+
TE.tryCatch(
|
|
425
|
+
() => scraper.scrapeServices(),
|
|
426
|
+
(error) =>
|
|
427
|
+
Errors.acuity('SCRAPE_FAILED', error instanceof Error ? error.message : 'Unknown error')
|
|
428
|
+
),
|
|
429
|
+
TE.flatMap((result) => TE.fromEither(result)),
|
|
430
|
+
TE.map((scraped) =>
|
|
431
|
+
scraped.map(
|
|
432
|
+
(s): Service => ({
|
|
433
|
+
id: s.id,
|
|
434
|
+
name: s.name,
|
|
435
|
+
description: s.description,
|
|
436
|
+
duration: s.duration,
|
|
437
|
+
price: s.price,
|
|
438
|
+
currency: 'USD',
|
|
439
|
+
category: s.category,
|
|
440
|
+
active: true,
|
|
441
|
+
})
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
),
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Get available dates for a service
|
|
448
|
+
*/
|
|
449
|
+
getAvailableDates: (
|
|
450
|
+
serviceId: string,
|
|
451
|
+
month?: string
|
|
452
|
+
): TE.TaskEither<AcuityError | InfrastructureError, string[]> =>
|
|
453
|
+
pipe(
|
|
454
|
+
TE.tryCatch(
|
|
455
|
+
() => scraper.scrapeAvailableDates(serviceId, month),
|
|
456
|
+
(error) =>
|
|
457
|
+
Errors.acuity('SCRAPE_FAILED', error instanceof Error ? error.message : 'Unknown error')
|
|
458
|
+
),
|
|
459
|
+
TE.flatMap((result) => TE.fromEither(result))
|
|
460
|
+
),
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* Get available time slots for a date
|
|
464
|
+
*/
|
|
465
|
+
getTimeSlots: (
|
|
466
|
+
serviceId: string,
|
|
467
|
+
date: string
|
|
468
|
+
): TE.TaskEither<AcuityError | InfrastructureError, TimeSlot[]> =>
|
|
469
|
+
pipe(
|
|
470
|
+
TE.tryCatch(
|
|
471
|
+
() => scraper.scrapeTimeSlots(serviceId, date),
|
|
472
|
+
(error) =>
|
|
473
|
+
Errors.acuity('SCRAPE_FAILED', error instanceof Error ? error.message : 'Unknown error')
|
|
474
|
+
),
|
|
475
|
+
TE.flatMap((result) => TE.fromEither(result)),
|
|
476
|
+
TE.map((slots) =>
|
|
477
|
+
slots
|
|
478
|
+
.filter((s) => s.available)
|
|
479
|
+
.map(
|
|
480
|
+
(s): TimeSlot => ({
|
|
481
|
+
datetime: s.datetime || `${date}T${s.time}`,
|
|
482
|
+
available: s.available,
|
|
483
|
+
})
|
|
484
|
+
)
|
|
485
|
+
)
|
|
486
|
+
),
|
|
487
|
+
|
|
488
|
+
/**
|
|
489
|
+
* Initialize browser
|
|
490
|
+
*/
|
|
491
|
+
init: () => scraper.init(),
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Close browser
|
|
495
|
+
*/
|
|
496
|
+
close: () => scraper.close(),
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* Get the underlying scraper instance
|
|
500
|
+
*/
|
|
501
|
+
getScraper: () => scraper,
|
|
502
|
+
};
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
// =============================================================================
|
|
506
|
+
// CONVENIENCE FUNCTIONS
|
|
507
|
+
// =============================================================================
|
|
508
|
+
|
|
509
|
+
/**
|
|
510
|
+
* One-shot scrape of all services (opens and closes browser)
|
|
511
|
+
*/
|
|
512
|
+
export const scrapeServicesOnce = async (
|
|
513
|
+
baseUrl: string
|
|
514
|
+
): Promise<E.Either<AcuityError | InfrastructureError, Service[]>> => {
|
|
515
|
+
const adapter = createScraperAdapter({ baseUrl });
|
|
516
|
+
|
|
517
|
+
try {
|
|
518
|
+
await adapter.init();
|
|
519
|
+
const result = await adapter.getServices()();
|
|
520
|
+
return result;
|
|
521
|
+
} finally {
|
|
522
|
+
await adapter.close();
|
|
523
|
+
}
|
|
524
|
+
};
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* One-shot scrape of availability (opens and closes browser)
|
|
528
|
+
*/
|
|
529
|
+
export const scrapeAvailabilityOnce = async (
|
|
530
|
+
baseUrl: string,
|
|
531
|
+
serviceId: string,
|
|
532
|
+
date: string
|
|
533
|
+
): Promise<E.Either<AcuityError | InfrastructureError, TimeSlot[]>> => {
|
|
534
|
+
const adapter = createScraperAdapter({ baseUrl });
|
|
535
|
+
|
|
536
|
+
try {
|
|
537
|
+
await adapter.init();
|
|
538
|
+
const result = await adapter.getTimeSlots(serviceId, date)();
|
|
539
|
+
return result;
|
|
540
|
+
} finally {
|
|
541
|
+
await adapter.close();
|
|
542
|
+
}
|
|
543
|
+
};
|