ozon-grabber 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +140 -0
  2. package/dist/index.js +842 -0
  3. package/package.json +46 -0
package/README.md ADDED
@@ -0,0 +1,140 @@
1
+ # Ozon Orders History CLI
2
+
3
+ A CLI utility that exports Ozon order history from a browser session where the user is already logged in.
4
+
5
+ The tool opens order pages by template:
6
+ `https://www.ozon.ru/my/orderdetails/?order=<userId>-<orderNumber>&selectedTab=archive`
7
+ and iterates order numbers in ascending order until it hits a missing or in-progress order.
8
+
9
+ ## Installation & Run
10
+
11
+ ### Using npx (recommended)
12
+
13
+ You can run the tool without local installation:
14
+ ```bash
15
+ npx ozon-grabber start --user-id <userId> --start-order 0001
16
+ ```
17
+
18
+ ### Local installation
19
+
20
+ ```bash
21
+ cd cli
22
+ npm install
23
+ npm run build
24
+ node dist/index.js start --user-id <userId> --start-order 0001
25
+ ```
26
+
27
+ ## Requirements
28
+
29
+ - Node.js 20+
30
+ - Chrome/Chromium running with the remote debugging port `9222` enabled
31
+ - The user is already authenticated in Ozon in that browser
32
+
33
+ Example Chrome launch (macOS):
34
+ ```bash
35
+ /Applications/Google\ Chrome\ Beta.app/Contents/MacOS/Google\ Chrome\ Beta \
36
+ --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-beta-mcp
37
+
38
+ ```
39
+
40
+ ## Publication to npm
41
+
42
+ 1. **Login to npm** (if not already):
43
+ ```bash
44
+ npm login
45
+ ```
46
+ 2. **Verify build**:
47
+ ```bash
48
+ npm run build
49
+ ```
50
+ 3. **Publish**:
51
+ ```bash
52
+ npm publish --access public
53
+ ```
54
+
55
+ ## Run examples
56
+
57
+ ```bash
58
+ # Start from order 0001
59
+ npx ozon-grabber start --user-id 30588125 --start-order 0001
60
+ ```
61
+
62
+ Or with a limited number of orders:
63
+ ```bash
64
+ npx ozon-grabber start --user-id 30588125 --start-order 0001 --max-orders 5
65
+ ```
66
+
67
+ Enable backend upload and start from the backend-provided order id:
68
+ ```bash
69
+ npx ozon-grabber start --user-id 30588125 --backend
70
+ ```
71
+ When backend upload is enabled, each order is submitted immediately after parsing (fail-fast on upload errors).
72
+
73
+ Fetch the next order id from the backend:
74
+ ```bash
75
+ npx ozon-grabber backend-next
76
+ ```
77
+ This command uses the backend configuration options listed below.
78
+
79
+ ## Parameters
80
+
81
+ Required:
82
+ - `--user-id <string>`: user identifier (e.g., `30588125`)
83
+ Optional:
84
+ - `--start-order <string>`: starting order number (e.g., `0001`); required when backend is disabled
85
+ - `--backend`: enable backend upload and backend-based start order resolution
86
+
87
+ Options:
88
+ - `--verbose`: verbose navigation and parsing logs
89
+ - `--max-orders <n>`: limit the number of orders to scan (debugging)
90
+ - `--page-load-timeout-ms <ms>`: how long to wait for order page widgets before declaring a missing order (default: `20000`)
91
+ - `--output <path>`: output file path; if the extension is `.csv`, CSV is used, otherwise JSON
92
+ - `--backend-url <url>`: backend base URL (env: `BACKEND_URL`, default: `http://localhost:3015`)
93
+ - `--backend-api-key <token>`: backend API key (env: `BACKEND_API_KEY`, default: `local-dev`)
94
+
95
+ ## Output format (JSON)
96
+
97
+ ```json
98
+ {
99
+ "userId": "30588125",
100
+ "startOrder": "0001",
101
+ "scannedOrders": 3,
102
+ "stopReason": "in-progress",
103
+ "stopOrderNumber": "0004",
104
+ "orders": [
105
+ {
106
+ "orderId": "30588125-0001",
107
+ "orderNumber": "0001",
108
+ "orderDate": "2026-01-09",
109
+ "items": [
110
+ {
111
+ "title": "...",
112
+ "price": "...",
113
+ "imageUrl": "..."
114
+ }
115
+ ]
116
+ }
117
+ ]
118
+ }
119
+ ```
120
+
121
+ `stopReason` can be:
122
+ - `in-progress` - shipment widget text indicates the order is still in progress
123
+ - `missing` - neither the shipment widget nor the order title appeared after the load wait
124
+ - `max-orders` - `--max-orders` limit reached
125
+
126
+ ## How it works
127
+
128
+ - Connects to an already-open browser via `chrome-devtools-mcp`.
129
+ - Reuses a single tab and navigates to each order URL.
130
+ - Parses items inside `data-widget="shipmentWidget"`.
131
+ - Reads the order header from `data-widget="titleWithTimer"` and parses `orderDate` in `YYYY-MM-DD` (UTC).
132
+ - Waits for `shipmentWidget` or `titleWithTimer` before declaring a missing order (logs show wait duration in verbose mode).
133
+ - Stops at the first in-progress or missing order.
134
+ - When backend upload is enabled, submits items per order using `orderNumber` as the backend `orderId` plus the `userId` field.
135
+ - The backend also downloads each item's `imageUrl`, stores it under `backend/photos/`, and replaces the stored value with that relative path; use `POST /photos/download` to refresh older rows that still reference remote URLs.
136
+
137
+ ## Common issues
138
+
139
+ - No data / empty results: make sure you're logged in to Ozon in the same browser.
140
+ - DevTools connection error: ensure Chrome is started with `--remote-debugging-port=9222`.
package/dist/index.js ADDED
@@ -0,0 +1,842 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/index.ts
4
+ import fs from "fs/promises";
5
+ import path from "path";
6
+ import yargs from "yargs";
7
+ import { hideBin } from "yargs/helpers";
8
+
9
+ // src/backend-client.ts
10
+ var DEFAULT_BACKEND_URL = "http://192.168.1.10:3015";
11
+ var DEFAULT_API_KEY = "local-dev";
12
+ var isRecord = (value) => value !== null && typeof value === "object";
13
+ var normalizeBackendUrl = (rawUrl) => {
14
+ const trimmed = rawUrl.trim();
15
+ if (trimmed.length === 0) {
16
+ throw new Error("Backend URL must not be empty.");
17
+ }
18
+ let parsed;
19
+ try {
20
+ parsed = new URL(trimmed);
21
+ } catch (error) {
22
+ throw new Error(`Invalid backend URL: ${trimmed}.`);
23
+ }
24
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
25
+ throw new Error(`Backend URL must use http or https: ${trimmed}.`);
26
+ }
27
+ if (parsed.search || parsed.hash) {
28
+ throw new Error(`Backend URL must not include query or hash: ${trimmed}.`);
29
+ }
30
+ const path2 = parsed.pathname.replace(/\/+$/, "");
31
+ return `${parsed.origin}${path2}`;
32
+ };
33
+ var buildUrl = (baseUrl, path2) => {
34
+ const normalizedBase = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
35
+ const normalizedPath = path2.startsWith("/") ? path2 : `/${path2}`;
36
+ return `${normalizedBase}${normalizedPath}`;
37
+ };
38
+ var parseJsonPayload = (text, context) => {
39
+ if (text.trim().length === 0) {
40
+ throw new Error(`${context} returned an empty response body.`);
41
+ }
42
+ try {
43
+ return JSON.parse(text);
44
+ } catch (error) {
45
+ throw new Error(`${context} returned invalid JSON.`);
46
+ }
47
+ };
48
+ var requestJson = async (config, path2, options) => {
49
+ const method = options?.method ?? "GET";
50
+ const url = buildUrl(config.baseUrl, path2);
51
+ const headers = {
52
+ "x-api-key": config.apiKey
53
+ };
54
+ let body;
55
+ if (options?.body !== void 0) {
56
+ headers["content-type"] = "application/json";
57
+ body = JSON.stringify(options.body);
58
+ }
59
+ let response;
60
+ try {
61
+ response = await fetch(url, { method, headers, body });
62
+ } catch (error) {
63
+ const message = error instanceof Error ? error.message : String(error);
64
+ throw new Error(`Failed to reach backend at ${url}: ${message}`);
65
+ }
66
+ const responseText = await response.text();
67
+ if (!response.ok) {
68
+ const detail = responseText.trim().length > 0 ? ` Response: ${responseText}` : "";
69
+ throw new Error(
70
+ `Backend request failed (${response.status} ${response.statusText}).${detail}`
71
+ );
72
+ }
73
+ return parseJsonPayload(responseText, `${method} ${path2}`);
74
+ };
75
+ var parseNextOrderId = (payload) => {
76
+ if (!isRecord(payload) || typeof payload.nextOrderId !== "string") {
77
+ throw new Error("Backend response missing nextOrderId.");
78
+ }
79
+ const nextOrderId = payload.nextOrderId.trim();
80
+ if (!/^\d{4}$/.test(nextOrderId)) {
81
+ throw new Error(`Backend nextOrderId is invalid: ${payload.nextOrderId}.`);
82
+ }
83
+ return nextOrderId;
84
+ };
85
+ var resolveBackendConfig = (args, env = process.env) => {
86
+ const baseUrlRaw = args.backendUrl ?? env.BACKEND_URL ?? DEFAULT_BACKEND_URL;
87
+ const apiKey = args.backendApiKey ?? env.BACKEND_API_KEY ?? DEFAULT_API_KEY;
88
+ return {
89
+ baseUrl: normalizeBackendUrl(baseUrlRaw),
90
+ apiKey
91
+ };
92
+ };
93
+ var getNextOrderId = async (config) => {
94
+ const payload = await requestJson(config, "/orders/next");
95
+ return parseNextOrderId(payload);
96
+ };
97
+ var submitItems = async (config, items) => {
98
+ if (items.length === 0) {
99
+ return [];
100
+ }
101
+ return requestJson(config, "/items", {
102
+ method: "POST",
103
+ body: items
104
+ });
105
+ };
106
+
107
+ // src/mcp.ts
108
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
109
+ import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
110
+ var DEFAULT_BROWSER_URL = "http://127.0.0.1:9222";
111
+ var hasOpenedPage = false;
112
+ var defaultChromeDevtoolsArgs = (browserUrl) => [
113
+ "chrome-devtools-mcp@latest",
114
+ `--browser-url=${browserUrl}`
115
+ ];
116
+ var connectChromeDevtoolsClient = async (options = {}) => {
117
+ const browserUrl = options.browserUrl ?? DEFAULT_BROWSER_URL;
118
+ const command = options.command ?? "npx";
119
+ const args = options.args ?? defaultChromeDevtoolsArgs(browserUrl);
120
+ const transport = new StdioClientTransport({
121
+ command,
122
+ args
123
+ });
124
+ const client = new Client(
125
+ { name: options.clientName ?? "ozon-grabber", version: options.clientVersion ?? "0.1.0" },
126
+ { capabilities: {} }
127
+ );
128
+ await client.connect(transport);
129
+ const rawTools = await client.listTools();
130
+ const tools = Array.isArray(rawTools) ? rawTools : rawTools.tools ?? [];
131
+ return { client, tools };
132
+ };
133
+ var resolveToolName = (tools, target) => {
134
+ const exact = tools.find((tool) => tool.name === target);
135
+ if (exact) {
136
+ return exact.name;
137
+ }
138
+ const suffixes = [
139
+ `.${target}`,
140
+ `/${target}`,
141
+ `:${target}`,
142
+ `__${target}`
143
+ ];
144
+ const fallback = tools.find((tool) => suffixes.some((suffix) => tool.name.endsWith(suffix)));
145
+ if (fallback) {
146
+ return fallback.name;
147
+ }
148
+ const available = tools.map((tool) => tool.name).sort().join(", ");
149
+ throw new Error(`Tool not found: ${target}. Available tools: ${available}`);
150
+ };
151
+ var openPage = async (client, tools, url) => {
152
+ const newPageTool = tools.some((tool) => tool.name.includes("new_page")) ? resolveToolName(tools, "new_page") : void 0;
153
+ if (newPageTool && !hasOpenedPage) {
154
+ hasOpenedPage = true;
155
+ return await client.callTool({
156
+ name: newPageTool,
157
+ arguments: { url }
158
+ });
159
+ }
160
+ const navigateTool = resolveToolName(tools, "navigate_page");
161
+ return await client.callTool({
162
+ name: navigateTool,
163
+ arguments: { type: "url", url }
164
+ });
165
+ };
166
+
167
+ // src/order-items.ts
168
+ var parseJsonFromText = (text) => {
169
+ const match = text.match(/```json\n([\s\S]*?)\n```/);
170
+ if (!match) {
171
+ return text;
172
+ }
173
+ try {
174
+ const jsonText = match[1];
175
+ if (!jsonText) {
176
+ return text;
177
+ }
178
+ return JSON.parse(jsonText);
179
+ } catch {
180
+ return text;
181
+ }
182
+ };
183
+ var isRecord2 = (value) => value !== null && typeof value === "object";
184
+ var isTextBlock = (value) => isRecord2(value) && typeof value.text === "string";
185
+ var unwrapToolContent = (content) => {
186
+ if (Array.isArray(content)) {
187
+ const text = content.map((entry) => isTextBlock(entry) ? entry.text : "").filter((entry) => entry.length > 0).join("\n");
188
+ if (text.length === 0) {
189
+ return content;
190
+ }
191
+ return parseJsonFromText(text);
192
+ }
193
+ return content;
194
+ };
195
+ var extractOrderItems = async (client, tools) => {
196
+ const evaluateTool = resolveToolName(tools, "evaluate_script");
197
+ const result = await client.callTool({
198
+ name: evaluateTool,
199
+ arguments: {
200
+ function: `async () => {
201
+ const normalize = (value) => String(value ?? '').replace(/\\s+/g, ' ').trim();
202
+ const textOf = (el) => normalize(el.textContent);
203
+ const isPriceText = (text) => /\u20BD|\\b\u0440\u0443\u0431\\.?\\b/i.test(text);
204
+ const isShowMoreText = (text) => /\u041F\u043E\u043A\u0430\u0437\u0430\u0442\u044C\\s+\u0435\u0449[\u0435\u0451]/i.test(text);
205
+
206
+ const shipments = Array.from(document.querySelectorAll('[data-widget="shipmentWidget"]'));
207
+ if (shipments.length === 0) return [];
208
+
209
+ const clickShowMore = async () => {
210
+ let clicked = false;
211
+ shipments.forEach((shipment) => {
212
+ const buttons = Array.from(shipment.querySelectorAll('button'))
213
+ .filter((btn) => isShowMoreText(textOf(btn)));
214
+ buttons.forEach((btn) => {
215
+ btn.scrollIntoView({ block: 'center', inline: 'center' });
216
+ btn.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window }));
217
+ clicked = true;
218
+ });
219
+ });
220
+ if (clicked) {
221
+ await new Promise((resolve) => setTimeout(resolve, 350));
222
+ }
223
+ return clicked;
224
+ };
225
+
226
+ for (let i = 0; i < 4; i += 1) {
227
+ const clicked = await clickShowMore();
228
+ if (!clicked) break;
229
+ }
230
+
231
+ const findItemRoot = (titleEl, shipment) => {
232
+ let current = titleEl;
233
+ let depth = 0;
234
+ while (current && current !== shipment && depth < 8) {
235
+ const text = textOf(current);
236
+ const hasPrice = Array.from(current.querySelectorAll('span'))
237
+ .some((span) => isPriceText(textOf(span)));
238
+ const hasImage = Boolean(current.querySelector('img'));
239
+ if (hasPrice && hasImage && text.length > 0) {
240
+ return current;
241
+ }
242
+ current = current.parentElement;
243
+ depth += 1;
244
+ }
245
+ return titleEl.closest('div') || titleEl;
246
+ };
247
+
248
+ const pickPrice = (root) => {
249
+ const spans = Array.from(root.querySelectorAll('span'));
250
+ const match = spans.map((span) => textOf(span)).find((text) => isPriceText(text));
251
+ return match || null;
252
+ };
253
+
254
+ const pickImageUrl = (root) => {
255
+ const img = root.querySelector('img');
256
+ if (!img) return null;
257
+ return img.currentSrc || img.getAttribute('src');
258
+ };
259
+
260
+ const items = [];
261
+ shipments.forEach((shipment) => {
262
+ const titleNodes = Array.from(shipment.querySelectorAll('span.tsCompact500Medium'));
263
+ titleNodes.forEach((titleNode) => {
264
+ const title = textOf(titleNode);
265
+ if (!title) return;
266
+
267
+ const root = findItemRoot(titleNode, shipment);
268
+ items.push({
269
+ title,
270
+ price: pickPrice(root),
271
+ imageUrl: pickImageUrl(root)
272
+ });
273
+ });
274
+ });
275
+
276
+ const seen = new Set();
277
+ return items.filter((item) => {
278
+ const key = [item.title, item.price || '', item.imageUrl || ''].join('::');
279
+ if (seen.has(key)) return false;
280
+ seen.add(key);
281
+ return true;
282
+ });
283
+ }`
284
+ }
285
+ });
286
+ const payload = unwrapToolContent(result.content);
287
+ if (!Array.isArray(payload)) {
288
+ return [];
289
+ }
290
+ return payload.filter(isRecord2).map((item) => {
291
+ const title = typeof item.title === "string" ? item.title : "";
292
+ const price = typeof item.price === "string" ? item.price : null;
293
+ const imageUrl = typeof item.imageUrl === "string" ? item.imageUrl : void 0;
294
+ return {
295
+ title,
296
+ price,
297
+ imageUrl
298
+ };
299
+ }).filter((item) => item.title.length > 0);
300
+ };
301
+
302
+ // src/order-loop.ts
303
+ var DEFAULT_ORDER_URL = "https://www.ozon.ru/my/orderdetails/";
304
+ var DEFAULT_PAGE_LOAD_TIMEOUT_MS = 2e4;
305
+ var DEFAULT_PAGE_LOAD_RETRY_DELAY_MS = 2e3;
306
+ var DEFAULT_WIDGET_POLL_INTERVAL_MS = 500;
307
+ var DEFAULT_SHIPMENT_WIDGET_TIMEOUT_MS = 8e3;
308
+ var DEFAULT_TITLE_WIDGET_TIMEOUT_MS = 8e3;
309
+ var normalizeText = (value) => value.replace(/\s+/g, " ").trim();
310
+ var parseJsonFromText2 = (text) => {
311
+ const match = text.match(/```json\n([\s\S]*?)\n```/);
312
+ if (!match) {
313
+ return text;
314
+ }
315
+ try {
316
+ const jsonText = match[1];
317
+ if (!jsonText) {
318
+ return text;
319
+ }
320
+ return JSON.parse(jsonText);
321
+ } catch {
322
+ return text;
323
+ }
324
+ };
325
+ var isRecord3 = (value) => value !== null && typeof value === "object";
326
+ var isTextBlock2 = (value) => isRecord3(value) && typeof value.text === "string";
327
+ var unwrapToolContent2 = (content) => {
328
+ if (Array.isArray(content)) {
329
+ const text = content.map((entry) => isTextBlock2(entry) ? entry.text : "").filter((entry) => entry.length > 0).join("\n");
330
+ if (text.length === 0) {
331
+ return content;
332
+ }
333
+ return parseJsonFromText2(text);
334
+ }
335
+ return content;
336
+ };
337
+ var RU_MONTHS = {
338
+ "\u044F\u043D\u0432\u0430\u0440\u044F": 1,
339
+ "\u0444\u0435\u0432\u0440\u0430\u043B\u044F": 2,
340
+ "\u043C\u0430\u0440\u0442\u0430": 3,
341
+ "\u0430\u043F\u0440\u0435\u043B\u044F": 4,
342
+ "\u043C\u0430\u044F": 5,
343
+ "\u0438\u044E\u043D\u044F": 6,
344
+ "\u0438\u044E\u043B\u044F": 7,
345
+ "\u0430\u0432\u0433\u0443\u0441\u0442\u0430": 8,
346
+ "\u0441\u0435\u043D\u0442\u044F\u0431\u0440\u044F": 9,
347
+ "\u043E\u043A\u0442\u044F\u0431\u0440\u044F": 10,
348
+ "\u043D\u043E\u044F\u0431\u0440\u044F": 11,
349
+ "\u0434\u0435\u043A\u0430\u0431\u0440\u044F": 12
350
+ };
351
+ var formatIsoDate = (year, month, day) => {
352
+ const date = new Date(Date.UTC(year, month - 1, day));
353
+ if (date.getUTCFullYear() !== year || date.getUTCMonth() + 1 !== month || date.getUTCDate() !== day) {
354
+ throw new Error(`Invalid order date: ${year}-${month}-${day}.`);
355
+ }
356
+ return date.toISOString().slice(0, 10);
357
+ };
358
+ var parseOrderDateFromTitle = (titleText, now = /* @__PURE__ */ new Date()) => {
359
+ const normalized = normalizeText(titleText);
360
+ const explicitMatch = normalized.match(/заказ от\s*(\d{1,2})\.(\d{1,2})\.(\d{4})/i);
361
+ if (explicitMatch) {
362
+ const dayText = explicitMatch[1];
363
+ const monthText = explicitMatch[2];
364
+ const yearText = explicitMatch[3];
365
+ if (!dayText || !monthText || !yearText) {
366
+ throw new Error(`Failed to parse order date from title: ${titleText}.`);
367
+ }
368
+ const day = Number.parseInt(dayText, 10);
369
+ const month = Number.parseInt(monthText, 10);
370
+ const year = Number.parseInt(yearText, 10);
371
+ return formatIsoDate(year, month, day);
372
+ }
373
+ const textLower = normalized.toLowerCase().replace(/ё/g, "\u0435");
374
+ const monthMatch = textLower.match(/заказ от\s*(\d{1,2})\s+([а-я]+)/i);
375
+ if (monthMatch) {
376
+ const dayText = monthMatch[1];
377
+ const monthName = monthMatch[2];
378
+ if (!dayText || !monthName) {
379
+ throw new Error(`Failed to parse order date from title: ${titleText}.`);
380
+ }
381
+ const day = Number.parseInt(dayText, 10);
382
+ const month = RU_MONTHS[monthName];
383
+ if (!month) {
384
+ throw new Error(`Unknown month name in order title: ${monthName}.`);
385
+ }
386
+ const year = now.getUTCFullYear();
387
+ return formatIsoDate(year, month, day);
388
+ }
389
+ throw new Error(`Failed to parse order date from title: ${titleText}.`);
390
+ };
391
+ var parseStartOrder = (startOrder) => {
392
+ const trimmed = startOrder.trim();
393
+ if (!/^\d+$/.test(trimmed)) {
394
+ throw new Error(`Invalid start order: ${startOrder}. Expected digits only.`);
395
+ }
396
+ const numeric = Number.parseInt(trimmed, 10);
397
+ if (Number.isNaN(numeric)) {
398
+ throw new Error(`Invalid start order: ${startOrder}.`);
399
+ }
400
+ return { width: trimmed.length, numeric };
401
+ };
402
+ var formatOrderNumber = (orderNumber, width) => String(orderNumber).padStart(width, "0");
403
+ var buildOrderUrl = (orderId) => `${DEFAULT_ORDER_URL}?order=${encodeURIComponent(orderId)}&selectedTab=archive`;
404
+ var IN_PROGRESS_MARKERS = [
405
+ "\u043C\u043E\u0436\u043D\u043E \u0437\u0430\u0431\u0438\u0440\u0430\u0442\u044C",
406
+ "\u043F\u0435\u0440\u0435\u0434\u0430\u0451\u0442\u0441\u044F \u0432 \u0434\u043E\u0441\u0442\u0430\u0432\u043A\u0443",
407
+ "\u043F\u0435\u0440\u0435\u0434\u0430\u0435\u0442\u0441\u044F \u0432 \u0434\u043E\u0441\u0442\u0430\u0432\u043A\u0443",
408
+ "\u0432 \u0441\u043B\u0443\u0436\u0431\u0435 \u0434\u043E\u0441\u0442\u0430\u0432\u043A\u0438",
409
+ "\u0432 \u0441\u0431\u043E\u0440\u043A\u0435",
410
+ "\u0432 \u043F\u0443\u0442\u0438"
411
+ ];
412
+ var COMPLETED_MARKER = "\u043F\u043E\u043B\u0443\u0447\u0435\u043D";
413
+ var getOrderPageState = async (client, tools) => {
414
+ const evaluateTool = resolveToolName(tools, "evaluate_script");
415
+ const result = await client.callTool({
416
+ name: evaluateTool,
417
+ arguments: {
418
+ function: `() => {
419
+ const shipments = Array.from(document.querySelectorAll('[data-widget="shipmentWidget"]'));
420
+ const shipmentTexts = shipments
421
+ .map((shipment) => shipment.textContent || '')
422
+ .filter((text) => text.trim().length > 0);
423
+ return {
424
+ hasItemsBlock: shipments.length > 0,
425
+ shipmentTexts: shipmentTexts.map((text) => String(text))
426
+ };
427
+ }`
428
+ }
429
+ });
430
+ const payload = unwrapToolContent2(result.content);
431
+ if (!isRecord3(payload)) {
432
+ return { hasItemsBlock: false, shipmentTexts: [], isInProgress: false };
433
+ }
434
+ const hasItemsBlock = Boolean(payload.hasItemsBlock);
435
+ const shipmentTextsRaw = Array.isArray(payload.shipmentTexts) ? payload.shipmentTexts.filter((entry) => typeof entry === "string") : [];
436
+ const shipmentTexts = shipmentTextsRaw.map((text) => normalizeText(text).toLowerCase());
437
+ const isInProgress = shipmentTexts.some((text) => {
438
+ if (text.includes(COMPLETED_MARKER)) {
439
+ return false;
440
+ }
441
+ return IN_PROGRESS_MARKERS.some((marker) => text.includes(marker));
442
+ });
443
+ return {
444
+ hasItemsBlock,
445
+ shipmentTexts,
446
+ isInProgress
447
+ };
448
+ };
449
+ var getOrderTitleText = async (client, tools) => {
450
+ const evaluateTool = resolveToolName(tools, "evaluate_script");
451
+ const result = await client.callTool({
452
+ name: evaluateTool,
453
+ arguments: {
454
+ function: `() => {
455
+ const widget = document.querySelector('[data-widget="titleWithTimer"]');
456
+ if (!widget) return null;
457
+ const span = widget.querySelector('span');
458
+ const text = span ? span.textContent : widget.textContent;
459
+ return text ? String(text) : null;
460
+ }`
461
+ }
462
+ });
463
+ const payload = unwrapToolContent2(result.content);
464
+ if (typeof payload === "string") {
465
+ const trimmed = normalizeText(payload);
466
+ return trimmed.length > 0 ? trimmed : null;
467
+ }
468
+ return null;
469
+ };
470
+ var logMessage = (options, message) => {
471
+ if (!options?.verbose) {
472
+ return;
473
+ }
474
+ if (options.logger) {
475
+ options.logger(message);
476
+ } else {
477
+ console.log(message);
478
+ }
479
+ };
480
+ var sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
481
+ var getOrderPageWidgetState = async (client, tools) => {
482
+ const evaluateTool = resolveToolName(tools, "evaluate_script");
483
+ const result = await client.callTool({
484
+ name: evaluateTool,
485
+ arguments: {
486
+ function: `() => {
487
+ const shipmentWidget = document.querySelector('[data-widget="shipmentWidget"]');
488
+ const titleWidget = document.querySelector('[data-widget="titleWithTimer"]');
489
+ return {
490
+ hasShipmentWidget: Boolean(shipmentWidget),
491
+ hasTitleWithTimer: Boolean(titleWidget)
492
+ };
493
+ }`
494
+ }
495
+ });
496
+ const payload = unwrapToolContent2(result.content);
497
+ if (!isRecord3(payload)) {
498
+ return { hasShipmentWidget: false, hasTitleWithTimer: false };
499
+ }
500
+ return {
501
+ hasShipmentWidget: Boolean(payload.hasShipmentWidget),
502
+ hasTitleWithTimer: Boolean(payload.hasTitleWithTimer)
503
+ };
504
+ };
505
+ var waitForOrderPageWidgets = async (client, tools, options, description, timeoutMs, predicate) => {
506
+ const start = Date.now();
507
+ const deadline = start + timeoutMs;
508
+ let attempts = 0;
509
+ let state = {
510
+ hasShipmentWidget: false,
511
+ hasTitleWithTimer: false
512
+ };
513
+ logMessage(options, `Waiting for ${description} (timeout ${timeoutMs}ms).`);
514
+ while (true) {
515
+ attempts += 1;
516
+ state = await getOrderPageWidgetState(client, tools);
517
+ if (predicate(state)) {
518
+ break;
519
+ }
520
+ const now = Date.now();
521
+ if (now >= deadline) {
522
+ break;
523
+ }
524
+ const remaining = deadline - now;
525
+ await sleep(Math.min(DEFAULT_WIDGET_POLL_INTERVAL_MS, remaining));
526
+ }
527
+ const elapsedMs = Date.now() - start;
528
+ const timedOut = !predicate(state);
529
+ logMessage(
530
+ options,
531
+ `Wait for ${description} finished in ${elapsedMs}ms (shipmentWidget=${state.hasShipmentWidget}, titleWithTimer=${state.hasTitleWithTimer}, attempts=${attempts}, timedOut=${timedOut}).`
532
+ );
533
+ return {
534
+ state,
535
+ elapsedMs,
536
+ attempts,
537
+ timedOut
538
+ };
539
+ };
540
+ var scanOrders = async (client, tools, userId, startOrder, options = {}) => {
541
+ const { width, numeric } = parseStartOrder(startOrder);
542
+ const pageLoadTimeoutMs = options.pageLoadTimeoutMs ?? DEFAULT_PAGE_LOAD_TIMEOUT_MS;
543
+ const pageLoadRetryDelayMs = options.pageLoadRetryDelayMs ?? DEFAULT_PAGE_LOAD_RETRY_DELAY_MS;
544
+ const shipmentWidgetTimeoutMs = Math.min(
545
+ pageLoadTimeoutMs,
546
+ DEFAULT_SHIPMENT_WIDGET_TIMEOUT_MS
547
+ );
548
+ const titleWidgetTimeoutMs = Math.min(pageLoadTimeoutMs, DEFAULT_TITLE_WIDGET_TIMEOUT_MS);
549
+ const orders = [];
550
+ let offset = 0;
551
+ while (true) {
552
+ if (options.maxOrders !== void 0 && orders.length >= options.maxOrders) {
553
+ const stopOrderNumber = formatOrderNumber(numeric + offset, width);
554
+ return {
555
+ orders,
556
+ scannedOrders: orders.length,
557
+ stopReason: "max-orders",
558
+ stopOrderNumber
559
+ };
560
+ }
561
+ const orderNumber = formatOrderNumber(numeric + offset, width);
562
+ const orderId = `${userId}-${orderNumber}`;
563
+ const url = buildOrderUrl(orderId);
564
+ logMessage(options, `Opening order ${orderId}`);
565
+ await openPage(client, tools, url);
566
+ await waitForOrderPageWidgets(
567
+ client,
568
+ tools,
569
+ options,
570
+ "order page widgets",
571
+ pageLoadTimeoutMs,
572
+ (state) => state.hasShipmentWidget || state.hasTitleWithTimer
573
+ );
574
+ let pageState = await getOrderPageState(client, tools);
575
+ let titleText = null;
576
+ if (!pageState.hasItemsBlock) {
577
+ logMessage(
578
+ options,
579
+ `No shipment widgets detected for ${orderId}, retrying after ${pageLoadRetryDelayMs}ms.`
580
+ );
581
+ if (pageLoadRetryDelayMs > 0) {
582
+ await sleep(pageLoadRetryDelayMs);
583
+ }
584
+ await waitForOrderPageWidgets(
585
+ client,
586
+ tools,
587
+ options,
588
+ "shipmentWidget",
589
+ shipmentWidgetTimeoutMs,
590
+ (state) => state.hasShipmentWidget
591
+ );
592
+ pageState = await getOrderPageState(client, tools);
593
+ }
594
+ if (!pageState.hasItemsBlock) {
595
+ titleText = await getOrderTitleText(client, tools);
596
+ if (titleText) {
597
+ logMessage(options, `Shipment widget still missing for ${orderId}, continuing with title.`);
598
+ }
599
+ }
600
+ if (!pageState.hasItemsBlock && !titleText) {
601
+ return {
602
+ orders,
603
+ scannedOrders: orders.length,
604
+ stopReason: "missing",
605
+ stopOrderNumber: orderNumber
606
+ };
607
+ }
608
+ if (pageState.isInProgress) {
609
+ logMessage(options, `Order ${orderId} is in progress, stopping scan.`);
610
+ return {
611
+ orders,
612
+ scannedOrders: orders.length,
613
+ stopReason: "in-progress",
614
+ stopOrderNumber: orderNumber
615
+ };
616
+ }
617
+ if (!titleText) {
618
+ titleText = await getOrderTitleText(client, tools);
619
+ }
620
+ if (!titleText) {
621
+ logMessage(
622
+ options,
623
+ `TitleWithTimer missing for ${orderId}, waiting up to ${titleWidgetTimeoutMs}ms.`
624
+ );
625
+ await waitForOrderPageWidgets(
626
+ client,
627
+ tools,
628
+ options,
629
+ "titleWithTimer",
630
+ titleWidgetTimeoutMs,
631
+ (state) => state.hasTitleWithTimer
632
+ );
633
+ titleText = await getOrderTitleText(client, tools);
634
+ }
635
+ if (!titleText) {
636
+ throw new Error(`Order ${orderId} is missing titleWithTimer text.`);
637
+ }
638
+ const orderDate = parseOrderDateFromTitle(titleText);
639
+ const items = await extractOrderItems(client, tools);
640
+ const order = { orderId, orderNumber, orderDate, items };
641
+ if (options.onOrder) {
642
+ await options.onOrder(order);
643
+ }
644
+ orders.push(order);
645
+ offset += 1;
646
+ }
647
+ };
648
+
649
+ // src/index.ts
650
+ var resolveOutputFormat = (outputPath) => {
651
+ if (!outputPath) {
652
+ return "json";
653
+ }
654
+ const extension = path.extname(outputPath).toLowerCase();
655
+ if (extension === ".csv") {
656
+ return "csv";
657
+ }
658
+ return "json";
659
+ };
660
+ var escapeCsvValue = (value) => {
661
+ const text = value === null || value === void 0 ? "" : String(value);
662
+ if (/[",\n]/.test(text)) {
663
+ return `"${text.replace(/"/g, '""')}"`;
664
+ }
665
+ return text;
666
+ };
667
+ var toCsv = (output) => {
668
+ const header = ["userId", "orderId", "orderNumber", "orderDate", "title", "price", "imageUrl"];
669
+ const rows = [header.join(",")];
670
+ output.orders.forEach((order) => {
671
+ order.items.forEach((item) => {
672
+ const row = [
673
+ escapeCsvValue(output.userId),
674
+ escapeCsvValue(order.orderId),
675
+ escapeCsvValue(order.orderNumber),
676
+ escapeCsvValue(order.orderDate),
677
+ escapeCsvValue(item.title),
678
+ escapeCsvValue(item.price),
679
+ escapeCsvValue(item.imageUrl ?? "")
680
+ ];
681
+ rows.push(row.join(","));
682
+ });
683
+ });
684
+ return rows.join("\n");
685
+ };
686
+ var toJson = (output) => JSON.stringify(output, null, 2);
687
+ var writeOutput = async (payload, outputPath) => {
688
+ if (!outputPath) {
689
+ console.log(payload);
690
+ return;
691
+ }
692
+ const dir = path.dirname(outputPath);
693
+ if (dir !== ".") {
694
+ await fs.mkdir(dir, { recursive: true });
695
+ }
696
+ await fs.writeFile(outputPath, payload, "utf-8");
697
+ };
698
+ var addBackendOptions = (builder) => builder.option("backend-url", {
699
+ type: "string",
700
+ describe: "Backend base URL (env: BACKEND_URL, default: http://localhost:3015)"
701
+ }).option("backend-api-key", {
702
+ type: "string",
703
+ describe: "Backend API key (env: BACKEND_API_KEY, default: local-dev)"
704
+ });
705
+ var cli = yargs(hideBin(process.argv)).scriptName("ozon-grabber").command(
706
+ "ping",
707
+ "CLI health check",
708
+ () => void 0,
709
+ async () => {
710
+ console.log("pong");
711
+ }
712
+ ).command(
713
+ "backend-next",
714
+ "Fetch next orderId from backend",
715
+ (builder) => addBackendOptions(builder),
716
+ async (argv) => {
717
+ try {
718
+ const config = resolveBackendConfig({
719
+ backendUrl: argv.backendUrl,
720
+ backendApiKey: argv.backendApiKey
721
+ });
722
+ const nextOrderId = await getNextOrderId(config);
723
+ console.log(nextOrderId);
724
+ } catch (error) {
725
+ const message = error instanceof Error ? error.message : String(error);
726
+ console.error(message);
727
+ process.exitCode = 1;
728
+ }
729
+ }
730
+ ).command(
731
+ "start",
732
+ "Collect order history",
733
+ (builder) => {
734
+ const configured = builder.option("user-id", {
735
+ type: "string",
736
+ demandOption: true,
737
+ describe: "Ozon user identifier"
738
+ }).option("start-order", {
739
+ type: "string",
740
+ describe: "Starting order number (e.g., 0008)"
741
+ }).option("max-orders", {
742
+ type: "number",
743
+ describe: "Maximum number of orders to scan"
744
+ }).option("page-load-timeout-ms", {
745
+ type: "number",
746
+ default: 2e4,
747
+ describe: "Timeout in ms to wait for order page widgets before marking missing"
748
+ }).option("output", {
749
+ type: "string",
750
+ describe: "Output file path"
751
+ }).option("verbose", {
752
+ type: "boolean",
753
+ default: false,
754
+ describe: "Show verbose logs"
755
+ }).option("backend", {
756
+ type: "boolean",
757
+ default: false,
758
+ describe: "Enable backend upload and next-order resolution"
759
+ });
760
+ return addBackendOptions(configured);
761
+ },
762
+ async (argv) => {
763
+ try {
764
+ const userId = argv.userId;
765
+ const maxOrders = argv.maxOrders;
766
+ const pageLoadTimeoutMs = argv.pageLoadTimeoutMs;
767
+ const outputPath = argv.output;
768
+ const verbose = argv.verbose;
769
+ const useBackend = Boolean(argv.backend);
770
+ const backendConfig = useBackend ? resolveBackendConfig({
771
+ backendUrl: argv.backendUrl,
772
+ backendApiKey: argv.backendApiKey
773
+ }) : null;
774
+ const resolvedStartOrder = argv.startOrder ?? (backendConfig ? await getNextOrderId(backendConfig) : void 0);
775
+ if (!resolvedStartOrder) {
776
+ throw new Error("Starting order number is required when backend is disabled.");
777
+ }
778
+ if (backendConfig && !/^\d{4}$/.test(resolvedStartOrder.trim())) {
779
+ throw new Error(
780
+ `Starting order must be a 4-digit string when using backend: ${resolvedStartOrder}.`
781
+ );
782
+ }
783
+ console.log(`Starting orderId: ${resolvedStartOrder}`);
784
+ const { client, tools } = await connectChromeDevtoolsClient();
785
+ try {
786
+ const summary = await scanOrders(client, tools, userId, resolvedStartOrder, {
787
+ maxOrders,
788
+ pageLoadTimeoutMs,
789
+ verbose,
790
+ logger: (message) => console.log(message),
791
+ onOrder: backendConfig ? async (order) => {
792
+ const items = order.items.map((item) => ({
793
+ orderId: order.orderNumber,
794
+ userId,
795
+ orderDate: order.orderDate,
796
+ title: item.title,
797
+ price: item.price,
798
+ imageUrl: item.imageUrl
799
+ }));
800
+ if (items.length === 0) {
801
+ console.log(`Order ${order.orderId} has no items to submit.`);
802
+ return;
803
+ }
804
+ try {
805
+ await submitItems(backendConfig, items);
806
+ console.log(
807
+ `Submitted order ${order.orderId} (${items.length} items) to backend.`
808
+ );
809
+ } catch (error) {
810
+ const message = error instanceof Error ? error.message : String(error);
811
+ console.error(`Failed to submit order ${order.orderId}: ${message}`);
812
+ throw error;
813
+ }
814
+ } : void 0
815
+ });
816
+ console.log(`Scanned orders: ${summary.scannedOrders}`);
817
+ const output = {
818
+ userId,
819
+ startOrder: resolvedStartOrder,
820
+ scannedOrders: summary.scannedOrders,
821
+ stopReason: summary.stopReason,
822
+ stopOrderNumber: summary.stopOrderNumber,
823
+ orders: summary.orders
824
+ };
825
+ const format = resolveOutputFormat(outputPath);
826
+ const payload = format === "csv" ? toCsv(output) : toJson(output);
827
+ await writeOutput(payload, outputPath);
828
+ } finally {
829
+ await client.close();
830
+ }
831
+ } catch (error) {
832
+ const message = error instanceof Error ? error.message : String(error);
833
+ console.error(message);
834
+ process.exitCode = 1;
835
+ }
836
+ }
837
+ ).option("verbose", {
838
+ type: "boolean",
839
+ default: false,
840
+ describe: "Show verbose logs"
841
+ }).strict().demandCommand(1, "Specify a command").help();
842
+ cli.parse();
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "ozon-grabber",
3
+ "version": "0.1.0",
4
+ "description": "Ozon order history grabber CLI",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "git+https://github.com/isachivka/ozon-orders-history.git",
8
+ "directory": "cli"
9
+ },
10
+ "keywords": [
11
+ "ozon",
12
+ "grabber",
13
+ "orders",
14
+ "history",
15
+ "cli"
16
+ ],
17
+ "author": "Igor Sachivka",
18
+ "bugs": {
19
+ "url": "https://github.com/isachivka/ozon-orders-history/issues"
20
+ },
21
+ "homepage": "https://github.com/isachivka/ozon-orders-history/tree/main/cli#readme",
22
+ "license": "MIT",
23
+ "type": "module",
24
+ "bin": {
25
+ "ozon-grabber": "dist/index.js"
26
+ },
27
+ "files": [
28
+ "dist"
29
+ ],
30
+ "scripts": {
31
+ "build": "npm run typecheck && tsup src/index.ts --format esm --target es2022 --out-dir dist --clean",
32
+ "dev": "tsup src/index.ts --format esm --target es2022 --out-dir dist --watch",
33
+ "lint": "npm run typecheck",
34
+ "typecheck": "tsc -p tsconfig.json --noEmit"
35
+ },
36
+ "dependencies": {
37
+ "@modelcontextprotocol/sdk": "^1.0.4",
38
+ "yargs": "^17.7.2"
39
+ },
40
+ "devDependencies": {
41
+ "@types/node": "^20.11.30",
42
+ "@types/yargs": "^17.0.33",
43
+ "tsup": "^8.0.2",
44
+ "typescript": "^5.4.5"
45
+ }
46
+ }