apify 4.0.0-beta.12 → 4.0.0-beta.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -48
- package/dist/actor.d.ts +157 -61
- package/dist/actor.js +278 -91
- package/dist/apify_storage_client.d.ts +54 -0
- package/dist/apify_storage_client.js +152 -0
- package/dist/charging.d.ts +43 -2
- package/dist/charging.js +196 -54
- package/dist/configuration.d.ts +79 -132
- package/dist/configuration.js +114 -141
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -2
- package/dist/input-schemas.d.ts +7 -0
- package/dist/input-schemas.js +58 -0
- package/dist/key_value_store.d.ts +8 -4
- package/dist/key_value_store.js +19 -11
- package/dist/platform_event_manager.d.ts +0 -1
- package/dist/platform_event_manager.js +5 -5
- package/dist/proxy_configuration.d.ts +41 -44
- package/dist/proxy_configuration.js +65 -103
- package/dist/storage.d.ts +58 -0
- package/dist/storage.js +79 -0
- package/dist/utils.d.ts +0 -1
- package/dist/utils.js +2 -4
- package/package.json +123 -73
- package/.turbo/turbo-build.log +0 -26
- package/.turbo/turbo-copy.log +0 -4
- package/dist/LICENSE.md +0 -201
- package/dist/README.md +0 -98
- package/dist/actor.d.ts.map +0 -1
- package/dist/actor.js.map +0 -1
- package/dist/charging.d.ts.map +0 -1
- package/dist/charging.js.map +0 -1
- package/dist/configuration.d.ts.map +0 -1
- package/dist/configuration.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/key_value_store.d.ts.map +0 -1
- package/dist/key_value_store.js.map +0 -1
- package/dist/package.json +0 -75
- package/dist/platform_event_manager.d.ts.map +0 -1
- package/dist/platform_event_manager.js.map +0 -1
- package/dist/proxy_configuration.d.ts.map +0 -1
- package/dist/proxy_configuration.js.map +0 -1
- package/dist/utils.d.ts.map +0 -1
- package/dist/utils.js.map +0 -1
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
|
+
import type { CreateDatasetClientOptions, CreateKeyValueStoreClientOptions, CreateRequestQueueClientOptions, DatasetClient, KeyValueStoreClient, RequestQueueClient, StorageClient } from '@crawlee/types';
|
|
3
|
+
import type { ApifyClient } from 'apify-client';
|
|
4
|
+
import { type ChargeResult, type ChargingManager } from './charging.js';
|
|
5
|
+
import type { Configuration } from './configuration.js';
|
|
6
|
+
type StorageType = 'Dataset' | 'KeyValueStore' | 'RequestQueue';
|
|
7
|
+
/** Marks a dataset client whose `pushItems` charges for pay-per-event. @internal */
|
|
8
|
+
export declare const USES_PUSH_DATA_INTERCEPTION: unique symbol;
|
|
9
|
+
/**
|
|
10
|
+
* Context of a single `Actor.pushData()` call, shared with the intercepted
|
|
11
|
+
* `pushItems()` calls so they can (1) know which event to charge and
|
|
12
|
+
* (2) aggregate the {@link ChargeResult} across the multiple `pushItems()`
|
|
13
|
+
* calls a single `pushData()` may trigger (Crawlee batches large pushes).
|
|
14
|
+
*/
|
|
15
|
+
export interface PpeAwarePushDataContext {
|
|
16
|
+
eventName: string | undefined;
|
|
17
|
+
chargeResult?: ChargeResult;
|
|
18
|
+
}
|
|
19
|
+
export declare const pushDataChargingContext: AsyncLocalStorage<PpeAwarePushDataContext>;
|
|
20
|
+
/**
|
|
21
|
+
* Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`,
|
|
22
|
+
* `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's
|
|
23
|
+
* `StorageClient` interface (async factory methods accepting either an `id`
|
|
24
|
+
* or a `name`).
|
|
25
|
+
*
|
|
26
|
+
* For the run's default dataset it transparently swaps in a charging-aware
|
|
27
|
+
* dataset client (pay-per-event on `Actor.pushData()`), provided a charging
|
|
28
|
+
* manager is supplied and a default-dataset-item price is configured.
|
|
29
|
+
*
|
|
30
|
+
* `storageExists()` lets `Dataset.open(idOrName)` resolve a string to an id
|
|
31
|
+
* first (when one exists on the platform) and fall back to a name otherwise —
|
|
32
|
+
* otherwise crawlee's `resolveStorageIdentifier` treats every string as a name
|
|
33
|
+
* and the SDK would silently create a new storage named like the passed id.
|
|
34
|
+
*/
|
|
35
|
+
export declare class ApifyStorageClient implements StorageClient {
|
|
36
|
+
private readonly client;
|
|
37
|
+
private readonly config?;
|
|
38
|
+
private readonly getChargingManager?;
|
|
39
|
+
constructor(client: ApifyClient, config?: Configuration | undefined, getChargingManager?: (() => ChargingManager) | undefined);
|
|
40
|
+
storageExists(id: string, type: StorageType): Promise<boolean>;
|
|
41
|
+
createDatasetClient(options?: CreateDatasetClientOptions): Promise<DatasetClient>;
|
|
42
|
+
createKeyValueStoreClient(options?: CreateKeyValueStoreClientOptions): Promise<KeyValueStoreClient>;
|
|
43
|
+
createRequestQueueClient(options?: CreateRequestQueueClientOptions): Promise<RequestQueueClient>;
|
|
44
|
+
/**
|
|
45
|
+
* Returns a charging-aware dataset client when `id` is the run's default
|
|
46
|
+
* dataset and a default-dataset-item price is configured; otherwise
|
|
47
|
+
* `undefined` (caller uses the plain client).
|
|
48
|
+
*/
|
|
49
|
+
private chargingDatasetClient;
|
|
50
|
+
private resolveId;
|
|
51
|
+
private resourceClient;
|
|
52
|
+
private collectionClient;
|
|
53
|
+
}
|
|
54
|
+
export {};
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/* eslint-disable max-classes-per-file */
|
|
2
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
3
|
+
import { DatasetClient as ApifyDatasetClient } from 'apify-client';
|
|
4
|
+
import { DEFAULT_DATASET_ITEM_EVENT, mergeChargeResults, pushDataAndCharge, } from './charging.js';
|
|
5
|
+
const DEFAULT_ID_CONFIG_KEY = {
|
|
6
|
+
Dataset: 'defaultDatasetId',
|
|
7
|
+
KeyValueStore: 'defaultKeyValueStoreId',
|
|
8
|
+
RequestQueue: 'defaultRequestQueueId',
|
|
9
|
+
};
|
|
10
|
+
/** Marks a dataset client whose `pushItems` charges for pay-per-event. @internal */
|
|
11
|
+
export const USES_PUSH_DATA_INTERCEPTION = Symbol('apify:uses-push-data-interception');
|
|
12
|
+
export const pushDataChargingContext = new AsyncLocalStorage();
|
|
13
|
+
/**
|
|
14
|
+
* Default `DatasetClient` that charges for pushed items (pay-per-event). Used
|
|
15
|
+
* only for the run's default dataset when a `apify-default-dataset-item` price
|
|
16
|
+
* is configured; for everything else the plain `apify-client` dataset client is
|
|
17
|
+
* used.
|
|
18
|
+
*/
|
|
19
|
+
class PpeAwareDatasetClient extends ApifyDatasetClient {
|
|
20
|
+
getChargingManager;
|
|
21
|
+
constructor(options, getChargingManager) {
|
|
22
|
+
super(options);
|
|
23
|
+
this.getChargingManager = getChargingManager;
|
|
24
|
+
}
|
|
25
|
+
normalizeItems(items) {
|
|
26
|
+
if (typeof items === 'string') {
|
|
27
|
+
const parsed = JSON.parse(items);
|
|
28
|
+
return Array.isArray(parsed) ? parsed : [parsed];
|
|
29
|
+
}
|
|
30
|
+
if (Array.isArray(items)) {
|
|
31
|
+
return items.flatMap((item) => typeof item === 'string' ? JSON.parse(item) : item);
|
|
32
|
+
}
|
|
33
|
+
return [items];
|
|
34
|
+
}
|
|
35
|
+
async pushItems(items) {
|
|
36
|
+
const context = pushDataChargingContext.getStore();
|
|
37
|
+
// A single JSON string may encode multiple items (e.g. '[{...},{...}]'),
|
|
38
|
+
// which the charging logic would miscount — parse strings into arrays so
|
|
39
|
+
// each logical item is counted individually.
|
|
40
|
+
const normalizedItems = this.normalizeItems(items);
|
|
41
|
+
const result = await pushDataAndCharge({
|
|
42
|
+
chargingManager: this.getChargingManager(),
|
|
43
|
+
items: normalizedItems,
|
|
44
|
+
eventName: context?.eventName,
|
|
45
|
+
isDefaultDataset: true,
|
|
46
|
+
// stringify for faster validation in the Apify client
|
|
47
|
+
pushFn: async (limitedItems) => super.pushItems(JSON.stringify(limitedItems)),
|
|
48
|
+
});
|
|
49
|
+
if (!context)
|
|
50
|
+
return;
|
|
51
|
+
// One `Actor.pushData()` may map to several `pushItems()` calls — aggregate.
|
|
52
|
+
context.chargeResult =
|
|
53
|
+
context.chargeResult === undefined ? result : mergeChargeResults(context.chargeResult, result);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`,
|
|
58
|
+
* `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's
|
|
59
|
+
* `StorageClient` interface (async factory methods accepting either an `id`
|
|
60
|
+
* or a `name`).
|
|
61
|
+
*
|
|
62
|
+
* For the run's default dataset it transparently swaps in a charging-aware
|
|
63
|
+
* dataset client (pay-per-event on `Actor.pushData()`), provided a charging
|
|
64
|
+
* manager is supplied and a default-dataset-item price is configured.
|
|
65
|
+
*
|
|
66
|
+
* `storageExists()` lets `Dataset.open(idOrName)` resolve a string to an id
|
|
67
|
+
* first (when one exists on the platform) and fall back to a name otherwise —
|
|
68
|
+
* otherwise crawlee's `resolveStorageIdentifier` treats every string as a name
|
|
69
|
+
* and the SDK would silently create a new storage named like the passed id.
|
|
70
|
+
*/
|
|
71
|
+
export class ApifyStorageClient {
|
|
72
|
+
client;
|
|
73
|
+
config;
|
|
74
|
+
getChargingManager;
|
|
75
|
+
constructor(client, config, getChargingManager) {
|
|
76
|
+
this.client = client;
|
|
77
|
+
this.config = config;
|
|
78
|
+
this.getChargingManager = getChargingManager;
|
|
79
|
+
}
|
|
80
|
+
async storageExists(id, type) {
|
|
81
|
+
// Apify's `GET /v2/{kind}/{idOrName}` matches by either id or name;
|
|
82
|
+
// confirm it was an *id* match so crawlee can fall through to `{ name }`.
|
|
83
|
+
const info = await this.resourceClient(id, type).get();
|
|
84
|
+
return info?.id === id;
|
|
85
|
+
}
|
|
86
|
+
async createDatasetClient(options) {
|
|
87
|
+
const id = await this.resolveId(options, 'Dataset');
|
|
88
|
+
const datasetClient = this.chargingDatasetClient(id) ?? this.client.dataset(id);
|
|
89
|
+
// apify-client's resource clients overlap with `@crawlee/types`' shapes
|
|
90
|
+
// but don't implement the v4-added members (`getMetadata`,
|
|
91
|
+
// `getRecordPublicUrl`), so cast through.
|
|
92
|
+
return datasetClient;
|
|
93
|
+
}
|
|
94
|
+
async createKeyValueStoreClient(options) {
|
|
95
|
+
const id = await this.resolveId(options, 'KeyValueStore');
|
|
96
|
+
return this.client.keyValueStore(id);
|
|
97
|
+
}
|
|
98
|
+
async createRequestQueueClient(options) {
|
|
99
|
+
const id = await this.resolveId(options, 'RequestQueue');
|
|
100
|
+
return this.client.requestQueue(id, options?.clientKey ? { clientKey: options.clientKey } : undefined);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Returns a charging-aware dataset client when `id` is the run's default
|
|
104
|
+
* dataset and a default-dataset-item price is configured; otherwise
|
|
105
|
+
* `undefined` (caller uses the plain client).
|
|
106
|
+
*/
|
|
107
|
+
chargingDatasetClient(id) {
|
|
108
|
+
const { getChargingManager } = this;
|
|
109
|
+
if (!getChargingManager)
|
|
110
|
+
return undefined;
|
|
111
|
+
if (id !== this.config?.defaultDatasetId)
|
|
112
|
+
return undefined;
|
|
113
|
+
const hasDefaultDatasetItemEvent = DEFAULT_DATASET_ITEM_EVENT in getChargingManager().getPricingInfo().perEventPrices;
|
|
114
|
+
if (!hasDefaultDatasetItemEvent)
|
|
115
|
+
return undefined;
|
|
116
|
+
const datasetClient = new PpeAwareDatasetClient({
|
|
117
|
+
id,
|
|
118
|
+
baseUrl: this.client.baseUrl,
|
|
119
|
+
publicBaseUrl: this.client.publicBaseUrl,
|
|
120
|
+
apifyClient: this.client,
|
|
121
|
+
httpClient: this.client.httpClient,
|
|
122
|
+
}, getChargingManager);
|
|
123
|
+
Object.assign(datasetClient, {
|
|
124
|
+
[USES_PUSH_DATA_INTERCEPTION]: true,
|
|
125
|
+
});
|
|
126
|
+
return datasetClient;
|
|
127
|
+
}
|
|
128
|
+
async resolveId(options, type) {
|
|
129
|
+
if (options?.id)
|
|
130
|
+
return options.id;
|
|
131
|
+
if (options?.name) {
|
|
132
|
+
return (await this.collectionClient(type).getOrCreate(options.name)).id;
|
|
133
|
+
}
|
|
134
|
+
// No id/name (crawlee's `__default__` alias): use the default storage
|
|
135
|
+
// id from the run's environment. apify-client rejects an empty id.
|
|
136
|
+
return this.config?.[DEFAULT_ID_CONFIG_KEY[type]] ?? '';
|
|
137
|
+
}
|
|
138
|
+
resourceClient(id, type) {
|
|
139
|
+
if (type === 'Dataset')
|
|
140
|
+
return this.client.dataset(id);
|
|
141
|
+
if (type === 'KeyValueStore')
|
|
142
|
+
return this.client.keyValueStore(id);
|
|
143
|
+
return this.client.requestQueue(id);
|
|
144
|
+
}
|
|
145
|
+
collectionClient(type) {
|
|
146
|
+
if (type === 'Dataset')
|
|
147
|
+
return this.client.datasets();
|
|
148
|
+
if (type === 'KeyValueStore')
|
|
149
|
+
return this.client.keyValueStores();
|
|
150
|
+
return this.client.requestQueues();
|
|
151
|
+
}
|
|
152
|
+
}
|
package/dist/charging.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { ActorRunPricingInfo, ApifyClient } from 'apify-client';
|
|
2
2
|
import type { Configuration } from './configuration.js';
|
|
3
|
+
export declare const DEFAULT_DATASET_ITEM_EVENT = "apify-default-dataset-item";
|
|
3
4
|
export interface ChargeOptions {
|
|
4
5
|
/**
|
|
5
6
|
* The name of the event type to charge for.
|
|
@@ -48,10 +49,12 @@ export interface ActorPricingInfo {
|
|
|
48
49
|
isPayPerEvent: boolean;
|
|
49
50
|
perEventPrices: Record<string, number>;
|
|
50
51
|
}
|
|
52
|
+
export declare function mergeChargeResults(a: ChargeResult, b: ChargeResult): ChargeResult;
|
|
51
53
|
/**
|
|
52
54
|
* Handles pay-per-event charging.
|
|
53
55
|
*/
|
|
54
56
|
export declare class ChargingManager {
|
|
57
|
+
private configuration;
|
|
55
58
|
private readonly LOCAL_CHARGING_LOG_DATASET_NAME;
|
|
56
59
|
private readonly PLATFORM_CHARGING_LOG_DATASET_ID_KEY;
|
|
57
60
|
private maxTotalChargeUsd;
|
|
@@ -67,6 +70,7 @@ export declare class ChargingManager {
|
|
|
67
70
|
private apifyClient;
|
|
68
71
|
constructor(configuration: Configuration, apifyClient: ApifyClient);
|
|
69
72
|
private get isPayPerEvent();
|
|
73
|
+
private fetchPricingInfo;
|
|
70
74
|
/**
|
|
71
75
|
* Initialize the ChargingManager by loading pricing information and charging state via Apify API.
|
|
72
76
|
*/
|
|
@@ -82,9 +86,17 @@ export declare class ChargingManager {
|
|
|
82
86
|
* This method attempts to charge for the specified number of events, but may charge fewer
|
|
83
87
|
* if doing so would exceed the total budget limit (`maxTotalChargeUsd`).
|
|
84
88
|
*
|
|
89
|
+
* **Important:** When using the `count` parameter to charge for multiple events at once,
|
|
90
|
+
* be aware that the charge may be partially fulfilled, i.e. `chargedCount` can be less
|
|
91
|
+
* than the requested `count`. Always check the returned `chargedCount` to know how many
|
|
92
|
+
* events were actually charged, and only perform that much work. If your work is
|
|
93
|
+
* meaningfully divisible into individual units, prefer calling `charge()` once per unit
|
|
94
|
+
* rather than batching via `count` — this gives finer control over budget consumption
|
|
95
|
+
* and avoids situations where more work is requested than the budget allows.
|
|
96
|
+
*
|
|
85
97
|
* @param options The name of the event to charge for and the number of events to be charged.
|
|
86
98
|
*/
|
|
87
|
-
charge({ eventName, count
|
|
99
|
+
charge({ eventName, count }: ChargeOptions): Promise<ChargeResult>;
|
|
88
100
|
/**
|
|
89
101
|
* Get the number of events with given name that the Actor has charged for so far.
|
|
90
102
|
*/
|
|
@@ -99,5 +111,34 @@ export declare class ChargingManager {
|
|
|
99
111
|
* If the event is not registered, returns Infinity (free of charge)
|
|
100
112
|
*/
|
|
101
113
|
calculateMaxEventChargeCountWithinLimit(eventName: string): number;
|
|
114
|
+
private calculateEventPrice;
|
|
115
|
+
private calculateMaxChargesByPrice;
|
|
116
|
+
/**
|
|
117
|
+
* Helper to calculate how many items can be pushed within charging limits.
|
|
118
|
+
* Returns the limited items and count to charge.
|
|
119
|
+
*/
|
|
120
|
+
calculatePushDataLimits<T>({ items, eventName, isDefaultDataset, }: {
|
|
121
|
+
items: T | T[];
|
|
122
|
+
eventName: string | undefined;
|
|
123
|
+
isDefaultDataset: boolean;
|
|
124
|
+
}): {
|
|
125
|
+
limitedItems: T[];
|
|
126
|
+
eventsToCharge: Record<string, number>;
|
|
127
|
+
};
|
|
102
128
|
}
|
|
103
|
-
|
|
129
|
+
/**
|
|
130
|
+
* Helper for PPE-aware pushing of data to the dataset.
|
|
131
|
+
*
|
|
132
|
+
* 1. Calculate limits based on budget
|
|
133
|
+
* 2. Push limited items via the provided callback
|
|
134
|
+
* 3. Charge for the events
|
|
135
|
+
*
|
|
136
|
+
* @internal
|
|
137
|
+
*/
|
|
138
|
+
export declare function pushDataAndCharge<T>({ chargingManager, items, eventName, isDefaultDataset, pushFn, }: {
|
|
139
|
+
chargingManager: ChargingManager;
|
|
140
|
+
items: T | T[];
|
|
141
|
+
eventName: string | undefined;
|
|
142
|
+
isDefaultDataset: boolean;
|
|
143
|
+
pushFn: (limitedItems: T | T[]) => Promise<void>;
|
|
144
|
+
}): Promise<ChargeResult>;
|
package/dist/charging.js
CHANGED
|
@@ -1,9 +1,21 @@
|
|
|
1
1
|
import { Dataset, KeyValueStore } from '@crawlee/core';
|
|
2
2
|
import log from '@apify/log';
|
|
3
|
+
export const DEFAULT_DATASET_ITEM_EVENT = 'apify-default-dataset-item';
|
|
4
|
+
export function mergeChargeResults(a, b) {
|
|
5
|
+
return {
|
|
6
|
+
eventChargeLimitReached: a.eventChargeLimitReached || b.eventChargeLimitReached,
|
|
7
|
+
chargedCount: a.chargedCount + b.chargedCount,
|
|
8
|
+
chargeableWithinLimit: Object.fromEntries(Object.entries(a.chargeableWithinLimit).map(([key, oldValue]) => [
|
|
9
|
+
key,
|
|
10
|
+
Math.min(oldValue, b.chargeableWithinLimit[key]),
|
|
11
|
+
])),
|
|
12
|
+
};
|
|
13
|
+
}
|
|
3
14
|
/**
|
|
4
15
|
* Handles pay-per-event charging.
|
|
5
16
|
*/
|
|
6
17
|
export class ChargingManager {
|
|
18
|
+
configuration;
|
|
7
19
|
LOCAL_CHARGING_LOG_DATASET_NAME = 'charging_log';
|
|
8
20
|
PLATFORM_CHARGING_LOG_DATASET_ID_KEY = 'CHARGING_LOG_DATASET_ID';
|
|
9
21
|
maxTotalChargeUsd;
|
|
@@ -18,32 +30,25 @@ export class ChargingManager {
|
|
|
18
30
|
chargingLogDataset;
|
|
19
31
|
apifyClient;
|
|
20
32
|
constructor(configuration, apifyClient) {
|
|
21
|
-
this.
|
|
22
|
-
|
|
23
|
-
this.isAtHome = configuration.
|
|
24
|
-
this.actorRunId = configuration.
|
|
25
|
-
this.purgeChargingLogDataset = configuration.
|
|
26
|
-
this.useChargingLogDataset = configuration.
|
|
27
|
-
if (this.useChargingLogDataset && this.isAtHome) {
|
|
28
|
-
throw new Error('Using the ACTOR_USE_CHARGING_LOG_DATASET environment variable is only supported in a local development environment');
|
|
29
|
-
}
|
|
30
|
-
if (configuration.get('testPayPerEvent')) {
|
|
31
|
-
if (this.isAtHome) {
|
|
32
|
-
throw new Error('Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported in a local development environment');
|
|
33
|
-
}
|
|
34
|
-
this.pricingModel = 'PAY_PER_EVENT';
|
|
35
|
-
}
|
|
33
|
+
this.configuration = configuration;
|
|
34
|
+
this.maxTotalChargeUsd = configuration.maxTotalChargeUsd || Infinity; // convert `0` to `Infinity` in case the value is an empty string
|
|
35
|
+
this.isAtHome = configuration.isAtHome;
|
|
36
|
+
this.actorRunId = configuration.actorRunId;
|
|
37
|
+
this.purgeChargingLogDataset = configuration.purgeOnStart;
|
|
38
|
+
this.useChargingLogDataset = configuration.useChargingLogDataset;
|
|
36
39
|
this.apifyClient = apifyClient;
|
|
37
40
|
}
|
|
38
41
|
get isPayPerEvent() {
|
|
39
42
|
return this.pricingModel === 'PAY_PER_EVENT';
|
|
40
43
|
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
async fetchPricingInfo() {
|
|
45
|
+
if (this.configuration.actorPricingInfo && this.configuration.chargedEventCounts) {
|
|
46
|
+
return {
|
|
47
|
+
pricingInfo: JSON.parse(this.configuration.actorPricingInfo),
|
|
48
|
+
chargedEventCounts: JSON.parse(this.configuration.chargedEventCounts),
|
|
49
|
+
maxTotalChargeUsd: this.configuration.maxTotalChargeUsd || Infinity,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
47
52
|
if (this.isAtHome) {
|
|
48
53
|
if (this.actorRunId === undefined) {
|
|
49
54
|
throw new Error('Actor run ID not found even though the Actor is running on Apify');
|
|
@@ -52,25 +57,55 @@ export class ChargingManager {
|
|
|
52
57
|
if (run === undefined) {
|
|
53
58
|
throw new Error('Actor run not found');
|
|
54
59
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
60
|
+
return {
|
|
61
|
+
pricingInfo: run.pricingInfo,
|
|
62
|
+
chargedEventCounts: run.chargedEventCounts,
|
|
63
|
+
maxTotalChargeUsd: run.options.maxTotalChargeUsd || Infinity,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
pricingInfo: undefined,
|
|
68
|
+
chargedEventCounts: {},
|
|
69
|
+
maxTotalChargeUsd: this.configuration.maxTotalChargeUsd || Infinity,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Initialize the ChargingManager by loading pricing information and charging state via Apify API.
|
|
74
|
+
*/
|
|
75
|
+
async init() {
|
|
76
|
+
// Validate config - it may have changed since the instantiation
|
|
77
|
+
if (this.useChargingLogDataset && this.isAtHome) {
|
|
78
|
+
throw new Error('Using the ACTOR_USE_CHARGING_LOG_DATASET environment variable is only supported in a local development environment');
|
|
79
|
+
}
|
|
80
|
+
if (this.configuration.testPayPerEvent) {
|
|
81
|
+
if (this.isAtHome) {
|
|
82
|
+
throw new Error('Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported in a local development environment');
|
|
66
83
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
84
|
+
}
|
|
85
|
+
// Retrieve pricing information
|
|
86
|
+
const { pricingInfo, chargedEventCounts, maxTotalChargeUsd } = await this.fetchPricingInfo();
|
|
87
|
+
if (this.configuration.testPayPerEvent) {
|
|
88
|
+
this.pricingModel = 'PAY_PER_EVENT';
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
this.pricingModel ??= pricingInfo?.pricingModel;
|
|
92
|
+
}
|
|
93
|
+
// Load per-event pricing information
|
|
94
|
+
if (pricingInfo?.pricingModel === 'PAY_PER_EVENT') {
|
|
95
|
+
for (const [eventName, eventPricing] of Object.entries(pricingInfo.pricingPerEvent.actorChargeEvents)) {
|
|
96
|
+
this.pricingInfo[eventName] = {
|
|
97
|
+
price: eventPricing.eventPriceUsd,
|
|
98
|
+
title: eventPricing.eventTitle,
|
|
72
99
|
};
|
|
73
100
|
}
|
|
101
|
+
this.maxTotalChargeUsd = maxTotalChargeUsd;
|
|
102
|
+
}
|
|
103
|
+
this.chargingState = {};
|
|
104
|
+
for (const [eventName, chargeCount] of Object.entries(chargedEventCounts ?? {})) {
|
|
105
|
+
this.chargingState[eventName] = {
|
|
106
|
+
chargeCount,
|
|
107
|
+
totalChargedAmount: chargeCount * (this.pricingInfo[eventName]?.price ?? 0),
|
|
108
|
+
};
|
|
74
109
|
}
|
|
75
110
|
if (!this.isPayPerEvent || !this.useChargingLogDataset) {
|
|
76
111
|
return;
|
|
@@ -118,13 +153,18 @@ export class ChargingManager {
|
|
|
118
153
|
* This method attempts to charge for the specified number of events, but may charge fewer
|
|
119
154
|
* if doing so would exceed the total budget limit (`maxTotalChargeUsd`).
|
|
120
155
|
*
|
|
156
|
+
* **Important:** When using the `count` parameter to charge for multiple events at once,
|
|
157
|
+
* be aware that the charge may be partially fulfilled, i.e. `chargedCount` can be less
|
|
158
|
+
* than the requested `count`. Always check the returned `chargedCount` to know how many
|
|
159
|
+
* events were actually charged, and only perform that much work. If your work is
|
|
160
|
+
* meaningfully divisible into individual units, prefer calling `charge()` once per unit
|
|
161
|
+
* rather than batching via `count` — this gives finer control over budget consumption
|
|
162
|
+
* and avoids situations where more work is requested than the budget allows.
|
|
163
|
+
*
|
|
121
164
|
* @param options The name of the event to charge for and the number of events to be charged.
|
|
122
165
|
*/
|
|
123
|
-
async charge({ eventName, count = 1
|
|
124
|
-
const calculateChargeableWithinLimit = () => Object.fromEntries(Object.keys(this.pricingInfo).map((name) => [
|
|
125
|
-
name,
|
|
126
|
-
this.calculateMaxEventChargeCountWithinLimit(name),
|
|
127
|
-
]));
|
|
166
|
+
async charge({ eventName, count = 1 }) {
|
|
167
|
+
const calculateChargeableWithinLimit = () => Object.fromEntries(Object.keys(this.pricingInfo).map((name) => [name, this.calculateMaxEventChargeCountWithinLimit(name)]));
|
|
128
168
|
if (!this.isPayPerEvent) {
|
|
129
169
|
if (!this.notPpeWarningPrinted) {
|
|
130
170
|
log.warning('Ignored attempt to charge for an event - the Actor does not use the pay-per-event pricing');
|
|
@@ -140,7 +180,19 @@ export class ChargingManager {
|
|
|
140
180
|
throw new Error('ChargingManager is not initialized');
|
|
141
181
|
}
|
|
142
182
|
/* START OF CRITICAL SECTION - no awaits here */
|
|
143
|
-
const
|
|
183
|
+
const maxEventChargeCount = this.calculateMaxEventChargeCountWithinLimit(eventName);
|
|
184
|
+
const chargedCount = (() => {
|
|
185
|
+
if (count <= maxEventChargeCount) {
|
|
186
|
+
return count;
|
|
187
|
+
}
|
|
188
|
+
// If the caller tries to charge more than the budget allows, overcharge by one event
|
|
189
|
+
// so that the Actor is detected by the platform and terminated.
|
|
190
|
+
// But don't do this if already strictly over the budget - no point piling on charges.
|
|
191
|
+
if (this.calculateTotalChargedAmount() <= this.maxTotalChargeUsd) {
|
|
192
|
+
return maxEventChargeCount + 1;
|
|
193
|
+
}
|
|
194
|
+
return 0;
|
|
195
|
+
})();
|
|
144
196
|
if (chargedCount === 0) {
|
|
145
197
|
return {
|
|
146
198
|
eventChargeLimitReached: count > 0, // Only true if user wanted to charge but couldn't
|
|
@@ -157,14 +209,15 @@ export class ChargingManager {
|
|
|
157
209
|
totalChargedAmount: 0,
|
|
158
210
|
};
|
|
159
211
|
this.chargingState[eventName].chargeCount += chargedCount;
|
|
160
|
-
this.chargingState[eventName].totalChargedAmount +=
|
|
161
|
-
chargedCount * pricingInfo.price;
|
|
212
|
+
this.chargingState[eventName].totalChargedAmount += chargedCount * pricingInfo.price;
|
|
162
213
|
/* END OF CRITICAL SECTION */
|
|
163
214
|
if (this.isAtHome) {
|
|
164
|
-
if (
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
215
|
+
if (eventName.startsWith('apify-')) {
|
|
216
|
+
// Synthetic events (e.g. apify-default-dataset-item) are tracked locally only,
|
|
217
|
+
// the platform handles them automatically based on dataset writes.
|
|
218
|
+
}
|
|
219
|
+
else if (this.pricingInfo[eventName] !== undefined) {
|
|
220
|
+
await this.apifyClient.run(this.actorRunId).charge({ eventName, count: chargedCount });
|
|
168
221
|
}
|
|
169
222
|
else {
|
|
170
223
|
log.warning(`Attempting to charge for an unknown event '${eventName}'`);
|
|
@@ -226,14 +279,103 @@ export class ChargingManager {
|
|
|
226
279
|
if (this.chargingState === undefined) {
|
|
227
280
|
throw new Error('ChargingManager is not initialized');
|
|
228
281
|
}
|
|
229
|
-
const price = this.
|
|
282
|
+
const price = this.calculateEventPrice(eventName);
|
|
230
283
|
if (!price) {
|
|
231
284
|
return Infinity;
|
|
232
285
|
}
|
|
286
|
+
return this.calculateMaxChargesByPrice(price);
|
|
287
|
+
}
|
|
288
|
+
calculateEventPrice(eventName) {
|
|
289
|
+
return this.isAtHome ? this.pricingInfo[eventName]?.price : 1; // Use a nonzero price for local development so that the maximum budget can be reached
|
|
290
|
+
}
|
|
291
|
+
calculateMaxChargesByPrice(price) {
|
|
292
|
+
// The raw number of events allowed by the budget
|
|
293
|
+
const unroundedResult = (this.maxTotalChargeUsd - this.calculateTotalChargedAmount()) / price;
|
|
233
294
|
// First round as Math.floor(4.9999999999999999) will incorrectly return 5
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
295
|
+
const roundedResult = Math.floor(Number(unroundedResult.toFixed(4)));
|
|
296
|
+
return Math.max(0, roundedResult);
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Helper to calculate how many items can be pushed within charging limits.
|
|
300
|
+
* Returns the limited items and count to charge.
|
|
301
|
+
*/
|
|
302
|
+
calculatePushDataLimits({ items, eventName, isDefaultDataset, }) {
|
|
303
|
+
if (this.chargingState === undefined) {
|
|
304
|
+
throw new Error('ChargingManager is not initialized');
|
|
305
|
+
}
|
|
306
|
+
const itemsArray = Array.isArray(items) ? items : [items];
|
|
307
|
+
if (!this.isPayPerEvent) {
|
|
308
|
+
return {
|
|
309
|
+
limitedItems: itemsArray,
|
|
310
|
+
eventsToCharge: {},
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
const itemPrice = ((eventName !== undefined ? this.calculateEventPrice(eventName) : undefined) ?? 0) +
|
|
314
|
+
((isDefaultDataset ? this.calculateEventPrice(DEFAULT_DATASET_ITEM_EVENT) : undefined) ?? 0);
|
|
315
|
+
const maxChargedCount = itemPrice > 0 ? this.calculateMaxChargesByPrice(itemPrice) : Infinity;
|
|
316
|
+
const itemsToKeep = (() => {
|
|
317
|
+
if (maxChargedCount >= itemsArray.length) {
|
|
318
|
+
return itemsArray.length;
|
|
319
|
+
}
|
|
320
|
+
// If the caller tries to push items even though the limit is depleted, overcharge by one
|
|
321
|
+
// so that the Platform terminates the run.
|
|
322
|
+
// But don't do this if already strictly over the budget - no point piling on charges.
|
|
323
|
+
if (itemsArray.length > 0 &&
|
|
324
|
+
maxChargedCount === 0 &&
|
|
325
|
+
this.calculateTotalChargedAmount() <= this.maxTotalChargeUsd) {
|
|
326
|
+
return 1;
|
|
327
|
+
}
|
|
328
|
+
return maxChargedCount;
|
|
329
|
+
})();
|
|
330
|
+
const eventsToCharge = {};
|
|
331
|
+
if (eventName !== undefined && itemsToKeep > 0) {
|
|
332
|
+
eventsToCharge[eventName] = itemsToKeep;
|
|
333
|
+
}
|
|
334
|
+
if (isDefaultDataset && itemsToKeep > 0) {
|
|
335
|
+
eventsToCharge[DEFAULT_DATASET_ITEM_EVENT] = itemsToKeep;
|
|
336
|
+
}
|
|
337
|
+
return {
|
|
338
|
+
limitedItems: itemsToKeep >= itemsArray.length ? itemsArray : itemsArray.slice(0, itemsToKeep),
|
|
339
|
+
eventsToCharge,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Helper for PPE-aware pushing of data to the dataset.
|
|
345
|
+
*
|
|
346
|
+
* 1. Calculate limits based on budget
|
|
347
|
+
* 2. Push limited items via the provided callback
|
|
348
|
+
* 3. Charge for the events
|
|
349
|
+
*
|
|
350
|
+
* @internal
|
|
351
|
+
*/
|
|
352
|
+
export async function pushDataAndCharge({ chargingManager, items, eventName, isDefaultDataset, pushFn, }) {
|
|
353
|
+
const { limitedItems, eventsToCharge } = chargingManager.calculatePushDataLimits({
|
|
354
|
+
items,
|
|
355
|
+
eventName,
|
|
356
|
+
isDefaultDataset,
|
|
357
|
+
});
|
|
358
|
+
if (limitedItems.length > 0) {
|
|
359
|
+
// Preserve original call shape for single items
|
|
360
|
+
await pushFn(Array.isArray(items) ? limitedItems : limitedItems[0]);
|
|
361
|
+
}
|
|
362
|
+
if (Object.keys(eventsToCharge).length > 0) {
|
|
363
|
+
const results = {};
|
|
364
|
+
await Promise.all(Object.entries(eventsToCharge).map(async ([name, count]) => {
|
|
365
|
+
results[name] = await chargingManager.charge({
|
|
366
|
+
eventName: name,
|
|
367
|
+
count,
|
|
368
|
+
});
|
|
369
|
+
}));
|
|
370
|
+
// Merge all charge results so that eventChargeLimitReached reflects
|
|
371
|
+
// whether ANY of the charged events hit their limit.
|
|
372
|
+
return Object.values(results).reduce(mergeChargeResults);
|
|
237
373
|
}
|
|
374
|
+
const itemsArray = Array.isArray(items) ? items : [items];
|
|
375
|
+
const allItemsTrimmed = itemsArray.length > 0 && limitedItems.length === 0;
|
|
376
|
+
return {
|
|
377
|
+
eventChargeLimitReached: allItemsTrimmed,
|
|
378
|
+
chargedCount: 0,
|
|
379
|
+
chargeableWithinLimit: {},
|
|
380
|
+
};
|
|
238
381
|
}
|
|
239
|
-
//# sourceMappingURL=charging.js.map
|