apify 4.0.0-beta.15 → 4.0.0-beta.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apify_storage_client.d.ts +12 -0
- package/dist/apify_storage_client.js +55 -7
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/package.json +1 -1
|
@@ -31,6 +31,18 @@ export declare const pushDataChargingContext: AsyncLocalStorage<PpeAwarePushData
|
|
|
31
31
|
* first (when one exists on the platform) and fall back to a name otherwise —
|
|
32
32
|
* otherwise crawlee's `resolveStorageIdentifier` treats every string as a name
|
|
33
33
|
* and the SDK would silently create a new storage named like the passed id.
|
|
34
|
+
*
|
|
35
|
+
* `Actor` wires this up automatically; construct it directly only to use Apify
|
|
36
|
+
* platform storage with crawlee's storage classes outside of `Actor` — e.g. to
|
|
37
|
+
* read another run's output with an explicit token:
|
|
38
|
+
*
|
|
39
|
+
* ```ts
|
|
40
|
+
* import { ApifyClient, ApifyStorageClient, Dataset } from 'apify';
|
|
41
|
+
*
|
|
42
|
+
* const client = new ApifyClient({ token });
|
|
43
|
+
* const dataset = await Dataset.open(datasetId, { storageClient: new ApifyStorageClient(client) });
|
|
44
|
+
* const { items } = await dataset.getData();
|
|
45
|
+
* ```
|
|
34
46
|
*/
|
|
35
47
|
export declare class ApifyStorageClient implements StorageClient {
|
|
36
48
|
private readonly client;
|
|
@@ -53,6 +53,27 @@ class PpeAwareDatasetClient extends ApifyDatasetClient {
|
|
|
53
53
|
context.chargeResult === undefined ? result : mergeChargeResults(context.chargeResult, result);
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
|
+
// crawlee v4's `StorageClient` sub-client interfaces use different method names
|
|
57
|
+
// than `apify-client`'s resource clients (`getValue`/`getRecord`,
|
|
58
|
+
// `pushData`/`pushItems`, `getData`/`listItems`, `getMetadata`/`get`,
|
|
59
|
+
// `drop`/`delete`). `adapt` wraps a client in a name-remapping proxy: `renames`
|
|
60
|
+
// aliases the differing methods and `overrides` replaces the few whose return
|
|
61
|
+
// shape differs; everything else — identically-named methods and the
|
|
62
|
+
// pay-per-event marker symbol — passes straight through.
|
|
63
|
+
//
|
|
64
|
+
// `purge()` has no apify-client equivalent and isn't needed on the platform
|
|
65
|
+
// (a run's storages are already fresh), so it's a no-op.
|
|
66
|
+
const noPurge = { purge: async () => { } };
|
|
67
|
+
function adapt(client, renames, overrides = {}) {
|
|
68
|
+
return new Proxy(client, {
|
|
69
|
+
get(target, prop) {
|
|
70
|
+
if (typeof prop === 'string' && prop in overrides)
|
|
71
|
+
return overrides[prop];
|
|
72
|
+
const value = Reflect.get(target, (typeof prop === 'string' && renames[prop]) || prop, target);
|
|
73
|
+
return typeof value === 'function' ? value.bind(target) : value;
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
}
|
|
56
77
|
/**
|
|
57
78
|
* Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`,
|
|
58
79
|
* `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's
|
|
@@ -67,6 +88,18 @@ class PpeAwareDatasetClient extends ApifyDatasetClient {
|
|
|
67
88
|
* first (when one exists on the platform) and fall back to a name otherwise —
|
|
68
89
|
* otherwise crawlee's `resolveStorageIdentifier` treats every string as a name
|
|
69
90
|
* and the SDK would silently create a new storage named like the passed id.
|
|
91
|
+
*
|
|
92
|
+
* `Actor` wires this up automatically; construct it directly only to use Apify
|
|
93
|
+
* platform storage with crawlee's storage classes outside of `Actor` — e.g. to
|
|
94
|
+
* read another run's output with an explicit token:
|
|
95
|
+
*
|
|
96
|
+
* ```ts
|
|
97
|
+
* import { ApifyClient, ApifyStorageClient, Dataset } from 'apify';
|
|
98
|
+
*
|
|
99
|
+
* const client = new ApifyClient({ token });
|
|
100
|
+
* const dataset = await Dataset.open(datasetId, { storageClient: new ApifyStorageClient(client) });
|
|
101
|
+
* const { items } = await dataset.getData();
|
|
102
|
+
* ```
|
|
70
103
|
*/
|
|
71
104
|
export class ApifyStorageClient {
|
|
72
105
|
client;
|
|
@@ -85,19 +118,34 @@ export class ApifyStorageClient {
|
|
|
85
118
|
}
|
|
86
119
|
async createDatasetClient(options) {
|
|
87
120
|
const id = await this.resolveId(options, 'Dataset');
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
121
|
+
const client = this.chargingDatasetClient(id) ?? this.client.dataset(id);
|
|
122
|
+
return adapt(client, {
|
|
123
|
+
getMetadata: 'get',
|
|
124
|
+
drop: 'delete',
|
|
125
|
+
pushData: 'pushItems',
|
|
126
|
+
getData: 'listItems',
|
|
127
|
+
}, noPurge);
|
|
93
128
|
}
|
|
94
129
|
async createKeyValueStoreClient(options) {
|
|
95
130
|
const id = await this.resolveId(options, 'KeyValueStore');
|
|
96
|
-
|
|
131
|
+
const client = this.client.keyValueStore(id);
|
|
132
|
+
return adapt(client, {
|
|
133
|
+
getMetadata: 'get',
|
|
134
|
+
getValue: 'getRecord',
|
|
135
|
+
setValue: 'setRecord',
|
|
136
|
+
deleteValue: 'deleteRecord',
|
|
137
|
+
drop: 'delete',
|
|
138
|
+
getPublicUrl: 'getRecordPublicUrl',
|
|
139
|
+
}, {
|
|
140
|
+
...noPurge,
|
|
141
|
+
// crawlee expects an array; apify-client returns `{ items }`.
|
|
142
|
+
listKeys: async (opts) => (await client.listKeys(opts)).items,
|
|
143
|
+
});
|
|
97
144
|
}
|
|
98
145
|
async createRequestQueueClient(options) {
|
|
99
146
|
const id = await this.resolveId(options, 'RequestQueue');
|
|
100
|
-
|
|
147
|
+
const client = this.client.requestQueue(id, options?.clientKey ? { clientKey: options.clientKey } : undefined);
|
|
148
|
+
return adapt(client, { getMetadata: 'get', drop: 'delete' }, noPurge);
|
|
101
149
|
}
|
|
102
150
|
/**
|
|
103
151
|
* Returns a charging-aware dataset client when `id` is the run's default
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export * from './actor.js';
|
|
2
|
+
export { ApifyStorageClient } from './apify_storage_client.js';
|
|
2
3
|
export type { OpenStorageOptions, StorageAlias, StorageId, StorageName, StorageIdentifier, StorageIdentifierWithoutAlias, } from './storage.js';
|
|
3
4
|
export { ChargeOptions, ChargeResult, ActorPricingInfo, ChargingManager } from './charging.js';
|
|
4
5
|
export * from './configuration.js';
|
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "apify",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.17",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|