apify-cli 0.18.2-beta.9 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -18
- package/oclif.manifest.json +1 -1
- package/package.json +114 -111
- package/src/commands/create.js +2 -10
- package/src/commands/init-wrap-scrapy.js +34 -0
- package/src/commands/init.js +27 -3
- package/src/commands/run.js +16 -4
- package/src/lib/consts.js +5 -0
- package/src/lib/create-utils.js +2 -22
- package/src/lib/project_analyzer.js +24 -0
- package/src/lib/scrapy-wrapper/ScrapyProjectAnalyzer.js +90 -0
- package/src/lib/scrapy-wrapper/Spider.js +10 -0
- package/src/lib/scrapy-wrapper/SpiderFileAnalyzer.js +26 -0
- package/src/lib/scrapy-wrapper/index.js +139 -0
- package/src/lib/utils.js +37 -2
package/README.md
CHANGED
|
@@ -299,7 +299,7 @@ USAGE
|
|
|
299
299
|
$ apify actor
|
|
300
300
|
```
|
|
301
301
|
|
|
302
|
-
_See code: [src/commands/actor/index.js](https://github.com/apify/apify-cli/blob/v0.
|
|
302
|
+
_See code: [src/commands/actor/index.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/actor/index.js)_
|
|
303
303
|
|
|
304
304
|
## `apify actor:get-input`
|
|
305
305
|
|
|
@@ -310,7 +310,7 @@ USAGE
|
|
|
310
310
|
$ apify actor:get-input
|
|
311
311
|
```
|
|
312
312
|
|
|
313
|
-
_See code: [src/commands/actor/get-input.js](https://github.com/apify/apify-cli/blob/v0.
|
|
313
|
+
_See code: [src/commands/actor/get-input.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/actor/get-input.js)_
|
|
314
314
|
|
|
315
315
|
## `apify actor:get-value KEY`
|
|
316
316
|
|
|
@@ -324,7 +324,7 @@ ARGUMENTS
|
|
|
324
324
|
KEY Key of the record in key-value store
|
|
325
325
|
```
|
|
326
326
|
|
|
327
|
-
_See code: [src/commands/actor/get-value.js](https://github.com/apify/apify-cli/blob/v0.
|
|
327
|
+
_See code: [src/commands/actor/get-value.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/actor/get-value.js)_
|
|
328
328
|
|
|
329
329
|
## `apify actor:push-data [ITEM]`
|
|
330
330
|
|
|
@@ -345,7 +345,7 @@ DESCRIPTION
|
|
|
345
345
|
$ cat ./test.json | apify actor:push-data
|
|
346
346
|
```
|
|
347
347
|
|
|
348
|
-
_See code: [src/commands/actor/push-data.js](https://github.com/apify/apify-cli/blob/v0.
|
|
348
|
+
_See code: [src/commands/actor/push-data.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/actor/push-data.js)_
|
|
349
349
|
|
|
350
350
|
## `apify actor:set-value KEY [VALUE]`
|
|
351
351
|
|
|
@@ -375,7 +375,7 @@ DESCRIPTION
|
|
|
375
375
|
$ cat ./my-text-file.txt | apify actor:set-value KEY --contentType text/plain
|
|
376
376
|
```
|
|
377
377
|
|
|
378
|
-
_See code: [src/commands/actor/set-value.js](https://github.com/apify/apify-cli/blob/v0.
|
|
378
|
+
_See code: [src/commands/actor/set-value.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/actor/set-value.js)_
|
|
379
379
|
|
|
380
380
|
## `apify call [ACTID]`
|
|
381
381
|
|
|
@@ -400,7 +400,7 @@ DESCRIPTION
|
|
|
400
400
|
takes input for the Actor from the default local key-value store by default.
|
|
401
401
|
```
|
|
402
402
|
|
|
403
|
-
_See code: [src/commands/call.js](https://github.com/apify/apify-cli/blob/v0.
|
|
403
|
+
_See code: [src/commands/call.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/call.js)_
|
|
404
404
|
|
|
405
405
|
## `apify create [ACTORNAME]`
|
|
406
406
|
|
|
@@ -422,7 +422,7 @@ OPTIONS
|
|
|
422
422
|
--skip-dependency-install Skip installing actor dependencies.
|
|
423
423
|
```
|
|
424
424
|
|
|
425
|
-
_See code: [src/commands/create.js](https://github.com/apify/apify-cli/blob/v0.
|
|
425
|
+
_See code: [src/commands/create.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/create.js)_
|
|
426
426
|
|
|
427
427
|
## `apify info`
|
|
428
428
|
|
|
@@ -436,7 +436,7 @@ DESCRIPTION
|
|
|
436
436
|
The information is printed to the console.
|
|
437
437
|
```
|
|
438
438
|
|
|
439
|
-
_See code: [src/commands/info.js](https://github.com/apify/apify-cli/blob/v0.
|
|
439
|
+
_See code: [src/commands/info.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/info.js)_
|
|
440
440
|
|
|
441
441
|
## `apify init [ACTORNAME]`
|
|
442
442
|
|
|
@@ -449,6 +449,10 @@ USAGE
|
|
|
449
449
|
ARGUMENTS
|
|
450
450
|
ACTORNAME Name of the actor. If not provided, you will be prompted for it.
|
|
451
451
|
|
|
452
|
+
OPTIONS
|
|
453
|
+
-y, --yes Automatic yes to prompts; assume "yes" as answer to all prompts. Note that in some cases, the command may
|
|
454
|
+
still ask for confirmation.
|
|
455
|
+
|
|
452
456
|
DESCRIPTION
|
|
453
457
|
The command only creates the ".actor/actor.json" file and the "storage" directory in the current directory, but will
|
|
454
458
|
not touch anything else.
|
|
@@ -456,7 +460,7 @@ DESCRIPTION
|
|
|
456
460
|
WARNING: The directory at "storage" will be overwritten if it already exists.
|
|
457
461
|
```
|
|
458
462
|
|
|
459
|
-
_See code: [src/commands/init.js](https://github.com/apify/apify-cli/blob/v0.
|
|
463
|
+
_See code: [src/commands/init.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/init.js)_
|
|
460
464
|
|
|
461
465
|
## `apify login`
|
|
462
466
|
|
|
@@ -474,7 +478,7 @@ DESCRIPTION
|
|
|
474
478
|
"apify" commands. To log out, call "apify logout".
|
|
475
479
|
```
|
|
476
480
|
|
|
477
|
-
_See code: [src/commands/login.js](https://github.com/apify/apify-cli/blob/v0.
|
|
481
|
+
_See code: [src/commands/login.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/login.js)_
|
|
478
482
|
|
|
479
483
|
## `apify logout`
|
|
480
484
|
|
|
@@ -489,7 +493,7 @@ DESCRIPTION
|
|
|
489
493
|
call "apify login".
|
|
490
494
|
```
|
|
491
495
|
|
|
492
|
-
_See code: [src/commands/logout.js](https://github.com/apify/apify-cli/blob/v0.
|
|
496
|
+
_See code: [src/commands/logout.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/logout.js)_
|
|
493
497
|
|
|
494
498
|
## `apify pull [ACTORID]`
|
|
495
499
|
|
|
@@ -507,7 +511,7 @@ OPTIONS
|
|
|
507
511
|
-v, --version=version Actor version number which will be pulled, e.g. 1.2. Default: the highest version
|
|
508
512
|
```
|
|
509
513
|
|
|
510
|
-
_See code: [src/commands/pull.js](https://github.com/apify/apify-cli/blob/v0.
|
|
514
|
+
_See code: [src/commands/pull.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/pull.js)_
|
|
511
515
|
|
|
512
516
|
## `apify push [ACTORID]`
|
|
513
517
|
|
|
@@ -545,7 +549,7 @@ DESCRIPTION
|
|
|
545
549
|
WARNING: If the target Actor already exists in your Apify account, it will be overwritten!
|
|
546
550
|
```
|
|
547
551
|
|
|
548
|
-
_See code: [src/commands/push.js](https://github.com/apify/apify-cli/blob/v0.
|
|
552
|
+
_See code: [src/commands/push.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/push.js)_
|
|
549
553
|
|
|
550
554
|
## `apify run`
|
|
551
555
|
|
|
@@ -575,7 +579,7 @@ DESCRIPTION
|
|
|
575
579
|
package.json file. You can set up your own main file or environment variables by changing it.
|
|
576
580
|
```
|
|
577
581
|
|
|
578
|
-
_See code: [src/commands/run.js](https://github.com/apify/apify-cli/blob/v0.
|
|
582
|
+
_See code: [src/commands/run.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/run.js)_
|
|
579
583
|
|
|
580
584
|
## `apify secrets`
|
|
581
585
|
|
|
@@ -603,7 +607,7 @@ DESCRIPTION
|
|
|
603
607
|
of the actor.
|
|
604
608
|
```
|
|
605
609
|
|
|
606
|
-
_See code: [src/commands/secrets/index.js](https://github.com/apify/apify-cli/blob/v0.
|
|
610
|
+
_See code: [src/commands/secrets/index.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/secrets/index.js)_
|
|
607
611
|
|
|
608
612
|
## `apify secrets:add NAME VALUE`
|
|
609
613
|
|
|
@@ -621,7 +625,7 @@ DESCRIPTION
|
|
|
621
625
|
The secrets are stored to a file at ~/.apify
|
|
622
626
|
```
|
|
623
627
|
|
|
624
|
-
_See code: [src/commands/secrets/add.js](https://github.com/apify/apify-cli/blob/v0.
|
|
628
|
+
_See code: [src/commands/secrets/add.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/secrets/add.js)_
|
|
625
629
|
|
|
626
630
|
## `apify secrets:rm NAME`
|
|
627
631
|
|
|
@@ -635,7 +639,7 @@ ARGUMENTS
|
|
|
635
639
|
NAME Name of the secret
|
|
636
640
|
```
|
|
637
641
|
|
|
638
|
-
_See code: [src/commands/secrets/rm.js](https://github.com/apify/apify-cli/blob/v0.
|
|
642
|
+
_See code: [src/commands/secrets/rm.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/secrets/rm.js)_
|
|
639
643
|
|
|
640
644
|
## `apify vis [PATH]`
|
|
641
645
|
|
|
@@ -659,5 +663,5 @@ DESCRIPTION
|
|
|
659
663
|
You can also pass any custom path to your input schema to have it validated instead.
|
|
660
664
|
```
|
|
661
665
|
|
|
662
|
-
_See code: [src/commands/vis.js](https://github.com/apify/apify-cli/blob/v0.
|
|
666
|
+
_See code: [src/commands/vis.js](https://github.com/apify/apify-cli/blob/v0.19.0/src/commands/vis.js)_
|
|
663
667
|
<!-- commandsstop -->
|
package/oclif.manifest.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"0.18.2","commands":{"call":{"id":"call","description":"Runs a specific actor remotely on the Apify cloud platform.\nThe Actor is run under your current Apify account. Therefore you need to be logged in by calling \"apify login\". It takes input for the Actor from the default local key-value store by default.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"build":{"name":"build","type":"option","char":"b","description":"Tag or number of the build to run (e.g. \"latest\" or \"1.2.34\").","required":false},"timeout":{"name":"timeout","type":"option","char":"t","description":"Timeout for the actor run in seconds. Zero value means there is no timeout.","required":false},"memory":{"name":"memory","type":"option","char":"m","description":"Amount of memory allocated for the actor run, in megabytes.","required":false},"wait-for-finish":{"name":"wait-for-finish","type":"option","char":"w","description":"Seconds for waiting to run to finish, if no value passed, it waits forever.","required":false}},"args":[{"name":"actId","description":"Name or ID of the actor to run (e.g. \"apify/hello-world\" or \"E2jjCZBezvAZnX8Rb\"). If not provided, the command runs the remote actor specified in the \".actor/actor.json\" file.","required":false}]},"check-version":{"id":"check-version","description":"Checks that installed Apify CLI version is up to date.","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":["cv"],"flags":{"enforce-update":{"name":"enforce-update","type":"boolean","char":"e","description":"[Optional] Enforce version update from NPM","required":false,"allowNo":false}},"args":[]},"create":{"id":"create","description":"Creates a new actor project directory from a selected boilerplate template.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"template":{"name":"template","type":"option","char":"t","description":"Template for the actor. If not provided, the command will prompt for it.\nVisit https://raw.githubusercontent.com/apify/actor-templates/master/templates/manifest.json to find available template names.","required":false},"skip-dependency-install":{"name":"skip-dependency-install","type":"boolean","description":"Skip installing actor dependencies.","required":false,"allowNo":false},"template-archive-url":{"name":"template-archive-url","type":"option","description":"Actor template archive url. Useful for developing new templates.","hidden":true,"required":false}},"args":[{"name":"actorName","description":"Name of the actor and its directory","required":false}]},"edit-input-schema":{"id":"edit-input-schema","description":"Lets you edit your input schema that would be used on the platform in a visual input schema editor.","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":["eis"],"flags":{},"args":[{"name":"path","description":"Optional path to your INPUT_SCHEMA.json file. If not provided default platform location for input schema is used.","required":false}]},"info":{"id":"info","description":"Displays information about the currently active Apify account.\nThe information is printed to the console.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"init":{"id":"init","description":"Initializes a new actor project in an existing directory.\nThe command only creates the \".actor/actor.json\" file and the \"storage\" directory in the current directory, but will not touch anything else.\n\nWARNING: The directory at \"storage\" will be overwritten if it already exists.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"actorName","description":"Name of the actor. If not provided, you will be prompted for it.","required":false}]},"login-new":{"id":"login-new","description":"Logs in to your Apify account using your API token.\nThe API token and other account information is stored in the ~/.apify directory, from where it is read by all other \"apify\" commands. To log out, call \"apify logout\".","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":[],"flags":{"token":{"name":"token","type":"option","char":"t","description":"[Optional] Apify API token","required":false}},"args":[]},"login":{"id":"login","description":"Logs in to your Apify account using a provided API token.\nThe API token and other account information is stored in the ~/.apify directory, from where it is read by all other \"apify\" commands. To log out, call \"apify logout\".","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"token":{"name":"token","type":"option","char":"t","description":"[Optional] Apify API token","required":false}},"args":[]},"logout":{"id":"logout","description":"Logs out of your Apify account.\nThe command deletes the API token and all other account information stored in the ~/.apify directory. To log in again, call \"apify login\".","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"pull":{"id":"pull","description":"Pulls an Actor from the Apify platform to the current directory. If it is defined as Git repository, it will be cloned. If it is defined as Web IDE, it will fetch the files.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"version":{"name":"version","type":"option","char":"v","description":"Actor version number which will be pulled, e.g. 1.2. Default: the highest version","required":false}},"args":[{"name":"actorId","description":"Name or ID of the actor to run (e.g. \"apify/hello-world\" or \"E2jjCZBezvAZnX8Rb\"). If not provided, the command will update the Actor in the current directory based on its name in \".actor/actor.json\" file.","required":false}]},"push":{"id":"push","description":"Uploads the actor to the Apify platform and builds it there.\nThe Actor settings are read from the \".actor/actor.json\" file in the current directory, but they can be overridden using command-line options.\nNOTE: If the source files are smaller than 3 MB then they are uploaded as \n\"Multiple source files\", otherwise they are uploaded as \"Zip file\".\n\nWARNING: If the target Actor already exists in your Apify account, it will be overwritten!","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"version-number":{"name":"version-number","type":"option","description":"DEPRECATED: Use flag version instead. Actor version number to which the files should be pushed. By default, it is taken from the \".actor/actor.json\" file.","required":false},"version":{"name":"version","type":"option","char":"v","description":"Actor version number to which the files should be pushed. By default, it is taken from the \".actor/actor.json\" file.","required":false},"build-tag":{"name":"build-tag","type":"option","char":"b","description":"Build tag to be applied to the successful Actor build. By default, it is taken from the \".actor/actor.json\" file","required":false},"wait-for-finish":{"name":"wait-for-finish","type":"option","char":"w","description":"Seconds for waiting to build to finish, if no value passed, it waits forever.","required":false},"no-prompt":{"name":"no-prompt","type":"boolean","description":"Do not prompt for opening the actor details in a browser. This will also not open the browser automatically.","required":false,"allowNo":false}},"args":[{"name":"actorId","description":"Name or ID of the Actor to push (e.g. \"apify/hello-world\" or \"E2jjCZBezvAZnX8Rb\"). If not provided, the command will create or modify the actor with the name specified in \".actor/actor.json\" file.","required":false}]},"run":{"id":"run","description":"Runs the actor locally in the current directory.\nIt sets various APIFY_XYZ environment variables in order to provide a working execution environment for the actor. For example, this causes the actor input, as well as all other data in key-value stores, datasets or request queues to be stored in the \"storage\" directory, rather than on the Apify platform.\n\nNOTE: You can override the command's default behavior for Node.js actors by overriding the \"start\" script in the package.json file. You can set up your own main file or environment variables by changing it.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"purge":{"name":"purge","type":"boolean","char":"p","description":"Shortcut that combines the --purge-queue, --purge-dataset and --purge-key-value-store options.","required":false,"allowNo":false},"purge-queue":{"name":"purge-queue","type":"boolean","description":"Deletes the local directory containing the default request queue before the run starts.","required":false,"allowNo":false},"purge-dataset":{"name":"purge-dataset","type":"boolean","description":"Deletes the local directory containing the default dataset before the run starts.","required":false,"allowNo":false},"purge-key-value-store":{"name":"purge-key-value-store","type":"boolean","description":"Deletes all records from the default key-value store in the local directory before the run starts, except for the \"INPUT\" key.","required":false,"allowNo":false}},"args":[]},"vis":{"id":"vis","description":"Validates input schema and prints errors found.\nThe input schema for the actor is used from these locations in order of preference.\nThe first one found is validated as it would be the one used on the Apify platform.\n1. Directly embedded object in \".actor/actor.json\" under 'input' key\n2. Path to JSON file referenced in \".actor/actor.json\" under 'input' key\n3. JSON file at .actor/INPUT_SCHEMA.json\n4. JSON file at INPUT_SCHEMA.json\n\nYou can also pass any custom path to your input schema to have it validated instead.\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"path","description":"Optional path to your INPUT_SCHEMA.json file. If not provided ./INPUT_SCHEMA.json is used.","required":false}]},"actor:get-input":{"id":"actor:get-input","description":"Gets the actor input value from the default key-value store associated with the actor run.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"actor:get-value":{"id":"actor:get-value","description":"Gets a value from the default key-value store associated with the actor run.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"key","description":"Key of the record in key-value store","required":true}]},"actor":{"id":"actor","description":"Commands are designed to be used in actor runs. All commands are in PoC state, do not use in production environments.\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"actor:push-data":{"id":"actor:push-data","description":"Stores an object or an array of objects to the default dataset of the actor run.\nIt is possible to pass data using item argument or stdin.\nPassing data using argument:\n$ apify actor:push-data {\"foo\": \"bar\"}\nPassing data using stdin with pipe:\n$ cat ./test.json | apify actor:push-data\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"item","description":"JSON string with one object or array of objects containing data to be stored in the default dataset.","required":false}]},"actor:set-value":{"id":"actor:set-value","description":"Sets or removes record into the default KeyValueStore associated with the actor run.\nIt is possible to pass data using argument or stdin.\nPassing data using argument:\n$ apify actor:set-value KEY my-value\nPassing data using stdin with pipe:\n$ cat ./my-text-file.txt | apify actor:set-value KEY --contentType text/plain\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"contentType":{"name":"contentType","type":"option","char":"c","description":"Specifies a custom MIME content type of the record. By default \"application/json\" is used.","required":false}},"args":[{"name":"key","description":"Key of the record in key-value store.","required":true},{"name":"value","description":"Record data, which can be one of the following values:\n- If empty, the record in the key-value store is deleted.\n- If no `contentType` flag is specified, value is expected to be any JSON string value.\n- If options.contentType is set, value is taken as is.","required":false}]},"secrets:add":{"id":"secrets:add","description":"Adds a new secret value.\nThe secrets are stored to a file at ~/.apify","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"name","description":"Name of the secret","required":true},{"name":"value","description":"Value of the secret","required":true}]},"secrets":{"id":"secrets","description":"Manages secret values for actor environment variables.\n\nExample:\n$ apify secrets:add mySecret TopSecretValue123\n\nNow the \"mySecret\" value can be used in an environment variable defined in \".actor/actor.json\" file by adding the \"@\" prefix:\n\n{\n \"actorSpecification\": 1,\n \"name\": \"my_actor\",\n \"environmentVariables\": { \"SECRET_ENV_VAR\": \"@mySecret\" },\n \"version\": \"0.1\n}\n\nWhen the actor is pushed to Apify cloud, the \"SECRET_ENV_VAR\" and its value is stored as a secret environment variable of the actor.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"secrets:rm":{"id":"secrets:rm","description":"Removes the secret.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"name","description":"Name of the secret","required":true}]}}}
|
|
1
|
+
{"version":"0.19.0","commands":{"call":{"id":"call","description":"Runs a specific actor remotely on the Apify cloud platform.\nThe Actor is run under your current Apify account. Therefore you need to be logged in by calling \"apify login\". It takes input for the Actor from the default local key-value store by default.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"build":{"name":"build","type":"option","char":"b","description":"Tag or number of the build to run (e.g. \"latest\" or \"1.2.34\").","required":false},"timeout":{"name":"timeout","type":"option","char":"t","description":"Timeout for the actor run in seconds. Zero value means there is no timeout.","required":false},"memory":{"name":"memory","type":"option","char":"m","description":"Amount of memory allocated for the actor run, in megabytes.","required":false},"wait-for-finish":{"name":"wait-for-finish","type":"option","char":"w","description":"Seconds for waiting to run to finish, if no value passed, it waits forever.","required":false}},"args":[{"name":"actId","description":"Name or ID of the actor to run (e.g. \"apify/hello-world\" or \"E2jjCZBezvAZnX8Rb\"). If not provided, the command runs the remote actor specified in the \".actor/actor.json\" file.","required":false}]},"check-version":{"id":"check-version","description":"Checks that installed Apify CLI version is up to date.","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":["cv"],"flags":{"enforce-update":{"name":"enforce-update","type":"boolean","char":"e","description":"[Optional] Enforce version update from NPM","required":false,"allowNo":false}},"args":[]},"create":{"id":"create","description":"Creates a new actor project directory from a selected boilerplate template.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"template":{"name":"template","type":"option","char":"t","description":"Template for the actor. If not provided, the command will prompt for it.\nVisit https://raw.githubusercontent.com/apify/actor-templates/master/templates/manifest.json to find available template names.","required":false},"skip-dependency-install":{"name":"skip-dependency-install","type":"boolean","description":"Skip installing actor dependencies.","required":false,"allowNo":false},"template-archive-url":{"name":"template-archive-url","type":"option","description":"Actor template archive url. Useful for developing new templates.","hidden":true,"required":false}},"args":[{"name":"actorName","description":"Name of the actor and its directory","required":false}]},"edit-input-schema":{"id":"edit-input-schema","description":"Lets you edit your input schema that would be used on the platform in a visual input schema editor.","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":["eis"],"flags":{},"args":[{"name":"path","description":"Optional path to your INPUT_SCHEMA.json file. If not provided default platform location for input schema is used.","required":false}]},"info":{"id":"info","description":"Displays information about the currently active Apify account.\nThe information is printed to the console.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"init-wrap-scrapy":{"id":"init-wrap-scrapy","description":"Wraps your existing Scrapy project to work like an Apify Actor.\n\nIt adds the following features:\n- Automatic retry of failed requests\n- Automatic proxy rotation\n- Automatic user agent rotation\n...\n","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":[],"flags":{},"args":[{"name":"path","description":"Optional path to your scrapy project. If not provided, the current directory is used.","required":false}]},"init":{"id":"init","description":"Initializes a new actor project in an existing directory.\nThe command only creates the \".actor/actor.json\" file and the \"storage\" directory in the current directory, but will not touch anything else.\n\nWARNING: The directory at \"storage\" will be overwritten if it already exists.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"yes":{"name":"yes","type":"boolean","char":"y","description":"Automatic yes to prompts; assume \"yes\" as answer to all prompts. Note that in some cases, the command may still ask for confirmation.","required":false,"allowNo":false}},"args":[{"name":"actorName","description":"Name of the actor. If not provided, you will be prompted for it.","required":false}]},"login-new":{"id":"login-new","description":"Logs in to your Apify account using your API token.\nThe API token and other account information is stored in the ~/.apify directory, from where it is read by all other \"apify\" commands. To log out, call \"apify logout\".","pluginName":"apify-cli","pluginType":"core","hidden":true,"aliases":[],"flags":{"token":{"name":"token","type":"option","char":"t","description":"[Optional] Apify API token","required":false}},"args":[]},"login":{"id":"login","description":"Logs in to your Apify account using a provided API token.\nThe API token and other account information is stored in the ~/.apify directory, from where it is read by all other \"apify\" commands. To log out, call \"apify logout\".","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"token":{"name":"token","type":"option","char":"t","description":"[Optional] Apify API token","required":false}},"args":[]},"logout":{"id":"logout","description":"Logs out of your Apify account.\nThe command deletes the API token and all other account information stored in the ~/.apify directory. To log in again, call \"apify login\".","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"pull":{"id":"pull","description":"Pulls an Actor from the Apify platform to the current directory. If it is defined as Git repository, it will be cloned. If it is defined as Web IDE, it will fetch the files.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"version":{"name":"version","type":"option","char":"v","description":"Actor version number which will be pulled, e.g. 1.2. Default: the highest version","required":false}},"args":[{"name":"actorId","description":"Name or ID of the actor to run (e.g. \"apify/hello-world\" or \"E2jjCZBezvAZnX8Rb\"). If not provided, the command will update the Actor in the current directory based on its name in \".actor/actor.json\" file.","required":false}]},"push":{"id":"push","description":"Uploads the actor to the Apify platform and builds it there.\nThe Actor settings are read from the \".actor/actor.json\" file in the current directory, but they can be overridden using command-line options.\nNOTE: If the source files are smaller than 3 MB then they are uploaded as \n\"Multiple source files\", otherwise they are uploaded as \"Zip file\".\n\nWARNING: If the target Actor already exists in your Apify account, it will be overwritten!","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"version-number":{"name":"version-number","type":"option","description":"DEPRECATED: Use flag version instead. Actor version number to which the files should be pushed. By default, it is taken from the \".actor/actor.json\" file.","required":false},"version":{"name":"version","type":"option","char":"v","description":"Actor version number to which the files should be pushed. By default, it is taken from the \".actor/actor.json\" file.","required":false},"build-tag":{"name":"build-tag","type":"option","char":"b","description":"Build tag to be applied to the successful Actor build. By default, it is taken from the \".actor/actor.json\" file","required":false},"wait-for-finish":{"name":"wait-for-finish","type":"option","char":"w","description":"Seconds for waiting to build to finish, if no value passed, it waits forever.","required":false},"no-prompt":{"name":"no-prompt","type":"boolean","description":"Do not prompt for opening the actor details in a browser. This will also not open the browser automatically.","required":false,"allowNo":false}},"args":[{"name":"actorId","description":"Name or ID of the Actor to push (e.g. \"apify/hello-world\" or \"E2jjCZBezvAZnX8Rb\"). If not provided, the command will create or modify the actor with the name specified in \".actor/actor.json\" file.","required":false}]},"run":{"id":"run","description":"Runs the actor locally in the current directory.\nIt sets various APIFY_XYZ environment variables in order to provide a working execution environment for the actor. For example, this causes the actor input, as well as all other data in key-value stores, datasets or request queues to be stored in the \"storage\" directory, rather than on the Apify platform.\n\nNOTE: You can override the command's default behavior for Node.js actors by overriding the \"start\" script in the package.json file. You can set up your own main file or environment variables by changing it.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"purge":{"name":"purge","type":"boolean","char":"p","description":"Shortcut that combines the --purge-queue, --purge-dataset and --purge-key-value-store options.","required":false,"allowNo":false},"purge-queue":{"name":"purge-queue","type":"boolean","description":"Deletes the local directory containing the default request queue before the run starts.","required":false,"allowNo":false},"purge-dataset":{"name":"purge-dataset","type":"boolean","description":"Deletes the local directory containing the default dataset before the run starts.","required":false,"allowNo":false},"purge-key-value-store":{"name":"purge-key-value-store","type":"boolean","description":"Deletes all records from the default key-value store in the local directory before the run starts, except for the \"INPUT\" key.","required":false,"allowNo":false}},"args":[]},"vis":{"id":"vis","description":"Validates input schema and prints errors found.\nThe input schema for the actor is used from these locations in order of preference.\nThe first one found is validated as it would be the one used on the Apify platform.\n1. Directly embedded object in \".actor/actor.json\" under 'input' key\n2. Path to JSON file referenced in \".actor/actor.json\" under 'input' key\n3. JSON file at .actor/INPUT_SCHEMA.json\n4. JSON file at INPUT_SCHEMA.json\n\nYou can also pass any custom path to your input schema to have it validated instead.\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"path","description":"Optional path to your INPUT_SCHEMA.json file. If not provided ./INPUT_SCHEMA.json is used.","required":false}]},"actor:get-input":{"id":"actor:get-input","description":"Gets the actor input value from the default key-value store associated with the actor run.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"actor:get-value":{"id":"actor:get-value","description":"Gets a value from the default key-value store associated with the actor run.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"key","description":"Key of the record in key-value store","required":true}]},"actor":{"id":"actor","description":"Commands are designed to be used in actor runs. All commands are in PoC state, do not use in production environments.\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"actor:push-data":{"id":"actor:push-data","description":"Stores an object or an array of objects to the default dataset of the actor run.\nIt is possible to pass data using item argument or stdin.\nPassing data using argument:\n$ apify actor:push-data {\"foo\": \"bar\"}\nPassing data using stdin with pipe:\n$ cat ./test.json | apify actor:push-data\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"item","description":"JSON string with one object or array of objects containing data to be stored in the default dataset.","required":false}]},"actor:set-value":{"id":"actor:set-value","description":"Sets or removes record into the default KeyValueStore associated with the actor run.\nIt is possible to pass data using argument or stdin.\nPassing data using argument:\n$ apify actor:set-value KEY my-value\nPassing data using stdin with pipe:\n$ cat ./my-text-file.txt | apify actor:set-value KEY --contentType text/plain\n","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{"contentType":{"name":"contentType","type":"option","char":"c","description":"Specifies a custom MIME content type of the record. By default \"application/json\" is used.","required":false}},"args":[{"name":"key","description":"Key of the record in key-value store.","required":true},{"name":"value","description":"Record data, which can be one of the following values:\n- If empty, the record in the key-value store is deleted.\n- If no `contentType` flag is specified, value is expected to be any JSON string value.\n- If options.contentType is set, value is taken as is.","required":false}]},"secrets:add":{"id":"secrets:add","description":"Adds a new secret value.\nThe secrets are stored to a file at ~/.apify","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"name","description":"Name of the secret","required":true},{"name":"value","description":"Value of the secret","required":true}]},"secrets":{"id":"secrets","description":"Manages secret values for actor environment variables.\n\nExample:\n$ apify secrets:add mySecret TopSecretValue123\n\nNow the \"mySecret\" value can be used in an environment variable defined in \".actor/actor.json\" file by adding the \"@\" prefix:\n\n{\n \"actorSpecification\": 1,\n \"name\": \"my_actor\",\n \"environmentVariables\": { \"SECRET_ENV_VAR\": \"@mySecret\" },\n \"version\": \"0.1\n}\n\nWhen the actor is pushed to Apify cloud, the \"SECRET_ENV_VAR\" and its value is stored as a secret environment variable of the actor.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[]},"secrets:rm":{"id":"secrets:rm","description":"Removes the secret.","pluginName":"apify-cli","pluginType":"core","aliases":[],"flags":{},"args":[{"name":"name","description":"Name of the secret","required":true}]}}}
|
package/package.json
CHANGED
|
@@ -1,114 +1,117 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
"
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
"
|
|
109
|
-
|
|
110
|
-
"./src/
|
|
111
|
-
|
|
2
|
+
"name": "apify-cli",
|
|
3
|
+
"version": "0.19.0",
|
|
4
|
+
"description": "Apify command-line interface helps you create, develop, build and run Apify actors, and manage the Apify cloud platform.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "cross-env APIFY_CLI_SKIP_UPDATE_CHECK=1 mocha --timeout 180000 --recursive",
|
|
8
|
+
"test-python": "npm run test -- --grep '\\[python\\]'",
|
|
9
|
+
"lint": "eslint src test",
|
|
10
|
+
"lint:fix": "eslint src test --fix",
|
|
11
|
+
"commands-md": "npm run manifest && oclif-dev readme",
|
|
12
|
+
"prepare-release": "npm run commands-md && npm run prod-shrinkwrap",
|
|
13
|
+
"prod-shrinkwrap": "rm -rf node_modules && npm install --production && npm shrinkwrap",
|
|
14
|
+
"manifest": "rm -f oclif.manifest.json && oclif-dev manifest",
|
|
15
|
+
"postinstall": "node \"./src/bin/run\" check-version && node ./src/lib/community"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"src",
|
|
19
|
+
"oclif.manifest.json",
|
|
20
|
+
"index.js",
|
|
21
|
+
"package.json"
|
|
22
|
+
],
|
|
23
|
+
"bin": {
|
|
24
|
+
"apify": "./src/bin/run"
|
|
25
|
+
},
|
|
26
|
+
"contributors": [
|
|
27
|
+
"Jakub Drobník <jakub.drobnik@apify.com>",
|
|
28
|
+
"Jan Curn <jan@apify.com>"
|
|
29
|
+
],
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "git+https://github.com/apify/apify-cli.git"
|
|
33
|
+
},
|
|
34
|
+
"keywords": [
|
|
35
|
+
"apify",
|
|
36
|
+
"client",
|
|
37
|
+
"node",
|
|
38
|
+
"command",
|
|
39
|
+
"line",
|
|
40
|
+
"bash"
|
|
41
|
+
],
|
|
42
|
+
"author": {
|
|
43
|
+
"name": "Apify",
|
|
44
|
+
"email": "support@apify.com",
|
|
45
|
+
"url": "https://www.apify.com"
|
|
46
|
+
},
|
|
47
|
+
"license": "Apache-2.0",
|
|
48
|
+
"bugs": {
|
|
49
|
+
"url": "https://github.com/apify/apify-cli/issues"
|
|
50
|
+
},
|
|
51
|
+
"homepage": "https://github.com/apify/apify-cli#readme",
|
|
52
|
+
"engines": {
|
|
53
|
+
"node": ">=16.0.0"
|
|
54
|
+
},
|
|
55
|
+
"dependencies": {
|
|
56
|
+
"@apify/actor-templates": "^0.1.4",
|
|
57
|
+
"@apify/consts": "^2.23.0",
|
|
58
|
+
"@apify/input_schema": "^3.5.8",
|
|
59
|
+
"@apify/utilities": "^2.9.3",
|
|
60
|
+
"@crawlee/memory-storage": "^3.5.8",
|
|
61
|
+
"@oclif/command": "^1.8.36",
|
|
62
|
+
"@oclif/config": "^1.18.17",
|
|
63
|
+
"@oclif/errors": "^1.3.6",
|
|
64
|
+
"@oclif/plugin-commands": "^2.1.0",
|
|
65
|
+
"@oclif/plugin-help": "^5.1.12",
|
|
66
|
+
"@root/walk": "^1.1.0",
|
|
67
|
+
"adm-zip": "^0.5.10",
|
|
68
|
+
"ajv": "^8.12.0",
|
|
69
|
+
"apify-client": "^2.8.2",
|
|
70
|
+
"archiver-promise": "^1.0.0",
|
|
71
|
+
"axios": "^1.6.1",
|
|
72
|
+
"chalk": "^4.1.2",
|
|
73
|
+
"computer-name": "^0.1.0",
|
|
74
|
+
"configparser": "^0.3.10",
|
|
75
|
+
"cors": "^2.8.5",
|
|
76
|
+
"detect-indent": "^6.1.0",
|
|
77
|
+
"escape-string-regexp": "^4.0.0",
|
|
78
|
+
"express": "^4.18.2",
|
|
79
|
+
"globby": "^11.1.0",
|
|
80
|
+
"handlebars": "^4.7.8",
|
|
81
|
+
"inquirer": "^7.3.3",
|
|
82
|
+
"is-ci": "^3.0.1",
|
|
83
|
+
"is-online": "^10.0.0",
|
|
84
|
+
"istextorbinary": "^8.0.0",
|
|
85
|
+
"jju": "^1.4.0",
|
|
86
|
+
"load-json-file": "^6.2.0",
|
|
87
|
+
"mime": "^3.0.0",
|
|
88
|
+
"mixpanel": "^0.18.0",
|
|
89
|
+
"open": "^8.4.0",
|
|
90
|
+
"ow": "^0.28.2",
|
|
91
|
+
"rimraf": "^3.0.2",
|
|
92
|
+
"semver": "^7.5.4",
|
|
93
|
+
"tiged": "^2.12.5",
|
|
94
|
+
"underscore": "^1.13.6",
|
|
95
|
+
"write-json-file": "^4.3.0"
|
|
96
|
+
},
|
|
97
|
+
"devDependencies": {
|
|
98
|
+
"@apify/eslint-config": "^0.4.0",
|
|
99
|
+
"@oclif/dev-cli": "^1.26.0",
|
|
100
|
+
"@oclif/test": "^2.1.0",
|
|
101
|
+
"chai": "^4.3.4",
|
|
102
|
+
"chai-match": "^1.1.1",
|
|
103
|
+
"cross-env": "^7.0.3",
|
|
104
|
+
"eslint": "^8.53.0",
|
|
105
|
+
"mocha": "^10.0.0",
|
|
106
|
+
"sinon": "^17.0.0"
|
|
107
|
+
},
|
|
108
|
+
"oclif": {
|
|
109
|
+
"bin": "apify",
|
|
110
|
+
"commands": "./src/commands",
|
|
111
|
+
"hooks": {
|
|
112
|
+
"init": [
|
|
113
|
+
"./src/hooks/init"
|
|
114
|
+
]
|
|
115
|
+
}
|
|
112
116
|
}
|
|
113
|
-
}
|
|
114
117
|
}
|
package/src/commands/create.js
CHANGED
|
@@ -1,17 +1,13 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
|
-
const { finished } = require('stream');
|
|
4
|
-
const { promisify } = require('util');
|
|
5
3
|
|
|
6
4
|
const actorTemplates = require('@apify/actor-templates');
|
|
7
5
|
const { flags: flagsHelper } = require('@oclif/command');
|
|
8
|
-
const AdmZip = require('adm-zip');
|
|
9
6
|
const semver = require('semver');
|
|
10
7
|
|
|
11
8
|
const { ApifyCommand } = require('../lib/apify_command');
|
|
12
9
|
const { EMPTY_LOCAL_CONFIG, LOCAL_CONFIG_PATH, PYTHON_VENV_PATH, SUPPORTED_NODEJS_VERSION } = require('../lib/consts');
|
|
13
10
|
const {
|
|
14
|
-
httpsGet,
|
|
15
11
|
ensureValidActorName,
|
|
16
12
|
getTemplateDefinition,
|
|
17
13
|
enhanceReadmeWithLocalSuffix,
|
|
@@ -31,6 +27,7 @@ const {
|
|
|
31
27
|
detectNodeVersion,
|
|
32
28
|
isNodeVersionSupported,
|
|
33
29
|
detectNpmVersion,
|
|
30
|
+
downloadAndUnzip,
|
|
34
31
|
} = require('../lib/utils');
|
|
35
32
|
|
|
36
33
|
class CreateCommand extends ApifyCommand {
|
|
@@ -81,12 +78,7 @@ class CreateCommand extends ApifyCommand {
|
|
|
81
78
|
throw err;
|
|
82
79
|
}
|
|
83
80
|
|
|
84
|
-
|
|
85
|
-
const chunks = [];
|
|
86
|
-
zipStream.on('data', (chunk) => chunks.push(chunk));
|
|
87
|
-
await promisify(finished)(zipStream);
|
|
88
|
-
const zip = new AdmZip(Buffer.concat(chunks));
|
|
89
|
-
zip.extractAllTo(actFolderDir, true);
|
|
81
|
+
await downloadAndUnzip({ url: templateArchiveUrl, pathTo: actFolderDir });
|
|
90
82
|
|
|
91
83
|
// There may be .actor/actor.json file in used template - let's try to load it and change the name prop value to actorName
|
|
92
84
|
const localConfig = await getJsonFileContent(path.join(actFolderDir, LOCAL_CONFIG_PATH));
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
const { ApifyCommand } = require('../lib/apify_command');
|
|
2
|
+
const { info } = require('../lib/outputs');
|
|
3
|
+
const { wrapScrapyProject } = require('../lib/scrapy-wrapper/index');
|
|
4
|
+
|
|
5
|
+
class WrapScrapyCommand extends ApifyCommand {
|
|
6
|
+
async run() {
|
|
7
|
+
const { args } = this.parse(WrapScrapyCommand);
|
|
8
|
+
|
|
9
|
+
await wrapScrapyProject({ projectPath: args.path });
|
|
10
|
+
|
|
11
|
+
info('Scrapy project wrapped successfully.');
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
WrapScrapyCommand.hidden = true;
|
|
16
|
+
|
|
17
|
+
WrapScrapyCommand.description = `Wraps your existing Scrapy project to work like an Apify Actor.
|
|
18
|
+
|
|
19
|
+
It adds the following features:
|
|
20
|
+
- Automatic retry of failed requests
|
|
21
|
+
- Automatic proxy rotation
|
|
22
|
+
- Automatic user agent rotation
|
|
23
|
+
...
|
|
24
|
+
`;
|
|
25
|
+
|
|
26
|
+
WrapScrapyCommand.args = [
|
|
27
|
+
{
|
|
28
|
+
name: 'path',
|
|
29
|
+
required: false,
|
|
30
|
+
description: 'Optional path to your scrapy project. If not provided, the current directory is used.',
|
|
31
|
+
},
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
module.exports = WrapScrapyCommand;
|
package/src/commands/init.js
CHANGED
|
@@ -1,19 +1,35 @@
|
|
|
1
1
|
const path = require('path');
|
|
2
2
|
|
|
3
|
+
const { flags: flagsHelper } = require('@oclif/command');
|
|
3
4
|
const inquirer = require('inquirer');
|
|
4
5
|
|
|
5
6
|
const { ApifyCommand } = require('../lib/apify_command');
|
|
6
|
-
const { EMPTY_LOCAL_CONFIG, DEFAULT_LOCAL_STORAGE_DIR, LOCAL_CONFIG_PATH } = require('../lib/consts');
|
|
7
|
+
const { EMPTY_LOCAL_CONFIG, DEFAULT_LOCAL_STORAGE_DIR, LOCAL_CONFIG_PATH, LANGUAGE, PROJECT_TYPES } = require('../lib/consts');
|
|
7
8
|
const { createPrefilledInputFileFromInputSchema } = require('../lib/input_schema');
|
|
8
9
|
const outputs = require('../lib/outputs');
|
|
9
|
-
const {
|
|
10
|
+
const { ProjectAnalyzer } = require('../lib/project_analyzer');
|
|
11
|
+
const { wrapScrapyProject } = require('../lib/scrapy-wrapper');
|
|
12
|
+
const { setLocalConfig, setLocalEnv, getLocalConfig, getLocalConfigOrThrow, detectLocalActorLanguage } = require('../lib/utils');
|
|
10
13
|
|
|
11
14
|
class InitCommand extends ApifyCommand {
|
|
12
15
|
async run() {
|
|
13
|
-
const { args } = this.parse(InitCommand);
|
|
16
|
+
const { args, flags } = this.parse(InitCommand);
|
|
14
17
|
let { actorName } = args;
|
|
15
18
|
const cwd = process.cwd();
|
|
16
19
|
|
|
20
|
+
if (ProjectAnalyzer.getProjectType(cwd) === PROJECT_TYPES.SCRAPY) {
|
|
21
|
+
outputs.info('The current directory looks like a Scrapy project. Using automatic project wrapping.');
|
|
22
|
+
this.telemetryData.actorWrapper = PROJECT_TYPES.SCRAPY;
|
|
23
|
+
|
|
24
|
+
return wrapScrapyProject({ projectPath: cwd });
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (!flags.yes && detectLocalActorLanguage(cwd).language === LANGUAGE.UNKNOWN) {
|
|
28
|
+
outputs.warning('The current directory does not look like a Node.js or Python project.');
|
|
29
|
+
const { c } = await inquirer.prompt([{ name: 'c', message: 'Do you want to continue?', type: 'confirm' }]);
|
|
30
|
+
if (!c) return;
|
|
31
|
+
}
|
|
32
|
+
|
|
17
33
|
if (getLocalConfig()) {
|
|
18
34
|
outputs.warning(`Skipping creation of "${LOCAL_CONFIG_PATH}", the file already exists in the current directory.`);
|
|
19
35
|
} else {
|
|
@@ -45,4 +61,12 @@ InitCommand.args = [
|
|
|
45
61
|
},
|
|
46
62
|
];
|
|
47
63
|
|
|
64
|
+
InitCommand.flags = {
|
|
65
|
+
yes: flagsHelper.boolean({
|
|
66
|
+
char: 'y',
|
|
67
|
+
description: 'Automatic yes to prompts; assume "yes" as answer to all prompts. Note that in some cases, the command may still ask for confirmation.',
|
|
68
|
+
required: false,
|
|
69
|
+
}),
|
|
70
|
+
};
|
|
71
|
+
|
|
48
72
|
module.exports = InitCommand;
|
package/src/commands/run.js
CHANGED
|
@@ -7,9 +7,11 @@ const loadJson = require('load-json-file');
|
|
|
7
7
|
const semver = require('semver');
|
|
8
8
|
|
|
9
9
|
const { ApifyCommand } = require('../lib/apify_command');
|
|
10
|
-
const { LEGACY_LOCAL_STORAGE_DIR, DEFAULT_LOCAL_STORAGE_DIR, SUPPORTED_NODEJS_VERSION, LANGUAGE } = require('../lib/consts');
|
|
10
|
+
const { LEGACY_LOCAL_STORAGE_DIR, DEFAULT_LOCAL_STORAGE_DIR, SUPPORTED_NODEJS_VERSION, LANGUAGE, PROJECT_TYPES } = require('../lib/consts');
|
|
11
11
|
const execWithLog = require('../lib/exec');
|
|
12
12
|
const { error, info, warning } = require('../lib/outputs');
|
|
13
|
+
const { ProjectAnalyzer } = require('../lib/project_analyzer');
|
|
14
|
+
const { ScrapyProjectAnalyzer } = require('../lib/scrapy-wrapper/ScrapyProjectAnalyzer');
|
|
13
15
|
const { replaceSecretsValue } = require('../lib/secrets');
|
|
14
16
|
const {
|
|
15
17
|
getLocalUserInfo, purgeDefaultQueue, purgeDefaultKeyValueStore,
|
|
@@ -29,11 +31,12 @@ class RunCommand extends ApifyCommand {
|
|
|
29
31
|
|
|
30
32
|
const packageJsonExists = fs.existsSync(packageJsonPath);
|
|
31
33
|
const mainPyExists = fs.existsSync(mainPyPath);
|
|
34
|
+
const isScrapyProject = ProjectAnalyzer.getProjectType(cwd) === PROJECT_TYPES.SCRAPY;
|
|
32
35
|
|
|
33
|
-
if (!packageJsonExists && !mainPyExists) {
|
|
36
|
+
if (!packageJsonExists && !mainPyExists && !isScrapyProject) {
|
|
34
37
|
throw new Error(
|
|
35
38
|
'Actor is of an unknown format.'
|
|
36
|
-
+ ` Make sure either the 'package.json' file or 'src/__main__.py' file exists.`,
|
|
39
|
+
+ ` Make sure either the 'package.json' file or 'src/__main__.py' file exists or you are in a migrated Scrapy project.`,
|
|
37
40
|
);
|
|
38
41
|
}
|
|
39
42
|
|
|
@@ -122,7 +125,16 @@ class RunCommand extends ApifyCommand {
|
|
|
122
125
|
if (pythonVersion) {
|
|
123
126
|
if (isPythonVersionSupported(pythonVersion)) {
|
|
124
127
|
const pythonCommand = getPythonCommand(cwd);
|
|
125
|
-
|
|
128
|
+
if (isScrapyProject) {
|
|
129
|
+
const project = new ScrapyProjectAnalyzer(cwd);
|
|
130
|
+
project.loadScrapyCfg();
|
|
131
|
+
if (!project.configuration.hasKey('apify', 'mainpy_location')) {
|
|
132
|
+
throw new Error(`This Scrapy project's configuration does not contain Apify settings. Did you forget to run "apify init"?`);
|
|
133
|
+
}
|
|
134
|
+
await execWithLog(pythonCommand, ['-m', project.configuration.get('apify', 'mainpy_location')], { env });
|
|
135
|
+
} else {
|
|
136
|
+
await execWithLog(pythonCommand, ['-m', 'src'], { env });
|
|
137
|
+
}
|
|
126
138
|
} else {
|
|
127
139
|
error(`Python actors require Python 3.8 or higher, but you have Python ${pythonVersion}!`);
|
|
128
140
|
error('Please install Python 3.8 or higher to be able to run Python actors locally.');
|
package/src/lib/consts.js
CHANGED
|
@@ -23,6 +23,11 @@ exports.LANGUAGE = {
|
|
|
23
23
|
UNKNOWN: 'n/a',
|
|
24
24
|
};
|
|
25
25
|
|
|
26
|
+
exports.PROJECT_TYPES = {
|
|
27
|
+
SCRAPY: 'scrapy',
|
|
28
|
+
UNKNOWN: 'unknown',
|
|
29
|
+
};
|
|
30
|
+
|
|
26
31
|
exports.COMMANDS_WITHIN_ACTOR = ['init', 'run', 'push', 'pull', 'call'];
|
|
27
32
|
|
|
28
33
|
exports.CHECK_VERSION_EVERY_MILLIS = 24 * 60 * 60 * 1000; // Once a day
|
package/src/lib/create-utils.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
|
-
const https = require('https');
|
|
3
2
|
const { pipeline } = require('stream');
|
|
4
3
|
const { promisify } = require('util');
|
|
5
4
|
|
|
@@ -9,29 +8,10 @@ const inquirer = require('inquirer');
|
|
|
9
8
|
const {
|
|
10
9
|
warning,
|
|
11
10
|
} = require('./outputs');
|
|
12
|
-
const { validateActorName } = require('./utils');
|
|
11
|
+
const { validateActorName, httpsGet } = require('./utils');
|
|
13
12
|
|
|
14
13
|
const PROGRAMMING_LANGUAGES = ['JavaScript', 'TypeScript', 'Python'];
|
|
15
14
|
|
|
16
|
-
/**
|
|
17
|
-
* @param {string} url
|
|
18
|
-
* @returns {Promise<unknown>}
|
|
19
|
-
*/
|
|
20
|
-
exports.httpsGet = async (url) => {
|
|
21
|
-
return new Promise((resolve, reject) => {
|
|
22
|
-
https.get(url, (response) => {
|
|
23
|
-
// Handle redirects
|
|
24
|
-
if (response.statusCode === 301 || response.statusCode === 302) {
|
|
25
|
-
resolve(exports.httpsGet(response.headers.location));
|
|
26
|
-
// Destroy the response to close the HTTP connection, otherwise this hangs for a long time with Node 19+ (due to HTTP keep-alive).
|
|
27
|
-
response.destroy();
|
|
28
|
-
} else {
|
|
29
|
-
resolve(response);
|
|
30
|
-
}
|
|
31
|
-
}).on('error', reject);
|
|
32
|
-
});
|
|
33
|
-
};
|
|
34
|
-
|
|
35
15
|
/**
|
|
36
16
|
* @param {string} maybeActorName
|
|
37
17
|
* @returns {Promise<string>}
|
|
@@ -78,7 +58,7 @@ exports.enhanceReadmeWithLocalSuffix = async (readmePath, manifestPromise) => {
|
|
|
78
58
|
if (manifest instanceof Error) throw manifest;
|
|
79
59
|
|
|
80
60
|
try {
|
|
81
|
-
const suffixStream = await
|
|
61
|
+
const suffixStream = await httpsGet(manifest.localReadmeSuffixUrl);
|
|
82
62
|
const readmeStream = fs.createWriteStream(readmePath, { flags: 'a' });
|
|
83
63
|
readmeStream.write('\n\n');
|
|
84
64
|
await promisify(pipeline)(suffixStream, readmeStream);
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const { PROJECT_TYPES } = require('./consts');
|
|
2
|
+
const { ScrapyProjectAnalyzer } = require('./scrapy-wrapper/ScrapyProjectAnalyzer');
|
|
3
|
+
|
|
4
|
+
const analyzers = [
|
|
5
|
+
{
|
|
6
|
+
type: PROJECT_TYPES.SCRAPY,
|
|
7
|
+
analyzer: ScrapyProjectAnalyzer,
|
|
8
|
+
},
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
class ProjectAnalyzer {
|
|
12
|
+
static getProjectType(pathname) {
|
|
13
|
+
const analyzer = analyzers.find((a) => {
|
|
14
|
+
if (!a.analyzer.isApplicable) {
|
|
15
|
+
throw new Error(`Analyzer ${a.analyzer} does not have isApplicable method.`);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return a.analyzer.isApplicable(pathname);
|
|
19
|
+
});
|
|
20
|
+
return analyzer?.type || PROJECT_TYPES.UNKNOWN;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
module.exports = { ProjectAnalyzer };
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
const { readdirSync } = require('fs');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
const ConfigParser = require('configparser');
|
|
6
|
+
const inquirer = require('inquirer');
|
|
7
|
+
|
|
8
|
+
const { SpiderFileAnalyzer } = require('./SpiderFileAnalyzer');
|
|
9
|
+
|
|
10
|
+
class ScrapyProjectAnalyzer {
|
|
11
|
+
constructor(pathname) {
|
|
12
|
+
this.pathname = pathname;
|
|
13
|
+
this.configuration = null;
|
|
14
|
+
this.settings = null;
|
|
15
|
+
this.loadScrapyCfg();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static isApplicable(pathname) {
|
|
19
|
+
return fs.existsSync(path.join(pathname, 'scrapy.cfg'));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async init() {
|
|
23
|
+
await this.loadSettings();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
loadScrapyCfg() {
|
|
27
|
+
const config = new ConfigParser();
|
|
28
|
+
const scrapyCfgPath = path.resolve(path.join(this.pathname, 'scrapy.cfg'));
|
|
29
|
+
|
|
30
|
+
if (!fs.existsSync(scrapyCfgPath)) {
|
|
31
|
+
throw new Error(`scrapy.cfg not found in "${scrapyCfgPath}".
|
|
32
|
+
Are you sure there is a Scrapy project there?`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
config.read(scrapyCfgPath);
|
|
36
|
+
this.configuration = config;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async loadSettings() {
|
|
40
|
+
const assumedBotName = this.configuration.get('settings', 'default').split('.')[0];
|
|
41
|
+
|
|
42
|
+
const settings = await inquirer.prompt([
|
|
43
|
+
{
|
|
44
|
+
type: 'input',
|
|
45
|
+
name: 'BOT_NAME',
|
|
46
|
+
message: 'Enter the Scrapy BOT_NAME (see settings.py):',
|
|
47
|
+
default: assumedBotName,
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
type: 'input',
|
|
51
|
+
name: 'SPIDER_MODULES',
|
|
52
|
+
message: 'What folder are the Scrapy spider modules stored in? (see SPIDER_MODULES in settings.py):',
|
|
53
|
+
default: [`${assumedBotName}.spiders`],
|
|
54
|
+
},
|
|
55
|
+
]);
|
|
56
|
+
|
|
57
|
+
if (typeof settings.SPIDER_MODULES === 'string') settings.SPIDER_MODULES = [settings.SPIDER_MODULES];
|
|
58
|
+
|
|
59
|
+
this.settings = settings;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
getName() {
|
|
63
|
+
return this.settings?.BOT_NAME;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
getAvailableSpiders() {
|
|
67
|
+
const spiderPaths = this.settings?.SPIDER_MODULES;
|
|
68
|
+
|
|
69
|
+
if (!spiderPaths) {
|
|
70
|
+
throw new Error('SPIDER_MODULES path not found in settings.');
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const spiders = [];
|
|
74
|
+
|
|
75
|
+
for (const spiderPath of spiderPaths) {
|
|
76
|
+
const spidersDir = path.join(this.pathname, spiderPath.replaceAll('.', '/'));
|
|
77
|
+
|
|
78
|
+
const files = readdirSync(spidersDir, { withFileTypes: true });
|
|
79
|
+
for (const file of files) {
|
|
80
|
+
if (file.isFile() && file.name.endsWith('.py') && file.name !== '__init__.py') {
|
|
81
|
+
spiders.push(...(new SpiderFileAnalyzer(path.join(spidersDir, file.name)).getSpiders()));
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return spiders;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
module.exports = { ScrapyProjectAnalyzer };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
|
|
3
|
+
const { Spider } = require('./Spider');
|
|
4
|
+
|
|
5
|
+
class SpiderFileAnalyzer {
|
|
6
|
+
constructor(pathname) {
|
|
7
|
+
this.pathname = pathname;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
getSpiders() {
|
|
11
|
+
const file = fs.readFileSync(this.pathname, 'utf8');
|
|
12
|
+
|
|
13
|
+
const regex = /class\s+(\w+)/g;
|
|
14
|
+
const spiders = [];
|
|
15
|
+
|
|
16
|
+
let match = regex.exec(file);
|
|
17
|
+
while (match) {
|
|
18
|
+
spiders.push(new Spider({ class_name: match[1], pathname: this.pathname }));
|
|
19
|
+
match = regex.exec(file);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
return spiders;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
module.exports = { SpiderFileAnalyzer };
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
|
|
4
|
+
const { fetchManifest, wrapperManifestUrl } = require('@apify/actor-templates');
|
|
5
|
+
const { walk } = require('@root/walk');
|
|
6
|
+
const ConfigParser = require('configparser');
|
|
7
|
+
const handlebars = require('handlebars');
|
|
8
|
+
const inquirer = require('inquirer');
|
|
9
|
+
|
|
10
|
+
const { ScrapyProjectAnalyzer } = require('./ScrapyProjectAnalyzer');
|
|
11
|
+
const outputs = require('../outputs');
|
|
12
|
+
const { downloadAndUnzip } = require('../utils');
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Files that should be concatenated instead of copied (and overwritten).
|
|
16
|
+
*/
|
|
17
|
+
const concatenableFiles = ['.dockerignore', '.gitignore'];
|
|
18
|
+
|
|
19
|
+
async function merge(fromPath, toPath, options = { bindings: {} }) {
|
|
20
|
+
await walk(fromPath, async (err, pathname, dirent) => {
|
|
21
|
+
if (pathname === fromPath) return;
|
|
22
|
+
const relPath = path.relative(fromPath, pathname);
|
|
23
|
+
const toRelPath = relPath.split(path.sep).map((part) => {
|
|
24
|
+
if (part.startsWith('{') && part.endsWith('}')) {
|
|
25
|
+
part = part.replace('{', '').replace('}', '');
|
|
26
|
+
const binding = options.bindings[part];
|
|
27
|
+
if (!binding) {
|
|
28
|
+
throw new Error(`Binding for ${part} not found.`);
|
|
29
|
+
}
|
|
30
|
+
return binding;
|
|
31
|
+
}
|
|
32
|
+
return part;
|
|
33
|
+
}).join(path.sep);
|
|
34
|
+
|
|
35
|
+
const targetPath = path.join(toPath, toRelPath);
|
|
36
|
+
|
|
37
|
+
if (dirent.isDirectory()) {
|
|
38
|
+
if (!fs.existsSync(targetPath)) {
|
|
39
|
+
fs.mkdirSync(targetPath);
|
|
40
|
+
}
|
|
41
|
+
return merge(pathname, targetPath);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (relPath.includes('.template')) {
|
|
45
|
+
fs.writeFileSync(
|
|
46
|
+
path.join(
|
|
47
|
+
toPath,
|
|
48
|
+
toRelPath.replace('.template', ''),
|
|
49
|
+
),
|
|
50
|
+
handlebars.compile(fs.readFileSync(pathname, 'utf8'))(options.bindings));
|
|
51
|
+
} else if (fs.existsSync(targetPath) && concatenableFiles.includes(path.basename(toRelPath))) {
|
|
52
|
+
fs.appendFileSync(targetPath, fs.readFileSync(pathname));
|
|
53
|
+
} else {
|
|
54
|
+
fs.copyFileSync(pathname, targetPath);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function wrapScrapyProject({ projectPath }) {
|
|
60
|
+
if (!projectPath) projectPath = '.';
|
|
61
|
+
|
|
62
|
+
const analyzer = new ScrapyProjectAnalyzer(projectPath);
|
|
63
|
+
|
|
64
|
+
if (analyzer.configuration.hasSection('apify')) {
|
|
65
|
+
throw new Error(`The Scrapy project configuration already contains Apify settings. Are you sure you didn't already wrap this project?`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
await analyzer.init();
|
|
69
|
+
|
|
70
|
+
const { spiderIndex } = await inquirer.prompt([
|
|
71
|
+
{
|
|
72
|
+
type: 'list',
|
|
73
|
+
name: 'spiderIndex',
|
|
74
|
+
message: 'Pick the Scrapy spider you want to wrap:',
|
|
75
|
+
choices: analyzer.getAvailableSpiders().map((spider, i) => ({
|
|
76
|
+
name: `${spider.class_name} (${spider.pathname})`,
|
|
77
|
+
value: i,
|
|
78
|
+
})),
|
|
79
|
+
},
|
|
80
|
+
]);
|
|
81
|
+
|
|
82
|
+
function translatePathToRelativeModuleName(pathname) {
|
|
83
|
+
const relPath = path.relative(projectPath, pathname);
|
|
84
|
+
|
|
85
|
+
return `.${relPath.split(path.sep).slice(1).join('.').replace('.py', '')}`;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const templateBindings = {
|
|
89
|
+
botName: analyzer.settings.BOT_NAME,
|
|
90
|
+
scrapy_settings_module: analyzer.configuration.get('settings', 'default'),
|
|
91
|
+
apify_module_path: `${analyzer.settings.BOT_NAME}.apify`,
|
|
92
|
+
spider_class_name: analyzer.getAvailableSpiders()[spiderIndex].class_name,
|
|
93
|
+
spider_module_name: `${translatePathToRelativeModuleName(analyzer.getAvailableSpiders()[spiderIndex].pathname)}`,
|
|
94
|
+
projectFolder: analyzer.settings.BOT_NAME,
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const manifest = await fetchManifest(wrapperManifestUrl);
|
|
98
|
+
|
|
99
|
+
outputs.info('Downloading the latest Scrapy wrapper template...');
|
|
100
|
+
|
|
101
|
+
const { archiveUrl } = manifest.templates.find(({ id }) => id === 'python-scrapy');
|
|
102
|
+
const templatePath = path.join(__dirname, 'templates', 'python-scrapy');
|
|
103
|
+
|
|
104
|
+
if (fs.existsSync(templatePath)) fs.rmSync(templatePath, { recursive: true });
|
|
105
|
+
|
|
106
|
+
await downloadAndUnzip({
|
|
107
|
+
url: archiveUrl,
|
|
108
|
+
pathTo: templatePath,
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
outputs.info('Wrapping the Scrapy project...');
|
|
112
|
+
|
|
113
|
+
merge(
|
|
114
|
+
path.join(__dirname, 'templates', 'python-scrapy'),
|
|
115
|
+
projectPath,
|
|
116
|
+
{
|
|
117
|
+
bindings: templateBindings,
|
|
118
|
+
},
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
const apifyConf = new ConfigParser();
|
|
122
|
+
apifyConf.addSection('apify');
|
|
123
|
+
apifyConf.set('apify', 'mainpy_location', analyzer.settings.BOT_NAME);
|
|
124
|
+
|
|
125
|
+
const s = fs.createWriteStream(path.join(projectPath, 'scrapy.cfg'), { flags: 'a' });
|
|
126
|
+
|
|
127
|
+
await new Promise((r) => {
|
|
128
|
+
s.on('open', (fd) => {
|
|
129
|
+
s.write('\n', () => {
|
|
130
|
+
apifyConf.write(fd);
|
|
131
|
+
r();
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
outputs.success('The Scrapy project has been wrapped successfully.');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
module.exports = { wrapScrapyProject };
|
package/src/lib/utils.js
CHANGED
|
@@ -5,6 +5,8 @@ const {
|
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const https = require('https');
|
|
7
7
|
const path = require('path');
|
|
8
|
+
const { finished } = require('stream');
|
|
9
|
+
const { promisify } = require('util');
|
|
8
10
|
|
|
9
11
|
const {
|
|
10
12
|
ACT_JOB_TERMINAL_STATUSES,
|
|
@@ -16,6 +18,7 @@ const {
|
|
|
16
18
|
LOCAL_STORAGE_SUBDIRS,
|
|
17
19
|
SOURCE_FILE_FORMATS,
|
|
18
20
|
} = require('@apify/consts');
|
|
21
|
+
const AdmZip = require('adm-zip');
|
|
19
22
|
const { ApifyClient } = require('apify-client');
|
|
20
23
|
const archiver = require('archiver-promise');
|
|
21
24
|
const escapeStringRegexp = require('escape-string-regexp');
|
|
@@ -40,6 +43,7 @@ const {
|
|
|
40
43
|
SUPPORTED_NODEJS_VERSION,
|
|
41
44
|
MINIMUM_SUPPORTED_PYTHON_VERSION,
|
|
42
45
|
LANGUAGE,
|
|
46
|
+
PROJECT_TYPES,
|
|
43
47
|
} = require('./consts');
|
|
44
48
|
const {
|
|
45
49
|
ensureFolderExistsSync,
|
|
@@ -49,6 +53,26 @@ const {
|
|
|
49
53
|
const {
|
|
50
54
|
info,
|
|
51
55
|
} = require('./outputs');
|
|
56
|
+
const { ProjectAnalyzer } = require('./project_analyzer');
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @param {string} url
|
|
60
|
+
* @returns {Promise<unknown>}
|
|
61
|
+
*/
|
|
62
|
+
const httpsGet = async (url) => {
|
|
63
|
+
return new Promise((resolve, reject) => {
|
|
64
|
+
https.get(url, (response) => {
|
|
65
|
+
// Handle redirects
|
|
66
|
+
if (response.statusCode === 301 || response.statusCode === 302) {
|
|
67
|
+
resolve(httpsGet(response.headers.location));
|
|
68
|
+
// Destroy the response to close the HTTP connection, otherwise this hangs for a long time with Node 19+ (due to HTTP keep-alive).
|
|
69
|
+
response.destroy();
|
|
70
|
+
} else {
|
|
71
|
+
resolve(response);
|
|
72
|
+
}
|
|
73
|
+
}).on('error', reject);
|
|
74
|
+
});
|
|
75
|
+
};
|
|
52
76
|
|
|
53
77
|
// Properties from apify.json file that will me migrated to actor specs in .actor/actor.json
|
|
54
78
|
const MIGRATED_APIFY_JSON_PROPERTIES = ['name', 'version', 'buildTag'];
|
|
@@ -581,8 +605,8 @@ const detectNpmVersion = () => {
|
|
|
581
605
|
|
|
582
606
|
const detectLocalActorLanguage = () => {
|
|
583
607
|
const cwd = process.cwd();
|
|
584
|
-
const isActorInNode = fs.existsSync(path.join(
|
|
585
|
-
const isActorInPython = fs.existsSync(path.join(
|
|
608
|
+
const isActorInNode = fs.existsSync(path.join(cwd, 'package.json'));
|
|
609
|
+
const isActorInPython = fs.existsSync(path.join(cwd, 'src/__main__.py')) || ProjectAnalyzer.getProjectType(cwd) === PROJECT_TYPES.SCRAPY;
|
|
586
610
|
const result = {};
|
|
587
611
|
if (isActorInNode) {
|
|
588
612
|
result.language = LANGUAGE.NODEJS;
|
|
@@ -596,7 +620,17 @@ const detectLocalActorLanguage = () => {
|
|
|
596
620
|
return result;
|
|
597
621
|
};
|
|
598
622
|
|
|
623
|
+
const downloadAndUnzip = async ({ url, pathTo }) => {
|
|
624
|
+
const zipStream = await httpsGet(url);
|
|
625
|
+
const chunks = [];
|
|
626
|
+
zipStream.on('data', (chunk) => chunks.push(chunk));
|
|
627
|
+
await promisify(finished)(zipStream);
|
|
628
|
+
const zip = new AdmZip(Buffer.concat(chunks));
|
|
629
|
+
zip.extractAllTo(pathTo, true);
|
|
630
|
+
};
|
|
631
|
+
|
|
599
632
|
module.exports = {
|
|
633
|
+
httpsGet,
|
|
600
634
|
getLoggedClientOrThrow,
|
|
601
635
|
getLocalConfig,
|
|
602
636
|
setLocalConfig,
|
|
@@ -630,4 +664,5 @@ module.exports = {
|
|
|
630
664
|
isNodeVersionSupported,
|
|
631
665
|
detectNpmVersion,
|
|
632
666
|
detectLocalActorLanguage,
|
|
667
|
+
downloadAndUnzip,
|
|
633
668
|
};
|