pagerts 0.2.0 โ 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +220 -16
- package/bin/main.js +9 -25
- package/bin/main.js.map +4 -4
- package/package.json +37 -13
- package/bin/package.json +0 -40
- package/bin/src/extractors/AbstractExtractor.js +0 -11
- package/bin/src/extractors/AbstractExtractor.js.map +0 -1
- package/bin/src/extractors/PageExtractor.js +0 -13
- package/bin/src/extractors/PageExtractor.js.map +0 -1
- package/bin/src/extractors/ResourceExtractor.js +0 -32
- package/bin/src/extractors/ResourceExtractor.js.map +0 -1
- package/bin/src/main.js +0 -36
- package/bin/src/main.js.map +0 -1
- package/bin/src/page/Page.js +0 -8
- package/bin/src/page/Page.js.map +0 -1
- package/bin/src/page/PageFetcher.js +0 -26
- package/bin/src/page/PageFetcher.js.map +0 -1
- package/bin/src/printers/AbstractResourcePrinter.js +0 -8
- package/bin/src/printers/AbstractResourcePrinter.js.map +0 -1
- package/bin/src/printers/JSONStylePrinter.js +0 -12
- package/bin/src/printers/JSONStylePrinter.js.map +0 -1
- package/bin/src/printers/LogStylePrinter.js +0 -27
- package/bin/src/printers/LogStylePrinter.js.map +0 -1
- package/bin/src/resource.js +0 -56
- package/bin/src/resource.js.map +0 -1
- package/jest.config.js +0 -198
- package/src/extractors/AbstractExtractor.ts +0 -5
- package/src/extractors/PageExtractor.ts +0 -12
- package/src/extractors/ResourceExtractor.ts +0 -25
- package/src/extractors/TagExtractor.ts +0 -14
- package/src/main.ts +0 -43
- package/src/page/Page.ts +0 -19
- package/src/page/PageFetcher.ts +0 -30
- package/src/printers/AbstractResourcePrinter.ts +0 -6
- package/src/printers/JSONStylePrinter.ts +0 -12
- package/src/printers/LogStylePrinter.ts +0 -28
- package/src/resource.ts +0 -96
- package/tsconfig.json +0 -12
package/README.md
CHANGED
|
@@ -1,39 +1,243 @@
|
|
|
1
|
-
#
|
|
1
|
+
# PagerTS
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://github.com/akinevz0/pagerts/actions)
|
|
4
|
+
[](./SECURITY.md)
|
|
5
|
+
[](https://nodejs.org)
|
|
6
|
+
[](./LICENSE)
|
|
4
7
|
|
|
5
|
-
|
|
8
|
+
PagerTS is a secure, modern command-line utility that transforms URLs into structured JSON objects, extracting all navigable items and resources from webpages.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- ๐ **Security-First**: Built-in URL validation, rate limiting, and XSS protection
|
|
13
|
+
- ๐ **Modern TypeScript**: Strict type checking and modern ES2022 syntax
|
|
14
|
+
- โก **Fast**: Efficient parsing with JSDOM and concurrent request handling
|
|
15
|
+
- ๐งช **Well-Tested**: Comprehensive test coverage with Jest
|
|
16
|
+
- ๐ฆ **Easy to Use**: Simple CLI interface with sensible defaults
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
### Global Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install -g pagerts
|
|
24
|
+
pagerts <url>
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Using npx (No Installation Required)
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npx pagerts <url>
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### From Source
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
git clone https://github.com/akinevz0/pagerts.git
|
|
37
|
+
cd pagerts
|
|
38
|
+
npm install
|
|
39
|
+
npm run build
|
|
40
|
+
npm link
|
|
41
|
+
```
|
|
6
42
|
|
|
7
43
|
## Usage
|
|
8
44
|
|
|
9
|
-
|
|
45
|
+
### Basic Usage
|
|
46
|
+
|
|
47
|
+
Extract resources from a remote URL:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pagerts https://example.com
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Extract from multiple URLs:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pagerts https://example.com https://example.org
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Extract from a local HTML file:
|
|
10
60
|
|
|
11
61
|
```bash
|
|
12
|
-
pagerts
|
|
62
|
+
pagerts file:///path/to/file.html
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Output Format
|
|
66
|
+
|
|
67
|
+
The output is a JSON object containing:
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"title": "Page Title",
|
|
72
|
+
"url": "https://example.com",
|
|
73
|
+
"resources": [
|
|
74
|
+
{
|
|
75
|
+
"name": "Link Text",
|
|
76
|
+
"url": "https://example.com/page"
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
}
|
|
13
80
|
```
|
|
14
81
|
|
|
15
|
-
|
|
82
|
+
Fields:
|
|
83
|
+
|
|
84
|
+
- `title`: The page's title extracted from the `<title>` tag
|
|
85
|
+
- `url`: The URL of the page
|
|
86
|
+
- `resources`: Array of resources found on the page (links, meta tags, embeds)
|
|
87
|
+
- `name`: Readable text or description
|
|
88
|
+
- `url`: Target URL of the resource
|
|
89
|
+
|
|
90
|
+
## Security
|
|
91
|
+
|
|
92
|
+
PagerTS takes security seriously. See [SECURITY.md](./SECURITY.md) for:
|
|
93
|
+
|
|
94
|
+
- Security features and protections
|
|
95
|
+
- How to report vulnerabilities
|
|
96
|
+
- Best practices for users
|
|
97
|
+
- Security checklist for contributors
|
|
16
98
|
|
|
17
|
-
|
|
99
|
+
### Built-in Security Features
|
|
18
100
|
|
|
19
|
-
|
|
101
|
+
- โ
URL validation (only allows `http://`, `https://`, `file://`)
|
|
102
|
+
- โ
Input sanitization to prevent XSS attacks
|
|
103
|
+
- โ
Rate limiting (50 requests/minute by default)
|
|
104
|
+
- โ
Request timeouts to prevent hanging
|
|
105
|
+
- โ
Maximum URL length enforcement
|
|
106
|
+
- โ
Suspicious pattern detection
|
|
107
|
+
- โ
Safe HTML parsing (no script execution)
|
|
20
108
|
|
|
21
|
-
|
|
109
|
+
## Development
|
|
22
110
|
|
|
23
|
-
|
|
111
|
+
### Prerequisites
|
|
24
112
|
|
|
25
|
-
|
|
113
|
+
- Node.js >= 18.0.0
|
|
114
|
+
- npm >= 9.0.0
|
|
26
115
|
|
|
27
|
-
|
|
116
|
+
### Setup
|
|
28
117
|
|
|
29
118
|
```bash
|
|
30
|
-
|
|
31
|
-
pagerts
|
|
119
|
+
# Clone the repository
|
|
120
|
+
git clone https://github.com/akinevz0/pagerts.git
|
|
121
|
+
cd pagerts
|
|
122
|
+
|
|
123
|
+
# Install dependencies
|
|
124
|
+
npm install
|
|
125
|
+
|
|
126
|
+
# Run in development mode
|
|
127
|
+
npm run dev <url>
|
|
32
128
|
```
|
|
33
129
|
|
|
34
|
-
|
|
130
|
+
### Available Scripts
|
|
35
131
|
|
|
36
132
|
```bash
|
|
37
|
-
|
|
133
|
+
# Run tests
|
|
134
|
+
npm test
|
|
135
|
+
|
|
136
|
+
# Run tests in watch mode
|
|
137
|
+
npm test:watch
|
|
138
|
+
|
|
139
|
+
# Build the project
|
|
140
|
+
npm run build
|
|
141
|
+
|
|
142
|
+
# Lint code
|
|
143
|
+
npm run lint
|
|
144
|
+
|
|
145
|
+
# Fix linting issues
|
|
146
|
+
npm run lint:fix
|
|
147
|
+
|
|
148
|
+
# Type check
|
|
149
|
+
npm run type-check
|
|
150
|
+
|
|
151
|
+
# Format code
|
|
152
|
+
npm run format
|
|
153
|
+
|
|
154
|
+
# Check formatting
|
|
155
|
+
npm run format:check
|
|
156
|
+
|
|
157
|
+
# Security audit
|
|
158
|
+
npm run security:audit
|
|
159
|
+
|
|
160
|
+
# Complete security check (audit + lint)
|
|
161
|
+
npm run security:check
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Project Structure
|
|
165
|
+
|
|
38
166
|
```
|
|
167
|
+
pagerts/
|
|
168
|
+
โโโ src/
|
|
169
|
+
โ โโโ main.ts # CLI entry point
|
|
170
|
+
โ โโโ security.ts # Security utilities
|
|
171
|
+
โ โโโ resource.ts # Resource types
|
|
172
|
+
โ โโโ extractors/ # Content extractors
|
|
173
|
+
โ โ โโโ AbstractExtractor.ts
|
|
174
|
+
โ โ โโโ PageExtractor.ts
|
|
175
|
+
โ โ โโโ ResourceExtractor.ts
|
|
176
|
+
โ โ โโโ TagExtractor.ts
|
|
177
|
+
โ โโโ page/ # Page fetching
|
|
178
|
+
โ โ โโโ Page.ts
|
|
179
|
+
โ โ โโโ PageFetcher.ts
|
|
180
|
+
โ โโโ printers/ # Output formatters
|
|
181
|
+
โ โ โโโ AbstractResourcePrinter.ts
|
|
182
|
+
โ โ โโโ JSONStylePrinter.ts
|
|
183
|
+
โ โ โโโ LogStylePrinter.ts
|
|
184
|
+
โ โโโ __tests__/ # Test files
|
|
185
|
+
โโโ bin/ # Built files
|
|
186
|
+
โโโ .github/workflows/ # CI/CD pipelines
|
|
187
|
+
โโโ package.json
|
|
188
|
+
โโโ tsconfig.json
|
|
189
|
+
โโโ jest.config.js
|
|
190
|
+
โโโ eslint.config.js
|
|
191
|
+
โโโ SECURITY.md
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Contributing
|
|
195
|
+
|
|
196
|
+
Contributions are welcome! Please:
|
|
197
|
+
|
|
198
|
+
1. Fork the repository
|
|
199
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
200
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
201
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
202
|
+
5. Open a Pull Request
|
|
203
|
+
|
|
204
|
+
### Contribution Guidelines
|
|
205
|
+
|
|
206
|
+
- Write tests for new features
|
|
207
|
+
- Follow the existing code style (enforced by ESLint and Prettier)
|
|
208
|
+
- Update documentation as needed
|
|
209
|
+
- Ensure all tests pass (`npm test`)
|
|
210
|
+
- Run security checks (`npm run security:check`)
|
|
211
|
+
- Follow security best practices (see [SECURITY.md](./SECURITY.md))
|
|
212
|
+
|
|
213
|
+
## License
|
|
214
|
+
|
|
215
|
+
This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
|
|
216
|
+
|
|
217
|
+
## Author
|
|
218
|
+
|
|
219
|
+
**Kirill kn253 Nevzorov**
|
|
220
|
+
|
|
221
|
+
## Support
|
|
222
|
+
|
|
223
|
+
- ๐ [Report bugs](https://github.com/akinevz0/pagerts/issues)
|
|
224
|
+
- ๐ก [Request features](https://github.com/akinevz0/pagerts/issues)
|
|
225
|
+
- ๐ [Report security issues](./SECURITY.md)
|
|
226
|
+
|
|
227
|
+
## Changelog
|
|
228
|
+
|
|
229
|
+
### v0.3.0 (Latest)
|
|
230
|
+
|
|
231
|
+
- โจ Added comprehensive security features
|
|
232
|
+
- โจ Implemented URL validation and sanitization
|
|
233
|
+
- โจ Added rate limiting
|
|
234
|
+
- โจ Modernized codebase with TypeScript strict mode
|
|
235
|
+
- โจ Added ESLint with security plugin
|
|
236
|
+
- โจ Added comprehensive test suite
|
|
237
|
+
- โจ Added CI/CD with GitHub Actions
|
|
238
|
+
- โจ Improved error handling and retry logic
|
|
239
|
+
- ๐ Added security documentation
|
|
240
|
+
|
|
241
|
+
### v0.2.0
|
|
39
242
|
|
|
243
|
+
- Initial public release
|
package/bin/main.js
CHANGED
|
@@ -1,32 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
`).
|
|
4
|
-
`)
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
(Did you mean one of ${i.join(", ")}?)`:i.length===1?`
|
|
11
|
-
(Did you mean ${i[0]}?)`:""}Y.suggestSimilar=Se});var te=b(ee=>{var ke=require("node:events").EventEmitter,j=require("node:child_process"),g=require("node:path"),L=require("node:fs"),m=require("node:process"),{Argument:He,humanReadableArgName:Ve}=w(),{CommanderError:W}=A(),{Help:Pe}=M(),{Option:Q,DualOptions:Te}=I(),{suggestSimilar:X}=z(),K=class a extends ke{constructor(e){super(),this.commands=[],this.options=[],this.parent=null,this._allowUnknownOption=!1,this._allowExcessArguments=!0,this.registeredArguments=[],this._args=this.registeredArguments,this.args=[],this.rawArgs=[],this.processedArgs=[],this._scriptPath=null,this._name=e||"",this._optionValues={},this._optionValueSources={},this._storeOptionsAsProperties=!1,this._actionHandler=null,this._executableHandler=!1,this._executableFile=null,this._executableDir=null,this._defaultCommandName=null,this._exitCallback=null,this._aliases=[],this._combineFlagAndOptionalValue=!0,this._description="",this._summary="",this._argsDescription=void 0,this._enablePositionalOptions=!1,this._passThroughOptions=!1,this._lifeCycleHooks={},this._showHelpAfterError=!1,this._showSuggestionAfterError=!0,this._outputConfiguration={writeOut:t=>m.stdout.write(t),writeErr:t=>m.stderr.write(t),getOutHelpWidth:()=>m.stdout.isTTY?m.stdout.columns:void 0,getErrHelpWidth:()=>m.stderr.isTTY?m.stderr.columns:void 0,outputError:(t,i)=>i(t)},this._hidden=!1,this._helpOption=void 0,this._addImplicitHelpCommand=void 0,this._helpCommand=void 0,this._helpConfiguration={}}copyInheritedSettings(e){return this._outputConfiguration=e._outputConfiguration,this._helpOption=e._helpOption,this._helpCommand=e._helpCommand,this._helpConfiguration=e._helpConfiguration,this._exitCallback=e._exitCallback,this._storeOptionsAsProperties=e._storeOptionsAsProperties,this._combineFlagAndOptionalValue=e._combineFlagAndOptionalValue,this._allowExcessArguments=e._allowExcessArguments,this._enablePositionalOptions=e._enablePositionalOptions,this._showHelpAfterError=e._showHelpAfterError,this._showSuggestionAfterError=e._showSuggestionAfterError,this}_getCommandAndAncestors(){let e=[];for(let t=this;t;t=t.parent)e.push(t);return e}command(e,t,i){let n=t,s=i;typeof n=="object"&&n!==null&&(s=n,n=null),s=s||{};let[,r,u]=e.match(/([^ ]+) *(.*)/),o=this.createCommand(r);return n&&(o.description(n),o._executableHandler=!0),s.isDefault&&(this._defaultCommandName=o._name),o._hidden=!!(s.noHelp||s.hidden),o._executableFile=s.executableFile||null,u&&o.arguments(u),this._registerCommand(o),o.parent=this,o.copyInheritedSettings(this),n?this:o}createCommand(e){return new a(e)}createHelp(){return Object.assign(new Pe,this.configureHelp())}configureHelp(e){return e===void 0?this._helpConfiguration:(this._helpConfiguration=e,this)}configureOutput(e){return e===void 0?this._outputConfiguration:(Object.assign(this._outputConfiguration,e),this)}showHelpAfterError(e=!0){return typeof e!="string"&&(e=!!e),this._showHelpAfterError=e,this}showSuggestionAfterError(e=!0){return this._showSuggestionAfterError=!!e,this}addCommand(e,t){if(!e._name)throw new Error(`Command passed to .addCommand() must have a name
|
|
12
|
-
- specify the name in Command constructor or using .name()`);return t=t||{},t.isDefault&&(this._defaultCommandName=e._name),(t.noHelp||t.hidden)&&(e._hidden=!0),this._registerCommand(e),e.parent=this,e._checkForBrokenPassThrough(),this}createArgument(e,t){return new He(e,t)}argument(e,t,i,n){let s=this.createArgument(e,t);return typeof i=="function"?s.default(n).argParser(i):s.default(i),this.addArgument(s),this}arguments(e){return e.trim().split(/ +/).forEach(t=>{this.argument(t)}),this}addArgument(e){let t=this.registeredArguments.slice(-1)[0];if(t&&t.variadic)throw new Error(`only the last argument can be variadic '${t.name()}'`);if(e.required&&e.defaultValue!==void 0&&e.parseArg===void 0)throw new Error(`a default value for a required argument is never used: '${e.name()}'`);return this.registeredArguments.push(e),this}helpCommand(e,t){if(typeof e=="boolean")return this._addImplicitHelpCommand=e,this;e=e??"help [command]";let[,i,n]=e.match(/([^ ]+) *(.*)/),s=t??"display help for command",r=this.createCommand(i);return r.helpOption(!1),n&&r.arguments(n),s&&r.description(s),this._addImplicitHelpCommand=!0,this._helpCommand=r,this}addHelpCommand(e,t){return typeof e!="object"?(this.helpCommand(e,t),this):(this._addImplicitHelpCommand=!0,this._helpCommand=e,this)}_getHelpCommand(){return this._addImplicitHelpCommand??(this.commands.length&&!this._actionHandler&&!this._findCommand("help"))?(this._helpCommand===void 0&&this.helpCommand(void 0,void 0),this._helpCommand):null}hook(e,t){let i=["preSubcommand","preAction","postAction"];if(!i.includes(e))throw new Error(`Unexpected value for event passed to hook : '${e}'.
|
|
13
|
-
Expecting one of '${i.join("', '")}'`);return this._lifeCycleHooks[e]?this._lifeCycleHooks[e].push(t):this._lifeCycleHooks[e]=[t],this}exitOverride(e){return e?this._exitCallback=e:this._exitCallback=t=>{if(t.code!=="commander.executeSubCommandAsync")throw t},this}_exit(e,t,i){this._exitCallback&&this._exitCallback(new W(e,t,i)),m.exit(e)}action(e){let t=i=>{let n=this.registeredArguments.length,s=i.slice(0,n);return this._storeOptionsAsProperties?s[n]=this:s[n]=this.opts(),s.push(this),e.apply(this,s)};return this._actionHandler=t,this}createOption(e,t){return new Q(e,t)}_callParseArg(e,t,i,n){try{return e.parseArg(t,i)}catch(s){if(s.code==="commander.invalidArgument"){let r=`${n} ${s.message}`;this.error(r,{exitCode:s.exitCode,code:s.code})}throw s}}_registerOption(e){let t=e.short&&this._findOption(e.short)||e.long&&this._findOption(e.long);if(t){let i=e.long&&this._findOption(e.long)?e.long:e.short;throw new Error(`Cannot add option '${e.flags}'${this._name&&` to command '${this._name}'`} due to conflicting flag '${i}'
|
|
14
|
-
- already used by option '${t.flags}'`)}this.options.push(e)}_registerCommand(e){let t=n=>[n.name()].concat(n.aliases()),i=t(e).find(n=>this._findCommand(n));if(i){let n=t(this._findCommand(i)).join("|"),s=t(e).join("|");throw new Error(`cannot add command '${s}' as already have command '${n}'`)}this.commands.push(e)}addOption(e){this._registerOption(e);let t=e.name(),i=e.attributeName();if(e.negate){let s=e.long.replace(/^--no-/,"--");this._findOption(s)||this.setOptionValueWithSource(i,e.defaultValue===void 0?!0:e.defaultValue,"default")}else e.defaultValue!==void 0&&this.setOptionValueWithSource(i,e.defaultValue,"default");let n=(s,r,u)=>{s==null&&e.presetArg!==void 0&&(s=e.presetArg);let o=this.getOptionValue(i);s!==null&&e.parseArg?s=this._callParseArg(e,s,o,r):s!==null&&e.variadic&&(s=e._concatValue(s,o)),s==null&&(e.negate?s=!1:e.isBoolean()||e.optional?s=!0:s=""),this.setOptionValueWithSource(i,s,u)};return this.on("option:"+t,s=>{let r=`error: option '${e.flags}' argument '${s}' is invalid.`;n(s,r,"cli")}),e.envVar&&this.on("optionEnv:"+t,s=>{let r=`error: option '${e.flags}' value '${s}' from env '${e.envVar}' is invalid.`;n(s,r,"env")}),this}_optionEx(e,t,i,n,s){if(typeof t=="object"&&t instanceof Q)throw new Error("To add an Option object use addOption() instead of option() or requiredOption()");let r=this.createOption(t,i);if(r.makeOptionMandatory(!!e.mandatory),typeof n=="function")r.default(s).argParser(n);else if(n instanceof RegExp){let u=n;n=(o,l)=>{let h=u.exec(o);return h?h[0]:l},r.default(s).argParser(n)}else r.default(n);return this.addOption(r)}option(e,t,i,n){return this._optionEx({},e,t,i,n)}requiredOption(e,t,i,n){return this._optionEx({mandatory:!0},e,t,i,n)}combineFlagAndOptionalValue(e=!0){return this._combineFlagAndOptionalValue=!!e,this}allowUnknownOption(e=!0){return this._allowUnknownOption=!!e,this}allowExcessArguments(e=!0){return this._allowExcessArguments=!!e,this}enablePositionalOptions(e=!0){return this._enablePositionalOptions=!!e,this}passThroughOptions(e=!0){return this._passThroughOptions=!!e,this._checkForBrokenPassThrough(),this}_checkForBrokenPassThrough(){if(this.parent&&this._passThroughOptions&&!this.parent._enablePositionalOptions)throw new Error(`passThroughOptions cannot be used for '${this._name}' without turning on enablePositionalOptions for parent command(s)`)}storeOptionsAsProperties(e=!0){if(this.options.length)throw new Error("call .storeOptionsAsProperties() before adding options");if(Object.keys(this._optionValues).length)throw new Error("call .storeOptionsAsProperties() before setting option values");return this._storeOptionsAsProperties=!!e,this}getOptionValue(e){return this._storeOptionsAsProperties?this[e]:this._optionValues[e]}setOptionValue(e,t){return this.setOptionValueWithSource(e,t,void 0)}setOptionValueWithSource(e,t,i){return this._storeOptionsAsProperties?this[e]=t:this._optionValues[e]=t,this._optionValueSources[e]=i,this}getOptionValueSource(e){return this._optionValueSources[e]}getOptionValueSourceWithGlobals(e){let t;return this._getCommandAndAncestors().forEach(i=>{i.getOptionValueSource(e)!==void 0&&(t=i.getOptionValueSource(e))}),t}_prepareUserArgs(e,t){if(e!==void 0&&!Array.isArray(e))throw new Error("first parameter to parse must be array or undefined");if(t=t||{},e===void 0&&t.from===void 0){m.versions?.electron&&(t.from="electron");let n=m.execArgv??[];(n.includes("-e")||n.includes("--eval")||n.includes("-p")||n.includes("--print"))&&(t.from="eval")}e===void 0&&(e=m.argv),this.rawArgs=e.slice();let i;switch(t.from){case void 0:case"node":this._scriptPath=e[1],i=e.slice(2);break;case"electron":m.defaultApp?(this._scriptPath=e[1],i=e.slice(2)):i=e.slice(1);break;case"user":i=e.slice(0);break;case"eval":i=e.slice(1);break;default:throw new Error(`unexpected parse option { from: '${t.from}' }`)}return!this._name&&this._scriptPath&&this.nameFromFilename(this._scriptPath),this._name=this._name||"program",i}parse(e,t){let i=this._prepareUserArgs(e,t);return this._parseCommand([],i),this}async parseAsync(e,t){let i=this._prepareUserArgs(e,t);return await this._parseCommand([],i),this}_executeSubCommand(e,t){t=t.slice();let i=!1,n=[".js",".ts",".tsx",".mjs",".cjs"];function s(h,c){let d=g.resolve(h,c);if(L.existsSync(d))return d;if(n.includes(g.extname(c)))return;let O=n.find(p=>L.existsSync(`${d}${p}`));if(O)return`${d}${O}`}this._checkForMissingMandatoryOptions(),this._checkForConflictingOptions();let r=e._executableFile||`${this._name}-${e._name}`,u=this._executableDir||"";if(this._scriptPath){let h;try{h=L.realpathSync(this._scriptPath)}catch{h=this._scriptPath}u=g.resolve(g.dirname(h),u)}if(u){let h=s(u,r);if(!h&&!e._executableFile&&this._scriptPath){let c=g.basename(this._scriptPath,g.extname(this._scriptPath));c!==this._name&&(h=s(u,`${c}-${e._name}`))}r=h||r}i=n.includes(g.extname(r));let o;m.platform!=="win32"?i?(t.unshift(r),t=Z(m.execArgv).concat(t),o=j.spawn(m.argv[0],t,{stdio:"inherit"})):o=j.spawn(r,t,{stdio:"inherit"}):(t.unshift(r),t=Z(m.execArgv).concat(t),o=j.spawn(m.execPath,t,{stdio:"inherit"})),o.killed||["SIGUSR1","SIGUSR2","SIGTERM","SIGINT","SIGHUP"].forEach(c=>{m.on(c,()=>{o.killed===!1&&o.exitCode===null&&o.kill(c)})});let l=this._exitCallback;o.on("close",h=>{h=h??1,l?l(new W(h,"commander.executeSubCommandAsync","(close)")):m.exit(h)}),o.on("error",h=>{if(h.code==="ENOENT"){let c=u?`searched for local subcommand relative to directory '${u}'`:"no directory for search for local subcommand, use .executableDir() to supply a custom directory",d=`'${r}' does not exist
|
|
15
|
-
- if '${e._name}' is not meant to be an executable command, remove description parameter from '.command()' and use '.description()' instead
|
|
16
|
-
- if the default executable name is not suitable, use the executableFile option to supply a custom name or path
|
|
17
|
-
- ${c}`;throw new Error(d)}else if(h.code==="EACCES")throw new Error(`'${r}' not executable`);if(!l)m.exit(1);else{let c=new W(1,"commander.executeSubCommandAsync","(error)");c.nestedError=h,l(c)}}),this.runningCommand=o}_dispatchSubcommand(e,t,i){let n=this._findCommand(e);n||this.help({error:!0});let s;return s=this._chainOrCallSubCommandHook(s,n,"preSubcommand"),s=this._chainOrCall(s,()=>{if(n._executableHandler)this._executeSubCommand(n,t.concat(i));else return n._parseCommand(t,i)}),s}_dispatchHelpCommand(e){e||this.help();let t=this._findCommand(e);return t&&!t._executableHandler&&t.help(),this._dispatchSubcommand(e,[],[this._getHelpOption()?.long??this._getHelpOption()?.short??"--help"])}_checkNumberOfArguments(){this.registeredArguments.forEach((e,t)=>{e.required&&this.args[t]==null&&this.missingArgument(e.name())}),!(this.registeredArguments.length>0&&this.registeredArguments[this.registeredArguments.length-1].variadic)&&this.args.length>this.registeredArguments.length&&this._excessArguments(this.args)}_processArguments(){let e=(i,n,s)=>{let r=n;if(n!==null&&i.parseArg){let u=`error: command-argument value '${n}' is invalid for argument '${i.name()}'.`;r=this._callParseArg(i,n,s,u)}return r};this._checkNumberOfArguments();let t=[];this.registeredArguments.forEach((i,n)=>{let s=i.defaultValue;i.variadic?n<this.args.length?(s=this.args.slice(n),i.parseArg&&(s=s.reduce((r,u)=>e(i,u,r),i.defaultValue))):s===void 0&&(s=[]):n<this.args.length&&(s=this.args[n],i.parseArg&&(s=e(i,s,i.defaultValue))),t[n]=s}),this.processedArgs=t}_chainOrCall(e,t){return e&&e.then&&typeof e.then=="function"?e.then(()=>t()):t()}_chainOrCallHooks(e,t){let i=e,n=[];return this._getCommandAndAncestors().reverse().filter(s=>s._lifeCycleHooks[t]!==void 0).forEach(s=>{s._lifeCycleHooks[t].forEach(r=>{n.push({hookedCommand:s,callback:r})})}),t==="postAction"&&n.reverse(),n.forEach(s=>{i=this._chainOrCall(i,()=>s.callback(s.hookedCommand,this))}),i}_chainOrCallSubCommandHook(e,t,i){let n=e;return this._lifeCycleHooks[i]!==void 0&&this._lifeCycleHooks[i].forEach(s=>{n=this._chainOrCall(n,()=>s(this,t))}),n}_parseCommand(e,t){let i=this.parseOptions(t);if(this._parseOptionsEnv(),this._parseOptionsImplied(),e=e.concat(i.operands),t=i.unknown,this.args=e.concat(t),e&&this._findCommand(e[0]))return this._dispatchSubcommand(e[0],e.slice(1),t);if(this._getHelpCommand()&&e[0]===this._getHelpCommand().name())return this._dispatchHelpCommand(e[1]);if(this._defaultCommandName)return this._outputHelpIfRequested(t),this._dispatchSubcommand(this._defaultCommandName,e,t);this.commands.length&&this.args.length===0&&!this._actionHandler&&!this._defaultCommandName&&this.help({error:!0}),this._outputHelpIfRequested(i.unknown),this._checkForMissingMandatoryOptions(),this._checkForConflictingOptions();let n=()=>{i.unknown.length>0&&this.unknownOption(i.unknown[0])},s=`command:${this.name()}`;if(this._actionHandler){n(),this._processArguments();let r;return r=this._chainOrCallHooks(r,"preAction"),r=this._chainOrCall(r,()=>this._actionHandler(this.processedArgs)),this.parent&&(r=this._chainOrCall(r,()=>{this.parent.emit(s,e,t)})),r=this._chainOrCallHooks(r,"postAction"),r}if(this.parent&&this.parent.listenerCount(s))n(),this._processArguments(),this.parent.emit(s,e,t);else if(e.length){if(this._findCommand("*"))return this._dispatchSubcommand("*",e,t);this.listenerCount("command:*")?this.emit("command:*",e,t):this.commands.length?this.unknownCommand():(n(),this._processArguments())}else this.commands.length?(n(),this.help({error:!0})):(n(),this._processArguments())}_findCommand(e){if(e)return this.commands.find(t=>t._name===e||t._aliases.includes(e))}_findOption(e){return this.options.find(t=>t.is(e))}_checkForMissingMandatoryOptions(){this._getCommandAndAncestors().forEach(e=>{e.options.forEach(t=>{t.mandatory&&e.getOptionValue(t.attributeName())===void 0&&e.missingMandatoryOptionValue(t)})})}_checkForConflictingLocalOptions(){let e=this.options.filter(i=>{let n=i.attributeName();return this.getOptionValue(n)===void 0?!1:this.getOptionValueSource(n)!=="default"});e.filter(i=>i.conflictsWith.length>0).forEach(i=>{let n=e.find(s=>i.conflictsWith.includes(s.attributeName()));n&&this._conflictingOption(i,n)})}_checkForConflictingOptions(){this._getCommandAndAncestors().forEach(e=>{e._checkForConflictingLocalOptions()})}parseOptions(e){let t=[],i=[],n=t,s=e.slice();function r(o){return o.length>1&&o[0]==="-"}let u=null;for(;s.length;){let o=s.shift();if(o==="--"){n===i&&n.push(o),n.push(...s);break}if(u&&!r(o)){this.emit(`option:${u.name()}`,o);continue}if(u=null,r(o)){let l=this._findOption(o);if(l){if(l.required){let h=s.shift();h===void 0&&this.optionMissingArgument(l),this.emit(`option:${l.name()}`,h)}else if(l.optional){let h=null;s.length>0&&!r(s[0])&&(h=s.shift()),this.emit(`option:${l.name()}`,h)}else this.emit(`option:${l.name()}`);u=l.variadic?l:null;continue}}if(o.length>2&&o[0]==="-"&&o[1]!=="-"){let l=this._findOption(`-${o[1]}`);if(l){l.required||l.optional&&this._combineFlagAndOptionalValue?this.emit(`option:${l.name()}`,o.slice(2)):(this.emit(`option:${l.name()}`),s.unshift(`-${o.slice(2)}`));continue}}if(/^--[^=]+=/.test(o)){let l=o.indexOf("="),h=this._findOption(o.slice(0,l));if(h&&(h.required||h.optional)){this.emit(`option:${h.name()}`,o.slice(l+1));continue}}if(r(o)&&(n=i),(this._enablePositionalOptions||this._passThroughOptions)&&t.length===0&&i.length===0){if(this._findCommand(o)){t.push(o),s.length>0&&i.push(...s);break}else if(this._getHelpCommand()&&o===this._getHelpCommand().name()){t.push(o),s.length>0&&t.push(...s);break}else if(this._defaultCommandName){i.push(o),s.length>0&&i.push(...s);break}}if(this._passThroughOptions){n.push(o),s.length>0&&n.push(...s);break}n.push(o)}return{operands:t,unknown:i}}opts(){if(this._storeOptionsAsProperties){let e={},t=this.options.length;for(let i=0;i<t;i++){let n=this.options[i].attributeName();e[n]=n===this._versionOptionName?this._version:this[n]}return e}return this._optionValues}optsWithGlobals(){return this._getCommandAndAncestors().reduce((e,t)=>Object.assign(e,t.opts()),{})}error(e,t){this._outputConfiguration.outputError(`${e}
|
|
18
|
-
`,this._outputConfiguration.writeErr),typeof this._showHelpAfterError=="string"?this._outputConfiguration.writeErr(`${this._showHelpAfterError}
|
|
19
|
-
`):this._showHelpAfterError&&(this._outputConfiguration.writeErr(`
|
|
20
|
-
`),this.outputHelp({error:!0}));let i=t||{},n=i.exitCode||1,s=i.code||"commander.error";this._exit(n,s,e)}_parseOptionsEnv(){this.options.forEach(e=>{if(e.envVar&&e.envVar in m.env){let t=e.attributeName();(this.getOptionValue(t)===void 0||["default","config","env"].includes(this.getOptionValueSource(t)))&&(e.required||e.optional?this.emit(`optionEnv:${e.name()}`,m.env[e.envVar]):this.emit(`optionEnv:${e.name()}`))}})}_parseOptionsImplied(){let e=new Te(this.options),t=i=>this.getOptionValue(i)!==void 0&&!["default","implied"].includes(this.getOptionValueSource(i));this.options.filter(i=>i.implied!==void 0&&t(i.attributeName())&&e.valueFromOption(this.getOptionValue(i.attributeName()),i)).forEach(i=>{Object.keys(i.implied).filter(n=>!t(n)).forEach(n=>{this.setOptionValueWithSource(n,i.implied[n],"implied")})})}missingArgument(e){let t=`error: missing required argument '${e}'`;this.error(t,{code:"commander.missingArgument"})}optionMissingArgument(e){let t=`error: option '${e.flags}' argument missing`;this.error(t,{code:"commander.optionMissingArgument"})}missingMandatoryOptionValue(e){let t=`error: required option '${e.flags}' not specified`;this.error(t,{code:"commander.missingMandatoryOptionValue"})}_conflictingOption(e,t){let i=r=>{let u=r.attributeName(),o=this.getOptionValue(u),l=this.options.find(c=>c.negate&&u===c.attributeName()),h=this.options.find(c=>!c.negate&&u===c.attributeName());return l&&(l.presetArg===void 0&&o===!1||l.presetArg!==void 0&&o===l.presetArg)?l:h||r},n=r=>{let u=i(r),o=u.attributeName();return this.getOptionValueSource(o)==="env"?`environment variable '${u.envVar}'`:`option '${u.flags}'`},s=`error: ${n(e)} cannot be used with ${n(t)}`;this.error(s,{code:"commander.conflictingOption"})}unknownOption(e){if(this._allowUnknownOption)return;let t="";if(e.startsWith("--")&&this._showSuggestionAfterError){let n=[],s=this;do{let r=s.createHelp().visibleOptions(s).filter(u=>u.long).map(u=>u.long);n=n.concat(r),s=s.parent}while(s&&!s._enablePositionalOptions);t=X(e,n)}let i=`error: unknown option '${e}'${t}`;this.error(i,{code:"commander.unknownOption"})}_excessArguments(e){if(this._allowExcessArguments)return;let t=this.registeredArguments.length,i=t===1?"":"s",s=`error: too many arguments${this.parent?` for '${this.name()}'`:""}. Expected ${t} argument${i} but got ${e.length}.`;this.error(s,{code:"commander.excessArguments"})}unknownCommand(){let e=this.args[0],t="";if(this._showSuggestionAfterError){let n=[];this.createHelp().visibleCommands(this).forEach(s=>{n.push(s.name()),s.alias()&&n.push(s.alias())}),t=X(e,n)}let i=`error: unknown command '${e}'${t}`;this.error(i,{code:"commander.unknownCommand"})}version(e,t,i){if(e===void 0)return this._version;this._version=e,t=t||"-V, --version",i=i||"output the version number";let n=this.createOption(t,i);return this._versionOptionName=n.attributeName(),this._registerOption(n),this.on("option:"+n.name(),()=>{this._outputConfiguration.writeOut(`${e}
|
|
21
|
-
`),this._exit(0,"commander.version",e)}),this}description(e,t){return e===void 0&&t===void 0?this._description:(this._description=e,t&&(this._argsDescription=t),this)}summary(e){return e===void 0?this._summary:(this._summary=e,this)}alias(e){if(e===void 0)return this._aliases[0];let t=this;if(this.commands.length!==0&&this.commands[this.commands.length-1]._executableHandler&&(t=this.commands[this.commands.length-1]),e===t._name)throw new Error("Command alias can't be the same as its name");let i=this.parent?._findCommand(e);if(i){let n=[i.name()].concat(i.aliases()).join("|");throw new Error(`cannot add alias '${e}' to command '${this.name()}' as already have command '${n}'`)}return t._aliases.push(e),this}aliases(e){return e===void 0?this._aliases:(e.forEach(t=>this.alias(t)),this)}usage(e){if(e===void 0){if(this._usage)return this._usage;let t=this.registeredArguments.map(i=>Ve(i));return[].concat(this.options.length||this._helpOption!==null?"[options]":[],this.commands.length?"[command]":[],this.registeredArguments.length?t:[]).join(" ")}return this._usage=e,this}name(e){return e===void 0?this._name:(this._name=e,this)}nameFromFilename(e){return this._name=g.basename(e,g.extname(e)),this}executableDir(e){return e===void 0?this._executableDir:(this._executableDir=e,this)}helpInformation(e){let t=this.createHelp();return t.helpWidth===void 0&&(t.helpWidth=e&&e.error?this._outputConfiguration.getErrHelpWidth():this._outputConfiguration.getOutHelpWidth()),t.formatHelp(this,t)}_getHelpContext(e){e=e||{};let t={error:!!e.error},i;return t.error?i=n=>this._outputConfiguration.writeErr(n):i=n=>this._outputConfiguration.writeOut(n),t.write=e.write||i,t.command=this,t}outputHelp(e){let t;typeof e=="function"&&(t=e,e=void 0);let i=this._getHelpContext(e);this._getCommandAndAncestors().reverse().forEach(s=>s.emit("beforeAllHelp",i)),this.emit("beforeHelp",i);let n=this.helpInformation(i);if(t&&(n=t(n),typeof n!="string"&&!Buffer.isBuffer(n)))throw new Error("outputHelp callback must return a string or a Buffer");i.write(n),this._getHelpOption()?.long&&this.emit(this._getHelpOption().long),this.emit("afterHelp",i),this._getCommandAndAncestors().forEach(s=>s.emit("afterAllHelp",i))}helpOption(e,t){return typeof e=="boolean"?(e?this._helpOption=this._helpOption??void 0:this._helpOption=null,this):(e=e??"-h, --help",t=t??"display help for command",this._helpOption=this.createOption(e,t),this)}_getHelpOption(){return this._helpOption===void 0&&this.helpOption(void 0,void 0),this._helpOption}addHelpOption(e){return this._helpOption=e,this}help(e){this.outputHelp(e);let t=m.exitCode||0;t===0&&e&&typeof e!="function"&&e.error&&(t=1),this._exit(t,"commander.help","(outputHelp)")}addHelpText(e,t){let i=["beforeAll","before","after","afterAll"];if(!i.includes(e))throw new Error(`Unexpected value for position to addHelpText.
|
|
22
|
-
Expecting one of '${i.join("', '")}'`);let n=`${e}Help`;return this.on(n,s=>{let r;typeof t=="function"?r=t({error:s.error,command:s.command}):r=t,r&&s.write(`${r}
|
|
23
|
-
`)}),this}_outputHelpIfRequested(e){let t=this._getHelpOption();t&&e.find(n=>t.is(n))&&(this.outputHelp(),this._exit(0,"commander.helpDisplayed","(outputHelp)"))}};function Z(a){return a.map(e=>{if(!e.startsWith("--inspect"))return e;let t,i="127.0.0.1",n="9229",s;return(s=e.match(/^(--inspect(-brk)?)$/))!==null?t=s[1]:(s=e.match(/^(--inspect(-brk|-port)?)=([^:]+)$/))!==null?(t=s[1],/^\d+$/.test(s[3])?n=s[3]:i=s[3]):(s=e.match(/^(--inspect(-brk|-port)?)=([^:]+):(\d+)$/))!==null&&(t=s[1],i=s[3],n=s[4]),t&&n!=="0"?`${t}=${i}:${parseInt(n)+1}`:e})}ee.Command=K});var re=b(f=>{var{Argument:ie}=w(),{Command:U}=te(),{CommanderError:Re,InvalidArgumentError:ne}=A(),{Help:De}=M(),{Option:se}=I();f.program=new U;f.createCommand=a=>new U(a);f.createOption=(a,e)=>new se(a,e);f.createArgument=(a,e)=>new ie(a,e);f.Command=U;f.Option=se;f.Argument=ie;f.Help=De;f.CommanderError=Re;f.InvalidArgumentError=ne;f.InvalidOptionArgumentError=ne});var oe=Ce(re(),1),{program:ze,createCommand:Qe,createArgument:ae,createOption:Xe,CommanderError:Ze,InvalidArgumentError:et,InvalidOptionArgumentError:tt,Command:ue,Argument:it,Option:nt,Help:st}=oe.default;var le="pagerts",he="A tool for viewing external relations in a webpage",ce="0.2.0";var C=class{constructor(e){this.name=e}};var E=class extends C{constructor(){super("page-extractor")}async extract(e){let{window:{document:{title:t,location:{href:i}}}}=e;return{title:t,url:i}}};function Me(a,e){for(let t of e)if(qe(t,a))return t}var Ne=["id","innerText","textContent","class","ariaLabel","ariaDescription","alt","rel"],Fe=["href","data-src","target","action","src","url"];function me(a){for(let e of Ne){let t=a[e];if(t&&typeof t=="string"&&t.trim()!=="")return{key:e,value:t}}}function pe(a){let e=Me(a,[...Fe]),t=a[e];if(t&&typeof t=="string"&&t.trim()!=="")return{key:e,url:t}}var qe=(a,e)=>a in e&&e[a]!==void 0;var v=class extends C{constructor(t){super("page-extractor");this.tags=t}async extract(t){let{document:i}=t.window,n=[];for(let s of this.tags){let r=i.querySelectorAll(s),u=Array.from(r);for(let o of u){let l=me(o),h=pe(o);!l||!h||h.url.startsWith("http")&&n.push({text:l,link:h})}}return n}};var y=require("jsdom"),$=class{async fetchPage(e){let t,i=new y.VirtualConsole().on("jsdomError",n=>{process.stderr.write(`Error parsing ${e}:${n.message}
|
|
24
|
-
`)});return e.startsWith("file://")?t=y.JSDOM.fromFile(e,{virtualConsole:i}):t=y.JSDOM.fromURL(e,{virtualConsole:i}),t.then(n=>({url:e,content:n})).catch(({message:n})=>({url:e,error:`JSDOM failed to parse: ${n}`}))}async fetchAll(e){return(await Promise.all(e.map(i=>this.fetchPage(i)))).filter(i=>i.content!==void 0)}};var S=class{constructor(){}};var k=class extends S{print(...e){let t=JSON.stringify(e);process.stdout.write(t+`
|
|
25
|
-
`)}};var Ie=new ue,je=ae("<url | file...>","remote https://URL or local file://resource.html to extract from");(async()=>await Ie.name(le).version(ce,"-v, --version").description(he).addArgument(je).action(async a=>{let e=new k,t=new $,i=new E,n=new v(["a","meta","link","embed"]),s=await t.fetchAll(a),r=[];for(let{content:u,url:o,error:l}of s){let h=l in u?[]:await n.extract(u),c=l in u?{url:o,error:l}:await i.extract(u);r.push({...c,resources:h})}await e.print(...r)}).parseAsync(process.argv))();
|
|
2
|
+
import{Command as j,createArgument as N}from"commander";var R={name:"pagerts",description:"A tool for viewing external relations in a webpage",version:"1.0.1",main:"main.js",bin:{pagerts:"bin/main.js"},files:["bin"],engines:{node:">=18.0.0"},scripts:{test:"jest --coverage","test:watch":"jest --watch",build:"esbuild src/main.ts --bundle --packages=external --outdir=bin --minify --sourcemap --platform=node --format=esm",lint:"eslint src/**/*.ts","lint:fix":"eslint src/**/*.ts --fix","type-check":"tsc --noEmit",format:'prettier --write "src/**/*.ts"',"format:check":'prettier --check "src/**/*.ts"',"security:audit":"npm audit --audit-level=moderate","security:check":"npm run security:audit && npm run lint",start:"node ./bin/main.js",dev:"tsx src/main.ts",prepare:"npm run build"},keywords:["webpage","hierarchy","management","web-scraping","cli","url-extraction"],author:"Kirill kn253 Nevzorov",license:"MIT",bugs:{url:"https://github.com/akinevz0/pagerts/issues"},homepage:"https://github.com/akinevz0/pagerts",dependencies:{commander:"^12.1.0",jsdom:"^25.0.1"},devDependencies:{"@types/jest":"^29.5.14","@types/jsdom":"^21.1.7","@types/node":"^22.10.5","@typescript-eslint/eslint-plugin":"^8.20.0","@typescript-eslint/parser":"^8.20.0",esbuild:"^0.25.1",eslint:"^9.18.0","eslint-config-prettier":"^9.1.0","eslint-plugin-security":"^3.0.1",jest:"^29.7.0",prettier:"^3.4.2","ts-jest":"^29.2.5",tsx:"^4.19.2",typescript:"^5.7.2"}};var a=class{constructor(e){this.name=e}};var d=class extends a{constructor(){super("page-extractor")}async extract(e){let{window:{document:{title:r,location:{href:t}}}}=e;return{title:r,url:t}}};function T(s,e){for(let r of e)if(S(r,s))return r}var k=["id","innerText","textContent","class","ariaLabel","ariaDescription","alt"],M=["href","data-src","target","action","src","url"];function w(s){for(let e of k){let r=s[e];if(r&&typeof r=="string"&&r.trim()!=="")return{key:e,value:r}}}function P(s){let e=T(s,[...M]);if(!e)return;let r=s[e];if(r&&typeof r=="string"&&r.trim()!=="")return{key:e,url:r}}var S=(s,e)=>s in e&&e[s]!==void 0;var f=class extends a{constructor(r){super("page-extractor");this.tags=r}async extract(r){let{document:t}=r.window,o=[];for(let i of this.tags){let c=t.querySelectorAll(i),u=Array.from(c);for(let m of u){let n=w(m),l=P(m);!n||!l||l.url.startsWith("http")&&o.push({text:n,link:l})}}return o}};import{JSDOM as b,VirtualConsole as A}from"jsdom";var g=class{timeout;maxRetries;constructor(e=1e4,r=2){this.timeout=e,this.maxRetries=r}async fetchPage(e,r=0){let t=new A().on("jsdomError",o=>{process.stderr.write(`Error parsing ${e}: ${o.message}
|
|
3
|
+
`)});try{let o;e.startsWith("file://")?o=b.fromFile(e.substring(7),{virtualConsole:t}):o=b.fromURL(e,{virtualConsole:t,resources:"usable",runScripts:"outside-only",beforeParse(c){c.setTimeout=()=>{throw new Error("setTimeout disabled for security")},c.setInterval=()=>{throw new Error("setInterval disabled for security")}}});let i=await Promise.race([o,new Promise((c,u)=>setTimeout(()=>u(new Error("Request timeout")),this.timeout))]);return{url:e,content:i}}catch(o){let i=o instanceof Error?o.message:"Unknown error";return r<this.maxRetries&&this.isRetryableError(i)?(process.stderr.write(`Retrying ${e} (attempt ${r+1}/${this.maxRetries})...
|
|
4
|
+
`),await this.delay(1e3*(r+1)),this.fetchPage(e,r+1)):{url:e,error:`Failed to fetch: ${i}`}}}isRetryableError(e){return[/timeout/i,/ECONNRESET/i,/ETIMEDOUT/i,/ENOTFOUND/i,/network/i].some(t=>t.test(e))}delay(e){return new Promise(r=>setTimeout(r,e))}async fetchAll(e){return(await Promise.all(e.map(t=>this.fetchPage(t)))).filter(t=>t.content!==void 0||t.error)}};var p=class{constructor(){}};var y=class extends p{print(...e){let r=JSON.stringify(e);process.stdout.write(r+`
|
|
5
|
+
`)}};var E=["http:","https:","file:"];var D=[/javascript:/i,/data:/i,/vbscript:/i,/<script/i,/on\w+=/i];function O(s){if(!s||!s.trim())return{isValid:!1,error:"URL cannot be empty"};let e=s.trim();if(e.length>2048)return{isValid:!1,error:"URL exceeds maximum length of 2048 characters"};for(let i of D)if(i.test(e))return{isValid:!1,error:"URL contains suspicious patterns"};let r;try{r=new URL(e)}catch{return e.startsWith("file://")?{isValid:!0,sanitizedUrl:e}:{isValid:!1,error:"Invalid URL format"}}if(!E.includes(r.protocol))return{isValid:!1,error:`Protocol ${r.protocol} is not allowed. Allowed protocols: ${E.join(", ")}`};let t=r.hostname.toLowerCase();return(t==="localhost"||t==="127.0.0.1"||t==="::1"||t.startsWith("192.168.")||t.startsWith("10.")||/^172\.(1[6-9]|2\d|3[01])\./.test(t))&&r.protocol!=="file:"&&console.warn(`Warning: Accessing local network resource: ${e}`),{isValid:!0,sanitizedUrl:r.toString()}}function v(s){let e=[],r=[];for(let t of s){let o=O(t);o.isValid&&o.sanitizedUrl?e.push(o.sanitizedUrl):r.push({url:t,error:o.error||"Unknown validation error"})}return{validUrls:e,errors:r}}var{description:$,name:q,version:_}=R,V=new j,J=N("<url | file...>","remote https://URL or local file://resource.html to extract from");(async()=>await V.name(q).version(_,"-v, --version").description($).addArgument(J).action(async s=>{try{let{validUrls:e,errors:r}=v(s);r.length>0&&(console.error(`
|
|
6
|
+
\u274C URL Validation Errors:`),r.forEach(({url:n,error:l})=>{console.error(` - ${n}: ${l}`)})),e.length===0&&(console.error(`
|
|
7
|
+
\u274C No valid URLs to process. Exiting.`),process.exit(1)),console.error(`
|
|
8
|
+
\u2705 Processing ${e.length} valid URL(s)...`);let t=new y,o=new g,i=new d,c=new f(["a","meta","link","embed"]),u=await o.fetchAll(e),m=[];for(let{content:n,url:l,error:x}of u){let h=x!==void 0||!n?[]:await c.extract(n),U=x!==void 0||!n?{url:l,error:x??"Unknown error",resources:h}:await i.extract(n);m.push({...U,resources:h})}await t.print(...m)}catch(e){console.error(`
|
|
9
|
+
\u274C An error occurred:`,e instanceof Error?e.message:e),process.exit(1)}}).parseAsync(process.argv))();
|
|
26
10
|
/**
|
|
27
11
|
* @license MIT
|
|
28
12
|
* We are interested in visualising a page as a collection of tags.
|
|
29
|
-
*
|
|
13
|
+
*
|
|
30
14
|
* We wish to work with tags that can be compactly previewed on a webpage.
|
|
31
15
|
* Here we must declare all of the element types that can be used to represent
|
|
32
16
|
* a resource that can be hyperlinked off a webpage.
|