defuddle-cli 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,7 @@ Command line interface for [Defuddle](https://github.com/kepano/defuddle). Extra
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- npm install -g @defuddle/cli
8
+ npm install -g defuddle/cli
9
9
  ```
10
10
 
11
11
  ## Usage
package/dist/cli.d.ts ADDED
@@ -0,0 +1 @@
1
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,77 @@
1
+ import { Command } from 'commander';
2
+ import pkg from 'defuddle';
3
+ const { Defuddle } = pkg;
4
+ import { readFile } from 'fs/promises';
5
+ import { resolve } from 'path';
6
+ import { createVirtualDOM, setupDocumentProperties } from './dom/setup.js';
7
+ const program = new Command();
8
+ program
9
+ .name('defuddle-cli')
10
+ .description('CLI tool for parsing HTML content')
11
+ .version('1.0.0');
12
+ program
13
+ .command('parse')
14
+ .description('Parse HTML content from a file or URL')
15
+ .argument('<source>', 'File path or URL to parse')
16
+ .option('-f, --format <format>', 'Output format (json, html, markdown)', 'json')
17
+ .option('-m, --markdown', 'Convert output to markdown', false)
18
+ .option('--md', 'Alias for --markdown', false)
19
+ .option('-d, --debug', 'Enable debug mode', false)
20
+ .action(async (source, options) => {
21
+ try {
22
+ // Handle --md alias
23
+ if (options.md) {
24
+ options.markdown = true;
25
+ }
26
+ let content;
27
+ let url;
28
+ // Determine if source is a URL or file path
29
+ if (source.startsWith('http://') || source.startsWith('https://')) {
30
+ url = source;
31
+ const response = await fetch(source);
32
+ content = await response.text();
33
+ }
34
+ else {
35
+ const filePath = resolve(process.cwd(), source);
36
+ content = await readFile(filePath, 'utf-8');
37
+ }
38
+ // Create virtual DOM instance
39
+ const dom = createVirtualDOM(content, url);
40
+ const doc = dom.window.document;
41
+ // Set up document properties
42
+ setupDocumentProperties(doc);
43
+ // Parse content
44
+ const defuddle = new Defuddle(doc);
45
+ const result = defuddle.parse();
46
+ // Format output based on options
47
+ let output;
48
+ if (options.markdown) {
49
+ output = result.content;
50
+ }
51
+ else {
52
+ switch (options.outputFormat) {
53
+ case 'html':
54
+ output = result.content;
55
+ break;
56
+ case 'markdown':
57
+ output = result.content;
58
+ break;
59
+ default:
60
+ output = JSON.stringify(result, null, 2);
61
+ }
62
+ }
63
+ // Output result
64
+ console.log(output);
65
+ if (options.debug) {
66
+ console.error('\nDebug information:');
67
+ console.error('Source:', source);
68
+ console.error('Content length:', content.length);
69
+ console.error('Document structure:', doc.documentElement?.outerHTML);
70
+ }
71
+ }
72
+ catch (error) {
73
+ console.error('Error:', error instanceof Error ? error.message : String(error));
74
+ process.exit(1);
75
+ }
76
+ });
77
+ program.parse();
@@ -0,0 +1,8 @@
1
+ export interface ParseOptions {
2
+ output?: string;
3
+ markdown?: boolean;
4
+ json?: boolean;
5
+ debug?: boolean;
6
+ property?: string;
7
+ }
8
+ export declare function handleParseCommand(source: string, options: ParseOptions): Promise<void>;
@@ -0,0 +1,66 @@
1
+ import pkg from 'defuddle';
2
+ const { Defuddle } = pkg;
3
+ import chalk from 'chalk';
4
+ import { readFile, writeFile } from 'fs/promises';
5
+ import { setupContentDOM, setupDocumentProperties } from '../dom/setup.js';
6
+ import { createMarkdownContent } from '../markdown.js';
7
+ export async function handleParseCommand(source, options) {
8
+ try {
9
+ // Load content from file or URL
10
+ let html;
11
+ if (source.startsWith('http')) {
12
+ const response = await fetch(source);
13
+ if (!response.ok) {
14
+ throw new Error(`Failed to fetch URL: ${response.statusText}`);
15
+ }
16
+ html = await response.text();
17
+ }
18
+ else {
19
+ html = await readFile(source, 'utf-8');
20
+ }
21
+ // Set up DOM environment
22
+ const dom = setupContentDOM(html, source);
23
+ const doc = dom.window.document;
24
+ // Set up document properties
25
+ setupDocumentProperties(doc);
26
+ // Parse content
27
+ const defuddle = new Defuddle(doc, { debug: options.debug });
28
+ const result = await defuddle.parse();
29
+ // Handle property extraction
30
+ if (options.property) {
31
+ const property = options.property.toLowerCase();
32
+ if (property in result) {
33
+ console.log(result[property]);
34
+ return;
35
+ }
36
+ throw new Error(`Property "${options.property}" not found in result`);
37
+ }
38
+ // If in debug mode, don't show content output
39
+ if (options.debug) {
40
+ process.exit(0);
41
+ }
42
+ // Handle output format
43
+ let output;
44
+ if (options.json) {
45
+ output = JSON.stringify(result, null, 2);
46
+ }
47
+ else if (options.markdown) {
48
+ output = createMarkdownContent(result.content, source);
49
+ }
50
+ else {
51
+ output = result.content;
52
+ }
53
+ // Write output to file or stdout
54
+ if (options.output) {
55
+ await writeFile(options.output, output);
56
+ console.log(chalk.green(`Output written to ${options.output}`));
57
+ }
58
+ else {
59
+ console.log(output);
60
+ }
61
+ }
62
+ catch (error) {
63
+ console.error(chalk.red('Error:'), error instanceof Error ? error.message : String(error));
64
+ process.exit(1);
65
+ }
66
+ }
@@ -0,0 +1,38 @@
1
+ export interface CSSRuleBase {
2
+ cssText: string;
3
+ parentRule: CSSRule | null;
4
+ parentStyleSheet: CSSStyleSheet | null;
5
+ type: number;
6
+ }
7
+ export interface CSSMediaRuleBase extends CSSRuleBase {
8
+ media: MediaList;
9
+ cssRules: CSSRuleList;
10
+ deleteRule(index: number): void;
11
+ insertRule(rule: string, index?: number): number;
12
+ }
13
+ export interface CSSStyleSheetBase {
14
+ cssRules: CSSRuleList;
15
+ ownerRule: CSSRule | null;
16
+ deleteRule(index: number): void;
17
+ insertRule(rule: string, index?: number): number;
18
+ }
19
+ export type CSSRule = CSSRuleBase;
20
+ export type CSSMediaRule = CSSMediaRuleBase;
21
+ export type CSSStyleSheet = CSSStyleSheetBase;
22
+ export declare const CSSRuleConstants: {
23
+ readonly STYLE_RULE: 1;
24
+ readonly CHARSET_RULE: 2;
25
+ readonly IMPORT_RULE: 3;
26
+ readonly MEDIA_RULE: 4;
27
+ readonly FONT_FACE_RULE: 5;
28
+ readonly PAGE_RULE: 6;
29
+ readonly KEYFRAMES_RULE: 7;
30
+ readonly KEYFRAME_RULE: 8;
31
+ readonly NAMESPACE_RULE: 10;
32
+ readonly COUNTER_STYLE_RULE: 11;
33
+ readonly SUPPORTS_RULE: 12;
34
+ readonly DOCUMENT_RULE: 13;
35
+ readonly FONT_FEATURE_VALUES_RULE: 14;
36
+ readonly VIEWPORT_RULE: 15;
37
+ readonly REGION_STYLE_RULE: 16;
38
+ };
@@ -0,0 +1,20 @@
1
+ // This file provides type definitions for CSS interfaces.
2
+ // The actual implementations are provided by JSDOM.
3
+ // Define static values
4
+ export const CSSRuleConstants = {
5
+ STYLE_RULE: 1,
6
+ CHARSET_RULE: 2,
7
+ IMPORT_RULE: 3,
8
+ MEDIA_RULE: 4,
9
+ FONT_FACE_RULE: 5,
10
+ PAGE_RULE: 6,
11
+ KEYFRAMES_RULE: 7,
12
+ KEYFRAME_RULE: 8,
13
+ NAMESPACE_RULE: 10,
14
+ COUNTER_STYLE_RULE: 11,
15
+ SUPPORTS_RULE: 12,
16
+ DOCUMENT_RULE: 13,
17
+ FONT_FEATURE_VALUES_RULE: 14,
18
+ VIEWPORT_RULE: 15,
19
+ REGION_STYLE_RULE: 16
20
+ };
@@ -0,0 +1,3 @@
1
+ import { SetupFunction } from './setup.js';
2
+ export declare const setupDocumentMethods: SetupFunction;
3
+ export declare const setupWindowMethods: SetupFunction;
@@ -0,0 +1,49 @@
1
+ export const setupDocumentMethods = (window) => {
2
+ if (!window.Document.prototype.getSelection) {
3
+ window.Document.prototype.getSelection = function () {
4
+ const selection = {
5
+ anchorNode: null,
6
+ anchorOffset: 0,
7
+ direction: 'forward',
8
+ focusNode: null,
9
+ focusOffset: 0,
10
+ isCollapsed: true,
11
+ rangeCount: 0,
12
+ type: 'None',
13
+ getRangeAt: function () { return new window.Range(); },
14
+ removeAllRanges: function () { },
15
+ addRange: function () { },
16
+ collapse: function () { },
17
+ collapseToEnd: function () { },
18
+ collapseToStart: function () { },
19
+ deleteFromDocument: function () { },
20
+ empty: function () { },
21
+ extend: function () { },
22
+ modify: function () { },
23
+ selectAllChildren: function () { },
24
+ setBaseAndExtent: function () { },
25
+ setPosition: function () { },
26
+ toString: function () { return ''; },
27
+ containsNode: function (node, allowPartialContainment = false) {
28
+ return false;
29
+ },
30
+ removeRange: function (range) { }
31
+ };
32
+ return selection;
33
+ };
34
+ }
35
+ };
36
+ export const setupWindowMethods = (window) => {
37
+ if (!window.Window.prototype.getComputedStyle) {
38
+ window.Window.prototype.getComputedStyle = function (elt, pseudoElt) {
39
+ const style = {
40
+ accentColor: '',
41
+ alignContent: '',
42
+ alignItems: '',
43
+ alignSelf: '',
44
+ getPropertyValue: function (prop) { return ''; }
45
+ };
46
+ return style;
47
+ };
48
+ }
49
+ };
@@ -0,0 +1,21 @@
1
+ export interface DOMSettableTokenList {
2
+ length: number;
3
+ value: string;
4
+ add(token: string): void;
5
+ contains(token: string): boolean;
6
+ item(index: number): string | null;
7
+ remove(token: string): void;
8
+ replace(oldToken: string, newToken: string): boolean;
9
+ supports(token: string): boolean;
10
+ toggle(token: string, force?: boolean): boolean;
11
+ [Symbol.iterator](): Iterator<string>;
12
+ }
13
+ export declare const setupCSSInterfaces: (global: any) => void;
14
+ export declare const setupSVGElement: (global: any) => void;
15
+ export declare function createMediaList(): MediaList;
16
+ export declare function createCSSRuleList(): CSSRuleList;
17
+ export declare function createCSSStyleDeclaration(): CSSStyleDeclaration;
18
+ export declare function createNamedNodeMap(): NamedNodeMap;
19
+ export declare function createNodeList(): NodeListOf<ChildNode>;
20
+ export declare function createHTMLCollection(): HTMLCollection;
21
+ export declare function createDOMSettableTokenList(): DOMSettableTokenList;
@@ -0,0 +1,182 @@
1
+ // Define CSS interfaces
2
+ export const setupCSSInterfaces = (global) => {
3
+ global.CSSRule = class {
4
+ constructor(type) {
5
+ this.type = 1;
6
+ if (type !== undefined) {
7
+ Object.defineProperty(this, 'type', { value: type });
8
+ }
9
+ this.cssText = '';
10
+ this.parentRule = null;
11
+ this.parentStyleSheet = null;
12
+ }
13
+ };
14
+ // Static properties
15
+ Object.defineProperties(global.CSSRule, {
16
+ STYLE_RULE: { value: 1, writable: false },
17
+ CHARSET_RULE: { value: 2, writable: false },
18
+ IMPORT_RULE: { value: 3, writable: false },
19
+ MEDIA_RULE: { value: 4, writable: false },
20
+ FONT_FACE_RULE: { value: 5, writable: false },
21
+ PAGE_RULE: { value: 6, writable: false },
22
+ KEYFRAMES_RULE: { value: 7, writable: false },
23
+ KEYFRAME_RULE: { value: 8, writable: false },
24
+ NAMESPACE_RULE: { value: 10, writable: false },
25
+ COUNTER_STYLE_RULE: { value: 11, writable: false },
26
+ SUPPORTS_RULE: { value: 12, writable: false },
27
+ DOCUMENT_RULE: { value: 13, writable: false },
28
+ FONT_FEATURE_VALUES_RULE: { value: 14, writable: false },
29
+ VIEWPORT_RULE: { value: 15, writable: false },
30
+ REGION_STYLE_RULE: { value: 16, writable: false }
31
+ });
32
+ global.CSSMediaRule = class extends global.CSSRule {
33
+ constructor() {
34
+ super();
35
+ this.conditionText = '';
36
+ this.deleteRule = () => { };
37
+ this.insertRule = () => 0;
38
+ Object.defineProperty(this, 'type', { value: 4 });
39
+ this.media = createMediaList();
40
+ this.cssRules = createCSSRuleList();
41
+ }
42
+ };
43
+ global.CSSStyleSheet = class {
44
+ constructor() {
45
+ this.type = 'text/css';
46
+ this.href = null;
47
+ this.ownerNode = null;
48
+ this.parentStyleSheet = null;
49
+ this.title = null;
50
+ this.disabled = false;
51
+ this.ownerRule = null;
52
+ this.addRule = () => 0;
53
+ this.removeRule = () => { };
54
+ this.replace = async () => this;
55
+ this.replaceSync = () => { };
56
+ this.media = createMediaList();
57
+ this.cssRules = createCSSRuleList();
58
+ this.rules = this.cssRules;
59
+ }
60
+ insertRule(rule, index) {
61
+ return 0;
62
+ }
63
+ deleteRule(index) { }
64
+ };
65
+ };
66
+ // Define SVGElement
67
+ export const setupSVGElement = (global) => {
68
+ global.SVGElement = class {
69
+ constructor() {
70
+ this.id = '';
71
+ this.className = '';
72
+ this.style = createCSSStyleDeclaration();
73
+ this.ownerSVGElement = null;
74
+ this.viewportElement = null;
75
+ this.tagName = '';
76
+ this.namespaceURI = null;
77
+ this.prefix = null;
78
+ this.localName = '';
79
+ this.baseURI = '';
80
+ this.textContent = '';
81
+ this.innerHTML = '';
82
+ this.outerHTML = '';
83
+ this.hidden = false;
84
+ this.slot = '';
85
+ this.attributes = createNamedNodeMap();
86
+ this.childNodes = createNodeList();
87
+ this.firstChild = null;
88
+ this.lastChild = null;
89
+ this.nextSibling = null;
90
+ this.previousSibling = null;
91
+ this.parentNode = null;
92
+ this.parentElement = null;
93
+ this.childElementCount = 0;
94
+ this.firstElementChild = null;
95
+ this.lastElementChild = null;
96
+ this.nextElementSibling = null;
97
+ this.previousElementSibling = null;
98
+ this.children = createHTMLCollection();
99
+ }
100
+ };
101
+ };
102
+ // Helper functions
103
+ export function createMediaList() {
104
+ return {
105
+ length: 0,
106
+ mediaText: '',
107
+ item: () => null,
108
+ appendMedium: () => { },
109
+ deleteMedium: () => { },
110
+ toString: () => '',
111
+ [Symbol.iterator]: function* () { yield ''; return undefined; }
112
+ };
113
+ }
114
+ export function createCSSRuleList() {
115
+ return {
116
+ length: 0,
117
+ item: () => null,
118
+ [Symbol.iterator]: function* () {
119
+ yield new globalThis.CSSRule();
120
+ return undefined;
121
+ }
122
+ };
123
+ }
124
+ export function createCSSStyleDeclaration() {
125
+ return {
126
+ cssText: '',
127
+ length: 0,
128
+ parentRule: null,
129
+ getPropertyPriority: () => '',
130
+ getPropertyValue: () => '',
131
+ item: () => '',
132
+ removeProperty: () => '',
133
+ setProperty: () => '',
134
+ [Symbol.iterator]: function* () { yield ''; return undefined; }
135
+ };
136
+ }
137
+ export function createNamedNodeMap() {
138
+ return {
139
+ length: 0,
140
+ getNamedItem: () => null,
141
+ getNamedItemNS: () => null,
142
+ item: () => null,
143
+ removeNamedItem: () => null,
144
+ removeNamedItemNS: () => null,
145
+ setNamedItem: () => null,
146
+ setNamedItemNS: () => null,
147
+ [Symbol.iterator]: function* () { yield null; return undefined; }
148
+ };
149
+ }
150
+ export function createNodeList() {
151
+ return {
152
+ length: 0,
153
+ item: () => null,
154
+ forEach: () => { },
155
+ entries: function* () { yield [0, null]; return undefined; },
156
+ keys: function* () { yield 0; return undefined; },
157
+ values: function* () { yield null; return undefined; },
158
+ [Symbol.iterator]: function* () { yield null; return undefined; }
159
+ };
160
+ }
161
+ export function createHTMLCollection() {
162
+ return {
163
+ length: 0,
164
+ item: () => null,
165
+ namedItem: () => null,
166
+ [Symbol.iterator]: function* () { yield null; return undefined; }
167
+ };
168
+ }
169
+ export function createDOMSettableTokenList() {
170
+ return {
171
+ length: 0,
172
+ value: '',
173
+ add: () => { },
174
+ contains: () => false,
175
+ item: () => null,
176
+ remove: () => { },
177
+ replace: () => false,
178
+ supports: () => false,
179
+ toggle: () => false,
180
+ [Symbol.iterator]: function* () { yield ''; return undefined; }
181
+ };
182
+ }
@@ -0,0 +1,57 @@
1
+ export interface DOMSettableTokenList extends DOMTokenList {
2
+ value: string;
3
+ }
4
+ export interface HTMLElementBase extends Element {
5
+ style: CSSStyleDeclaration;
6
+ dataset: DOMStringMap;
7
+ title: string;
8
+ lang: string;
9
+ dir: string;
10
+ hidden: boolean;
11
+ tabIndex: number;
12
+ }
13
+ export interface HTMLImageElementBase extends HTMLElementBase {
14
+ alt: string;
15
+ src: string;
16
+ srcset: string;
17
+ sizes: string;
18
+ crossOrigin: string | null;
19
+ useMap: string;
20
+ isMap: boolean;
21
+ width: number;
22
+ height: number;
23
+ naturalWidth: number;
24
+ naturalHeight: number;
25
+ complete: boolean;
26
+ currentSrc: string;
27
+ decoding: 'sync' | 'async' | 'auto';
28
+ loading: 'eager' | 'lazy';
29
+ referrerPolicy: string;
30
+ }
31
+ export interface HTMLIFrameElementBase extends HTMLElementBase {
32
+ src: string;
33
+ srcdoc: string;
34
+ name: string;
35
+ sandbox: DOMSettableTokenList;
36
+ allow: string;
37
+ allowFullscreen: boolean;
38
+ width: string;
39
+ height: string;
40
+ contentDocument: Document | null;
41
+ contentWindow: Window | null;
42
+ referrerPolicy: string;
43
+ }
44
+ export type HTMLElement = HTMLElementBase;
45
+ export type HTMLImageElement = HTMLImageElementBase;
46
+ export type HTMLIFrameElement = HTMLIFrameElementBase;
47
+ export type HTMLOListElement = HTMLElementBase;
48
+ export type HTMLUListElement = HTMLElementBase;
49
+ export type HTMLTableElement = HTMLElementBase;
50
+ export type HTMLTableRowElement = HTMLElementBase;
51
+ export type HTMLTableCellElement = HTMLElementBase;
52
+ export type HTMLTableSectionElement = HTMLElementBase;
53
+ export type HTMLTableCaptionElement = HTMLElementBase;
54
+ export type HTMLButtonElement = HTMLElementBase;
55
+ export type HTMLSpanElement = HTMLElementBase;
56
+ export type HTMLDivElement = HTMLElementBase;
57
+ export type HTMLAnchorElement = HTMLElementBase;
@@ -0,0 +1,3 @@
1
+ // This file provides type definitions for DOM interfaces.
2
+ // The actual implementations are provided by JSDOM.
3
+ export {};
@@ -0,0 +1,2 @@
1
+ import { SetupFunction } from './setup.js';
2
+ export declare const setupRange: SetupFunction;
@@ -0,0 +1,87 @@
1
+ export const setupRange = (window) => {
2
+ var _a;
3
+ if (!window.Range) {
4
+ window.Range = (_a = class Range {
5
+ constructor() {
6
+ this.START_TO_START = 0;
7
+ this.START_TO_END = 1;
8
+ this.END_TO_END = 2;
9
+ this.END_TO_START = 3;
10
+ this.startContainer = document.documentElement;
11
+ this.startOffset = 0;
12
+ this.endContainer = document.documentElement;
13
+ this.endOffset = 0;
14
+ this.collapsed = true;
15
+ this.commonAncestorContainer = document.documentElement;
16
+ }
17
+ createContextualFragment(fragment) {
18
+ return document.createDocumentFragment();
19
+ }
20
+ detach() { }
21
+ cloneContents() {
22
+ return document.createDocumentFragment();
23
+ }
24
+ cloneRange() {
25
+ return new _a();
26
+ }
27
+ collapse(toStart = false) { }
28
+ compareBoundaryPoints(how, sourceRange) {
29
+ return 0;
30
+ }
31
+ comparePoint(node, offset) {
32
+ return 0;
33
+ }
34
+ deleteContents() { }
35
+ extractContents() {
36
+ return document.createDocumentFragment();
37
+ }
38
+ getBoundingClientRect() {
39
+ return {
40
+ top: 0,
41
+ left: 0,
42
+ bottom: 0,
43
+ right: 0,
44
+ width: 0,
45
+ height: 0,
46
+ x: 0,
47
+ y: 0,
48
+ toJSON: function () { return this; }
49
+ };
50
+ }
51
+ getClientRects() {
52
+ return {
53
+ length: 0,
54
+ item: function () { return null; },
55
+ [Symbol.iterator]: function* () { }
56
+ };
57
+ }
58
+ insertNode(node) { }
59
+ intersectsNode(node) {
60
+ return false;
61
+ }
62
+ isPointInRange(node, offset) {
63
+ return false;
64
+ }
65
+ selectNode(node) { }
66
+ selectNodeContents(node) {
67
+ this.startContainer = node;
68
+ this.startOffset = 0;
69
+ this.endContainer = node;
70
+ this.endOffset = node.childNodes.length;
71
+ this.collapsed = false;
72
+ }
73
+ setEnd(node, offset) { }
74
+ setEndAfter(node) { }
75
+ setEndBefore(node) { }
76
+ setStart(node, offset) { }
77
+ setStartAfter(node) { }
78
+ setStartBefore(node) { }
79
+ surroundContents(newParent) { }
80
+ },
81
+ _a.START_TO_START = 0,
82
+ _a.START_TO_END = 1,
83
+ _a.END_TO_END = 2,
84
+ _a.END_TO_START = 3,
85
+ _a);
86
+ }
87
+ };
@@ -0,0 +1,12 @@
1
+ import { DOMWindow } from 'jsdom';
2
+ export type SetupFunction = (window: DOMWindow) => void;
3
+ export declare const setupBasicWindow: SetupFunction;
4
+ export declare const setupCSSInterfaces: SetupFunction;
5
+ export declare const setupHTMLAndSVG: SetupFunction;
6
+ export declare const setupScreen: SetupFunction;
7
+ export declare const setupStorage: SetupFunction;
8
+ export declare const setupAnimationFrame: SetupFunction;
9
+ export declare const setupDOMMethods: SetupFunction;
10
+ export declare const setupNodeMethods: SetupFunction;
11
+ export declare const setupElementMethods: SetupFunction;
12
+ export declare const setupDOMInterfaces: (window: DOMWindow) => void;