site-to-md-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alex Scott
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1 @@
1
+ # site-to-md
package/bin/cli.js ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Command } from 'commander';
4
+ import fs from 'fs/promises';
5
+ import path from 'path';
6
+ import { fetchAndConvert } from '../src/index.js';
7
+
8
+ const program = new Command();
9
+
10
+ program
11
+ .name('site-to-md')
12
+ .description('A CLI to convert websites to clean, formatted Markdown')
13
+ .version('1.0.0')
14
+ .argument('<url>', 'The URL of the website to convert')
15
+ .option('-o, --output <file>', 'Save the output to a specific file path')
16
+ .option('--no-title', 'Do not prepend the article title to the markdown output')
17
+ .action(async (url, options) => {
18
+ try {
19
+ // Basic URL validation
20
+ new URL(url);
21
+ } catch (e) {
22
+ console.error('❌ Error: Invalid URL provided.');
23
+ process.exit(1);
24
+ }
25
+
26
+ console.log(`⏳ Fetching and processing: ${url}...`);
27
+
28
+ try {
29
+ const result = await fetchAndConvert(url);
30
+
31
+ let finalOutput = '';
32
+ if (options.title && result.title) {
33
+ finalOutput += `# ${result.title}\n\n`;
34
+ }
35
+ finalOutput += result.markdown;
36
+
37
+ if (options.output) {
38
+ const outputPath = path.resolve(process.cwd(), options.output);
39
+ await fs.writeFile(outputPath, finalOutput, 'utf-8');
40
+ console.log(`✅ Success! Markdown saved to: ${outputPath}`);
41
+ } else {
42
+ console.log('\n--- Markdown Output ---\n');
43
+ console.log(finalOutput);
44
+ console.log('\n-----------------------\n');
45
+ console.log(`✅ Success! Extracted "${result.title}"`);
46
+ console.log('💡 Tip: Use -o <filename.md> to save this output to a file.');
47
+ }
48
+ } catch (error) {
49
+ console.error(`❌ Error during conversion: ${error.message}`);
50
+ process.exit(1);
51
+ }
52
+ });
53
+
54
+ program.parse(process.argv);
@@ -0,0 +1,376 @@
1
+ # Markdown
2
+
3
+ From Wikipedia, the free encyclopedia
4
+
5
+ | Markdown |
6
+ | --- |
7
+ | [![](https://upload.wikimedia.org/wikipedia/commons/thumb/4/48/Markdown-mark.svg/120px-Markdown-mark.svg.png)](https://en.wikipedia.org/wiki/File:Markdown-mark.svg) |
8
+ | [Filename extensions](https://en.wikipedia.org/wiki/Filename_extension "Filename extension") | `.md`, `.markdown`[\[1\]](#cite_note-df-2022-1)[\[2\]](#cite_note-rfc7763-2) |
9
+ | [Internet media type](https://en.wikipedia.org/wiki/Media_type "Media type") | `text/markdown`[\[2\]](#cite_note-rfc7763-2) |
10
+ | [Uniform Type Identifier (UTI)](https://en.wikipedia.org/wiki/Uniform_Type_Identifier "Uniform Type Identifier") | `net.daringfireball.markdown` |
11
+ | UTI conformation | `public.plain-text` |
12
+ | [Magic number](https://en.wikipedia.org/wiki/File_format#Magic_number "File format") | None |
13
+ | Developed by |
14
+ * [John Gruber](https://en.wikipedia.org/wiki/John_Gruber "John Gruber")
15
+
16
+
17
+
18
+ |
19
+ | Initial release | March 9, 2004 (21 years ago)[\[3\]](#cite_note-markdown-swartz-3)[\[4\]](#cite_note-gruber-2004-release-4) |
20
+ | [Latest release](https://en.wikipedia.org/wiki/Software_release_life_cycle "Software release life cycle") |
21
+
22
+ 1.0.1
23
+ December 17, 2004 (21 years ago)[\[5\]](#cite_note-md-5)
24
+
25
+ |
26
+ | Type of format | [Open file format](https://en.wikipedia.org/wiki/Open_file_format "Open file format")[\[6\]](#cite_note-license-6) |
27
+ | Extended to | [pandoc](https://en.wikipedia.org/wiki/Pandoc "Pandoc"), [MultiMarkdown](https://en.wikipedia.org/wiki/MultiMarkdown "MultiMarkdown"), [Markdown Extra](https://en.wikipedia.org/wiki/Markdown_Extra "Markdown Extra"), [CommonMark](#Standardization),[\[7\]](#cite_note-rfc7764-7) [RMarkdown](https://en.wikipedia.org/wiki/RMarkdown "RMarkdown")[\[8\]](#cite_note-RMarkdown-8) |
28
+ | Website | [daringfireball.net/projects/markdown/](https://daringfireball.net/projects/markdown/) |
29
+
30
+ **Markdown**[\[9\]](#cite_note-philosophy-9) is a [lightweight markup language](https://en.wikipedia.org/wiki/Lightweight_markup_language "Lightweight markup language") for creating [formatted text](https://en.wikipedia.org/wiki/Formatted_text "Formatted text") using a [plain-text editor](https://en.wikipedia.org/wiki/Text_editor "Text editor"). [John Gruber](https://en.wikipedia.org/wiki/John_Gruber "John Gruber") created Markdown in 2004 as an easy-to-read [markup language](https://en.wikipedia.org/wiki/Markup_language "Markup language").[\[9\]](#cite_note-philosophy-9) Markdown is widely used for [blogging](https://en.wikipedia.org/wiki/Blog "Blog"), [instant messaging](https://en.wikipedia.org/wiki/Instant_messaging "Instant messaging"), and [large language models](https://en.wikipedia.org/wiki/Large_language_models "Large language models"),[\[10\]](#cite_note-llm-markdown-10) and also used elsewhere in [online forums](https://en.wikipedia.org/wiki/Online_forums "Online forums"), [collaborative software](https://en.wikipedia.org/wiki/Collaborative_software "Collaborative software"), [documentation](https://en.wikipedia.org/wiki/Documentation "Documentation") pages, and [readme files](https://en.wikipedia.org/wiki/README "README").
31
+
32
+ The initial description of Markdown[\[11\]](#cite_note-11) contained ambiguities and raised unanswered questions, causing implementations to both intentionally and accidentally diverge from the original version. This was addressed in 2014 when long-standing Markdown contributors released [CommonMark](#Standardization), an unambiguous specification and test suite for Markdown.[\[12\]](#cite_note-FutureOfMarkdown-12)\[*[better source needed](https://en.wikipedia.org/wiki/Wikipedia:Verifiability#Questionable_sources "Wikipedia:Verifiability")*\]
33
+
34
+ Markdown was inspired by pre-existing [conventions](https://en.wikipedia.org/wiki/Convention_\(norm\) "Convention (norm)") for marking up [plain text](https://en.wikipedia.org/wiki/Plain_text "Plain text") in [email](https://en.wikipedia.org/wiki/Email "Email") and [usenet](https://en.wikipedia.org/wiki/Usenet "Usenet") posts,[\[13\]](#cite_note-ArsTechnica2014-13) such as the earlier markup languages [setext](https://en.wikipedia.org/wiki/Setext "Setext") (c. 1992), [Textile](https://en.wikipedia.org/wiki/Textile_\(markup_language\) "Textile (markup language)") (c. 2002), and [reStructuredText](https://en.wikipedia.org/wiki/ReStructuredText "ReStructuredText") (c. 2002).[\[9\]](#cite_note-philosophy-9)
35
+
36
+ In 2002 [Aaron Swartz](https://en.wikipedia.org/wiki/Aaron_Swartz "Aaron Swartz") created [atx](https://en.wikipedia.org/wiki/Atx_\(markup_language\) "Atx (markup language)") and referred to it as "the true structured text format". Gruber created the Markdown language in 2004 with Swartz as his "sounding board".[\[14\]](#cite_note-Gruber-14) The goal of the language was to enable people "to write using an easy-to-read and easy-to-write plain text format, optionally convert it to structurally valid [XHTML](https://en.wikipedia.org/wiki/XHTML "XHTML") (or [HTML](https://en.wikipedia.org/wiki/HTML "HTML"))".[\[5\]](#cite_note-md-5)
37
+
38
+ Another key design goal was *readability*, that the language be readable as-is, without looking like it has been marked up with tags or formatting instructions,[\[9\]](#cite_note-philosophy-9) unlike text formatted with "heavier" [markup languages](https://en.wikipedia.org/wiki/Markup_language "Markup language"), such as [Rich Text Format](https://en.wikipedia.org/wiki/Rich_Text_Format "Rich Text Format") (RTF), HTML, or even [wikitext](https://en.wikipedia.org/wiki/Wikitext "Wikitext") (each of which have obvious in-line tags and formatting instructions which can make the text more difficult for humans to read).\[*[citation needed](https://en.wikipedia.org/wiki/Wikipedia:Citation_needed "Wikipedia:Citation needed")*\]
39
+
40
+ Gruber wrote a [Perl](https://en.wikipedia.org/wiki/Perl "Perl") script, `Markdown.pl`, which converts marked-up text input to valid, [well-formed](https://en.wikipedia.org/wiki/Well-formed_document "Well-formed document") XHTML or HTML, encoding angle brackets (`<`, `>`) and [ampersands](https://en.wikipedia.org/wiki/Ampersand "Ampersand") (`&`), which would be misinterpreted as special characters in those languages. It can take the role of a standalone script, a plugin for [Blosxom](https://en.wikipedia.org/wiki/Blosxom "Blosxom") or a [Movable Type](https://en.wikipedia.org/wiki/Movable_Type "Movable Type"), or of a text filter for [BBEdit](https://en.wikipedia.org/wiki/BBEdit "BBEdit").[\[5\]](#cite_note-md-5)
41
+
42
+ ## Rise and divergence
43
+
44
+ \[[edit](https://en.wikipedia.org/w/index.php?title=Markdown&action=edit&section=2 "Edit section: Rise and divergence")\]
45
+
46
+ As Markdown's popularity grew rapidly, many Markdown [implementations](https://en.wikipedia.org/wiki/Implementation "Implementation") appeared, driven mostly by the need for additional features such as [tables](https://en.wikipedia.org/wiki/Table_\(information\) "Table (information)"), [footnotes](https://en.wikipedia.org/wiki/Note_\(typography\) "Note (typography)"), definition lists,[\[note 1\]](#cite_note-17) and Markdown inside HTML blocks.\[*[citation needed](https://en.wikipedia.org/wiki/Wikipedia:Citation_needed "Wikipedia:Citation needed")*\]
47
+
48
+ The behavior of some of these diverged from the reference implementation, as Markdown was only characterised by an informal [specification](https://en.wikipedia.org/wiki/Specification_\(technical_standard\) "Specification (technical standard)")[\[17\]](#cite_note-18) and a [Perl](https://en.wikipedia.org/wiki/Perl "Perl") implementation for conversion to HTML.\[*[citation needed](https://en.wikipedia.org/wiki/Wikipedia:Citation_needed "Wikipedia:Citation needed")*\]
49
+
50
+ At the same time, a number of ambiguities in the informal specification had attracted attention.[\[18\]](#cite_note-gfm_on_github-why_spec-19) These issues spurred the creation of tools such as Babelmark[\[19\]](#cite_note-babelmark-2-20)[\[20\]](#cite_note-babelmark-3-21) to compare the output of various implementations,[\[21\]](#cite_note-22) and an effort by some developers of Markdown [parsers](https://en.wikipedia.org/wiki/Parsing "Parsing") for standardization. However, Gruber has argued that complete standardization would be a mistake: "Different sites (and people) have different needs. No one syntax would make all happy."[\[22\]](#cite_note-23)
51
+
52
+ Gruber avoided using curly braces in Markdown to unofficially reserve them for implementation-specific extensions.[\[23\]](#cite_note-curlyBraces-24)
53
+
54
+ | CommonMark |
55
+ | --- |
56
+ | [Filename extensions](https://en.wikipedia.org/wiki/Filename_extension "Filename extension") | `.md`, `.markdown`[\[2\]](#cite_note-rfc7763-2) |
57
+ | [Internet media type](https://en.wikipedia.org/wiki/Media_type "Media type") | `text/markdown; variant=CommonMark`[\[7\]](#cite_note-rfc7764-7) |
58
+ | [Uniform Type Identifier (UTI)](https://en.wikipedia.org/wiki/Uniform_Type_Identifier "Uniform Type Identifier") | *uncertain*[\[24\]](#cite_note-cm-uti-25) |
59
+ | UTI conformation | public.plain-text |
60
+ | Developed by | [John MacFarlane](https://en.wikipedia.org/wiki/John_MacFarlane_\(philosopher\) "John MacFarlane (philosopher)"), open source |
61
+ | Initial release | October 25, 2014 (11 years ago) |
62
+ | [Latest release](https://en.wikipedia.org/wiki/Software_release_life_cycle "Software release life cycle") |
63
+ 0.31.2
64
+ January 28, 2024 (2 years ago)[\[25\]](#cite_note-cm-spec-26)
65
+
66
+ |
67
+ | Type of format | [Open file format](https://en.wikipedia.org/wiki/Open_file_format "Open file format") |
68
+ | Extended from | Markdown |
69
+ | Extended to | [GitHub Flavored Markdown](#GFM) |
70
+ | Website | [commonmark.org](https://commonmark.org/) [spec.commonmark.org](http://spec.commonmark.org/) |
71
+
72
+ In 2012, a group of people, including [Jeff Atwood](https://en.wikipedia.org/wiki/Jeff_Atwood "Jeff Atwood") and [John MacFarlane](https://en.wikipedia.org/wiki/John_MacFarlane_\(philosopher\) "John MacFarlane (philosopher)"), launched what Atwood characterised as a standardization effort.[\[12\]](#cite_note-FutureOfMarkdown-12)
73
+
74
+ A community website now aims to "document various tools and resources available to document authors and developers, as well as implementors of the various Markdown implementations".[\[26\]](#cite_note-27)
75
+
76
+ In September 2014, Gruber objected to the usage of "Markdown" in the name of this effort and it was rebranded as "CommonMark".[\[13\]](#cite_note-ArsTechnica2014-13)[\[27\]](#cite_note-28)[\[28\]](#cite_note-29) CommonMark.org published several versions of a specification, reference implementation, test suite, and "\[plans\] to announce a finalized 1.0 spec and test suite in 2019".[\[29\]](#cite_note-commonmark.org-30)
77
+
78
+ The finalized 1.0 spec has not been released, as major issues still remain unsolved.[\[30\]](#cite_note-31)
79
+
80
+ Nonetheless, the following websites and projects have adopted CommonMark: [Codeberg](https://en.wikipedia.org/wiki/Codeberg "Codeberg"), [Discourse](https://en.wikipedia.org/wiki/Discourse_\(software\) "Discourse (software)"), [GitHub](https://en.wikipedia.org/wiki/GitHub "GitHub"), [GitLab](https://en.wikipedia.org/wiki/GitLab "GitLab"), [Reddit](https://en.wikipedia.org/wiki/Reddit "Reddit"), [Qt](https://en.wikipedia.org/wiki/Qt_\(software\) "Qt (software)"), [Stack Exchange](https://en.wikipedia.org/wiki/Stack_Exchange "Stack Exchange") ([Stack Overflow](https://en.wikipedia.org/wiki/Stack_Overflow "Stack Overflow")), and [Swift](https://en.wikipedia.org/wiki/Swift_\(programming_language\) "Swift (programming language)").
81
+
82
+ In March 2016, two relevant informational Internet [RFCs](https://en.wikipedia.org/wiki/Request_for_Comments "Request for Comments") were published:
83
+
84
+ * RFC [7763](https://www.rfc-editor.org/rfc/rfc7763) – "The text/markdown Media Type,"[\[2\]](#cite_note-rfc7763-2) *Informational.*
85
+
86
+ Introduces [MIME](https://en.wikipedia.org/wiki/MIME "MIME") type `text/markdown`.
87
+
88
+ * RFC [7764](https://www.rfc-editor.org/rfc/rfc7764) – "Guidance on Markdown: Design Philosophies, Stability Strategies, and Select Registrations,"[\[7\]](#cite_note-rfc7764-7) *Informational.*
89
+
90
+ Discusses and registers the variants [MultiMarkdown](https://en.wikipedia.org/wiki/MultiMarkdown "MultiMarkdown"), [GitHub Flavored Markdown](#GFM) (GFM), [Pandoc](https://en.wikipedia.org/wiki/Pandoc "Pandoc"), and Markdown Extra (among others).[\[31\]](#cite_note-IANA-32)
91
+
92
+
93
+ Websites like [Bitbucket](https://en.wikipedia.org/wiki/Bitbucket "Bitbucket"), [Diaspora](https://en.wikipedia.org/wiki/Diaspora_\(social_network\) "Diaspora (social network)"), [Discord](https://en.wikipedia.org/wiki/Discord "Discord"),[\[32\]](#cite_note-33) [GitHub](#GFM),[\[33\]](#cite_note-gfm_on_github-34) [OpenStreetMap](https://en.wikipedia.org/wiki/OpenStreetMap "OpenStreetMap"), [Reddit](https://en.wikipedia.org/wiki/Reddit "Reddit"),[\[34\]](#cite_note-35) [SourceForge](https://en.wikipedia.org/wiki/SourceForge "SourceForge")[\[35\]](#cite_note-36) and [Stack Exchange](https://en.wikipedia.org/wiki/Stack_Exchange "Stack Exchange")[\[36\]](#cite_note-37) use variants of Markdown to make discussions between users easier.
94
+
95
+ Depending on implementation, basic inline [HTML tags](https://en.wikipedia.org/wiki/HTML_tag "HTML tag") may be supported.[\[37\]](#cite_note-38)
96
+
97
+ Italic text may be implemented by `_underscores_` or `*single-asterisks*`.[\[38\]](#cite_note-italic-39)
98
+
99
+ ### GitHub Flavored Markdown
100
+
101
+ \[[edit](https://en.wikipedia.org/w/index.php?title=Markdown&action=edit&section=5 "Edit section: GitHub Flavored Markdown")\]
102
+
103
+ [GitHub](https://en.wikipedia.org/wiki/GitHub "GitHub") had been using its own variant of Markdown since as early as 2009,[\[39\]](#cite_note-40) which added support for additional formatting such as tables and nesting [block content](https://en.wikipedia.org/wiki/HTML_element#Block_elements "HTML element") inside list elements, as well as GitHub-specific features such as auto-linking references to commits, issues, usernames, etc.
104
+
105
+ In 2017, GitHub released a formal specification of its [GitHub Flavored Markdown](https://github.github.com/gfm/) (GFM) that is based on [CommonMark](https://en.wikipedia.org/wiki/CommonMark "CommonMark").[\[33\]](#cite_note-gfm_on_github-34) It is a [strict superset](https://en.wikipedia.org/wiki/Superset "Superset") of CommonMark, following its specification exactly except for tables, [strikethrough](https://en.wikipedia.org/wiki/Strikethrough "Strikethrough"), [autolinks](https://en.wikipedia.org/wiki/Automatic_hyperlinking "Automatic hyperlinking") and task lists, which GFM adds as extensions.[\[40\]](#cite_note-41)
106
+
107
+ Accordingly, GitHub also changed the parser used on their sites, which required that some documents be changed. For instance, GFM now requires that the [hash symbol](https://en.wikipedia.org/wiki/Number_sign "Number sign") that creates a heading be separated from the heading text by a space character.
108
+
109
+ Markdown Extra is a [lightweight markup language](https://en.wikipedia.org/wiki/Lightweight_markup_language "Lightweight markup language") based on Markdown implemented in [PHP](https://en.wikipedia.org/wiki/PHP "PHP") (originally), [Python](https://en.wikipedia.org/wiki/Python_\(programming_language\) "Python (programming language)") and [Ruby](https://en.wikipedia.org/wiki/Ruby_\(programming_language\) "Ruby (programming language)").[\[41\]](#cite_note-fortin-2018-42) It adds the following features that are not available with regular Markdown:
110
+
111
+ * Markdown markup inside [HTML](https://en.wikipedia.org/wiki/HTML "HTML") blocks
112
+ * Elements with id/class attribute
113
+ * "Fenced code blocks" that span multiple lines of code
114
+ * Tables[\[42\]](#cite_note-43)
115
+ * Definition lists
116
+ * Footnotes
117
+ * Abbreviations
118
+
119
+ Markdown Extra is supported in some [content management systems](https://en.wikipedia.org/wiki/Content_management_system "Content management system") such as [Drupal](https://en.wikipedia.org/wiki/Drupal "Drupal"),[\[43\]](#cite_note-44) [Grav (CMS)](https://en.wikipedia.org/wiki/Grav_\(CMS\) "Grav (CMS)"), [Textpattern CMS](https://en.wikipedia.org/wiki/Textpattern "Textpattern")[\[44\]](#cite_note-45) and [TYPO3](https://en.wikipedia.org/wiki/TYPO3 "TYPO3").[\[45\]](#cite_note-46)
120
+
121
+ | Text using Markdown syntax | Corresponding HTML produced by a Markdown processor | Text viewed in a browser |
122
+ | --- | --- | --- |
123
+ |
124
+ Heading
125
+ \=======
126
+
127
+ Sub-heading
128
+ \-----------
129
+
130
+ \# Alternative heading
131
+
132
+ \## Alternative sub-heading
133
+
134
+ Paragraphs are separated
135
+ by a blank line.
136
+
137
+ Two spaces at the end of a line
138
+ produce a line break.
139
+
140
+
141
+
142
+
143
+ |
144
+
145
+ <h1\>Heading</h1\>
146
+
147
+ <h2\>Sub-heading</h2\>
148
+
149
+ <h1\>Alternative heading</h1\>
150
+
151
+ <h2\>Alternative sub-heading</h2\>
152
+
153
+ <p\>Paragraphs are separated
154
+ by a blank line.</p\>
155
+
156
+ <p\>Two spaces at the end of a line<br />
157
+ produce a line break.</p\>
158
+
159
+
160
+
161
+
162
+ |
163
+
164
+ Heading
165
+
166
+ Sub-heading
167
+
168
+ Alternative heading
169
+
170
+ Alternative sub-heading
171
+
172
+ Paragraphs are separated by a blank line.
173
+
174
+ Two spaces at the end of a line
175
+ produce a line break.
176
+
177
+ |
178
+ |
179
+
180
+ Text attributes \_italic\_, \*\*bold\*\*, \`monospace\`.
181
+
182
+ Horizontal rule:
183
+
184
+ ---
185
+
186
+
187
+
188
+
189
+ |
190
+
191
+ <p\>Text attributes <em\>italic</em\>, <strong\>bold</strong\>, <code\>monospace</code\>.</p\>
192
+
193
+ <p\>Horizontal rule:</p\>
194
+
195
+ <hr />
196
+
197
+
198
+
199
+
200
+ | Text attributes *italic*, **bold**, `monospace`.
201
+
202
+ Horizontal rule:
203
+
204
+ * * *
205
+
206
+ |
207
+ |
208
+
209
+ Bullet lists nested within numbered list:
210
+
211
+ 1. fruits
212
+ \* apple
213
+ \* banana
214
+ 2. vegetables
215
+ \- carrot
216
+ \- broccoli
217
+
218
+
219
+
220
+
221
+ |
222
+
223
+ <p\>Bullet lists nested within numbered list:</p\>
224
+
225
+ <ol\>
226
+ <li\>fruits <ul\>
227
+ <li\>apple</li\>
228
+ <li\>banana</li\>
229
+ </ul\></li\>
230
+ <li\>vegetables <ul\>
231
+ <li\>carrot</li\>
232
+ <li\>broccoli</li\>
233
+ </ul\></li\>
234
+ </ol\>
235
+
236
+
237
+
238
+
239
+ | Bullet lists nested within numbered list:
240
+
241
+ 1. fruits
242
+ * apple
243
+ * banana
244
+ 2. vegetables
245
+ * carrot
246
+ * broccoli
247
+
248
+ |
249
+ |
250
+
251
+ A \[link\](http://example.com).
252
+
253
+ !\[Image\](Icon-pictures.png "icon")
254
+
255
+ \> Markdown uses email-style
256
+ characters for blockquoting.
257
+ \>
258
+ \> Multiple paragraphs need to be prepended individually.
259
+
260
+ Most inline <abbr title="Hypertext Markup Language">HTML</abbr> tags are supported.
261
+
262
+
263
+
264
+
265
+ |
266
+
267
+ <p\>A <a href\="http://example.com"\>link</a\>.</p\>
268
+
269
+ <p\><img alt\="Image" title\="icon" src\="Icon-pictures.png" /></p\>
270
+
271
+ <blockquote\>
272
+ <p\>Markdown uses email-style characters for blockquoting.</p\>
273
+ <p\>Multiple paragraphs need to be prepended individually.</p\>
274
+ </blockquote\>
275
+
276
+ <p\>Most inline <abbr title\="Hypertext Markup Language"\>HTML</abbr\> tags are supported.</p\>
277
+
278
+
279
+
280
+
281
+ | A [link](http://example.com/).
282
+
283
+ ![Image](https://upload.wikimedia.org/wikipedia/commons/5/5c/Icon-pictures.png)
284
+
285
+ > Markdown uses email-style characters for blockquoting.
286
+ >
287
+ > Multiple paragraphs need to be prepended individually.
288
+
289
+ Most inline HTML tags are supported.
290
+
291
+ |
292
+
293
+ Implementations of Markdown are available for over a dozen [programming languages](https://en.wikipedia.org/wiki/Programming_language "Programming language"); in addition, many [applications](https://en.wikipedia.org/wiki/Application_software "Application software"), platforms and [frameworks](https://en.wikipedia.org/wiki/Software_framework "Software framework") support Markdown.[\[46\]](#cite_note-47) For example, Markdown [plugins](https://en.wikipedia.org/wiki/Plug-in_\(computing\) "Plug-in (computing)") exist for every major [blogging](https://en.wikipedia.org/wiki/Blog "Blog") platform.[\[13\]](#cite_note-ArsTechnica2014-13)
294
+
295
+ While Markdown is a minimal markup language and is read and edited with a normal [text editor](https://en.wikipedia.org/wiki/Text_editor "Text editor"), there are specially designed editors that preview the files with styles, which are available for all major platforms. Many general-purpose text and [code editors](https://en.wikipedia.org/wiki/Source-code_editor "Source-code editor") have [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting "Syntax highlighting") plugins for Markdown built into them or available as optional download. Editors may feature a side-by-side preview window or render the code directly in a [WYSIWYG](https://en.wikipedia.org/wiki/WYSIWYG "WYSIWYG") fashion.
296
+
297
+ ### Comparison of Markdown editors
298
+
299
+ \[[edit](https://en.wikipedia.org/w/index.php?title=Markdown&action=edit&section=9 "Edit section: Comparison of Markdown editors")\]
300
+
301
+ | Program | [Platform(s)](https://en.wikipedia.org/wiki/Computing_platform "Computing platform") | License | Source code editor | Shows images | View(s) | Additional features |
302
+ | --- | --- | --- | --- | --- | --- | --- |
303
+ | [Markdown.UWP](https://apps.microsoft.com/detail/9nblggh4q9rs?hl=en-US&gl=BB) | Windows 10 ≥ version 10240.0[\[47\]](#cite_note-MarkdownUWP_MSappStoreEn-US-48) | ? | [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting "Syntax highlighting"); Search & Replace[\[47\]](#cite_note-MarkdownUWP_MSappStoreEn-US-48)(also with wildcards and regular expressions[\[48\]](#cite_note-49); no column number shown) | loaded from [HTTP](https://en.wikipedia.org/wiki/HTTP "HTTP") | source code editor, live HTML preview optionally concurrently [\[47\]](#cite_note-MarkdownUWP_MSappStoreEn-US-48) | auto-detect file encoding (Including GB1213 & BIG5) [\[47\]](#cite_note-MarkdownUWP_MSappStoreEn-US-48) |
304
+ | [MarkText](https://github.com/marktext/marktext) (version 0.17.1) | [Windows](https://en.wikipedia.org/wiki/Microsoft_Windows "Microsoft Windows"): [IA-32](https://en.wikipedia.org/wiki/IA-32 "IA-32"), [x86-64](https://en.wikipedia.org/wiki/X86-64 "X86-64"), [Linux](https://en.wikipedia.org/wiki/Linux "Linux"):x86-64, [macOS](https://en.wikipedia.org/wiki/MacOS "MacOS"):[ARM64](https://en.wikipedia.org/wiki/AArch64 "AArch64"),x86-64 [\[49\]](#cite_note-50)[\[50\]](#cite_note-51) | [open source](https://en.wikipedia.org/wiki/Open_source "Open source")[\[51\]](#cite_note-52), MIT[\[52\]](#cite_note-53) | syntax highlighting; changeable font size [\[53\]](#cite_note-54), line number only every 10th line, no line column | from HTTP | table of contents[\[54\]](#cite_note-55)(generated from headings), either(not concurrently) [WYSIWYG](https://en.wikipedia.org/wiki/WYSIWYG "WYSIWYG") realtime preview[\[55\]](#cite_note-MarkTextFeatures-56) or source code[\[56\]](#cite_note-57) | supports CommonMark spec, GitHub Flavored Markdown spec [\[55\]](#cite_note-MarkTextFeatures-56) |
305
+ | [Scratch](https://www.ericli.io/scratch) (version 0.7.1) | macOS ≥ 10.15, Windows 10/11:x64, Linux(Ubuntu 22.04+):AMD64 .deb, .AppImage, .rpm[\[57\]](#cite_note-58) | open source | no syntax highlighting | can't load images originally from HTTP, adds local file system images with prefix "[http://asset.localhost/](http://asset.localhost/)" into source code | WYSIWYG [\[58\]](#cite_note-Scratch_homepage-59) or(not concurrently) source code editor | Edit with AI, Git integration [\[58\]](#cite_note-Scratch_homepage-59); change/add:text style, heading(type), list, blockquote, code, horizontal rule link, image, table in "formatted view"(preview) |
306
+
307
+ * [Comparison of document markup languages](https://en.wikipedia.org/wiki/Comparison_of_document_markup_languages "Comparison of document markup languages")
308
+ * [Comparison of documentation generators](https://en.wikipedia.org/wiki/Comparison_of_documentation_generators "Comparison of documentation generators")
309
+ * [Comparison of wiki software](https://en.wikipedia.org/wiki/Comparison_of_wiki_software "Comparison of wiki software")
310
+ * [Lightweight markup language](https://en.wikipedia.org/wiki/Lightweight_markup_language "Lightweight markup language")
311
+ * [List of markup languages](https://en.wikipedia.org/wiki/List_of_markup_languages "List of markup languages")
312
+ * [List of text editors](https://en.wikipedia.org/wiki/List_of_text_editors "List of text editors")
313
+ * [Wiki markup](https://en.wikipedia.org/wiki/Wiki_markup "Wiki markup")
314
+
315
+ 1. **[^](#cite_ref-17)** Technically HTML description lists
316
+
317
+ 1. **[^](#cite_ref-df-2022_1-0)** Gruber, John (8 January 2014). ["The Markdown File Extension"](https://daringfireball.net/linked/2014/01/08/markdown-extension). The Daring Fireball Company, LLC. [Archived](https://web.archive.org/web/20200712120733/https://daringfireball.net/linked/2014/01/08/markdown-extension) from the original on 12 July 2020. Retrieved 27 March 2022. Too late now, I suppose, but the only file extension I would endorse is ".markdown", for the same reason offered by Hilton Lipschitz: *We no longer live in a 8.3 world, so we should be using the most descriptive file extensions. It's sad that all our operating systems rely on this stupid convention instead of the better creator code or a metadata model, but great that they now support longer file extensions.*
318
+ 2. ^ [***a***](#cite_ref-rfc7763_2-0) [***b***](#cite_ref-rfc7763_2-1) [***c***](#cite_ref-rfc7763_2-2) [***d***](#cite_ref-rfc7763_2-3) S. Leonard (March 2016). [*The text/markdown Media Type*](https://www.rfc-editor.org/rfc/rfc7763). [Internet Engineering Task Force](https://en.wikipedia.org/wiki/Internet_Engineering_Task_Force "Internet Engineering Task Force"). [doi](https://en.wikipedia.org/wiki/Doi_\(identifier\) "Doi (identifier)"):[10.17487/RFC7763](https://doi.org/10.17487%2FRFC7763). [ISSN](https://en.wikipedia.org/wiki/ISSN_\(identifier\) "ISSN (identifier)") [2070-1721](https://search.worldcat.org/issn/2070-1721). [RFC](https://en.wikipedia.org/wiki/Request_for_Comments "Request for Comments") [7763](https://datatracker.ietf.org/doc/html/rfc7763). *Informational.*
319
+ 3. **[^](#cite_ref-markdown-swartz_3-0)** [Swartz, Aaron](https://en.wikipedia.org/wiki/Aaron_Swartz "Aaron Swartz") (2004-03-19). ["Markdown"](http://www.aaronsw.com/weblog/001189). *Aaron Swartz: The Weblog*. [Archived](https://web.archive.org/web/20171224200232/http://www.aaronsw.com/weblog/001189) from the original on 2017-12-24. Retrieved 2013-09-01.
320
+ 4. **[^](#cite_ref-gruber-2004-release_4-0)** [Gruber, John](https://en.wikipedia.org/wiki/John_Gruber "John Gruber"). ["Markdown"](https://web.archive.org/web/20040311230924/https://daringfireball.net/projects/markdown/index.text). *[Daring Fireball](https://en.wikipedia.org/wiki/Daring_Fireball "Daring Fireball")*. Archived from [the original](http://daringfireball.net/projects/markdown/index.text) on 2004-03-11. Retrieved 2022-08-20.
321
+ 5. ^ [***a***](#cite_ref-md_5-0) [***b***](#cite_ref-md_5-1) [***c***](#cite_ref-md_5-2) Markdown 1.0.1 readme source code ["Daring Fireball – Markdown"](https://web.archive.org/web/20040402182332/http://daringfireball.net/projects/markdown/). 2004-12-17. Archived from [the original](http://daringfireball.net/projects/markdown/) on 2004-04-02.
322
+ 6. **[^](#cite_ref-license_6-0)** ["Markdown: License"](http://daringfireball.net/projects/markdown/license). Daring Fireball. [Archived](https://web.archive.org/web/20200218183533/https://daringfireball.net/projects/markdown/license) from the original on 2020-02-18. Retrieved 2014-04-25.
323
+ 7. ^ [***a***](#cite_ref-rfc7764_7-0) [***b***](#cite_ref-rfc7764_7-1) [***c***](#cite_ref-rfc7764_7-2) S. Leonard (March 2016). [*Guidance on Markdown: Design Philosophies, Stability Strategies, and Select Registrations*](https://www.rfc-editor.org/rfc/rfc7764). [Internet Engineering Task Force](https://en.wikipedia.org/wiki/Internet_Engineering_Task_Force "Internet Engineering Task Force"). [doi](https://en.wikipedia.org/wiki/Doi_\(identifier\) "Doi (identifier)"):[10.17487/RFC7764](https://doi.org/10.17487%2FRFC7764). [ISSN](https://en.wikipedia.org/wiki/ISSN_\(identifier\) "ISSN (identifier)") [2070-1721](https://search.worldcat.org/issn/2070-1721). [RFC](https://en.wikipedia.org/wiki/Request_for_Comments "Request for Comments") [7764](https://datatracker.ietf.org/doc/html/rfc7764). *Informational.*
324
+ 8. **[^](#cite_ref-RMarkdown_8-0)** ["RMarkdown Reference site"](https://rmarkdown.rstudio.com/). [Archived](https://web.archive.org/web/20200303054734/https://rmarkdown.rstudio.com/) from the original on 2020-03-03. Retrieved 2019-11-21.
325
+ 9. ^ [***a***](#cite_ref-philosophy_9-0) [***b***](#cite_ref-philosophy_9-1) [***c***](#cite_ref-philosophy_9-2) [***d***](#cite_ref-philosophy_9-3) Markdown Syntax ["Daring Fireball – Markdown – Syntax"](http://daringfireball.net/projects/markdown/syntax#philosophy). 2013-06-13. "Readability, however, is emphasized above all else. A Markdown-formatted document should be publishable as-is, as plain text, without looking like it's been marked up with tags or formatting instructions. While Markdown's syntax has been influenced by several existing text-to-HTML filters — including Setext, atx, Textile, reStructuredText, Grutatext[\[15\]](#cite_note-grutatext-15), and EtText[\[16\]](#cite_note-ettext-16) — the single biggest source of inspiration for Markdown's syntax is the format of plain text email."
326
+ 10. **[^](#cite_ref-llm-markdown_10-0)** Dillet, Romain (6 March 2025). ["Mistral adds a new API that turns any PDF document into an AI-ready Markdown file"](https://techcrunch.com/2025/03/06/mistrals-new-ocr-api-turns-any-pdf-document-into-an-ai-ready-markdown-file/). *TechCrunch*. Retrieved 7 September 2025.
327
+ 11. **[^](#cite_ref-11)** ["Daring Fireball: Introducing Markdown"](https://daringfireball.net/2004/03/introducing_markdown). *daringfireball.net*. [Archived](https://web.archive.org/web/20200920182442/https://daringfireball.net/2004/03/introducing_markdown) from the original on 2020-09-20. Retrieved 2020-09-23.
328
+ 12. ^ [***a***](#cite_ref-FutureOfMarkdown_12-0) [***b***](#cite_ref-FutureOfMarkdown_12-1) Atwood, Jeff (2012-10-25). ["The Future of Markdown"](https://web.archive.org/web/20140211233513/http://www.codinghorror.com/blog/2012/10/the-future-of-markdown.html). CodingHorror.com. Archived from [the original](http://www.codinghorror.com/blog/2012/10/the-future-of-markdown.html) on 2014-02-11. Retrieved 2014-04-25.
329
+ 13. ^ [***a***](#cite_ref-ArsTechnica2014_13-0) [***b***](#cite_ref-ArsTechnica2014_13-1) [***c***](#cite_ref-ArsTechnica2014_13-2) Gilbertson, Scott (October 5, 2014). ["Markdown throwdown: What happens when FOSS software gets corporate backing?"](https://arstechnica.com/information-technology/2014/10/markdown-throwdown-what-happens-when-foss-software-gets-corporate-backing/). *[Ars Technica](https://en.wikipedia.org/wiki/Ars_Technica "Ars Technica")*. [Archived](https://web.archive.org/web/20201114231130/https://arstechnica.com/information-technology/2014/10/markdown-throwdown-what-happens-when-foss-software-gets-corporate-backing/) from the original on November 14, 2020. Retrieved June 14, 2017. [CommonMark](https://en.wikipedia.org/wiki/CommonMark "CommonMark") fork could end up better for users... but original creators seem to disagree.
330
+ 14. **[^](#cite_ref-Gruber_14-0)** @gruber (June 12, 2016). ["I should write about it, but it's painful. More or less: Aaron was my sounding board, my muse"](https://x.com/gruber/status/741989829173510145) ([Tweet](https://en.wikipedia.org/wiki/Tweet_\(social_media\) "Tweet (social media)")) – via [Twitter](https://en.wikipedia.org/wiki/Twitter "Twitter").
331
+ 15. **[^](#cite_ref-grutatext_15-0)** ["Un naufragio personal: The Grutatxt markup"](https://web.archive.org/web/20220630230546/https://triptico.com/docs/grutatxt_markup.html). *triptico.com*. Archived from [the original](https://triptico.com/docs/grutatxt_markup.html) on 2022-06-30. Retrieved 2022-06-30.
332
+ 16. **[^](#cite_ref-ettext_16-0)** ["EtText: Documentation: Using EtText"](http://ettext.taint.org/doc/ettext.html). *ettext.taint.org*. Retrieved 2022-06-30.
333
+ 17. **[^](#cite_ref-18)** ["Markdown Syntax Documentation"](https://daringfireball.net/projects/markdown/syntax). Daring Fireball. [Archived](https://web.archive.org/web/20190909051956/https://daringfireball.net/projects/markdown/syntax) from the original on 2019-09-09. Retrieved 2018-03-09.
334
+ 18. **[^](#cite_ref-gfm_on_github-why_spec_19-0)** ["GitHub Flavored Markdown Spec – Why is a spec needed?"](https://github.github.com/gfm/#why-is-a-spec-needed-). *github.github.com*. [Archived](https://web.archive.org/web/20200203204734/https://github.github.com/gfm/#why-is-a-spec-needed-) from the original on 2020-02-03. Retrieved 2018-05-17.
335
+ 19. **[^](#cite_ref-babelmark-2_20-0)** ["Babelmark 2 – Compare markdown implementations"](http://johnmacfarlane.net/babelmark2/). Johnmacfarlane.net. [Archived](https://web.archive.org/web/20170718113552/http://johnmacfarlane.net/babelmark2/) from the original on 2017-07-18. Retrieved 2014-04-25.
336
+ 20. **[^](#cite_ref-babelmark-3_21-0)** ["Babelmark 3 – Compare Markdown Implementations"](https://babelmark.github.io/). github.io. [Archived](https://web.archive.org/web/20201112043521/https://babelmark.github.io/) from the original on 2020-11-12. Retrieved 2017-12-10.
337
+ 21. **[^](#cite_ref-22)** ["Babelmark 2 – FAQ"](http://johnmacfarlane.net/babelmark2/faq.html). Johnmacfarlane.net. [Archived](https://web.archive.org/web/20170728115918/http://johnmacfarlane.net/babelmark2/faq.html) from the original on 2017-07-28. Retrieved 2014-04-25.
338
+ 22. **[^](#cite_ref-23)** [Gruber, John \[@gruber\]](https://en.wikipedia.org/wiki/John_Gruber "John Gruber") (4 September 2014). ["@tobie @espadrine @comex @wycats Because different sites (and people) have different needs. No one syntax would make all happy"](https://x.com/gruber/status/507670720886091776) ([Tweet](https://en.wikipedia.org/wiki/Tweet_\(social_media\) "Tweet (social media)")) – via [Twitter](https://en.wikipedia.org/wiki/Twitter "Twitter").
339
+ 23. **[^](#cite_ref-curlyBraces_24-0)** Gruber, John (19 May 2022). ["Markdoc"](https://daringfireball.net/linked/2022/05/19/markdoc). *Daring Fireball*. [Archived](https://web.archive.org/web/20220519202920/https://daringfireball.net/linked/2022/05/19/markdoc) from the original on 19 May 2022. Retrieved May 19, 2022. I love their syntax extensions — very true to the spirit of Markdown. They use curly braces for their extensions; I'm not sure I ever made this clear, publicly, but I avoided using curly braces in Markdown itself — even though they are very tempting characters — to unofficially reserve them for implementation-specific extensions. Markdoc's extensive use of curly braces for its syntax is exactly the sort of thing I was thinking about.
340
+ 24. **[^](#cite_ref-cm-uti_25-0)** ["UTI of a CommonMark document"](https://talk.commonmark.org/t/uti-of-a-commonmark-document/2406). 12 April 2017. [Archived](https://web.archive.org/web/20181122140119/https://talk.commonmark.org/t/uti-of-a-commonmark-document/2406) from the original on 22 November 2018. Retrieved 29 September 2017.
341
+ 25. **[^](#cite_ref-cm-spec_26-0)** ["CommonMark specification"](http://spec.commonmark.org/). [Archived](https://web.archive.org/web/20170807052756/http://spec.commonmark.org/) from the original on 2017-08-07. Retrieved 2017-07-26.
342
+ 26. **[^](#cite_ref-27)** ["Markdown Community Page"](https://markdown.github.io/). GitHub. [Archived](https://web.archive.org/web/20201026161924/http://markdown.github.io/) from the original on 2020-10-26. Retrieved 2014-04-25.
343
+ 27. **[^](#cite_ref-28)** ["Standard Markdown is now Common Markdown"](http://blog.codinghorror.com/standard-markdown-is-now-common-markdown/). Jeff Atwood. 4 September 2014. [Archived](https://web.archive.org/web/20141009181014/http://blog.codinghorror.com/standard-markdown-is-now-common-markdown/) from the original on 2014-10-09. Retrieved 2014-10-07.
344
+ 28. **[^](#cite_ref-29)** ["Standard Markdown Becomes Common Markdown then CommonMark"](http://www.infoq.com/news/2014/09/markdown-commonmark). *InfoQ*. [Archived](https://web.archive.org/web/20200930150521/https://www.infoq.com/news/2014/09/markdown-commonmark/) from the original on 2020-09-30. Retrieved 2014-10-07.
345
+ 29. **[^](#cite_ref-commonmark.org_30-0)** ["CommonMark"](http://commonmark.org/). [Archived](https://web.archive.org/web/20160412211434/http://commonmark.org/) from the original on 12 April 2016. Retrieved 20 Jun 2018. The current version of the CommonMark spec is complete, and quite robust after a year of public feedback … but not quite final. With your help, we plan to announce a finalized 1.0 spec and test suite in 2019.
346
+ 30. **[^](#cite_ref-31)** ["Issues we MUST resolve before 1.0 release \[6 remaining\]"](https://talk.commonmark.org/t/issues-we-must-resolve-before-1-0-release-6-remaining/1287). *CommonMark Discussion*. 2015-07-26. [Archived](https://web.archive.org/web/20210414032229/https://talk.commonmark.org/t/issues-we-must-resolve-before-1-0-release-6-remaining/1287) from the original on 2021-04-14. Retrieved 2020-10-02.
347
+ 31. **[^](#cite_ref-IANA_32-0)** ["Markdown Variants"](https://www.iana.org/assignments/markdown-variants/markdown-variants.xhtml). [IANA](https://en.wikipedia.org/wiki/Internet_Assigned_Numbers_Authority "Internet Assigned Numbers Authority"). 2016-03-28. [Archived](https://web.archive.org/web/20201027005128/https://www.iana.org/assignments/markdown-variants/markdown-variants.xhtml) from the original on 2020-10-27. Retrieved 2016-07-06.
348
+ 32. **[^](#cite_ref-33)** ["Markdown Text 101 (Chat Formatting: Bold, Italic, Underline)"](https://support.discord.com/hc/en-us/articles/210298617-Markdown-Text-101-Chat-Formatting-Bold-Italic-Underline). *Discord*. 2024-10-03. Retrieved 2025-02-07.
349
+ 33. ^ [***a***](#cite_ref-gfm_on_github_34-0) [***b***](#cite_ref-gfm_on_github_34-1) ["GitHub Flavored Markdown Spec"](https://github.github.com/gfm/). GitHub. [Archived](https://web.archive.org/web/20200203204734/https://github.github.com/gfm/) from the original on 2020-02-03. Retrieved 2020-06-11.
350
+ 34. **[^](#cite_ref-35)** ["Reddit markdown primer. Or, how do you do all that fancy formatting in your comments, anyway?"](https://www.reddit.com/r/reddit.com/comments/6ewgt/reddit_markdown_primer_or_how_do_you_do_all_that/). Reddit. [Archived](https://web.archive.org/web/20190611185827/https://www.reddit.com/r/reddit.com/comments/6ewgt/reddit_markdown_primer_or_how_do_you_do_all_that/) from the original on 2019-06-11. Retrieved 2013-03-29.
351
+ 35. **[^](#cite_ref-36)** ["SourceForge: Markdown Syntax Guide"](https://sourceforge.net/p/forge/documentation/markdown_syntax/). [SourceForge](https://en.wikipedia.org/wiki/SourceForge "SourceForge"). [Archived](https://web.archive.org/web/20190613130356/https://sourceforge.net/p/forge/documentation/markdown_syntax/) from the original on 2019-06-13. Retrieved 2013-05-10.
352
+ 36. **[^](#cite_ref-37)** ["Markdown Editing Help"](https://stackoverflow.com/editing-help). StackOverflow.com. [Archived](https://web.archive.org/web/20140328061854/http://stackoverflow.com/editing-help) from the original on 2014-03-28. Retrieved 2014-04-11.
353
+ 37. **[^](#cite_ref-38)** ["Markdown Syntax Documentation"](https://daringfireball.net/projects/markdown/syntax#html). *daringfireball.net*. [Archived](https://web.archive.org/web/20190909051956/https://daringfireball.net/projects/markdown/syntax#html) from the original on 2019-09-09. Retrieved 2021-03-01.
354
+ 38. **[^](#cite_ref-italic_39-0)** ["Basic Syntax: Italic"](https://www.markdownguide.org/basic-syntax/#italic). *The Markdown Guide*. Matt Cone. [Archived](https://web.archive.org/web/20220326234942/https://www.markdownguide.org/basic-syntax/#italic) from the original on 26 March 2022. Retrieved 27 March 2022. To italicize text, add one asterisk or underscore before and after a word or phrase. To italicize the middle of a word for emphasis, add one asterisk without spaces around the letters.
355
+ 39. **[^](#cite_ref-40)** [Tom Preston-Werner](https://en.wikipedia.org/wiki/Tom_Preston-Werner "Tom Preston-Werner"). ["GitHub Flavored Markdown Examples"](https://github.com/mojombo/github-flavored-markdown/issues/1). *GitHub*. [Archived](https://web.archive.org/web/20210513154115/https://github.com/mojombo/github-flavored-markdown/issues/1) from the original on 2021-05-13. Retrieved 2021-04-02.
356
+ 40. **[^](#cite_ref-41)** ["A formal spec for GitHub Flavored Markdown"](https://githubengineering.com/a-formal-spec-for-github-markdown/). *GitHub Engineering*. 14 March 2017. [Archived](https://web.archive.org/web/20200203205138/https://githubengineering.com/a-formal-spec-for-github-markdown/) from the original on 3 February 2020. Retrieved 16 Mar 2017.
357
+ 41. **[^](#cite_ref-fortin-2018_42-0)** Fortin, Michel (2018). ["PHP Markdown Extra"](https://michelf.ca/projects/php-markdown/extra). *Michel Fortin website*. [Archived](https://web.archive.org/web/20210117015819/https://michelf.ca/projects/php-markdown/extra/) from the original on 2021-01-17. Retrieved 2018-12-26.
358
+ 42. **[^](#cite_ref-43)** ["PHP Markdown Extra"](https://michelf.ca/projects/php-markdown/extra). *Michel Fortin*. [Archived](https://web.archive.org/web/20210117015819/https://michelf.ca/projects/php-markdown/extra/) from the original on 2021-01-17. Retrieved 2018-12-26.
359
+ 43. **[^](#cite_ref-44)** ["Markdown editor for BUEditor"](https://drupal.org/project/markdowneditor). 4 December 2008. [Archived](https://web.archive.org/web/20200917172201/https://www.drupal.org/project/markdowneditor) from the original on 17 September 2020. Retrieved 15 January 2017.
360
+ 44. **[^](#cite_ref-45)** ["Plugin: wet\_textfilter\_markdown"](https://plugins.textpattern.com/plugins/wet_textfilter_markdown). *Textpattern CMS plugins*. 2025-04-27.
361
+ 45. **[^](#cite_ref-46)** ["Markdown for TYPO3 (markdown\_content)"](https://extensions.typo3.org/extension/markdown_content/). *extensions.typo3.org*. [Archived](https://web.archive.org/web/20210201205749/https://extensions.typo3.org/extension/markdown_content/) from the original on 2021-02-01. Retrieved 2019-02-06.
362
+ 46. **[^](#cite_ref-47)** ["W3C Community Page of Markdown Implementations"](https://www.w3.org/community/markdown/wiki/MarkdownImplementations). *W3C Markdown Wiki*. [Archived](https://web.archive.org/web/20200917231621/https://www.w3.org/community/markdown/wiki/MarkdownImplementations) from the original on 17 September 2020. Retrieved 24 March 2016.
363
+ 47. ^ [***a***](#cite_ref-MarkdownUWP_MSappStoreEn-US_48-0) [***b***](#cite_ref-MarkdownUWP_MSappStoreEn-US_48-1) [***c***](#cite_ref-MarkdownUWP_MSappStoreEn-US_48-2) [***d***](#cite_ref-MarkdownUWP_MSappStoreEn-US_48-3) [https://apps.microsoft.com/detail/9nblggh4q9rs?hl=en-US&gl=BB](https://apps.microsoft.com/detail/9nblggh4q9rs?hl=en-US&gl=BB)
364
+ 48. **[^](#cite_ref-49)** see "..." menu in source code editor pane
365
+ 49. **[^](#cite_ref-50)** [https://github.com/marktext/marktext#download-and-installation](https://github.com/marktext/marktext#download-and-installation)
366
+ 50. **[^](#cite_ref-51)** [https://github.com/marktext/marktext/releases/tag/v0.17.1](https://github.com/marktext/marktext/releases/tag/v0.17.1)
367
+ 51. **[^](#cite_ref-52)** [https://github.com/marktext/marktext?tab=readme-ov-file#marktext](https://github.com/marktext/marktext?tab=readme-ov-file#marktext)
368
+ 52. **[^](#cite_ref-53)** [https://github.com/marktext/marktext?tab=MIT-1-ov-file#readme](https://github.com/marktext/marktext?tab=MIT-1-ov-file#readme)
369
+ 53. **[^](#cite_ref-54)** in "Preferences" dialog window: "General" tab->"Window:" section, "Zoom" and/or "Editor" tab, "Code block settings:" section
370
+ 54. **[^](#cite_ref-55)** "View" menu
371
+ 55. ^ [***a***](#cite_ref-MarkTextFeatures_56-0) [***b***](#cite_ref-MarkTextFeatures_56-1) [https://github.com/marktext/marktext?tab=readme-ov-file#features](https://github.com/marktext/marktext?tab=readme-ov-file#features)
372
+ 56. **[^](#cite_ref-57)** "View" menu
373
+ 57. **[^](#cite_ref-58)** [https://github.com/erictli/scratch/releases](https://github.com/erictli/scratch/releases)
374
+ 58. ^ [***a***](#cite_ref-Scratch_homepage_59-0) [***b***](#cite_ref-Scratch_homepage_59-1) [https://www.ericli.io/scratch](https://www.ericli.io/scratch)
375
+
376
+ * [Official website](https://daringfireball.net/projects/markdown/) for original John Gruber markup
package/package.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "site-to-md-cli",
3
+ "version": "1.0.0",
4
+ "description": "Convert websites to clean, formatted Markdown directly from your terminal.",
5
+ "main": "src/index.js",
6
+ "type": "module",
7
+ "bin": {
8
+ "site-to-md": "./bin/cli.js"
9
+ },
10
+ "scripts": {
11
+ "start": "node ./bin/cli.js"
12
+ },
13
+ "keywords": [
14
+ "cli",
15
+ "markdown",
16
+ "scraper",
17
+ "readability",
18
+ "html-to-markdown"
19
+ ],
20
+ "author": "Alex Scott",
21
+ "license": "MIT",
22
+ "dependencies": {
23
+ "@mozilla/readability": "^0.5.0",
24
+ "commander": "^12.0.0",
25
+ "jsdom": "^24.0.0",
26
+ "turndown": "^7.1.3",
27
+ "turndown-plugin-gfm": "^1.0.2"
28
+ }
29
+ }
package/src/index.js ADDED
@@ -0,0 +1,62 @@
1
+ import { JSDOM } from 'jsdom';
2
+ import { Readability } from '@mozilla/readability';
3
+ import TurndownService from 'turndown';
4
+ import { gfm } from 'turndown-plugin-gfm';
5
+
6
+ /**
7
+ * Fetches a URL and converts its main content to Markdown.
8
+ * @param {string} url - The URL to fetch.
9
+ * @returns {Promise<{title: string, markdown: string, excerpt: string}>}
10
+ */
11
+ export async function fetchAndConvert(url) {
12
+ const response = await fetch(url, {
13
+ headers: {
14
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
15
+ }
16
+ });
17
+
18
+ if (!response.ok) {
19
+ throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
20
+ }
21
+
22
+ const html = await response.text();
23
+ return convertHtmlToMarkdown(html, url);
24
+ }
25
+
26
+ /**
27
+ * Converts raw HTML to clean Markdown using Readability and Turndown.
28
+ * @param {string} html - The raw HTML string.
29
+ * @param {string} [url='http://localhost'] - Base URL for resolving relative links.
30
+ * @returns {{title: string, markdown: string, excerpt: string}}
31
+ */
32
+ export function convertHtmlToMarkdown(html, url = 'http://localhost') {
33
+ // 1. Parse HTML into a DOM document
34
+ const doc = new JSDOM(html, { url });
35
+
36
+ // 2. Extract clean article content
37
+ const reader = new Readability(doc.window.document);
38
+ const article = reader.parse();
39
+
40
+ if (!article) {
41
+ throw new Error('Could not parse meaningful content from the provided HTML.');
42
+ }
43
+
44
+ // 3. Configure Turndown for Markdown conversion
45
+ const turndownService = new TurndownService({
46
+ headingStyle: 'atx', // Use '#' for headings
47
+ codeBlockStyle: 'fenced', // Use '```' for code blocks
48
+ emDelimiter: '*' // Use '*' for italics
49
+ });
50
+
51
+ // Add GitHub Flavored Markdown support (tables, strikethrough, task lists)
52
+ turndownService.use(gfm);
53
+
54
+ // Convert the cleaned HTML to Markdown
55
+ const markdown = turndownService.turndown(article.content);
56
+
57
+ return {
58
+ title: article.title,
59
+ excerpt: article.excerpt,
60
+ markdown: markdown
61
+ };
62
+ }