pygments.rb 0.2.4 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/pygments/version.rb +1 -1
- data/vendor/pygments-main/AUTHORS +14 -0
- data/vendor/pygments-main/CHANGES +34 -3
- data/vendor/pygments-main/Makefile +1 -1
- data/vendor/pygments-main/docs/generate.py +1 -1
- data/vendor/pygments-main/external/markdown-processor.py +1 -1
- data/vendor/pygments-main/external/moin-parser.py +1 -1
- data/vendor/pygments-main/external/rst-directive-old.py +1 -1
- data/vendor/pygments-main/external/rst-directive.py +1 -1
- data/vendor/pygments-main/pygments/__init__.py +1 -1
- data/vendor/pygments-main/pygments/cmdline.py +4 -1
- data/vendor/pygments-main/pygments/console.py +1 -1
- data/vendor/pygments-main/pygments/filter.py +1 -1
- data/vendor/pygments-main/pygments/filters/__init__.py +1 -1
- data/vendor/pygments-main/pygments/formatter.py +1 -1
- data/vendor/pygments-main/pygments/formatters/__init__.py +1 -1
- data/vendor/pygments-main/pygments/formatters/_mapping.py +1 -1
- data/vendor/pygments-main/pygments/formatters/bbcode.py +1 -1
- data/vendor/pygments-main/pygments/formatters/html.py +2 -2
- data/vendor/pygments-main/pygments/formatters/img.py +1 -1
- data/vendor/pygments-main/pygments/formatters/latex.py +3 -2
- data/vendor/pygments-main/pygments/formatters/other.py +1 -1
- data/vendor/pygments-main/pygments/formatters/rtf.py +1 -1
- data/vendor/pygments-main/pygments/formatters/svg.py +1 -1
- data/vendor/pygments-main/pygments/formatters/terminal.py +5 -2
- data/vendor/pygments-main/pygments/formatters/terminal256.py +5 -2
- data/vendor/pygments-main/pygments/lexer.py +29 -10
- data/vendor/pygments-main/pygments/lexers/__init__.py +14 -11
- data/vendor/pygments-main/pygments/lexers/_asybuiltins.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_clbuiltins.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_luabuiltins.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_mapping.py +41 -23
- data/vendor/pygments-main/pygments/lexers/_phpbuiltins.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_postgres_builtins.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_scilab_builtins.py +29 -0
- data/vendor/pygments-main/pygments/lexers/_vimbuiltins.py +3 -3
- data/vendor/pygments-main/pygments/lexers/agile.py +148 -443
- data/vendor/pygments-main/pygments/lexers/asm.py +5 -3
- data/vendor/pygments-main/pygments/lexers/compiled.py +298 -294
- data/vendor/pygments-main/pygments/lexers/dotnet.py +40 -34
- data/vendor/pygments-main/pygments/lexers/functional.py +723 -4
- data/vendor/pygments-main/pygments/lexers/hdl.py +228 -6
- data/vendor/pygments-main/pygments/lexers/jvm.py +678 -0
- data/vendor/pygments-main/pygments/lexers/math.py +65 -2
- data/vendor/pygments-main/pygments/lexers/other.py +875 -481
- data/vendor/pygments-main/pygments/lexers/parsers.py +1 -1
- data/vendor/pygments-main/pygments/lexers/shell.py +360 -0
- data/vendor/pygments-main/pygments/lexers/special.py +1 -1
- data/vendor/pygments-main/pygments/lexers/sql.py +565 -0
- data/vendor/pygments-main/pygments/lexers/templates.py +1 -1
- data/vendor/pygments-main/pygments/lexers/text.py +237 -100
- data/vendor/pygments-main/pygments/lexers/web.py +146 -10
- data/vendor/pygments-main/pygments/plugin.py +1 -1
- data/vendor/pygments-main/pygments/scanner.py +1 -1
- data/vendor/pygments-main/pygments/style.py +1 -1
- data/vendor/pygments-main/pygments/styles/__init__.py +2 -1
- data/vendor/pygments-main/pygments/styles/autumn.py +1 -1
- data/vendor/pygments-main/pygments/styles/borland.py +1 -1
- data/vendor/pygments-main/pygments/styles/bw.py +1 -1
- data/vendor/pygments-main/pygments/styles/colorful.py +1 -1
- data/vendor/pygments-main/pygments/styles/default.py +1 -1
- data/vendor/pygments-main/pygments/styles/emacs.py +1 -1
- data/vendor/pygments-main/pygments/styles/friendly.py +1 -1
- data/vendor/pygments-main/pygments/styles/fruity.py +1 -2
- data/vendor/pygments-main/pygments/styles/manni.py +1 -1
- data/vendor/pygments-main/pygments/styles/monokai.py +1 -1
- data/vendor/pygments-main/pygments/styles/murphy.py +1 -1
- data/vendor/pygments-main/pygments/styles/native.py +1 -1
- data/vendor/pygments-main/pygments/styles/pastie.py +1 -1
- data/vendor/pygments-main/pygments/styles/perldoc.py +1 -1
- data/vendor/pygments-main/pygments/styles/rrt.py +33 -0
- data/vendor/pygments-main/pygments/styles/tango.py +1 -1
- data/vendor/pygments-main/pygments/styles/trac.py +1 -1
- data/vendor/pygments-main/pygments/styles/vim.py +1 -1
- data/vendor/pygments-main/pygments/styles/vs.py +1 -1
- data/vendor/pygments-main/pygments/token.py +1 -1
- data/vendor/pygments-main/pygments/unistring.py +1 -1
- data/vendor/pygments-main/pygments/util.py +2 -2
- data/vendor/pygments-main/scripts/check_sources.py +2 -2
- data/vendor/pygments-main/scripts/find_codetags.py +1 -1
- data/vendor/pygments-main/scripts/find_error.py +5 -2
- data/vendor/pygments-main/scripts/get_vimkw.py +9 -4
- data/vendor/pygments-main/setup.py +1 -1
- data/vendor/pygments-main/tests/examplefiles/classes.dylan +16 -0
- data/vendor/pygments-main/tests/examplefiles/coq_RelationClasses +447 -0
- data/vendor/pygments-main/tests/examplefiles/example.cls +15 -0
- data/vendor/pygments-main/tests/examplefiles/example.moon +629 -0
- data/vendor/pygments-main/tests/examplefiles/example.p +34 -0
- data/vendor/pygments-main/tests/examplefiles/example.snobol +15 -0
- data/vendor/pygments-main/tests/examplefiles/example.u +548 -0
- data/vendor/pygments-main/tests/examplefiles/example_elixir.ex +363 -0
- data/vendor/pygments-main/tests/examplefiles/foo.sce +6 -0
- data/vendor/pygments-main/tests/examplefiles/http_request_example +14 -0
- data/vendor/pygments-main/tests/examplefiles/http_response_example +27 -0
- data/vendor/pygments-main/tests/examplefiles/irc.lsp +214 -0
- data/vendor/pygments-main/tests/examplefiles/markdown.lsp +679 -0
- data/vendor/pygments-main/tests/examplefiles/nemerle_sample.n +4 -2
- data/vendor/pygments-main/tests/examplefiles/reversi.lsp +427 -0
- data/vendor/pygments-main/tests/examplefiles/scilab.sci +30 -0
- data/vendor/pygments-main/tests/examplefiles/test.bro +250 -0
- data/vendor/pygments-main/tests/examplefiles/test.cs +23 -0
- data/vendor/pygments-main/tests/examplefiles/test.dart +23 -0
- data/vendor/pygments-main/tests/examplefiles/test.ecl +58 -0
- data/vendor/pygments-main/tests/examplefiles/test.fan +818 -0
- data/vendor/pygments-main/tests/examplefiles/test.ps1 +108 -0
- data/vendor/pygments-main/tests/examplefiles/test.vhdl +161 -0
- data/vendor/pygments-main/tests/old_run.py +1 -1
- data/vendor/pygments-main/tests/run.py +1 -1
- data/vendor/pygments-main/tests/test_basic_api.py +4 -3
- data/vendor/pygments-main/tests/test_clexer.py +1 -1
- data/vendor/pygments-main/tests/test_cmdline.py +1 -1
- data/vendor/pygments-main/tests/test_examplefiles.py +4 -3
- data/vendor/pygments-main/tests/test_html_formatter.py +33 -1
- data/vendor/pygments-main/tests/test_latex_formatter.py +1 -1
- data/vendor/pygments-main/tests/test_perllexer.py +137 -0
- data/vendor/pygments-main/tests/test_regexlexer.py +1 -1
- data/vendor/pygments-main/tests/test_token.py +1 -1
- data/vendor/pygments-main/tests/test_using_api.py +1 -1
- data/vendor/pygments-main/tests/test_util.py +35 -5
- metadata +30 -4
@@ -0,0 +1,679 @@
|
|
1
|
+
#!/usr/bin/env newlisp
|
2
|
+
|
3
|
+
;; @module markdown
|
4
|
+
;; @author cormullion
|
5
|
+
;; @description a port of John Gruber's Markdown to newLISP
|
6
|
+
;; @location http://unbalanced-parentheses.nfshost.com/
|
7
|
+
;; @version of date 2011-10-02 22:36:02
|
8
|
+
;; version history: at the end
|
9
|
+
;; a port of John Gruber's Markdown.pl (http://daringfireball.net/markdown) script to newLISP...
|
10
|
+
;; see his original Perl script for explanations of the fearsome regexen and
|
11
|
+
;; byzantine logic, etc...
|
12
|
+
;; TODO:
|
13
|
+
;; the following Markdown tests fail:
|
14
|
+
;; Inline HTML (Advanced) ... FAILED
|
15
|
+
;; Links, reference style ... FAILED -- nested brackets
|
16
|
+
;; Links, shortcut references ... FAILED
|
17
|
+
;; Markdown Documentation - Syntax ... FAILED
|
18
|
+
;; Ordered and unordered lists ... FAILED -- a nested ordered list error
|
19
|
+
;; parens in url : ![this is a stupid URL](http://example.com/(parens).jpg) see (Images.text)
|
20
|
+
;; Add: email address scrambling
|
21
|
+
|
22
|
+
(context 'Hash)
|
23
|
+
(define HashTable:HashTable)
|
24
|
+
|
25
|
+
(define (build-escape-table)
|
26
|
+
(set '*escape-chars* [text]\`*_{}[]()>#+-.![/text])
|
27
|
+
(dolist (c (explode *escape-chars*))
|
28
|
+
(HashTable c (hash c))))
|
29
|
+
|
30
|
+
(define (init-hash txt)
|
31
|
+
; finds a hash identifier that doesn't occur anywhere in the text
|
32
|
+
(set 'counter 0)
|
33
|
+
(set 'hash-prefix "HASH")
|
34
|
+
(set 'hash-id (string hash-prefix counter))
|
35
|
+
(do-while (find hash-id txt)
|
36
|
+
(set 'hash-id (string hash-prefix (inc counter))))
|
37
|
+
(Hash:build-escape-table))
|
38
|
+
|
39
|
+
(define (hash s)
|
40
|
+
(HashTable s (string hash-id (inc counter))))
|
41
|
+
|
42
|
+
(context 'markdown)
|
43
|
+
|
44
|
+
(define (markdown:markdown txt)
|
45
|
+
(initialize)
|
46
|
+
(Hash:init-hash txt)
|
47
|
+
(unescape-special-chars
|
48
|
+
(block-transforms
|
49
|
+
(strip-link-definitions
|
50
|
+
(protect
|
51
|
+
(cleanup txt))))))
|
52
|
+
|
53
|
+
(define (initialize)
|
54
|
+
(set '*escape-pairs* '(
|
55
|
+
({\\\\} {\})
|
56
|
+
({\\`} {`})
|
57
|
+
({\\\*} {*})
|
58
|
+
({\\_} {_})
|
59
|
+
([text]\\\{[/text] [text]{[/text])
|
60
|
+
([text]\\\}[/text] [text]}[/text])
|
61
|
+
({\\\[} {[})
|
62
|
+
({\\\]} {]})
|
63
|
+
({\\\(} {(})
|
64
|
+
({\\\)} {)})
|
65
|
+
({\\>} {>})
|
66
|
+
({\\\#} {#})
|
67
|
+
({\\\+} {+})
|
68
|
+
({\\\-} {-})
|
69
|
+
({\\\.} {.})
|
70
|
+
({\\!} {!})))
|
71
|
+
(set '*hashed-html-blocks* '())
|
72
|
+
(set '*list-level* 0))
|
73
|
+
|
74
|
+
(define (block-transforms txt)
|
75
|
+
(form-paragraphs
|
76
|
+
(protect
|
77
|
+
(block-quotes
|
78
|
+
(code-blocks
|
79
|
+
(lists
|
80
|
+
(horizontal-rules
|
81
|
+
(headers txt))))))))
|
82
|
+
|
83
|
+
(define (span-transforms txt)
|
84
|
+
(line-breaks
|
85
|
+
(emphasis
|
86
|
+
(amps-and-angles
|
87
|
+
(auto-links
|
88
|
+
(anchors
|
89
|
+
(images
|
90
|
+
(escape-special-chars
|
91
|
+
(escape-special-chars (code-spans txt) 'inside-attributes)))))))))
|
92
|
+
|
93
|
+
(define (tokenize-html xhtml)
|
94
|
+
; return list of tag/text portions of xhtml text
|
95
|
+
(letn (
|
96
|
+
(tag-match [text]((?s:<!(-- .*? -- \s*)+>)|
|
97
|
+
(?s:<\?.*?\?>)|
|
98
|
+
(?:<[a-z/!$](?:[^<>]|
|
99
|
+
(?:<[a-z/!$](?:[^<>]|
|
100
|
+
(?:<[a-z/!$](?:[^<>]|
|
101
|
+
(?:<[a-z/!$](?:[^<>]|
|
102
|
+
(?:<[a-z/!$](?:[^<>]|
|
103
|
+
(?:<[a-z/!$](?:[^<>])*>))*>))*>))*>))*>))*>))[/text]) ; yeah, well...
|
104
|
+
(str xhtml)
|
105
|
+
(len (length str))
|
106
|
+
(pos 0)
|
107
|
+
(tokens '()))
|
108
|
+
(while (set 'tag-start (find tag-match str 8))
|
109
|
+
(if (< pos tag-start)
|
110
|
+
(push (list 'text (slice str pos (- tag-start pos))) tokens -1))
|
111
|
+
(push (list 'tag $0) tokens -1)
|
112
|
+
(set 'str (slice str (+ tag-start (length $0))))
|
113
|
+
(set 'pos 0))
|
114
|
+
; leftovers
|
115
|
+
(if (< pos len)
|
116
|
+
(push (list 'text (slice str pos (- len pos))) tokens -1))
|
117
|
+
tokens))
|
118
|
+
|
119
|
+
(define (escape-special-chars txt (within-tag-attributes nil))
|
120
|
+
(let ((temp (tokenize-html txt))
|
121
|
+
(new-text {}))
|
122
|
+
(dolist (pair temp)
|
123
|
+
(if (= (first pair) 'tag)
|
124
|
+
; 'tag
|
125
|
+
(begin
|
126
|
+
(set 'new-text (replace {\\} (last pair) (HashTable {\\}) 0))
|
127
|
+
(replace [text](?<=.)</?code>(?=.)[/text] new-text (HashTable {`}) 0)
|
128
|
+
(replace {\*} new-text (HashTable {*}) 0)
|
129
|
+
(replace {_} new-text (HashTable {_} ) 0))
|
130
|
+
; 'text
|
131
|
+
(if within-tag-attributes
|
132
|
+
(set 'new-text (last pair))
|
133
|
+
(set 'new-text (encode-backslash-escapes (last pair)))))
|
134
|
+
(setf (temp $idx) (list (first pair) new-text)))
|
135
|
+
; return as text
|
136
|
+
(join (map last temp))))
|
137
|
+
|
138
|
+
(define (encode-backslash-escapes t)
|
139
|
+
(dolist (pair *escape-pairs*)
|
140
|
+
(replace (first pair) t (HashTable (last pair)) 14)))
|
141
|
+
|
142
|
+
(define (encode-code s)
|
143
|
+
; encode/escape certain characters inside Markdown code runs
|
144
|
+
(replace {&} s "&" 0)
|
145
|
+
(replace {<} s "<" 0)
|
146
|
+
(replace {>} s ">" 0)
|
147
|
+
(replace {\*} s (HashTable {\\}) 0)
|
148
|
+
(replace {_} s (HashTable {_}) 0)
|
149
|
+
(replace "{" s (HashTable "{") 0)
|
150
|
+
(replace {\[} s (HashTable {[}) 0)
|
151
|
+
(replace {\]} s (HashTable {]}) 0)
|
152
|
+
(replace {\\} s (HashTable "\\") 0))
|
153
|
+
|
154
|
+
(define (code-spans s)
|
155
|
+
(replace
|
156
|
+
{(?<!\\)(`+)(.+?)(?<!`)\1(?!`)}
|
157
|
+
s
|
158
|
+
(string {<code>} (encode-code (trim $2)) {</code>})
|
159
|
+
2))
|
160
|
+
|
161
|
+
(define (encode-alt s)
|
162
|
+
(replace {&} s "&" 0)
|
163
|
+
(replace {"} s """ 0))
|
164
|
+
|
165
|
+
(define (images txt)
|
166
|
+
(let ((alt-text {})
|
167
|
+
(url {})
|
168
|
+
(title {})
|
169
|
+
(ref-regex {(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])})
|
170
|
+
(inline-regex {(!\[(.*?)\]\([ \t]*<?(\S+?)>?[ \t]*((['"])(.*?)\5[ \t]*)?\))})
|
171
|
+
(whole-match {})
|
172
|
+
(result {})
|
173
|
+
(id-ref {})
|
174
|
+
(url {}))
|
175
|
+
; reference links ![alt text][id]
|
176
|
+
(replace
|
177
|
+
ref-regex
|
178
|
+
txt
|
179
|
+
(begin
|
180
|
+
(set 'whole-match $1 'alt-text $2 'id-ref $3)
|
181
|
+
(if alt-text
|
182
|
+
(replace {"} alt-text {"} 0))
|
183
|
+
(if (empty? id-ref)
|
184
|
+
(set 'id-ref (lower-case alt-text)))
|
185
|
+
(if (lookup id-ref *link-database*)
|
186
|
+
(set 'url (first (lookup id-ref *link-database*)))
|
187
|
+
(set 'url nil))
|
188
|
+
(if url
|
189
|
+
(begin
|
190
|
+
(replace {\*} url (HashTable {*}) 0)
|
191
|
+
(replace {_} url (HashTable {_}) 0)
|
192
|
+
))
|
193
|
+
(if (last (lookup id-ref *link-database*))
|
194
|
+
; title
|
195
|
+
(begin
|
196
|
+
(set 'title (last (lookup id-ref *link-database*)))
|
197
|
+
(replace {"} title {"} 0)
|
198
|
+
(replace {\*} title (HashTable {*}) 0)
|
199
|
+
(replace {_} title (HashTable {_}) 0))
|
200
|
+
; no title
|
201
|
+
(set 'title {})
|
202
|
+
)
|
203
|
+
(if url
|
204
|
+
(set 'result (string
|
205
|
+
{<img src="}
|
206
|
+
(trim url)
|
207
|
+
{" alt="}
|
208
|
+
alt-text {" }
|
209
|
+
(if (not (empty? title))
|
210
|
+
(string { title="} title {"}) {})
|
211
|
+
{ />}))
|
212
|
+
(set 'result whole-match))
|
213
|
+
)
|
214
|
+
0
|
215
|
+
)
|
216
|
+
; inline image refs: ![alt text](url "optional title")
|
217
|
+
(replace
|
218
|
+
inline-regex
|
219
|
+
txt
|
220
|
+
(begin
|
221
|
+
(set 'whole-match $1)
|
222
|
+
(set 'alt-text $2)
|
223
|
+
(set 'url $3)
|
224
|
+
(set 'title $6)
|
225
|
+
(if alt-text
|
226
|
+
(replace {"} alt-text {"} 0)
|
227
|
+
(set 'alt-text {}))
|
228
|
+
(if title
|
229
|
+
(begin
|
230
|
+
(replace {"} title {"} 0)
|
231
|
+
(replace {\*} title (HashTable {*}) 0)
|
232
|
+
(replace {_} title (HashTable {_}) 0))
|
233
|
+
(set 'title {}))
|
234
|
+
(replace {\*} url (HashTable {*}) 0)
|
235
|
+
(replace {_} url (HashTable {_}) 0)
|
236
|
+
(string
|
237
|
+
{<img src="}
|
238
|
+
(trim url)
|
239
|
+
{" alt="}
|
240
|
+
alt-text {" }
|
241
|
+
(if title (string {title="} title {"}) {}) { />})
|
242
|
+
)
|
243
|
+
0
|
244
|
+
)
|
245
|
+
; empty ones are possible
|
246
|
+
(set '$1 {})
|
247
|
+
(replace {!\[(.*?)\]\([ \t]*\)}
|
248
|
+
txt
|
249
|
+
(string {<img src="" alt="} $1 {" title="" />})
|
250
|
+
0)))
|
251
|
+
|
252
|
+
(define (make-anchor link-text id-ref )
|
253
|
+
; Link defs are in the form: ^[id]: url "optional title"
|
254
|
+
; stored in link db list as (id (url title))
|
255
|
+
; params are text to be linked and the id of the link in the db
|
256
|
+
; eg bar 1 for [bar][1]
|
257
|
+
|
258
|
+
(let ((title {})
|
259
|
+
(id id-ref)
|
260
|
+
(url nil))
|
261
|
+
(if link-text
|
262
|
+
(begin
|
263
|
+
(replace {"} link-text {"} 0)
|
264
|
+
(replace {\n} link-text { } 0)
|
265
|
+
(replace {[ ]?\n} link-text { } 0)))
|
266
|
+
(if (null? id ) (set 'id (lower-case link-text)))
|
267
|
+
(if (not (nil? (lookup id *link-database*)))
|
268
|
+
(begin
|
269
|
+
(set 'url (first (lookup id *link-database*)))
|
270
|
+
(replace {\*} url (HashTable {*}) 0)
|
271
|
+
(replace {_} url (HashTable {_}) 0)
|
272
|
+
(if (set 'title (last (lookup id *link-database*)))
|
273
|
+
(begin
|
274
|
+
(replace {"} title {"} 0)
|
275
|
+
(replace {\*} title (HashTable {*}) 0)
|
276
|
+
(replace {_} title (HashTable {_}) 0))
|
277
|
+
(set 'title {})))
|
278
|
+
(set 'url nil))
|
279
|
+
(if url
|
280
|
+
(string {<a href="} (trim url)
|
281
|
+
{"}
|
282
|
+
(if (not (= title {})) (string { title="} (trim title) {"}) {})
|
283
|
+
{>} link-text {</a>})
|
284
|
+
(string {[} link-text {][} id-ref {]}))))
|
285
|
+
|
286
|
+
(define (anchors txt)
|
287
|
+
(letn ((nested-brackets {(?>[^\[\]]+)*})
|
288
|
+
(ref-link-regex (string {(\[(} nested-brackets {)\][ ]?(?:\n[ ]*)?\[(.*?)\])}))
|
289
|
+
(inline-regex {(\[(.*?)\]\([ ]*<?(.*?\)?)>?[ ]*((['"])(.*?)\5[ \t]*)?\))})
|
290
|
+
(link-text {})
|
291
|
+
(url {})
|
292
|
+
(title {}))
|
293
|
+
; reference-style links: [link text] [id]
|
294
|
+
(set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {} '$6 {}) ; i still don't think I should have to do this...
|
295
|
+
|
296
|
+
; what about this regex instead?
|
297
|
+
(set 'ref-link-regex {(\[(.*?)\][ ]?\[(.*?)\])})
|
298
|
+
|
299
|
+
(replace ref-link-regex txt (make-anchor $2 $3) 8) ; $2 is link text, $3 is id
|
300
|
+
; inline links: [link text](url "optional title")
|
301
|
+
(set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {} '$6 {})
|
302
|
+
(replace
|
303
|
+
inline-regex
|
304
|
+
txt
|
305
|
+
(begin
|
306
|
+
(set 'link-text $2)
|
307
|
+
(set 'url $3)
|
308
|
+
(set 'title $6)
|
309
|
+
(if link-text (replace {"} link-text {"} 0))
|
310
|
+
(if title
|
311
|
+
(begin
|
312
|
+
(replace {"} title {"} 0)
|
313
|
+
(replace {\*} title (HashTable {*}) 0)
|
314
|
+
(replace {_} title (HashTable {_}) 0))
|
315
|
+
(set 'title {}))
|
316
|
+
(replace {\*} url (HashTable {*}) 0)
|
317
|
+
(replace {_} url (HashTable {_}) 0)
|
318
|
+
(replace {^<(.*)>$} url $1 0)
|
319
|
+
(string
|
320
|
+
{<a href="}
|
321
|
+
(trim url)
|
322
|
+
{"}
|
323
|
+
(if (not (= title {}))
|
324
|
+
(string { title="} (trim title) {"})
|
325
|
+
{})
|
326
|
+
{>} link-text {</a>}
|
327
|
+
))
|
328
|
+
8
|
329
|
+
) ; replace
|
330
|
+
) txt)
|
331
|
+
|
332
|
+
(define (auto-links txt)
|
333
|
+
(replace
|
334
|
+
[text]<((https?|ftp):[^'">\s]+)>[/text]
|
335
|
+
txt
|
336
|
+
(string {<a href="} $1 {">} $1 {</a>})
|
337
|
+
0
|
338
|
+
)
|
339
|
+
; to-do: email ...
|
340
|
+
)
|
341
|
+
|
342
|
+
(define (amps-and-angles txt)
|
343
|
+
; Smart processing for ampersands and angle brackets
|
344
|
+
(replace
|
345
|
+
[text]&(?!\#?[xX]?(?:[0-9a-fA-F]+|\w+);)[/text]
|
346
|
+
txt
|
347
|
+
{&}
|
348
|
+
10
|
349
|
+
)
|
350
|
+
(replace
|
351
|
+
[text]<(?![a-z/?\$!])[/text]
|
352
|
+
txt
|
353
|
+
{<}
|
354
|
+
10))
|
355
|
+
|
356
|
+
(define (emphasis txt)
|
357
|
+
; italics/bold: strong first
|
358
|
+
(replace
|
359
|
+
[text] (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 [/text]
|
360
|
+
txt
|
361
|
+
(string {<strong>} $2 {</strong>})
|
362
|
+
8
|
363
|
+
)
|
364
|
+
(replace
|
365
|
+
[text] (\*|_) (?=\S) (.+?) (?<=\S) \1 [/text]
|
366
|
+
txt
|
367
|
+
(string {<em>} $2 {</em>})
|
368
|
+
8
|
369
|
+
))
|
370
|
+
|
371
|
+
(define (line-breaks txt)
|
372
|
+
; handles line break markers
|
373
|
+
(replace " {2,}\n" txt " <br/>\n" 0))
|
374
|
+
|
375
|
+
(define (hex-str-to-unicode-char strng)
|
376
|
+
; given a five character string, assume it's "U" + 4 hex chars and convert
|
377
|
+
; return the character...
|
378
|
+
(char (int (string "0x" (1 strng)) 0 16)))
|
379
|
+
|
380
|
+
(define (ustring s)
|
381
|
+
; any four digit string preceded by U
|
382
|
+
(replace "U[0-9a-f]{4,}" s (hex-str-to-unicode-char $0) 0))
|
383
|
+
|
384
|
+
(define (cleanup txt)
|
385
|
+
; cleanup the text by normalizing some possible variations
|
386
|
+
(replace "\r\n|\r" txt "\n" 0) ; standardize line ends
|
387
|
+
(push "\n\n" txt -1) ; end with two returns
|
388
|
+
(set 'txt (detab txt)) ; convert tabs to spaces
|
389
|
+
|
390
|
+
; convert inline Unicode:
|
391
|
+
(set 'txt (ustring txt))
|
392
|
+
(replace "\n[ \t]+\n" txt "\n\n" 0) ; lines with only spaces and tabs
|
393
|
+
)
|
394
|
+
|
395
|
+
(define (protect txt)
|
396
|
+
; protect or "hash html blocks"
|
397
|
+
(letn ((nested-block-regex [text](^<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b(.*\n)*?</\2>[ \t]*(?=\n+|\Z))[/text])
|
398
|
+
(liberal-tag-regex [text](^<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b(.*\n)*?.*</\2>[ \t]*(?=\n+|\Z))[/text])
|
399
|
+
(hr-regex [text](?:(?<=\n\n)|\A\n?)([ ]{0,3}<(hr)\b([^<>])*?/?>[ \t]*(?=\n{2,}|\Z))[/text])
|
400
|
+
(html-comment-regex [text](?:(?<=\n\n)|\A\n?)([ ]{0,3}(?s:<!(--.*?--\s*)+>)[ \t]*(?=\n{2,}|\Z))[/text])
|
401
|
+
(results '())
|
402
|
+
(chunk-count (length (set 'chunks (parse txt "\n\n"))))
|
403
|
+
(chunk-size 500))
|
404
|
+
|
405
|
+
; due to a limitation in PCRE, long sections have to be divided up otherwise we'll crash
|
406
|
+
; so divide up long texts into chunks, then do the regex on each chunk
|
407
|
+
; not an ideal solution, but it works ok :(
|
408
|
+
|
409
|
+
(for (i 0 chunk-count chunk-size)
|
410
|
+
; do a chunk
|
411
|
+
(set 'text-chunk (join (i (- (min chunk-count (- (+ i chunk-size) 1)) i) chunks) "\n\n"))
|
412
|
+
(dolist (rgx (list nested-block-regex liberal-tag-regex hr-regex html-comment-regex))
|
413
|
+
(replace
|
414
|
+
rgx
|
415
|
+
text-chunk
|
416
|
+
(begin
|
417
|
+
(set 'key (Hash:hash $1))
|
418
|
+
(push (list key $1 ) *hashed-html-blocks* -1)
|
419
|
+
(string "\n\n" key "\n\n"))
|
420
|
+
2))
|
421
|
+
; save this partial result
|
422
|
+
(push text-chunk results -1)
|
423
|
+
) ; for
|
424
|
+
; return string result
|
425
|
+
(join results "\n\n")))
|
426
|
+
|
427
|
+
(define (unescape-special-chars t)
|
428
|
+
; Swap back in all the special characters we've hidden.
|
429
|
+
(dolist (pair (HashTable))
|
430
|
+
(replace (last pair) t (first pair) 10)) t)
|
431
|
+
|
432
|
+
(define (strip-link-definitions txt)
|
433
|
+
; strip link definitions from the text and store them
|
434
|
+
; Link defs are in the form: ^[id]: url "optional title"
|
435
|
+
; stored in link db list as (id (url title))
|
436
|
+
(let ((link-db '())
|
437
|
+
(url {})
|
438
|
+
(id {})
|
439
|
+
(title {}))
|
440
|
+
(replace
|
441
|
+
[text]^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(?<=\s)["(](.+?)[")][ \t]*)?(?:\n+|\Z)[/text]
|
442
|
+
txt
|
443
|
+
(begin
|
444
|
+
(set 'id (lower-case $1) 'url (amps-and-angles $2) 'title $3)
|
445
|
+
(if title (replace {"} title {"} 0))
|
446
|
+
(push (list id (list url title)) link-db)
|
447
|
+
(set '$3 {}) ; necessary?
|
448
|
+
(string {}) ; remove from text
|
449
|
+
)
|
450
|
+
10)
|
451
|
+
(set '*link-database* link-db)
|
452
|
+
txt))
|
453
|
+
|
454
|
+
(define (horizontal-rules txt)
|
455
|
+
(replace
|
456
|
+
[text]^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$[/text]
|
457
|
+
txt
|
458
|
+
"\n<hr />"
|
459
|
+
14)
|
460
|
+
(replace
|
461
|
+
[text]^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$[/text]
|
462
|
+
txt
|
463
|
+
"\n<hr />"
|
464
|
+
14)
|
465
|
+
(replace
|
466
|
+
[text]^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$[/text]
|
467
|
+
txt
|
468
|
+
"\n<hr />"
|
469
|
+
14))
|
470
|
+
|
471
|
+
(define (headers txt)
|
472
|
+
; setext headers
|
473
|
+
(let ((level 1))
|
474
|
+
(replace
|
475
|
+
[text]^(.+)[ \t]*\n=+[ \t]*\n+[/text]
|
476
|
+
txt
|
477
|
+
(string "<h1>" (span-transforms $1) "</h1>\n\n")
|
478
|
+
2)
|
479
|
+
|
480
|
+
(replace
|
481
|
+
[text]^(.+)[ \t]*\n-+[ \t]*\n+[/text]
|
482
|
+
txt
|
483
|
+
(string "<h2>" (span-transforms $1) "</h2>\n\n")
|
484
|
+
2)
|
485
|
+
; atx headers
|
486
|
+
(replace
|
487
|
+
[text]^(\#{1,6})\s*(.+?)[ ]*\#*(\n+)[/text]
|
488
|
+
txt
|
489
|
+
(begin
|
490
|
+
(set 'level (length $1))
|
491
|
+
(string "<h" level ">" (span-transforms $2) "</h" level ">\n\n")
|
492
|
+
)
|
493
|
+
2)))
|
494
|
+
|
495
|
+
(define (lists txt)
|
496
|
+
(letn ((marker-ul {[*+-]})
|
497
|
+
(marker-ol {\d+[.]})
|
498
|
+
(marker-any (string {(?:} marker-ul {|} marker-ol {)}))
|
499
|
+
(whole-list-regex (string [text](([ ]{0,3}([/text] marker-any [text])[ \t]+)(?s:.+?)(\z|\n{2,}(?=\S)(?![ \t]*[/text] marker-any [text][ \t]+)))[/text]))
|
500
|
+
(my-list {})
|
501
|
+
(list-type {})
|
502
|
+
(my-result {}))
|
503
|
+
(replace
|
504
|
+
(if (> *list-level* 0)
|
505
|
+
(string {^} whole-list-regex)
|
506
|
+
(string {(?:(?<=\n\n)|\A\n?)} whole-list-regex))
|
507
|
+
txt
|
508
|
+
(begin
|
509
|
+
(set 'my-list $1)
|
510
|
+
(if (find $3 marker-ul)
|
511
|
+
(set 'list-type "ul" 'marker-type marker-ul)
|
512
|
+
(set 'list-type "ol" 'marker-type marker-ol))
|
513
|
+
(replace [text]\n{2,}[/text] my-list "\n\n\n" 0)
|
514
|
+
(set 'my-result (process-list-items my-list marker-any))
|
515
|
+
(replace {\s+$} my-result {} 0)
|
516
|
+
(string {<} list-type {>} "\n" my-result "\n" {</} list-type {>} "\n"))
|
517
|
+
10 ; must be multiline
|
518
|
+
)))
|
519
|
+
|
520
|
+
(define (process-list-items list-text marker-any)
|
521
|
+
(let ((list-regex (string [text](\n)?(^[ \t]*)([/text] marker-any [text])[ \t]+((?s:.+?)(\n{1,2}))(?=\n*(\z|\2([/text] marker-any [text])[ \t]+))[/text]))
|
522
|
+
(item {})
|
523
|
+
(leading-line {})
|
524
|
+
(leading-space {})
|
525
|
+
(result {}))
|
526
|
+
(inc *list-level*)
|
527
|
+
(replace [text]\n{2,}\z[/text] list-text "\n" 0)
|
528
|
+
(set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {})
|
529
|
+
(replace
|
530
|
+
list-regex
|
531
|
+
list-text
|
532
|
+
(begin
|
533
|
+
(set 'item $4)
|
534
|
+
(set 'leading-line $1)
|
535
|
+
(set 'leading-space $2)
|
536
|
+
(if (or (not (empty? leading-line)) (ends-with item "\n{2,}" 0))
|
537
|
+
(set 'item (block-transforms (outdent item)))
|
538
|
+
; recurse for sub lists
|
539
|
+
(begin
|
540
|
+
(set 'item (lists (outdent item)))
|
541
|
+
(set 'item (span-transforms (trim item "\n")))
|
542
|
+
))
|
543
|
+
(string {<li>} item {</li>} "\n"))
|
544
|
+
10)
|
545
|
+
(dec *list-level*)
|
546
|
+
list-text))
|
547
|
+
|
548
|
+
(define (code-blocks txt)
|
549
|
+
(let ((code-block {})
|
550
|
+
(token-list '()))
|
551
|
+
(replace
|
552
|
+
[text](?:\n\n|\A)((?:(?:[ ]{4}|\t).*\n+)+)((?=^[ ]{0,3}\S)|\Z)[/text]
|
553
|
+
txt
|
554
|
+
(begin
|
555
|
+
(set 'code-block $1)
|
556
|
+
; format if Nestor module is loaded and it's not marked as plain
|
557
|
+
(if (and (not (starts-with code-block " ;plain\n")) (context? Nestor))
|
558
|
+
; format newlisp
|
559
|
+
(begin
|
560
|
+
; remove flag if present
|
561
|
+
(replace "[ ]{4};newlisp\n" code-block {} 0)
|
562
|
+
(set 'code-block (protect (Nestor:nlx-to-html (Nestor:my-read (trim (detab (outdent code-block)) "\n")))))
|
563
|
+
code-block)
|
564
|
+
; don't format
|
565
|
+
(begin
|
566
|
+
; trim leading and trailing newlines
|
567
|
+
(replace "[ ]{4};plain\n" code-block {} 0)
|
568
|
+
(set 'code-block (trim (detab (encode-code (outdent code-block))) "\n"))
|
569
|
+
(set '$1 {})
|
570
|
+
(set 'code-block (string "\n\n<pre><code>" code-block "\n</code></pre>\n\n")))))
|
571
|
+
10)))
|
572
|
+
|
573
|
+
(define (block-quotes txt)
|
574
|
+
(let ((block-quote {}))
|
575
|
+
(replace
|
576
|
+
[text]((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)[/text]
|
577
|
+
txt
|
578
|
+
(begin
|
579
|
+
(set 'block-quote $1)
|
580
|
+
(replace {^[ ]*>[ ]?} block-quote {} 2)
|
581
|
+
(replace {^[ ]+$} block-quote {} 2)
|
582
|
+
(set 'block-quote (block-transforms block-quote)) ; recurse
|
583
|
+
; remove leading spaces
|
584
|
+
(replace
|
585
|
+
{(\s*<pre>.+?</pre>)}
|
586
|
+
block-quote
|
587
|
+
(trim $1)
|
588
|
+
2)
|
589
|
+
(string "<blockquote>\n" block-quote "\n</blockquote>\n\n"))
|
590
|
+
2)))
|
591
|
+
|
592
|
+
(define (outdent s)
|
593
|
+
(replace [text]^(\t|[ ]{1,4})[/text] s {} 2))
|
594
|
+
|
595
|
+
(define (detab s)
|
596
|
+
(replace [text](.*?)\t[/text]
|
597
|
+
s
|
598
|
+
(string $1 (dup { } (- 4 (% (length $1) 4))))
|
599
|
+
2))
|
600
|
+
|
601
|
+
(define (form-paragraphs txt)
|
602
|
+
(let ((grafs '())
|
603
|
+
(original nil))
|
604
|
+
(set 'txt (trim txt "\n")) ; strip blank lines before and after
|
605
|
+
(set 'grafs (parse txt "\n{2,}" 0)) ; split
|
606
|
+
(dolist (p grafs)
|
607
|
+
(if (set 'original (lookup p *hashed-html-blocks*))
|
608
|
+
; html blocks
|
609
|
+
(setf (grafs $idx) original)
|
610
|
+
; wrap <p> tags round everything else
|
611
|
+
(setf (grafs $idx) (string {<p>} (replace {^[ ]*} (span-transforms p) {} (+ 4 8 16)) {</p>}))))
|
612
|
+
(join grafs "\n\n")))
|
613
|
+
|
614
|
+
[text]
|
615
|
+
; three command line arguments: let's hope last one is a file
|
616
|
+
(when (= 3 (length (main-args)))
|
617
|
+
(println (markdown (read-file (main-args 2))))
|
618
|
+
(exit))
|
619
|
+
|
620
|
+
; hack for command-line and module loading
|
621
|
+
(set 'level (sys-info 3))
|
622
|
+
|
623
|
+
; if level is 2, then we're probably invoking markdown.lsp directly
|
624
|
+
; if level is > 3, then we're probably loading it into another script...
|
625
|
+
|
626
|
+
(when (= level 2)
|
627
|
+
; running on command line, read STDIN and execute:
|
628
|
+
(while (read-line)
|
629
|
+
(push (current-line) *stdin* -1))
|
630
|
+
(println (markdown (join *stdin* "\n")))
|
631
|
+
(exit))
|
632
|
+
[/text]
|
633
|
+
|
634
|
+
;; version 2011-09-16 16:31:29
|
635
|
+
;; Changed to different hash routine. Profiling shows that hashing takes 40% of the execution time.
|
636
|
+
;; Unfortunately this new version is only very slightly faster.
|
637
|
+
;; Command-line arguments hack in previous version doesn't work.
|
638
|
+
;;
|
639
|
+
;; version 2011-08-18 15:04:40
|
640
|
+
;; various fixes, and added hack for running this from the command-line:
|
641
|
+
;; echo "hi there" | newlisp markdown.lsp
|
642
|
+
;; echo "hello world" | markdown.lsp
|
643
|
+
;; cat file.text | newlisp markdown.lsp
|
644
|
+
;;
|
645
|
+
;; version 2010-11-14 17:34:52
|
646
|
+
;; some problems in ustring. Probably remove it one day, as it's non standard...
|
647
|
+
;;
|
648
|
+
;; version 2010-10-14 18:41:38
|
649
|
+
;; added code to work round PCRE crash in (protect ...
|
650
|
+
;;
|
651
|
+
;; version date 2010-07-10 22:20:25
|
652
|
+
;; modified call to 'read' since lutz has changed it
|
653
|
+
;;
|
654
|
+
;; version date 2009-11-16 22:10:10
|
655
|
+
;; fixed bug in tokenize.html
|
656
|
+
;;
|
657
|
+
;; version date 2008-10-08 18:44:46
|
658
|
+
;; changed nth-set to setf to be version-10 ready.
|
659
|
+
;; This means that now this script will NOT work with
|
660
|
+
;; earlier versions of newLISP!!!!!!!!!!!
|
661
|
+
;; requires Nestor if you want source code colouring...
|
662
|
+
;;
|
663
|
+
;; version date 2008-08-08 16:54:56
|
664
|
+
;; changed (unless to (if (not ... :(
|
665
|
+
;;
|
666
|
+
;; version date 2008-07-20 14:!2:29
|
667
|
+
;; added hex-str-to-unicode-char ustring
|
668
|
+
;;
|
669
|
+
;; version date 2008-03-07 15:36:09
|
670
|
+
;; fixed load error
|
671
|
+
;;
|
672
|
+
;; version date 2007-11-17 16:20:57
|
673
|
+
;; added syntax colouring module
|
674
|
+
;;
|
675
|
+
;; version date 2007-11-14 09:19:42
|
676
|
+
;; removed reliance on dostring for compatibility with 9.1
|
677
|
+
|
678
|
+
|
679
|
+
; eof
|