pygments.rb 0.2.4 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. data/lib/pygments/version.rb +1 -1
  2. data/vendor/pygments-main/AUTHORS +14 -0
  3. data/vendor/pygments-main/CHANGES +34 -3
  4. data/vendor/pygments-main/Makefile +1 -1
  5. data/vendor/pygments-main/docs/generate.py +1 -1
  6. data/vendor/pygments-main/external/markdown-processor.py +1 -1
  7. data/vendor/pygments-main/external/moin-parser.py +1 -1
  8. data/vendor/pygments-main/external/rst-directive-old.py +1 -1
  9. data/vendor/pygments-main/external/rst-directive.py +1 -1
  10. data/vendor/pygments-main/pygments/__init__.py +1 -1
  11. data/vendor/pygments-main/pygments/cmdline.py +4 -1
  12. data/vendor/pygments-main/pygments/console.py +1 -1
  13. data/vendor/pygments-main/pygments/filter.py +1 -1
  14. data/vendor/pygments-main/pygments/filters/__init__.py +1 -1
  15. data/vendor/pygments-main/pygments/formatter.py +1 -1
  16. data/vendor/pygments-main/pygments/formatters/__init__.py +1 -1
  17. data/vendor/pygments-main/pygments/formatters/_mapping.py +1 -1
  18. data/vendor/pygments-main/pygments/formatters/bbcode.py +1 -1
  19. data/vendor/pygments-main/pygments/formatters/html.py +2 -2
  20. data/vendor/pygments-main/pygments/formatters/img.py +1 -1
  21. data/vendor/pygments-main/pygments/formatters/latex.py +3 -2
  22. data/vendor/pygments-main/pygments/formatters/other.py +1 -1
  23. data/vendor/pygments-main/pygments/formatters/rtf.py +1 -1
  24. data/vendor/pygments-main/pygments/formatters/svg.py +1 -1
  25. data/vendor/pygments-main/pygments/formatters/terminal.py +5 -2
  26. data/vendor/pygments-main/pygments/formatters/terminal256.py +5 -2
  27. data/vendor/pygments-main/pygments/lexer.py +29 -10
  28. data/vendor/pygments-main/pygments/lexers/__init__.py +14 -11
  29. data/vendor/pygments-main/pygments/lexers/_asybuiltins.py +1 -1
  30. data/vendor/pygments-main/pygments/lexers/_clbuiltins.py +1 -1
  31. data/vendor/pygments-main/pygments/lexers/_luabuiltins.py +1 -1
  32. data/vendor/pygments-main/pygments/lexers/_mapping.py +41 -23
  33. data/vendor/pygments-main/pygments/lexers/_phpbuiltins.py +1 -1
  34. data/vendor/pygments-main/pygments/lexers/_postgres_builtins.py +1 -1
  35. data/vendor/pygments-main/pygments/lexers/_scilab_builtins.py +29 -0
  36. data/vendor/pygments-main/pygments/lexers/_vimbuiltins.py +3 -3
  37. data/vendor/pygments-main/pygments/lexers/agile.py +148 -443
  38. data/vendor/pygments-main/pygments/lexers/asm.py +5 -3
  39. data/vendor/pygments-main/pygments/lexers/compiled.py +298 -294
  40. data/vendor/pygments-main/pygments/lexers/dotnet.py +40 -34
  41. data/vendor/pygments-main/pygments/lexers/functional.py +723 -4
  42. data/vendor/pygments-main/pygments/lexers/hdl.py +228 -6
  43. data/vendor/pygments-main/pygments/lexers/jvm.py +678 -0
  44. data/vendor/pygments-main/pygments/lexers/math.py +65 -2
  45. data/vendor/pygments-main/pygments/lexers/other.py +875 -481
  46. data/vendor/pygments-main/pygments/lexers/parsers.py +1 -1
  47. data/vendor/pygments-main/pygments/lexers/shell.py +360 -0
  48. data/vendor/pygments-main/pygments/lexers/special.py +1 -1
  49. data/vendor/pygments-main/pygments/lexers/sql.py +565 -0
  50. data/vendor/pygments-main/pygments/lexers/templates.py +1 -1
  51. data/vendor/pygments-main/pygments/lexers/text.py +237 -100
  52. data/vendor/pygments-main/pygments/lexers/web.py +146 -10
  53. data/vendor/pygments-main/pygments/plugin.py +1 -1
  54. data/vendor/pygments-main/pygments/scanner.py +1 -1
  55. data/vendor/pygments-main/pygments/style.py +1 -1
  56. data/vendor/pygments-main/pygments/styles/__init__.py +2 -1
  57. data/vendor/pygments-main/pygments/styles/autumn.py +1 -1
  58. data/vendor/pygments-main/pygments/styles/borland.py +1 -1
  59. data/vendor/pygments-main/pygments/styles/bw.py +1 -1
  60. data/vendor/pygments-main/pygments/styles/colorful.py +1 -1
  61. data/vendor/pygments-main/pygments/styles/default.py +1 -1
  62. data/vendor/pygments-main/pygments/styles/emacs.py +1 -1
  63. data/vendor/pygments-main/pygments/styles/friendly.py +1 -1
  64. data/vendor/pygments-main/pygments/styles/fruity.py +1 -2
  65. data/vendor/pygments-main/pygments/styles/manni.py +1 -1
  66. data/vendor/pygments-main/pygments/styles/monokai.py +1 -1
  67. data/vendor/pygments-main/pygments/styles/murphy.py +1 -1
  68. data/vendor/pygments-main/pygments/styles/native.py +1 -1
  69. data/vendor/pygments-main/pygments/styles/pastie.py +1 -1
  70. data/vendor/pygments-main/pygments/styles/perldoc.py +1 -1
  71. data/vendor/pygments-main/pygments/styles/rrt.py +33 -0
  72. data/vendor/pygments-main/pygments/styles/tango.py +1 -1
  73. data/vendor/pygments-main/pygments/styles/trac.py +1 -1
  74. data/vendor/pygments-main/pygments/styles/vim.py +1 -1
  75. data/vendor/pygments-main/pygments/styles/vs.py +1 -1
  76. data/vendor/pygments-main/pygments/token.py +1 -1
  77. data/vendor/pygments-main/pygments/unistring.py +1 -1
  78. data/vendor/pygments-main/pygments/util.py +2 -2
  79. data/vendor/pygments-main/scripts/check_sources.py +2 -2
  80. data/vendor/pygments-main/scripts/find_codetags.py +1 -1
  81. data/vendor/pygments-main/scripts/find_error.py +5 -2
  82. data/vendor/pygments-main/scripts/get_vimkw.py +9 -4
  83. data/vendor/pygments-main/setup.py +1 -1
  84. data/vendor/pygments-main/tests/examplefiles/classes.dylan +16 -0
  85. data/vendor/pygments-main/tests/examplefiles/coq_RelationClasses +447 -0
  86. data/vendor/pygments-main/tests/examplefiles/example.cls +15 -0
  87. data/vendor/pygments-main/tests/examplefiles/example.moon +629 -0
  88. data/vendor/pygments-main/tests/examplefiles/example.p +34 -0
  89. data/vendor/pygments-main/tests/examplefiles/example.snobol +15 -0
  90. data/vendor/pygments-main/tests/examplefiles/example.u +548 -0
  91. data/vendor/pygments-main/tests/examplefiles/example_elixir.ex +363 -0
  92. data/vendor/pygments-main/tests/examplefiles/foo.sce +6 -0
  93. data/vendor/pygments-main/tests/examplefiles/http_request_example +14 -0
  94. data/vendor/pygments-main/tests/examplefiles/http_response_example +27 -0
  95. data/vendor/pygments-main/tests/examplefiles/irc.lsp +214 -0
  96. data/vendor/pygments-main/tests/examplefiles/markdown.lsp +679 -0
  97. data/vendor/pygments-main/tests/examplefiles/nemerle_sample.n +4 -2
  98. data/vendor/pygments-main/tests/examplefiles/reversi.lsp +427 -0
  99. data/vendor/pygments-main/tests/examplefiles/scilab.sci +30 -0
  100. data/vendor/pygments-main/tests/examplefiles/test.bro +250 -0
  101. data/vendor/pygments-main/tests/examplefiles/test.cs +23 -0
  102. data/vendor/pygments-main/tests/examplefiles/test.dart +23 -0
  103. data/vendor/pygments-main/tests/examplefiles/test.ecl +58 -0
  104. data/vendor/pygments-main/tests/examplefiles/test.fan +818 -0
  105. data/vendor/pygments-main/tests/examplefiles/test.ps1 +108 -0
  106. data/vendor/pygments-main/tests/examplefiles/test.vhdl +161 -0
  107. data/vendor/pygments-main/tests/old_run.py +1 -1
  108. data/vendor/pygments-main/tests/run.py +1 -1
  109. data/vendor/pygments-main/tests/test_basic_api.py +4 -3
  110. data/vendor/pygments-main/tests/test_clexer.py +1 -1
  111. data/vendor/pygments-main/tests/test_cmdline.py +1 -1
  112. data/vendor/pygments-main/tests/test_examplefiles.py +4 -3
  113. data/vendor/pygments-main/tests/test_html_formatter.py +33 -1
  114. data/vendor/pygments-main/tests/test_latex_formatter.py +1 -1
  115. data/vendor/pygments-main/tests/test_perllexer.py +137 -0
  116. data/vendor/pygments-main/tests/test_regexlexer.py +1 -1
  117. data/vendor/pygments-main/tests/test_token.py +1 -1
  118. data/vendor/pygments-main/tests/test_using_api.py +1 -1
  119. data/vendor/pygments-main/tests/test_util.py +35 -5
  120. metadata +30 -4
@@ -0,0 +1,679 @@
1
+ #!/usr/bin/env newlisp
2
+
3
+ ;; @module markdown
4
+ ;; @author cormullion
5
+ ;; @description a port of John Gruber's Markdown to newLISP
6
+ ;; @location http://unbalanced-parentheses.nfshost.com/
7
+ ;; @version of date 2011-10-02 22:36:02
8
+ ;; version history: at the end
9
+ ;; a port of John Gruber's Markdown.pl (http://daringfireball.net/markdown) script to newLISP...
10
+ ;; see his original Perl script for explanations of the fearsome regexen and
11
+ ;; byzantine logic, etc...
12
+ ;; TODO:
13
+ ;; the following Markdown tests fail:
14
+ ;; Inline HTML (Advanced) ... FAILED
15
+ ;; Links, reference style ... FAILED -- nested brackets
16
+ ;; Links, shortcut references ... FAILED
17
+ ;; Markdown Documentation - Syntax ... FAILED
18
+ ;; Ordered and unordered lists ... FAILED -- a nested ordered list error
19
+ ;; parens in url : ![this is a stupid URL](http://example.com/(parens).jpg) see (Images.text)
20
+ ;; Add: email address scrambling
21
+
22
+ (context 'Hash)
23
+ (define HashTable:HashTable)
24
+
25
+ (define (build-escape-table)
26
+ (set '*escape-chars* [text]\`*_{}[]()>#+-.![/text])
27
+ (dolist (c (explode *escape-chars*))
28
+ (HashTable c (hash c))))
29
+
30
+ (define (init-hash txt)
31
+ ; finds a hash identifier that doesn't occur anywhere in the text
32
+ (set 'counter 0)
33
+ (set 'hash-prefix "HASH")
34
+ (set 'hash-id (string hash-prefix counter))
35
+ (do-while (find hash-id txt)
36
+ (set 'hash-id (string hash-prefix (inc counter))))
37
+ (Hash:build-escape-table))
38
+
39
+ (define (hash s)
40
+ (HashTable s (string hash-id (inc counter))))
41
+
42
+ (context 'markdown)
43
+
44
+ (define (markdown:markdown txt)
45
+ (initialize)
46
+ (Hash:init-hash txt)
47
+ (unescape-special-chars
48
+ (block-transforms
49
+ (strip-link-definitions
50
+ (protect
51
+ (cleanup txt))))))
52
+
53
+ (define (initialize)
54
+ (set '*escape-pairs* '(
55
+ ({\\\\} {\})
56
+ ({\\`} {`})
57
+ ({\\\*} {*})
58
+ ({\\_} {_})
59
+ ([text]\\\{[/text] [text]{[/text])
60
+ ([text]\\\}[/text] [text]}[/text])
61
+ ({\\\[} {[})
62
+ ({\\\]} {]})
63
+ ({\\\(} {(})
64
+ ({\\\)} {)})
65
+ ({\\>} {>})
66
+ ({\\\#} {#})
67
+ ({\\\+} {+})
68
+ ({\\\-} {-})
69
+ ({\\\.} {.})
70
+ ({\\!} {!})))
71
+ (set '*hashed-html-blocks* '())
72
+ (set '*list-level* 0))
73
+
74
+ (define (block-transforms txt)
75
+ (form-paragraphs
76
+ (protect
77
+ (block-quotes
78
+ (code-blocks
79
+ (lists
80
+ (horizontal-rules
81
+ (headers txt))))))))
82
+
83
+ (define (span-transforms txt)
84
+ (line-breaks
85
+ (emphasis
86
+ (amps-and-angles
87
+ (auto-links
88
+ (anchors
89
+ (images
90
+ (escape-special-chars
91
+ (escape-special-chars (code-spans txt) 'inside-attributes)))))))))
92
+
93
+ (define (tokenize-html xhtml)
94
+ ; return list of tag/text portions of xhtml text
95
+ (letn (
96
+ (tag-match [text]((?s:<!(-- .*? -- \s*)+>)|
97
+ (?s:<\?.*?\?>)|
98
+ (?:<[a-z/!$](?:[^<>]|
99
+ (?:<[a-z/!$](?:[^<>]|
100
+ (?:<[a-z/!$](?:[^<>]|
101
+ (?:<[a-z/!$](?:[^<>]|
102
+ (?:<[a-z/!$](?:[^<>]|
103
+ (?:<[a-z/!$](?:[^<>])*>))*>))*>))*>))*>))*>))[/text]) ; yeah, well...
104
+ (str xhtml)
105
+ (len (length str))
106
+ (pos 0)
107
+ (tokens '()))
108
+ (while (set 'tag-start (find tag-match str 8))
109
+ (if (< pos tag-start)
110
+ (push (list 'text (slice str pos (- tag-start pos))) tokens -1))
111
+ (push (list 'tag $0) tokens -1)
112
+ (set 'str (slice str (+ tag-start (length $0))))
113
+ (set 'pos 0))
114
+ ; leftovers
115
+ (if (< pos len)
116
+ (push (list 'text (slice str pos (- len pos))) tokens -1))
117
+ tokens))
118
+
119
+ (define (escape-special-chars txt (within-tag-attributes nil))
120
+ (let ((temp (tokenize-html txt))
121
+ (new-text {}))
122
+ (dolist (pair temp)
123
+ (if (= (first pair) 'tag)
124
+ ; 'tag
125
+ (begin
126
+ (set 'new-text (replace {\\} (last pair) (HashTable {\\}) 0))
127
+ (replace [text](?<=.)</?code>(?=.)[/text] new-text (HashTable {`}) 0)
128
+ (replace {\*} new-text (HashTable {*}) 0)
129
+ (replace {_} new-text (HashTable {_} ) 0))
130
+ ; 'text
131
+ (if within-tag-attributes
132
+ (set 'new-text (last pair))
133
+ (set 'new-text (encode-backslash-escapes (last pair)))))
134
+ (setf (temp $idx) (list (first pair) new-text)))
135
+ ; return as text
136
+ (join (map last temp))))
137
+
138
+ (define (encode-backslash-escapes t)
139
+ (dolist (pair *escape-pairs*)
140
+ (replace (first pair) t (HashTable (last pair)) 14)))
141
+
142
+ (define (encode-code s)
143
+ ; encode/escape certain characters inside Markdown code runs
144
+ (replace {&} s "&amp;" 0)
145
+ (replace {<} s "&lt;" 0)
146
+ (replace {>} s "&gt;" 0)
147
+ (replace {\*} s (HashTable {\\}) 0)
148
+ (replace {_} s (HashTable {_}) 0)
149
+ (replace "{" s (HashTable "{") 0)
150
+ (replace {\[} s (HashTable {[}) 0)
151
+ (replace {\]} s (HashTable {]}) 0)
152
+ (replace {\\} s (HashTable "\\") 0))
153
+
154
+ (define (code-spans s)
155
+ (replace
156
+ {(?<!\\)(`+)(.+?)(?<!`)\1(?!`)}
157
+ s
158
+ (string {<code>} (encode-code (trim $2)) {</code>})
159
+ 2))
160
+
161
+ (define (encode-alt s)
162
+ (replace {&} s "&amp;" 0)
163
+ (replace {"} s "&quot;" 0))
164
+
165
+ (define (images txt)
166
+ (let ((alt-text {})
167
+ (url {})
168
+ (title {})
169
+ (ref-regex {(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])})
170
+ (inline-regex {(!\[(.*?)\]\([ \t]*<?(\S+?)>?[ \t]*((['"])(.*?)\5[ \t]*)?\))})
171
+ (whole-match {})
172
+ (result {})
173
+ (id-ref {})
174
+ (url {}))
175
+ ; reference links ![alt text][id]
176
+ (replace
177
+ ref-regex
178
+ txt
179
+ (begin
180
+ (set 'whole-match $1 'alt-text $2 'id-ref $3)
181
+ (if alt-text
182
+ (replace {"} alt-text {&quot;} 0))
183
+ (if (empty? id-ref)
184
+ (set 'id-ref (lower-case alt-text)))
185
+ (if (lookup id-ref *link-database*)
186
+ (set 'url (first (lookup id-ref *link-database*)))
187
+ (set 'url nil))
188
+ (if url
189
+ (begin
190
+ (replace {\*} url (HashTable {*}) 0)
191
+ (replace {_} url (HashTable {_}) 0)
192
+ ))
193
+ (if (last (lookup id-ref *link-database*))
194
+ ; title
195
+ (begin
196
+ (set 'title (last (lookup id-ref *link-database*)))
197
+ (replace {"} title {&quot;} 0)
198
+ (replace {\*} title (HashTable {*}) 0)
199
+ (replace {_} title (HashTable {_}) 0))
200
+ ; no title
201
+ (set 'title {})
202
+ )
203
+ (if url
204
+ (set 'result (string
205
+ {<img src="}
206
+ (trim url)
207
+ {" alt="}
208
+ alt-text {" }
209
+ (if (not (empty? title))
210
+ (string { title="} title {"}) {})
211
+ { />}))
212
+ (set 'result whole-match))
213
+ )
214
+ 0
215
+ )
216
+ ; inline image refs: ![alt text](url "optional title")
217
+ (replace
218
+ inline-regex
219
+ txt
220
+ (begin
221
+ (set 'whole-match $1)
222
+ (set 'alt-text $2)
223
+ (set 'url $3)
224
+ (set 'title $6)
225
+ (if alt-text
226
+ (replace {"} alt-text {&quot;} 0)
227
+ (set 'alt-text {}))
228
+ (if title
229
+ (begin
230
+ (replace {"} title {&quot;} 0)
231
+ (replace {\*} title (HashTable {*}) 0)
232
+ (replace {_} title (HashTable {_}) 0))
233
+ (set 'title {}))
234
+ (replace {\*} url (HashTable {*}) 0)
235
+ (replace {_} url (HashTable {_}) 0)
236
+ (string
237
+ {<img src="}
238
+ (trim url)
239
+ {" alt="}
240
+ alt-text {" }
241
+ (if title (string {title="} title {"}) {}) { />})
242
+ )
243
+ 0
244
+ )
245
+ ; empty ones are possible
246
+ (set '$1 {})
247
+ (replace {!\[(.*?)\]\([ \t]*\)}
248
+ txt
249
+ (string {<img src="" alt="} $1 {" title="" />})
250
+ 0)))
251
+
252
+ (define (make-anchor link-text id-ref )
253
+ ; Link defs are in the form: ^[id]: url "optional title"
254
+ ; stored in link db list as (id (url title))
255
+ ; params are text to be linked and the id of the link in the db
256
+ ; eg bar 1 for [bar][1]
257
+
258
+ (let ((title {})
259
+ (id id-ref)
260
+ (url nil))
261
+ (if link-text
262
+ (begin
263
+ (replace {"} link-text {&quot;} 0)
264
+ (replace {\n} link-text { } 0)
265
+ (replace {[ ]?\n} link-text { } 0)))
266
+ (if (null? id ) (set 'id (lower-case link-text)))
267
+ (if (not (nil? (lookup id *link-database*)))
268
+ (begin
269
+ (set 'url (first (lookup id *link-database*)))
270
+ (replace {\*} url (HashTable {*}) 0)
271
+ (replace {_} url (HashTable {_}) 0)
272
+ (if (set 'title (last (lookup id *link-database*)))
273
+ (begin
274
+ (replace {"} title {&quot;} 0)
275
+ (replace {\*} title (HashTable {*}) 0)
276
+ (replace {_} title (HashTable {_}) 0))
277
+ (set 'title {})))
278
+ (set 'url nil))
279
+ (if url
280
+ (string {<a href="} (trim url)
281
+ {"}
282
+ (if (not (= title {})) (string { title="} (trim title) {"}) {})
283
+ {>} link-text {</a>})
284
+ (string {[} link-text {][} id-ref {]}))))
285
+
286
+ (define (anchors txt)
287
+ (letn ((nested-brackets {(?>[^\[\]]+)*})
288
+ (ref-link-regex (string {(\[(} nested-brackets {)\][ ]?(?:\n[ ]*)?\[(.*?)\])}))
289
+ (inline-regex {(\[(.*?)\]\([ ]*<?(.*?\)?)>?[ ]*((['"])(.*?)\5[ \t]*)?\))})
290
+ (link-text {})
291
+ (url {})
292
+ (title {}))
293
+ ; reference-style links: [link text] [id]
294
+ (set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {} '$6 {}) ; i still don't think I should have to do this...
295
+
296
+ ; what about this regex instead?
297
+ (set 'ref-link-regex {(\[(.*?)\][ ]?\[(.*?)\])})
298
+
299
+ (replace ref-link-regex txt (make-anchor $2 $3) 8) ; $2 is link text, $3 is id
300
+ ; inline links: [link text](url "optional title")
301
+ (set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {} '$6 {})
302
+ (replace
303
+ inline-regex
304
+ txt
305
+ (begin
306
+ (set 'link-text $2)
307
+ (set 'url $3)
308
+ (set 'title $6)
309
+ (if link-text (replace {"} link-text {&quot;} 0))
310
+ (if title
311
+ (begin
312
+ (replace {"} title {&quot;} 0)
313
+ (replace {\*} title (HashTable {*}) 0)
314
+ (replace {_} title (HashTable {_}) 0))
315
+ (set 'title {}))
316
+ (replace {\*} url (HashTable {*}) 0)
317
+ (replace {_} url (HashTable {_}) 0)
318
+ (replace {^<(.*)>$} url $1 0)
319
+ (string
320
+ {<a href="}
321
+ (trim url)
322
+ {"}
323
+ (if (not (= title {}))
324
+ (string { title="} (trim title) {"})
325
+ {})
326
+ {>} link-text {</a>}
327
+ ))
328
+ 8
329
+ ) ; replace
330
+ ) txt)
331
+
332
+ (define (auto-links txt)
333
+ (replace
334
+ [text]<((https?|ftp):[^'">\s]+)>[/text]
335
+ txt
336
+ (string {<a href="} $1 {">} $1 {</a>})
337
+ 0
338
+ )
339
+ ; to-do: email ...
340
+ )
341
+
342
+ (define (amps-and-angles txt)
343
+ ; Smart processing for ampersands and angle brackets
344
+ (replace
345
+ [text]&(?!\#?[xX]?(?:[0-9a-fA-F]+|\w+);)[/text]
346
+ txt
347
+ {&amp;}
348
+ 10
349
+ )
350
+ (replace
351
+ [text]<(?![a-z/?\$!])[/text]
352
+ txt
353
+ {&lt;}
354
+ 10))
355
+
356
+ (define (emphasis txt)
357
+ ; italics/bold: strong first
358
+ (replace
359
+ [text] (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 [/text]
360
+ txt
361
+ (string {<strong>} $2 {</strong>})
362
+ 8
363
+ )
364
+ (replace
365
+ [text] (\*|_) (?=\S) (.+?) (?<=\S) \1 [/text]
366
+ txt
367
+ (string {<em>} $2 {</em>})
368
+ 8
369
+ ))
370
+
371
+ (define (line-breaks txt)
372
+ ; handles line break markers
373
+ (replace " {2,}\n" txt " <br/>\n" 0))
374
+
375
+ (define (hex-str-to-unicode-char strng)
376
+ ; given a five character string, assume it's "U" + 4 hex chars and convert
377
+ ; return the character...
378
+ (char (int (string "0x" (1 strng)) 0 16)))
379
+
380
+ (define (ustring s)
381
+ ; any four digit string preceded by U
382
+ (replace "U[0-9a-f]{4,}" s (hex-str-to-unicode-char $0) 0))
383
+
384
+ (define (cleanup txt)
385
+ ; cleanup the text by normalizing some possible variations
386
+ (replace "\r\n|\r" txt "\n" 0) ; standardize line ends
387
+ (push "\n\n" txt -1) ; end with two returns
388
+ (set 'txt (detab txt)) ; convert tabs to spaces
389
+
390
+ ; convert inline Unicode:
391
+ (set 'txt (ustring txt))
392
+ (replace "\n[ \t]+\n" txt "\n\n" 0) ; lines with only spaces and tabs
393
+ )
394
+
395
+ (define (protect txt)
396
+ ; protect or "hash html blocks"
397
+ (letn ((nested-block-regex [text](^<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b(.*\n)*?</\2>[ \t]*(?=\n+|\Z))[/text])
398
+ (liberal-tag-regex [text](^<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b(.*\n)*?.*</\2>[ \t]*(?=\n+|\Z))[/text])
399
+ (hr-regex [text](?:(?<=\n\n)|\A\n?)([ ]{0,3}<(hr)\b([^<>])*?/?>[ \t]*(?=\n{2,}|\Z))[/text])
400
+ (html-comment-regex [text](?:(?<=\n\n)|\A\n?)([ ]{0,3}(?s:<!(--.*?--\s*)+>)[ \t]*(?=\n{2,}|\Z))[/text])
401
+ (results '())
402
+ (chunk-count (length (set 'chunks (parse txt "\n\n"))))
403
+ (chunk-size 500))
404
+
405
+ ; due to a limitation in PCRE, long sections have to be divided up otherwise we'll crash
406
+ ; so divide up long texts into chunks, then do the regex on each chunk
407
+ ; not an ideal solution, but it works ok :(
408
+
409
+ (for (i 0 chunk-count chunk-size)
410
+ ; do a chunk
411
+ (set 'text-chunk (join (i (- (min chunk-count (- (+ i chunk-size) 1)) i) chunks) "\n\n"))
412
+ (dolist (rgx (list nested-block-regex liberal-tag-regex hr-regex html-comment-regex))
413
+ (replace
414
+ rgx
415
+ text-chunk
416
+ (begin
417
+ (set 'key (Hash:hash $1))
418
+ (push (list key $1 ) *hashed-html-blocks* -1)
419
+ (string "\n\n" key "\n\n"))
420
+ 2))
421
+ ; save this partial result
422
+ (push text-chunk results -1)
423
+ ) ; for
424
+ ; return string result
425
+ (join results "\n\n")))
426
+
427
+ (define (unescape-special-chars t)
428
+ ; Swap back in all the special characters we've hidden.
429
+ (dolist (pair (HashTable))
430
+ (replace (last pair) t (first pair) 10)) t)
431
+
432
+ (define (strip-link-definitions txt)
433
+ ; strip link definitions from the text and store them
434
+ ; Link defs are in the form: ^[id]: url "optional title"
435
+ ; stored in link db list as (id (url title))
436
+ (let ((link-db '())
437
+ (url {})
438
+ (id {})
439
+ (title {}))
440
+ (replace
441
+ [text]^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(?<=\s)["(](.+?)[")][ \t]*)?(?:\n+|\Z)[/text]
442
+ txt
443
+ (begin
444
+ (set 'id (lower-case $1) 'url (amps-and-angles $2) 'title $3)
445
+ (if title (replace {"} title {&quot;} 0))
446
+ (push (list id (list url title)) link-db)
447
+ (set '$3 {}) ; necessary?
448
+ (string {}) ; remove from text
449
+ )
450
+ 10)
451
+ (set '*link-database* link-db)
452
+ txt))
453
+
454
+ (define (horizontal-rules txt)
455
+ (replace
456
+ [text]^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$[/text]
457
+ txt
458
+ "\n<hr />"
459
+ 14)
460
+ (replace
461
+ [text]^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$[/text]
462
+ txt
463
+ "\n<hr />"
464
+ 14)
465
+ (replace
466
+ [text]^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$[/text]
467
+ txt
468
+ "\n<hr />"
469
+ 14))
470
+
471
+ (define (headers txt)
472
+ ; setext headers
473
+ (let ((level 1))
474
+ (replace
475
+ [text]^(.+)[ \t]*\n=+[ \t]*\n+[/text]
476
+ txt
477
+ (string "<h1>" (span-transforms $1) "</h1>\n\n")
478
+ 2)
479
+
480
+ (replace
481
+ [text]^(.+)[ \t]*\n-+[ \t]*\n+[/text]
482
+ txt
483
+ (string "<h2>" (span-transforms $1) "</h2>\n\n")
484
+ 2)
485
+ ; atx headers
486
+ (replace
487
+ [text]^(\#{1,6})\s*(.+?)[ ]*\#*(\n+)[/text]
488
+ txt
489
+ (begin
490
+ (set 'level (length $1))
491
+ (string "<h" level ">" (span-transforms $2) "</h" level ">\n\n")
492
+ )
493
+ 2)))
494
+
495
+ (define (lists txt)
496
+ (letn ((marker-ul {[*+-]})
497
+ (marker-ol {\d+[.]})
498
+ (marker-any (string {(?:} marker-ul {|} marker-ol {)}))
499
+ (whole-list-regex (string [text](([ ]{0,3}([/text] marker-any [text])[ \t]+)(?s:.+?)(\z|\n{2,}(?=\S)(?![ \t]*[/text] marker-any [text][ \t]+)))[/text]))
500
+ (my-list {})
501
+ (list-type {})
502
+ (my-result {}))
503
+ (replace
504
+ (if (> *list-level* 0)
505
+ (string {^} whole-list-regex)
506
+ (string {(?:(?<=\n\n)|\A\n?)} whole-list-regex))
507
+ txt
508
+ (begin
509
+ (set 'my-list $1)
510
+ (if (find $3 marker-ul)
511
+ (set 'list-type "ul" 'marker-type marker-ul)
512
+ (set 'list-type "ol" 'marker-type marker-ol))
513
+ (replace [text]\n{2,}[/text] my-list "\n\n\n" 0)
514
+ (set 'my-result (process-list-items my-list marker-any))
515
+ (replace {\s+$} my-result {} 0)
516
+ (string {<} list-type {>} "\n" my-result "\n" {</} list-type {>} "\n"))
517
+ 10 ; must be multiline
518
+ )))
519
+
520
+ (define (process-list-items list-text marker-any)
521
+ (let ((list-regex (string [text](\n)?(^[ \t]*)([/text] marker-any [text])[ \t]+((?s:.+?)(\n{1,2}))(?=\n*(\z|\2([/text] marker-any [text])[ \t]+))[/text]))
522
+ (item {})
523
+ (leading-line {})
524
+ (leading-space {})
525
+ (result {}))
526
+ (inc *list-level*)
527
+ (replace [text]\n{2,}\z[/text] list-text "\n" 0)
528
+ (set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {})
529
+ (replace
530
+ list-regex
531
+ list-text
532
+ (begin
533
+ (set 'item $4)
534
+ (set 'leading-line $1)
535
+ (set 'leading-space $2)
536
+ (if (or (not (empty? leading-line)) (ends-with item "\n{2,}" 0))
537
+ (set 'item (block-transforms (outdent item)))
538
+ ; recurse for sub lists
539
+ (begin
540
+ (set 'item (lists (outdent item)))
541
+ (set 'item (span-transforms (trim item "\n")))
542
+ ))
543
+ (string {<li>} item {</li>} "\n"))
544
+ 10)
545
+ (dec *list-level*)
546
+ list-text))
547
+
548
+ (define (code-blocks txt)
549
+ (let ((code-block {})
550
+ (token-list '()))
551
+ (replace
552
+ [text](?:\n\n|\A)((?:(?:[ ]{4}|\t).*\n+)+)((?=^[ ]{0,3}\S)|\Z)[/text]
553
+ txt
554
+ (begin
555
+ (set 'code-block $1)
556
+ ; format if Nestor module is loaded and it's not marked as plain
557
+ (if (and (not (starts-with code-block " ;plain\n")) (context? Nestor))
558
+ ; format newlisp
559
+ (begin
560
+ ; remove flag if present
561
+ (replace "[ ]{4};newlisp\n" code-block {} 0)
562
+ (set 'code-block (protect (Nestor:nlx-to-html (Nestor:my-read (trim (detab (outdent code-block)) "\n")))))
563
+ code-block)
564
+ ; don't format
565
+ (begin
566
+ ; trim leading and trailing newlines
567
+ (replace "[ ]{4};plain\n" code-block {} 0)
568
+ (set 'code-block (trim (detab (encode-code (outdent code-block))) "\n"))
569
+ (set '$1 {})
570
+ (set 'code-block (string "\n\n<pre><code>" code-block "\n</code></pre>\n\n")))))
571
+ 10)))
572
+
573
+ (define (block-quotes txt)
574
+ (let ((block-quote {}))
575
+ (replace
576
+ [text]((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)[/text]
577
+ txt
578
+ (begin
579
+ (set 'block-quote $1)
580
+ (replace {^[ ]*>[ ]?} block-quote {} 2)
581
+ (replace {^[ ]+$} block-quote {} 2)
582
+ (set 'block-quote (block-transforms block-quote)) ; recurse
583
+ ; remove leading spaces
584
+ (replace
585
+ {(\s*<pre>.+?</pre>)}
586
+ block-quote
587
+ (trim $1)
588
+ 2)
589
+ (string "<blockquote>\n" block-quote "\n</blockquote>\n\n"))
590
+ 2)))
591
+
592
+ (define (outdent s)
593
+ (replace [text]^(\t|[ ]{1,4})[/text] s {} 2))
594
+
595
+ (define (detab s)
596
+ (replace [text](.*?)\t[/text]
597
+ s
598
+ (string $1 (dup { } (- 4 (% (length $1) 4))))
599
+ 2))
600
+
601
+ (define (form-paragraphs txt)
602
+ (let ((grafs '())
603
+ (original nil))
604
+ (set 'txt (trim txt "\n")) ; strip blank lines before and after
605
+ (set 'grafs (parse txt "\n{2,}" 0)) ; split
606
+ (dolist (p grafs)
607
+ (if (set 'original (lookup p *hashed-html-blocks*))
608
+ ; html blocks
609
+ (setf (grafs $idx) original)
610
+ ; wrap <p> tags round everything else
611
+ (setf (grafs $idx) (string {<p>} (replace {^[ ]*} (span-transforms p) {} (+ 4 8 16)) {</p>}))))
612
+ (join grafs "\n\n")))
613
+
614
+ [text]
615
+ ; three command line arguments: let's hope last one is a file
616
+ (when (= 3 (length (main-args)))
617
+ (println (markdown (read-file (main-args 2))))
618
+ (exit))
619
+
620
+ ; hack for command-line and module loading
621
+ (set 'level (sys-info 3))
622
+
623
+ ; if level is 2, then we're probably invoking markdown.lsp directly
624
+ ; if level is > 3, then we're probably loading it into another script...
625
+
626
+ (when (= level 2)
627
+ ; running on command line, read STDIN and execute:
628
+ (while (read-line)
629
+ (push (current-line) *stdin* -1))
630
+ (println (markdown (join *stdin* "\n")))
631
+ (exit))
632
+ [/text]
633
+
634
+ ;; version 2011-09-16 16:31:29
635
+ ;; Changed to different hash routine. Profiling shows that hashing takes 40% of the execution time.
636
+ ;; Unfortunately this new version is only very slightly faster.
637
+ ;; Command-line arguments hack in previous version doesn't work.
638
+ ;;
639
+ ;; version 2011-08-18 15:04:40
640
+ ;; various fixes, and added hack for running this from the command-line:
641
+ ;; echo "hi there" | newlisp markdown.lsp
642
+ ;; echo "hello world" | markdown.lsp
643
+ ;; cat file.text | newlisp markdown.lsp
644
+ ;;
645
+ ;; version 2010-11-14 17:34:52
646
+ ;; some problems in ustring. Probably remove it one day, as it's non standard...
647
+ ;;
648
+ ;; version 2010-10-14 18:41:38
649
+ ;; added code to work round PCRE crash in (protect ...
650
+ ;;
651
+ ;; version date 2010-07-10 22:20:25
652
+ ;; modified call to 'read' since lutz has changed it
653
+ ;;
654
+ ;; version date 2009-11-16 22:10:10
655
+ ;; fixed bug in tokenize.html
656
+ ;;
657
+ ;; version date 2008-10-08 18:44:46
658
+ ;; changed nth-set to setf to be version-10 ready.
659
+ ;; This means that now this script will NOT work with
660
+ ;; earlier versions of newLISP!!!!!!!!!!!
661
+ ;; requires Nestor if you want source code colouring...
662
+ ;;
663
+ ;; version date 2008-08-08 16:54:56
664
+ ;; changed (unless to (if (not ... :(
665
+ ;;
666
+ ;; version date 2008-07-20 14:!2:29
667
+ ;; added hex-str-to-unicode-char ustring
668
+ ;;
669
+ ;; version date 2008-03-07 15:36:09
670
+ ;; fixed load error
671
+ ;;
672
+ ;; version date 2007-11-17 16:20:57
673
+ ;; added syntax colouring module
674
+ ;;
675
+ ;; version date 2007-11-14 09:19:42
676
+ ;; removed reliance on dostring for compatibility with 9.1
677
+
678
+
679
+ ; eof