marker 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,679 @@
1
+ #--
2
+ # Copyright 2009 Ryan Blue.
3
+ # Distributed under the terms of the GNU General Public License (GPL).
4
+ # See the LICENSE file for further information on the GPL.
5
+ #++
6
+
7
+ module Marker
8
+ grammar Language
9
+ rule markup
10
+ h:block rnl r:markup <Markup>
11
+ /
12
+ h:block "" <Markup>
13
+ end
14
+
15
+ rule block
16
+ line
17
+ /
18
+ text
19
+ /
20
+ ws { # allows blank lines
21
+ def to_html( options = {} )
22
+ ""
23
+ end
24
+
25
+ def to_s( options = {} )
26
+ ""
27
+ end
28
+ }
29
+ end
30
+
31
+ ##### special lines
32
+ rule line
33
+ heading
34
+ /
35
+ list
36
+ /
37
+ verbatim_area
38
+ /
39
+ horiz_rule
40
+ end
41
+
42
+ rule heading
43
+ s:heading_toggle+ ws l:heading_enclosed_text ws e:heading_toggle+ <Heading>
44
+ end
45
+
46
+ rule heading_enclosed_text
47
+ h:heading_toggle ws r:heading_enclosed_text <Phrase>
48
+ /
49
+ h:heading_enclosed_word ws r:heading_enclosed_text <Phrase>
50
+ /
51
+ h:heading_enclosed_word "" <Phrase>
52
+ end
53
+
54
+ rule heading_enclosed_word
55
+ bold
56
+ /
57
+ italic
58
+ /
59
+ link
60
+ /
61
+ template
62
+ /
63
+ url
64
+ /
65
+ plain_word
66
+ /
67
+ bold_toggle # allow unmatched delimiters after we have ruled out structures
68
+ /
69
+ italic_toggle
70
+ /
71
+ internal_link_start
72
+ /
73
+ internal_link_end
74
+ /
75
+ external_link_start
76
+ /
77
+ external_link_end
78
+ /
79
+ template_start
80
+ /
81
+ template_end
82
+ /
83
+ arg_delimiter
84
+ /
85
+ term_delimiter
86
+ end
87
+
88
+ # a series of list items, so they can be coalesced
89
+ rule list
90
+ h:list_item rnl r:list <List>
91
+ /
92
+ h:list_item "" <List>
93
+ end
94
+
95
+ # for matching any list item
96
+ rule list_item
97
+ bulleted
98
+ /
99
+ numbered
100
+ /
101
+ indented
102
+ /
103
+ definition
104
+ end
105
+
106
+ rule bulleted
107
+ '*' ws list_item <Bulleted>
108
+ /
109
+ '*' ws phrase <Bulleted>
110
+ end
111
+
112
+ rule numbered
113
+ '#' ws list_item <Numbered>
114
+ /
115
+ '#' ws phrase <Numbered>
116
+ end
117
+
118
+ rule indented
119
+ ':' ws list_item <Indented>
120
+ /
121
+ ':' ws phrase <Indented>
122
+ end
123
+
124
+ rule definition
125
+ ';' ws term:definition_term_text ws term_delimiter ws definition:phrase <Definition>
126
+ /
127
+ ';' ws term:phrase <Definition>
128
+ end
129
+
130
+ rule definition_term_text
131
+ h:definition_term_word ws r:definition_term_text <Phrase>
132
+ /
133
+ h:definition_term_word "" <Phrase>
134
+ end
135
+
136
+ rule definition_term_word
137
+ bold
138
+ /
139
+ italic
140
+ /
141
+ link
142
+ /
143
+ template
144
+ /
145
+ url
146
+ /
147
+ plain_word
148
+ /
149
+ bold_toggle
150
+ /
151
+ italic_toggle
152
+ /
153
+ heading_toggle
154
+ /
155
+ internal_link_start
156
+ /
157
+ internal_link_end
158
+ /
159
+ external_link_start
160
+ /
161
+ external_link_end
162
+ /
163
+ template_start
164
+ /
165
+ template_end
166
+ /
167
+ arg_delimiter
168
+ end
169
+
170
+ rule verbatim_area
171
+ h:verbatim rnl r:verbatim_area <VerbatimArea>
172
+ /
173
+ h:verbatim "" <VerbatimArea>
174
+ end
175
+
176
+ rule verbatim
177
+ " " v:(!"\n" .)* <Verbatim>
178
+ end
179
+
180
+ rule horiz_rule
181
+ # this deviates from how MediaWiki does things. consider this:
182
+ # ---- a para-
183
+ # graph
184
+ # MediaWiki renders (newlines removed): <hr /><p>a para-</p><p>graph</p>
185
+ # this renders (newlines removed): <hr /><p>a para-graph</p>
186
+ "----" "-"* ws text? <HorizRule>
187
+ end
188
+
189
+ ##### combination rules
190
+
191
+ # a block of normal text, including single newlines but not a special line
192
+ rule text
193
+ !line h:phrase ws rnl r:text <Paragraph>
194
+ /
195
+ !line h:phrase "" <Paragraph>
196
+ end
197
+
198
+ # a phrase of words without newlines
199
+ rule phrase
200
+ h:word ws r:phrase <Phrase>
201
+ /
202
+ h:word "" <Phrase>
203
+ end
204
+
205
+ rule word
206
+ bold
207
+ /
208
+ italic
209
+ /
210
+ link
211
+ /
212
+ template
213
+ /
214
+ url
215
+ /
216
+ plain_word
217
+ /
218
+ delimiter # catch all remaining unmatched delimiters
219
+ end
220
+
221
+ ##### text markup
222
+
223
+ rule bold
224
+ bold_toggle ws bold_enclosed_text ws bold_toggle <Bold>
225
+ end
226
+
227
+ rule bold_enclosed_text
228
+ h:bold_enclosed_word ws r:bold_enclosed_text <Phrase>
229
+ /
230
+ h:bold_enclosed_word "" <Phrase>
231
+ end
232
+
233
+ # things that can be inside bold areas
234
+ rule bold_enclosed_word
235
+ italic
236
+ /
237
+ link
238
+ /
239
+ template
240
+ /
241
+ url
242
+ /
243
+ plain_word
244
+ /
245
+ heading_toggle
246
+ /
247
+ internal_link_start
248
+ /
249
+ internal_link_end
250
+ /
251
+ external_link_start
252
+ /
253
+ external_link_end
254
+ /
255
+ template_start
256
+ /
257
+ template_end
258
+ /
259
+ arg_delimiter
260
+ /
261
+ term_delimiter
262
+ end
263
+
264
+ rule italic
265
+ italic_toggle ws italic_enclosed_text ws italic_toggle_no_lookahead <Italic>
266
+ end
267
+
268
+ rule italic_enclosed_text
269
+ h:italic_enclosed_word ws r:italic_enclosed_text <Phrase>
270
+ /
271
+ h:italic_enclosed_word "" <Phrase>
272
+ end
273
+
274
+ # things that can be inside italic areas
275
+ rule italic_enclosed_word
276
+ bold
277
+ /
278
+ link
279
+ /
280
+ template
281
+ /
282
+ url
283
+ /
284
+ plain_word
285
+ /
286
+ heading_toggle
287
+ /
288
+ internal_link_start
289
+ /
290
+ internal_link_end
291
+ /
292
+ external_link_start
293
+ /
294
+ external_link_end
295
+ /
296
+ template_start
297
+ /
298
+ template_end
299
+ /
300
+ arg_delimiter
301
+ /
302
+ term_delimiter
303
+ end
304
+
305
+ rule link
306
+ internal_link
307
+ /
308
+ external_link
309
+ end
310
+
311
+ # internal links:
312
+ # [[ link target | link label ]]
313
+ # * can contain white space
314
+ # * cannot contain new lines
315
+ rule internal_link
316
+ internal_link_start ws t:plain_text ws a:arg_delimiter ws l:internal_link_enclosed_text ws internal_link_end <InternalLink>
317
+ /
318
+ internal_link_start ws t:plain_text ws a:arg_delimiter ws internal_link_end <InternalLink>
319
+ /
320
+ internal_link_start ws t:plain_text ws internal_link_end <InternalLink>
321
+ end
322
+
323
+ rule internal_link_enclosed_text
324
+ h:internal_link_enclosed_word ws r:internal_link_enclosed_text <Phrase>
325
+ /
326
+ h:internal_link_enclosed_word "" <Phrase>
327
+ end
328
+
329
+ rule internal_link_enclosed_word
330
+ bold
331
+ /
332
+ italic
333
+ /
334
+ template
335
+ /
336
+ url
337
+ /
338
+ plain_word
339
+ /
340
+ bold_toggle
341
+ /
342
+ italic_toggle
343
+ /
344
+ heading_toggle
345
+ /
346
+ internal_link_start
347
+ /
348
+ external_link_start
349
+ /
350
+ external_link_end
351
+ /
352
+ template_start
353
+ /
354
+ template_end
355
+ /
356
+ arg_delimiter
357
+ /
358
+ term_delimiter
359
+ end
360
+
361
+ # external links:
362
+ # [ url label ]
363
+ # * can contain white space
364
+ # * cannot contain new lines
365
+ #
366
+ # renders differently from MediaWiki:
367
+ # [ external [[internal]] ]
368
+ # MediaWiki: <a href="external"></a><a href="internal">internal</a>
369
+ # This lib: <a href="external">[[internal]]</a>
370
+ rule external_link
371
+ external_link_start ws t:url rws l:external_link_enclosed_text ws external_link_end <ExternalLink>
372
+ /
373
+ external_link_start ws t:url ws external_link_end <ExternalLink>
374
+ /
375
+ external_link_start ws t:implicit_url rws l:external_link_enclosed_text ws external_link_end <ExternalLink>
376
+ /
377
+ external_link_start ws t:implicit_url ws external_link_end <ExternalLink>
378
+ end
379
+
380
+ rule external_link_enclosed_text
381
+ h:external_link_enclosed_word ws r:external_link_enclosed_text <Phrase>
382
+ /
383
+ h:external_link_enclosed_word "" <Phrase>
384
+ end
385
+
386
+ rule external_link_enclosed_word
387
+ bold
388
+ /
389
+ italic
390
+ /
391
+ template
392
+ /
393
+ url
394
+ /
395
+ plain_word
396
+ /
397
+ bold_toggle
398
+ /
399
+ italic_toggle
400
+ /
401
+ heading_toggle
402
+ /
403
+ internal_link_start
404
+ /
405
+ internal_link_end
406
+ /
407
+ external_link_start
408
+ /
409
+ template_start
410
+ /
411
+ template_end
412
+ /
413
+ arg_delimiter
414
+ /
415
+ term_delimiter
416
+ end
417
+
418
+ rule template
419
+ template_start aws t:plain_text aws arg_delimiter aws args:arg_list aws template_end <Template>
420
+ /
421
+ template_start aws t:plain_text aws template_end <Template>
422
+ end
423
+
424
+ rule arg_list
425
+ h:arg aws arg_delimiter aws r:arg_list <Arguments>
426
+ /
427
+ h:arg "" <Arguments>
428
+ /
429
+ aws "" <Arguments> {
430
+ def h
431
+ nil
432
+ end
433
+ }
434
+ end
435
+
436
+ # a plain (positional) argument or a named (name=text) argument
437
+ rule arg
438
+ name:plain_text aws heading_toggle aws val:arg_list_enclosed_text <Argument>
439
+ /
440
+ val:arg_list_enclosed_text "" <Argument>
441
+ end
442
+
443
+ rule arg_list_enclosed_text
444
+ h:arg_list_enclosed_word aws r:arg_list_enclosed_text <Phrase>
445
+ /
446
+ h:arg_list_enclosed_word "" <Phrase>
447
+ end
448
+
449
+ rule arg_list_enclosed_word
450
+ bold
451
+ /
452
+ italic
453
+ /
454
+ link
455
+ /
456
+ template
457
+ /
458
+ url
459
+ /
460
+ plain_word
461
+ /
462
+ bold_toggle # allow unmatched delimiters after we have ruled out structures
463
+ /
464
+ italic_toggle
465
+ /
466
+ heading_toggle
467
+ /
468
+ internal_link_start
469
+ /
470
+ internal_link_end
471
+ /
472
+ external_link_start
473
+ /
474
+ external_link_end
475
+ /
476
+ template_start
477
+ /
478
+ term_delimiter
479
+ end
480
+
481
+ # a phrase of plain words
482
+ rule plain_text
483
+ h:plain_word ws r:plain_text <Phrase>
484
+ /
485
+ h:plain_word "" <Phrase>
486
+ end
487
+
488
+ # avoids using a white-list so that utf-8 characters are accepted
489
+ #
490
+ # matches anything up to a delimiter or whitespace
491
+ rule plain_word
492
+ (!delimiter ![ \t\r\n] .)+ <Word>
493
+ end
494
+
495
+ ##### delimiters
496
+
497
+ # for matching any delimiter, typically in lookahead assertions
498
+ rule delimiter
499
+ bold_toggle
500
+ /
501
+ italic_toggle
502
+ /
503
+ heading_toggle
504
+ /
505
+ internal_link_start
506
+ /
507
+ internal_link_end
508
+ /
509
+ external_link_start
510
+ /
511
+ external_link_end
512
+ /
513
+ template_start
514
+ /
515
+ template_end
516
+ /
517
+ arg_delimiter
518
+ /
519
+ term_delimiter
520
+ end
521
+
522
+ rule bold_toggle
523
+ "'''" <Delimiter>
524
+ end
525
+
526
+ rule italic_toggle
527
+ "''" !"'" <Delimiter> # avoid matching bold toggles
528
+ end
529
+
530
+ rule italic_toggle_no_lookahead
531
+ "''" <Delimiter>
532
+ end
533
+
534
+ # rule fixed_toggle
535
+ # "||" <Delimiter>
536
+ # end
537
+
538
+ rule heading_toggle
539
+ '=' <Delimiter>
540
+ end
541
+
542
+ rule internal_link_start
543
+ "[[" <Delimiter>
544
+ end
545
+
546
+ rule internal_link_end
547
+ "]]" <Delimiter>
548
+ end
549
+
550
+ rule external_link_start
551
+ "[" !"[" <Delimiter> # avoid matching internal link starts
552
+ end
553
+
554
+ rule external_link_end
555
+ "]" !"]" <Delimiter> # avoid matching internal link ends
556
+ end
557
+
558
+ rule template_start
559
+ "{{" <Delimiter>
560
+ end
561
+
562
+ rule template_end
563
+ "}}" <Delimiter>
564
+ end
565
+
566
+ # breaks up arguments in an argument list
567
+ # for example: {{ template | arg1 | arg2 }}
568
+ rule arg_delimiter
569
+ "|" !"|" <Delimiter> # avoid matching fixed-width toggles
570
+ end
571
+
572
+ # signals the break between a definition list term and definition
573
+ rule term_delimiter
574
+ ":" <Delimiter>
575
+ end
576
+
577
+ ##### matching URLs
578
+
579
+ # unknown scheme URLs
580
+ # used to match unknown-scheme URLs in URL contexts (like external links)
581
+ rule implicit_url
582
+ url_safe_char+ <URL>
583
+ end
584
+
585
+ # parses from a known scheme to the end of URL-safe characters
586
+ # uses known schemes to parse bare URLs from text
587
+ rule url
588
+ scheme ":" url_safe_char+ <URL>
589
+ end
590
+
591
+ # pretty much straight from RFC 1738
592
+ # additions:
593
+ # * @, :, / are added because they are valid in some cases (not checked)
594
+ # * %XX is allowed as a character, these are escaped characters
595
+ rule url_safe_char
596
+ !"]" [A-Za-z0-9$-_.+!*'(),@:/]
597
+ /
598
+ "%" xdigit xdigit
599
+ end
600
+
601
+ # known-good schemes that we want to recognize as URLs
602
+ rule scheme
603
+ "http" "s"? <Protocol>
604
+ /
605
+ "mailto" <Protocol>
606
+ /
607
+ "ftp" <Protocol>
608
+ end
609
+
610
+ ##### general rules
611
+
612
+ # these really don't make sense unless there are specific run-time directives
613
+ # that the markup user needs access to (such as __NOTOC__). Not sure if
614
+ # there will be any. Also, perhaps a better strategy would be:
615
+ # * if the line starts with "__" constant_name "__", then parse as magic
616
+ # * send the magic word through an extensible module of some sort
617
+ #
618
+ # this means the magic words wouldn't require grammar changes
619
+ # rule magic_word
620
+ # '__' constant_name '__' #<MagicWord>
621
+ # end
622
+ #
623
+ # rule constant_name
624
+ # [A-Z_]+
625
+ # end
626
+
627
+ rule number
628
+ [1-9] digit* {
629
+ def to_i
630
+ text_value.to_i
631
+ end
632
+ }
633
+ /
634
+ "0" {
635
+ def to_i
636
+ 0
637
+ end
638
+ }
639
+ end
640
+
641
+ # digits
642
+ rule digit
643
+ [0-9]
644
+ /
645
+ "0"
646
+ end
647
+
648
+ # hex digits
649
+ rule xdigit
650
+ [A-Fa-f0-9]
651
+ end
652
+
653
+ # required new lines
654
+ rule rnl
655
+ "\n"+
656
+ end
657
+
658
+ # new lines
659
+ rule nl
660
+ "\n"*
661
+ end
662
+
663
+ # all white space
664
+ rule aws
665
+ [ \t\r\n]*
666
+ end
667
+
668
+ # required white space
669
+ rule rws
670
+ [ \t]+
671
+ end
672
+
673
+ # optional white space
674
+ rule ws
675
+ [ \t]*
676
+ end
677
+
678
+ end
679
+ end