marker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,679 @@
1
+ #--
2
+ # Copyright 2009 Ryan Blue.
3
+ # Distributed under the terms of the GNU General Public License (GPL).
4
+ # See the LICENSE file for further information on the GPL.
5
+ #++
6
+
7
+ module Marker
8
+ grammar Language
9
+ rule markup
10
+ h:block rnl r:markup <Markup>
11
+ /
12
+ h:block "" <Markup>
13
+ end
14
+
15
+ rule block
16
+ line
17
+ /
18
+ text
19
+ /
20
+ ws { # allows blank lines
21
+ def to_html( options = {} )
22
+ ""
23
+ end
24
+
25
+ def to_s( options = {} )
26
+ ""
27
+ end
28
+ }
29
+ end
30
+
31
+ ##### special lines
32
+ rule line
33
+ heading
34
+ /
35
+ list
36
+ /
37
+ verbatim_area
38
+ /
39
+ horiz_rule
40
+ end
41
+
42
+ rule heading
43
+ s:heading_toggle+ ws l:heading_enclosed_text ws e:heading_toggle+ <Heading>
44
+ end
45
+
46
+ rule heading_enclosed_text
47
+ h:heading_toggle ws r:heading_enclosed_text <Phrase>
48
+ /
49
+ h:heading_enclosed_word ws r:heading_enclosed_text <Phrase>
50
+ /
51
+ h:heading_enclosed_word "" <Phrase>
52
+ end
53
+
54
+ rule heading_enclosed_word
55
+ bold
56
+ /
57
+ italic
58
+ /
59
+ link
60
+ /
61
+ template
62
+ /
63
+ url
64
+ /
65
+ plain_word
66
+ /
67
+ bold_toggle # allow unmatched delimiters after we have ruled out structures
68
+ /
69
+ italic_toggle
70
+ /
71
+ internal_link_start
72
+ /
73
+ internal_link_end
74
+ /
75
+ external_link_start
76
+ /
77
+ external_link_end
78
+ /
79
+ template_start
80
+ /
81
+ template_end
82
+ /
83
+ arg_delimiter
84
+ /
85
+ term_delimiter
86
+ end
87
+
88
+ # a series of list items, so they can be coalesced
89
+ rule list
90
+ h:list_item rnl r:list <List>
91
+ /
92
+ h:list_item "" <List>
93
+ end
94
+
95
+ # for matching any list item
96
+ rule list_item
97
+ bulleted
98
+ /
99
+ numbered
100
+ /
101
+ indented
102
+ /
103
+ definition
104
+ end
105
+
106
+ rule bulleted
107
+ '*' ws list_item <Bulleted>
108
+ /
109
+ '*' ws phrase <Bulleted>
110
+ end
111
+
112
+ rule numbered
113
+ '#' ws list_item <Numbered>
114
+ /
115
+ '#' ws phrase <Numbered>
116
+ end
117
+
118
+ rule indented
119
+ ':' ws list_item <Indented>
120
+ /
121
+ ':' ws phrase <Indented>
122
+ end
123
+
124
+ rule definition
125
+ ';' ws term:definition_term_text ws term_delimiter ws definition:phrase <Definition>
126
+ /
127
+ ';' ws term:phrase <Definition>
128
+ end
129
+
130
+ rule definition_term_text
131
+ h:definition_term_word ws r:definition_term_text <Phrase>
132
+ /
133
+ h:definition_term_word "" <Phrase>
134
+ end
135
+
136
+ rule definition_term_word
137
+ bold
138
+ /
139
+ italic
140
+ /
141
+ link
142
+ /
143
+ template
144
+ /
145
+ url
146
+ /
147
+ plain_word
148
+ /
149
+ bold_toggle
150
+ /
151
+ italic_toggle
152
+ /
153
+ heading_toggle
154
+ /
155
+ internal_link_start
156
+ /
157
+ internal_link_end
158
+ /
159
+ external_link_start
160
+ /
161
+ external_link_end
162
+ /
163
+ template_start
164
+ /
165
+ template_end
166
+ /
167
+ arg_delimiter
168
+ end
169
+
170
+ rule verbatim_area
171
+ h:verbatim rnl r:verbatim_area <VerbatimArea>
172
+ /
173
+ h:verbatim "" <VerbatimArea>
174
+ end
175
+
176
+ rule verbatim
177
+ " " v:(!"\n" .)* <Verbatim>
178
+ end
179
+
180
+ rule horiz_rule
181
+ # this deviates from how MediaWiki does things. consider this:
182
+ # ---- a para-
183
+ # graph
184
+ # MediaWiki renders (newlines removed): <hr /><p>a para-</p><p>graph</p>
185
+ # this renders (newlines removed): <hr /><p>a para-graph</p>
186
+ "----" "-"* ws text? <HorizRule>
187
+ end
188
+
189
+ ##### combination rules
190
+
191
+ # a block of normal text, including single newlines but not a special line
192
+ rule text
193
+ !line h:phrase ws rnl r:text <Paragraph>
194
+ /
195
+ !line h:phrase "" <Paragraph>
196
+ end
197
+
198
+ # a phrase of words without newlines
199
+ rule phrase
200
+ h:word ws r:phrase <Phrase>
201
+ /
202
+ h:word "" <Phrase>
203
+ end
204
+
205
+ rule word
206
+ bold
207
+ /
208
+ italic
209
+ /
210
+ link
211
+ /
212
+ template
213
+ /
214
+ url
215
+ /
216
+ plain_word
217
+ /
218
+ delimiter # catch all remaining unmatched delimiters
219
+ end
220
+
221
+ ##### text markup
222
+
223
+ rule bold
224
+ bold_toggle ws bold_enclosed_text ws bold_toggle <Bold>
225
+ end
226
+
227
+ rule bold_enclosed_text
228
+ h:bold_enclosed_word ws r:bold_enclosed_text <Phrase>
229
+ /
230
+ h:bold_enclosed_word "" <Phrase>
231
+ end
232
+
233
+ # things that can be inside bold areas
234
+ rule bold_enclosed_word
235
+ italic
236
+ /
237
+ link
238
+ /
239
+ template
240
+ /
241
+ url
242
+ /
243
+ plain_word
244
+ /
245
+ heading_toggle
246
+ /
247
+ internal_link_start
248
+ /
249
+ internal_link_end
250
+ /
251
+ external_link_start
252
+ /
253
+ external_link_end
254
+ /
255
+ template_start
256
+ /
257
+ template_end
258
+ /
259
+ arg_delimiter
260
+ /
261
+ term_delimiter
262
+ end
263
+
264
+ rule italic
265
+ italic_toggle ws italic_enclosed_text ws italic_toggle_no_lookahead <Italic>
266
+ end
267
+
268
+ rule italic_enclosed_text
269
+ h:italic_enclosed_word ws r:italic_enclosed_text <Phrase>
270
+ /
271
+ h:italic_enclosed_word "" <Phrase>
272
+ end
273
+
274
+ # things that can be inside italic areas
275
+ rule italic_enclosed_word
276
+ bold
277
+ /
278
+ link
279
+ /
280
+ template
281
+ /
282
+ url
283
+ /
284
+ plain_word
285
+ /
286
+ heading_toggle
287
+ /
288
+ internal_link_start
289
+ /
290
+ internal_link_end
291
+ /
292
+ external_link_start
293
+ /
294
+ external_link_end
295
+ /
296
+ template_start
297
+ /
298
+ template_end
299
+ /
300
+ arg_delimiter
301
+ /
302
+ term_delimiter
303
+ end
304
+
305
+ rule link
306
+ internal_link
307
+ /
308
+ external_link
309
+ end
310
+
311
+ # internal links:
312
+ # [[ link target | link label ]]
313
+ # * can contain white space
314
+ # * cannot contain new lines
315
+ rule internal_link
316
+ internal_link_start ws t:plain_text ws a:arg_delimiter ws l:internal_link_enclosed_text ws internal_link_end <InternalLink>
317
+ /
318
+ internal_link_start ws t:plain_text ws a:arg_delimiter ws internal_link_end <InternalLink>
319
+ /
320
+ internal_link_start ws t:plain_text ws internal_link_end <InternalLink>
321
+ end
322
+
323
+ rule internal_link_enclosed_text
324
+ h:internal_link_enclosed_word ws r:internal_link_enclosed_text <Phrase>
325
+ /
326
+ h:internal_link_enclosed_word "" <Phrase>
327
+ end
328
+
329
+ rule internal_link_enclosed_word
330
+ bold
331
+ /
332
+ italic
333
+ /
334
+ template
335
+ /
336
+ url
337
+ /
338
+ plain_word
339
+ /
340
+ bold_toggle
341
+ /
342
+ italic_toggle
343
+ /
344
+ heading_toggle
345
+ /
346
+ internal_link_start
347
+ /
348
+ external_link_start
349
+ /
350
+ external_link_end
351
+ /
352
+ template_start
353
+ /
354
+ template_end
355
+ /
356
+ arg_delimiter
357
+ /
358
+ term_delimiter
359
+ end
360
+
361
+ # external links:
362
+ # [ url label ]
363
+ # * can contain white space
364
+ # * cannot contain new lines
365
+ #
366
+ # renders differently from MediaWiki:
367
+ # [ external [[internal]] ]
368
+ # MediaWiki: <a href="external"></a><a href="internal">internal</a>
369
+ # This lib: <a href="external">[[internal]]</a>
370
+ rule external_link
371
+ external_link_start ws t:url rws l:external_link_enclosed_text ws external_link_end <ExternalLink>
372
+ /
373
+ external_link_start ws t:url ws external_link_end <ExternalLink>
374
+ /
375
+ external_link_start ws t:implicit_url rws l:external_link_enclosed_text ws external_link_end <ExternalLink>
376
+ /
377
+ external_link_start ws t:implicit_url ws external_link_end <ExternalLink>
378
+ end
379
+
380
+ rule external_link_enclosed_text
381
+ h:external_link_enclosed_word ws r:external_link_enclosed_text <Phrase>
382
+ /
383
+ h:external_link_enclosed_word "" <Phrase>
384
+ end
385
+
386
+ rule external_link_enclosed_word
387
+ bold
388
+ /
389
+ italic
390
+ /
391
+ template
392
+ /
393
+ url
394
+ /
395
+ plain_word
396
+ /
397
+ bold_toggle
398
+ /
399
+ italic_toggle
400
+ /
401
+ heading_toggle
402
+ /
403
+ internal_link_start
404
+ /
405
+ internal_link_end
406
+ /
407
+ external_link_start
408
+ /
409
+ template_start
410
+ /
411
+ template_end
412
+ /
413
+ arg_delimiter
414
+ /
415
+ term_delimiter
416
+ end
417
+
418
+ rule template
419
+ template_start aws t:plain_text aws arg_delimiter aws args:arg_list aws template_end <Template>
420
+ /
421
+ template_start aws t:plain_text aws template_end <Template>
422
+ end
423
+
424
+ rule arg_list
425
+ h:arg aws arg_delimiter aws r:arg_list <Arguments>
426
+ /
427
+ h:arg "" <Arguments>
428
+ /
429
+ aws "" <Arguments> {
430
+ def h
431
+ nil
432
+ end
433
+ }
434
+ end
435
+
436
+ # a plain (positional) argument or a named (name=text) argument
437
+ rule arg
438
+ name:plain_text aws heading_toggle aws val:arg_list_enclosed_text <Argument>
439
+ /
440
+ val:arg_list_enclosed_text "" <Argument>
441
+ end
442
+
443
+ rule arg_list_enclosed_text
444
+ h:arg_list_enclosed_word aws r:arg_list_enclosed_text <Phrase>
445
+ /
446
+ h:arg_list_enclosed_word "" <Phrase>
447
+ end
448
+
449
+ rule arg_list_enclosed_word
450
+ bold
451
+ /
452
+ italic
453
+ /
454
+ link
455
+ /
456
+ template
457
+ /
458
+ url
459
+ /
460
+ plain_word
461
+ /
462
+ bold_toggle # allow unmatched delimiters after we have ruled out structures
463
+ /
464
+ italic_toggle
465
+ /
466
+ heading_toggle
467
+ /
468
+ internal_link_start
469
+ /
470
+ internal_link_end
471
+ /
472
+ external_link_start
473
+ /
474
+ external_link_end
475
+ /
476
+ template_start
477
+ /
478
+ term_delimiter
479
+ end
480
+
481
+ # a phrase of plain words
482
+ rule plain_text
483
+ h:plain_word ws r:plain_text <Phrase>
484
+ /
485
+ h:plain_word "" <Phrase>
486
+ end
487
+
488
+ # avoids using a white-list so that utf-8 characters are accepted
489
+ #
490
+ # matches anything up to a delimiter or whitespace
491
+ rule plain_word
492
+ (!delimiter ![ \t\r\n] .)+ <Word>
493
+ end
494
+
495
+ ##### delimiters
496
+
497
+ # for matching any delimiter, typically in lookahead assertions
498
+ rule delimiter
499
+ bold_toggle
500
+ /
501
+ italic_toggle
502
+ /
503
+ heading_toggle
504
+ /
505
+ internal_link_start
506
+ /
507
+ internal_link_end
508
+ /
509
+ external_link_start
510
+ /
511
+ external_link_end
512
+ /
513
+ template_start
514
+ /
515
+ template_end
516
+ /
517
+ arg_delimiter
518
+ /
519
+ term_delimiter
520
+ end
521
+
522
+ rule bold_toggle
523
+ "'''" <Delimiter>
524
+ end
525
+
526
+ rule italic_toggle
527
+ "''" !"'" <Delimiter> # avoid matching bold toggles
528
+ end
529
+
530
+ rule italic_toggle_no_lookahead
531
+ "''" <Delimiter>
532
+ end
533
+
534
+ # rule fixed_toggle
535
+ # "||" <Delimiter>
536
+ # end
537
+
538
+ rule heading_toggle
539
+ '=' <Delimiter>
540
+ end
541
+
542
+ rule internal_link_start
543
+ "[[" <Delimiter>
544
+ end
545
+
546
+ rule internal_link_end
547
+ "]]" <Delimiter>
548
+ end
549
+
550
+ rule external_link_start
551
+ "[" !"[" <Delimiter> # avoid matching internal link starts
552
+ end
553
+
554
+ rule external_link_end
555
+ "]" !"]" <Delimiter> # avoid matching internal link ends
556
+ end
557
+
558
+ rule template_start
559
+ "{{" <Delimiter>
560
+ end
561
+
562
+ rule template_end
563
+ "}}" <Delimiter>
564
+ end
565
+
566
+ # breaks up arguments in an argument list
567
+ # for example: {{ template | arg1 | arg2 }}
568
+ rule arg_delimiter
569
+ "|" !"|" <Delimiter> # avoid matching fixed-width toggles
570
+ end
571
+
572
+ # signals the break between a definition list term and definition
573
+ rule term_delimiter
574
+ ":" <Delimiter>
575
+ end
576
+
577
+ ##### matching URLs
578
+
579
+ # unknown scheme URLs
580
+ # used to match unknown-scheme URLs in URL contexts (like external links)
581
+ rule implicit_url
582
+ url_safe_char+ <URL>
583
+ end
584
+
585
+ # parses from a known scheme to the end of URL-safe characters
586
+ # uses known schemes to parse bare URLs from text
587
+ rule url
588
+ scheme ":" url_safe_char+ <URL>
589
+ end
590
+
591
+ # pretty much straight from RFC 1738
592
+ # additions:
593
+ # * @, :, / are added because they are valid in some cases (not checked)
594
+ # * %XX is allowed as a character, these are escaped characters
595
+ rule url_safe_char
596
+ !"]" [A-Za-z0-9$-_.+!*'(),@:/]
597
+ /
598
+ "%" xdigit xdigit
599
+ end
600
+
601
+ # known-good schemes that we want to recognize as URLs
602
+ rule scheme
603
+ "http" "s"? <Protocol>
604
+ /
605
+ "mailto" <Protocol>
606
+ /
607
+ "ftp" <Protocol>
608
+ end
609
+
610
+ ##### general rules
611
+
612
+ # these really don't make sense unless there are specific run-time directives
613
+ # that the markup user needs access to (such as __NOTOC__). Not sure if
614
+ # there will be any. Also, perhaps a better strategy would be:
615
+ # * if the line starts with "__" constant_name "__", then parse as magic
616
+ # * send the magic word through an extensible module of some sort
617
+ #
618
+ # this means the magic words wouldn't require grammar changes
619
+ # rule magic_word
620
+ # '__' constant_name '__' #<MagicWord>
621
+ # end
622
+ #
623
+ # rule constant_name
624
+ # [A-Z_]+
625
+ # end
626
+
627
+ rule number
628
+ [1-9] digit* {
629
+ def to_i
630
+ text_value.to_i
631
+ end
632
+ }
633
+ /
634
+ "0" {
635
+ def to_i
636
+ 0
637
+ end
638
+ }
639
+ end
640
+
641
+ # digits
642
+ rule digit
643
+ [0-9]
644
+ /
645
+ "0"
646
+ end
647
+
648
+ # hex digits
649
+ rule xdigit
650
+ [A-Fa-f0-9]
651
+ end
652
+
653
+ # required new lines
654
+ rule rnl
655
+ "\n"+
656
+ end
657
+
658
+ # new lines
659
+ rule nl
660
+ "\n"*
661
+ end
662
+
663
+ # all white space
664
+ rule aws
665
+ [ \t\r\n]*
666
+ end
667
+
668
+ # required white space
669
+ rule rws
670
+ [ \t]+
671
+ end
672
+
673
+ # optional white space
674
+ rule ws
675
+ [ \t]*
676
+ end
677
+
678
+ end
679
+ end