dtext_rb 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/dtext/dtext.h ADDED
@@ -0,0 +1,4 @@
1
+ #ifndef DTEXT_H
2
+ #define DTEXT_H
3
+
4
+ #endif
@@ -0,0 +1,1294 @@
1
+ // situationally print newlines to make the generated html
2
+ // easier to read
3
+ #define PRETTY_PRINT 0
4
+
5
+ #include <ruby.h>
6
+ #include <stdio.h>
7
+ #include <stdint.h>
8
+ #include <stdbool.h>
9
+ #include <glib.h>
10
+
11
+ typedef struct StateMachine {
12
+ int top;
13
+ int cs;
14
+ int act;
15
+ const char * p;
16
+ const char * pe;
17
+ const char * eof;
18
+ const char * ts;
19
+ const char * te;
20
+
21
+ const char * a1;
22
+ const char * a2;
23
+ const char * b1;
24
+ const char * b2;
25
+ bool f_inline;
26
+ bool list_mode;
27
+ GString * output;
28
+ GArray * stack;
29
+ GQueue * dstack;
30
+ int list_nest;
31
+ int d;
32
+ int b;
33
+ } StateMachine;
34
+
35
+ static const int MAX_STACK_DEPTH = 512;
36
+ static const int BLOCK_P = 1;
37
+ static const int INLINE_SPOILER = 2;
38
+ static const int BLOCK_SPOILER = 3;
39
+ static const int BLOCK_QUOTE = 4;
40
+ static const int BLOCK_EXPAND = 5;
41
+ static const int BLOCK_NODTEXT = 6;
42
+ static const int BLOCK_CODE = 7;
43
+ static const int BLOCK_TD = 8;
44
+ static const int INLINE_NODTEXT = 9;
45
+ static const int INLINE_B = 10;
46
+ static const int INLINE_I = 11;
47
+ static const int INLINE_U = 12;
48
+ static const int INLINE_S = 13;
49
+ static const int INLINE_TN = 14;
50
+ static const int BLOCK_TN = 15;
51
+ static const int BLOCK_TABLE = 16;
52
+ static const int BLOCK_THEAD = 17;
53
+ static const int BLOCK_TBODY = 18;
54
+ static const int BLOCK_TR = 19;
55
+ static const int BLOCK_UL = 20;
56
+ static const int BLOCK_LI = 21;
57
+ static const int BLOCK_TH = 22;
58
+
59
+ %%{
60
+ machine dtext;
61
+
62
+ access sm->;
63
+ variable p sm->p;
64
+ variable pe sm->pe;
65
+ variable eof sm->eof;
66
+ variable top sm->top;
67
+ variable ts sm->ts;
68
+ variable te sm->te;
69
+ variable act sm->act;
70
+ variable stack ((int *)sm->stack->data);
71
+
72
+ prepush {
73
+ size_t len = sm->stack->len;
74
+
75
+ if (len > MAX_STACK_DEPTH) {
76
+ free_machine(sm);
77
+ rb_raise(rb_eSyntaxError, "too many nested elements");
78
+ }
79
+
80
+ if (sm->top >= len) {
81
+ sm->stack = g_array_set_size(sm->stack, len + 16);
82
+ }
83
+ }
84
+
85
+ action mark_a1 {
86
+ sm->a1 = sm->p;
87
+ }
88
+
89
+ action mark_a2 {
90
+ sm->a2 = sm->p;
91
+ }
92
+
93
+ action mark_b1 {
94
+ sm->b1 = sm->p;
95
+ }
96
+
97
+ action mark_b2 {
98
+ sm->b2 = sm->p;
99
+ }
100
+
101
+ newline = '\r\n' | '\r' | '\n';
102
+
103
+ nonnewline = any - (newline | '\0');
104
+ nonquote = ^'"';
105
+ nonbracket = ^']';
106
+ nonpipe = ^'|';
107
+ nonpipebracket = nonpipe & nonbracket;
108
+ noncurly = ^'}';
109
+
110
+ mention = '@' graph+ >mark_a1 %mark_a2;
111
+
112
+ url = 'http' 's'? '://' graph+;
113
+ internal_url = '/' graph+;
114
+ basic_textile_link = '"' nonquote+ >mark_a1 '"' >mark_a2 ':' (url | internal_url) >mark_b1 %mark_b2;
115
+ bracketed_textile_link = '"' nonquote+ >mark_a1 '"' >mark_a2 ':[' (url | internal_url) >mark_b1 %mark_b2 :>> ']';
116
+
117
+ basic_wiki_link = '[[' nonpipebracket+ >mark_a1 %mark_a2 ']]';
118
+ aliased_wiki_link = '[[' nonpipebracket+ >mark_a1 %mark_a2 '|' nonbracket+ >mark_b1 %mark_b2 ']]';
119
+
120
+ post_link = '{{' noncurly+ >mark_a1 %mark_a2 '}}';
121
+
122
+ post_id = 'post #' digit+ >mark_a1 %mark_a2;
123
+ forum_post_id = 'forum #' digit+ >mark_a1 %mark_a2;
124
+ forum_topic_id = 'topic #' digit+ >mark_a1 %mark_a2;
125
+ forum_topic_paged_id = 'topic #' digit+ >mark_a1 %mark_a2 '/p' digit+ >mark_b1 %mark_b2;
126
+ comment_id = 'comment #' digit+ >mark_a1 %mark_a2;
127
+ pool_id = 'pool #' digit+ >mark_a1 %mark_a2;
128
+ user_id = 'user #' digit+ >mark_a1 %mark_a2;
129
+ artist_id = 'artist #' digit+ >mark_a1 %mark_a2;
130
+ github_issue_id = 'issue #' digit+ >mark_a1 %mark_a2;
131
+ pixiv_id = 'pixiv #' digit+ >mark_a1 %mark_a2;
132
+ pixiv_paged_id = 'pixiv #' digit+ >mark_a1 %mark_a2 '/p' digit+ >mark_b1 %mark_b2;
133
+
134
+ ws = ' ' | '\t';
135
+ header = 'h' [123456] >mark_a1 %mark_a2 '.' ws* nonnewline+ >mark_b1 %mark_b2;
136
+ aliased_expand = '[expand=' (nonbracket+ >mark_a1 %mark_a2) ']';
137
+
138
+ list_item = '*'+ >mark_a1 %mark_a2 ws+ nonnewline+ >mark_b1 %mark_b2;
139
+
140
+ inline := |*
141
+ post_id => {
142
+ append(sm, "<a href=\"/posts/");
143
+ append_segment(sm, sm->a1, sm->a2 - 1);
144
+ append(sm, "\">post #");
145
+ append_segment(sm, sm->a1, sm->a2 - 1);
146
+ append(sm, "</a>");
147
+ };
148
+
149
+ forum_post_id => {
150
+ append(sm, "<a href=\"/forum_posts/");
151
+ append_segment(sm, sm->a1, sm->a2 - 1);
152
+ append(sm, "\">forum #");
153
+ append_segment(sm, sm->a1, sm->a2 - 1);
154
+ append(sm, "</a>");
155
+ };
156
+
157
+ forum_topic_id => {
158
+ append(sm, "<a href=\"/forum_topics/");
159
+ append_segment(sm, sm->a1, sm->a2 - 1);
160
+ append(sm, "\">topic #");
161
+ append_segment(sm, sm->a1, sm->a2 - 1);
162
+ append(sm, "</a>");
163
+ };
164
+
165
+ forum_topic_paged_id => {
166
+ append(sm, "<a href=\"/forum_topics/");
167
+ append_segment(sm, sm->a1, sm->a2 - 1);
168
+ append(sm, "?page=");
169
+ append_segment(sm, sm->b1, sm->b2 - 1);
170
+ append(sm, "\">topic #");
171
+ append_segment(sm, sm->a1, sm->a2 - 1);
172
+ append(sm, "/p");
173
+ append_segment(sm, sm->b1, sm->b2 - 1);
174
+ append(sm, "</a>");
175
+ };
176
+
177
+ comment_id => {
178
+ append(sm, "<a href=\"/comments/");
179
+ append_segment(sm, sm->a1, sm->a2 - 1);
180
+ append(sm, "\">comment #");
181
+ append_segment(sm, sm->a1, sm->a2 - 1);
182
+ append(sm, "</a>");
183
+ };
184
+
185
+ pool_id => {
186
+ append(sm, "<a href=\"/pools/");
187
+ append_segment(sm, sm->a1, sm->a2 - 1);
188
+ append(sm, "\">pool #");
189
+ append_segment(sm, sm->a1, sm->a2 - 1);
190
+ append(sm, "</a>");
191
+ };
192
+
193
+ user_id => {
194
+ append(sm, "<a href=\"/users/");
195
+ append_segment(sm, sm->a1, sm->a2 - 1);
196
+ append(sm, "\">user #");
197
+ append_segment(sm, sm->a1, sm->a2 - 1);
198
+ append(sm, "</a>");
199
+ };
200
+
201
+ artist_id => {
202
+ append(sm, "<a href=\"/artists/");
203
+ append_segment(sm, sm->a1, sm->a2 - 1);
204
+ append(sm, "\">artist #");
205
+ append_segment(sm, sm->a1, sm->a2 - 1);
206
+ append(sm, "</a>");
207
+ };
208
+
209
+ github_issue_id => {
210
+ append(sm, "<a href=\"https://github.com/r888888888/danbooru/issues/");
211
+ append_segment(sm, sm->a1, sm->a2 - 1);
212
+ append(sm, "\">issue #");
213
+ append_segment(sm, sm->a1, sm->a2 - 1);
214
+ append(sm, "</a>");
215
+ };
216
+
217
+ pixiv_id => {
218
+ append(sm, "<a href=\"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=");
219
+ append_segment(sm, sm->a1, sm->a2 - 1);
220
+ append(sm, "\">pixiv #");
221
+ append_segment(sm, sm->a1, sm->a2 - 1);
222
+ append(sm, "</a>");
223
+ };
224
+
225
+ pixiv_paged_id => {
226
+ append(sm, "<a href=\"http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=");
227
+ append_segment(sm, sm->a1, sm->a2 - 1);
228
+ append(sm, "&page=");
229
+ append_segment(sm, sm->b1, sm->b2 - 1);
230
+ append(sm, "\">pixiv #");
231
+ append_segment(sm, sm->a1, sm->a2 - 1);
232
+ append(sm, "/p");
233
+ append_segment(sm, sm->b1, sm->b2 - 1);
234
+ append(sm, "</a>");
235
+ };
236
+
237
+ post_link => {
238
+ append(sm, "<a rel=\"nofollow\" href=\"/posts?tags=");
239
+ append_segment_uri_escaped(sm, sm->a1, sm->a2 - 1);
240
+ append(sm, "\">");
241
+ append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
242
+ append(sm, "</a>");
243
+ };
244
+
245
+ basic_wiki_link => {
246
+ GString * segment = g_string_new_len(sm->a1, sm->a2 - sm->a1);
247
+ underscore_string(segment->str, segment->len);
248
+ GString * lowercase_segment = NULL;
249
+
250
+ if (g_utf8_validate(segment->str, -1, NULL)) {
251
+ lowercase_segment = g_string_new(g_utf8_strdown(segment->str, -1));
252
+ } else {
253
+ lowercase_segment = g_string_new(g_ascii_strdown(segment->str, -1));
254
+ }
255
+
256
+ append(sm, "<a href=\"/wiki_pages/show_or_new?title=");
257
+ append_segment_uri_escaped(sm, lowercase_segment->str, lowercase_segment->str + lowercase_segment->len - 1);
258
+ append(sm, "\">");
259
+ append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
260
+ append(sm, "</a>");
261
+
262
+ g_string_free(lowercase_segment, TRUE);
263
+ g_string_free(segment, TRUE);
264
+ };
265
+
266
+ aliased_wiki_link => {
267
+ GString * segment = g_string_new_len(sm->a1, sm->a2 - sm->a1);
268
+ underscore_string(segment->str, segment->len);
269
+ GString * lowercase_segment = NULL;
270
+
271
+ if (g_utf8_validate(segment->str, -1, NULL)) {
272
+ lowercase_segment = g_string_new(g_utf8_strdown(segment->str, -1));
273
+ } else {
274
+ lowercase_segment = g_string_new(g_ascii_strdown(segment->str, -1));
275
+ }
276
+
277
+ append(sm, "<a href=\"/wiki_pages/show_or_new?title=");
278
+ append_segment_uri_escaped(sm, lowercase_segment->str, lowercase_segment->str + lowercase_segment->len - 1);
279
+ append(sm, "\">");
280
+ append_segment_html_escaped(sm, sm->b1, sm->b2 - 1);
281
+ append(sm, "</a>");
282
+
283
+ g_string_free(lowercase_segment, TRUE);
284
+ g_string_free(segment, TRUE);
285
+ };
286
+
287
+ basic_textile_link => {
288
+ if (is_boundary_c(fc)) {
289
+ sm->d = 2;
290
+ sm->b = true;
291
+ } else {
292
+ sm->d = 1;
293
+ sm->b = false;
294
+ }
295
+
296
+ append(sm, "<a href=\"");
297
+ append_segment_uri_escaped(sm, sm->b1, sm->b2 - sm->d);
298
+ append(sm, "\">");
299
+ append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
300
+ append(sm, "</a>");
301
+
302
+ if (sm->b) {
303
+ append_c_html_escaped(sm, fc);
304
+ }
305
+ };
306
+
307
+ bracketed_textile_link => {
308
+ append(sm, "<a href=\"");
309
+ append_segment_uri_escaped(sm, sm->b1, sm->b2 - 1);
310
+ append(sm, "\">");
311
+ append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
312
+ append(sm, "</a>");
313
+ };
314
+
315
+ url => {
316
+ if (is_boundary_c(fc)) {
317
+ sm->b = true;
318
+ sm->d = 2;
319
+ } else {
320
+ sm->b = false;
321
+ sm->d = 1;
322
+ }
323
+
324
+ append(sm, "<a href=\"");
325
+ append_segment_uri_escaped(sm, sm->ts, sm->te - sm->d);
326
+ append(sm, "\">");
327
+ append_segment_html_escaped(sm, sm->ts, sm->te - sm->d);
328
+ append(sm, "</a>");
329
+
330
+ if (sm->b) {
331
+ append_c_html_escaped(sm, fc);
332
+ }
333
+ };
334
+
335
+ # probably a tag. examples include @.@ and @_@
336
+ '@' graph '@' => {
337
+ append_segment(sm, sm->ts, sm->te - 1);
338
+ };
339
+
340
+ mention => {
341
+ if (is_boundary_c(fc)) {
342
+ sm->b = true;
343
+ sm->d = 2;
344
+ } else {
345
+ sm->b = false;
346
+ sm->d = 1;
347
+ }
348
+
349
+ append(sm, "<a rel=\"nofollow\" href=\"/users?name=");
350
+ append_segment_uri_escaped(sm, sm->a1, sm->a2 - sm->d);
351
+ append(sm, "\">@");
352
+ append_segment_html_escaped(sm, sm->a1, sm->a2 - sm->d);
353
+ append(sm, "</a>");
354
+
355
+ if (sm->b) {
356
+ append_c_html_escaped(sm, fc);
357
+ }
358
+ };
359
+
360
+ list_item => {
361
+ g_debug("inline list");
362
+
363
+ if (dstack_check(sm, BLOCK_P)) {
364
+ g_debug(" rewind p");
365
+ dstack_rewind(sm);
366
+ }
367
+
368
+ g_debug(" call list");
369
+ fexec sm->ts;
370
+ fcall list;
371
+ };
372
+
373
+ '[b]' => {
374
+ dstack_push(sm, &INLINE_B);
375
+ append(sm, "<strong>");
376
+ };
377
+
378
+ '[/b]' => {
379
+ if (dstack_check(sm, INLINE_B)) {
380
+ dstack_pop(sm);
381
+ append(sm, "</strong>");
382
+ } else {
383
+ append(sm, "[/b]");
384
+ }
385
+ };
386
+
387
+ '[i]' => {
388
+ dstack_push(sm, &INLINE_I);
389
+ append(sm, "<em>");
390
+ };
391
+
392
+ '[/i]' => {
393
+ if (dstack_check(sm, INLINE_I)) {
394
+ dstack_pop(sm);
395
+ append(sm, "</em>");
396
+ } else {
397
+ append(sm, "[/i]");
398
+ }
399
+ };
400
+
401
+ '[s]' => {
402
+ dstack_push(sm, &INLINE_S);
403
+ append(sm, "<s>");
404
+ };
405
+
406
+ '[/s]' => {
407
+ if (dstack_check(sm, INLINE_S)) {
408
+ dstack_pop(sm);
409
+ append(sm, "</s>");
410
+ } else {
411
+ append(sm, "[/s]");
412
+ }
413
+ };
414
+
415
+ '[u]' => {
416
+ dstack_push(sm, &INLINE_U);
417
+ append(sm, "<u>");
418
+ };
419
+
420
+ '[/u]' => {
421
+ if (dstack_check(sm, INLINE_U)) {
422
+ dstack_pop(sm);
423
+ append(sm, "</u>");
424
+ } else {
425
+ append(sm, "[/u]");
426
+ }
427
+ };
428
+
429
+ '[tn]' => {
430
+ dstack_push(sm, &INLINE_TN);
431
+ append(sm, "<span class=\"tn\">");
432
+ };
433
+
434
+ '[/tn]' => {
435
+ if (dstack_check(sm, BLOCK_TN)) {
436
+ dstack_pop(sm);
437
+ append_closing_p(sm);
438
+ append_newline(sm);
439
+ fret;
440
+ } else if (dstack_check(sm, INLINE_TN)) {
441
+ dstack_pop(sm);
442
+ append(sm, "</span>");
443
+ } else {
444
+ append(sm, "[/tn]");
445
+ }
446
+ };
447
+
448
+ # these are block level elements that should kick us out of the inline
449
+ # scanner
450
+ header => {
451
+ dstack_rewind(sm);
452
+ fexec sm->a1 - 1;
453
+ fret;
454
+ };
455
+
456
+ (space* '[quote]') => {
457
+ g_debug("inline [quote]");
458
+
459
+ if (dstack_check(sm, BLOCK_P)) {
460
+ g_debug(" pop dstack");
461
+ g_debug(" print </p>");
462
+
463
+ dstack_pop(sm);
464
+ append_closing_p(sm);
465
+ append_newline(sm);
466
+ }
467
+
468
+ fexec sm->p - 6;
469
+ fret;
470
+ };
471
+
472
+ (space* '[/quote]') => {
473
+ g_debug("inline [/quote]");
474
+
475
+ if (dstack_check(sm, BLOCK_P)) {
476
+ dstack_rewind(sm);
477
+ }
478
+
479
+ if (dstack_check(sm, BLOCK_QUOTE)) {
480
+ dstack_rewind(sm);
481
+ fret;
482
+ } else {
483
+ append(sm, "[/quote]");
484
+ }
485
+ };
486
+
487
+ '[spoiler]' => {
488
+ g_debug("inline [spoiler]");
489
+ g_debug(" push <span>");
490
+ dstack_push(sm, &INLINE_SPOILER);
491
+ append(sm, "<span class=\"spoiler\">");
492
+ };
493
+
494
+ (space* '[/spoiler]') => {
495
+ g_debug("inline [/spoiler]");
496
+
497
+ if (dstack_check(sm, INLINE_SPOILER)) {
498
+ g_debug(" pop dstack");
499
+ g_debug(" print </span>");
500
+ dstack_pop(sm);
501
+ append(sm, "</span>");
502
+ } else if (dstack_check(sm, BLOCK_P) && dstack_check2(sm, BLOCK_SPOILER)) {
503
+ g_debug(" pop dstack");
504
+ g_debug(" print </p></div>");
505
+ g_debug(" return");
506
+ dstack_pop(sm);
507
+ dstack_pop(sm);
508
+ append_newline(sm);
509
+ append_closing_p(sm);
510
+ append_block(sm, "</div>");
511
+ append_newline(sm);
512
+ append_newline(sm);
513
+
514
+ fret;
515
+ } else {
516
+ append(sm, "[/spoiler]");
517
+ }
518
+ };
519
+
520
+ (space* '[expand]' space*) => {
521
+ dstack_rewind(sm);
522
+ fexec(sm->p - 8);
523
+ fret;
524
+ };
525
+
526
+ (space* '[/expand]') => {
527
+ if (dstack_check(sm, BLOCK_P)) {
528
+ append_closing_p(sm);
529
+ append_newline(sm);
530
+ dstack_pop(sm);
531
+ }
532
+
533
+ if (dstack_check(sm, BLOCK_EXPAND)) {
534
+ append_newline(sm);
535
+ append_block(sm, "</div></div>");
536
+ append_newline(sm);
537
+ append_newline(sm);
538
+ dstack_pop(sm);
539
+ fret;
540
+ } else {
541
+ append(sm, "[/expand]");
542
+ }
543
+ };
544
+
545
+ '[nodtext]' => {
546
+ dstack_push(sm, &INLINE_NODTEXT);
547
+ fcall nodtext;
548
+ };
549
+
550
+ '[/th]' => {
551
+ if (dstack_check(sm, BLOCK_TH)) {
552
+ dstack_pop(sm);
553
+ append_newline(sm);
554
+ append_block(sm, "</th>");
555
+ append_newline(sm);
556
+ fret;
557
+ } else {
558
+ append(sm, "[/th]");
559
+ }
560
+ };
561
+
562
+ '[/td]' => {
563
+ if (dstack_check(sm, BLOCK_TD)) {
564
+ dstack_pop(sm);
565
+ append_newline(sm);
566
+ append_block(sm, "</td>");
567
+ append_newline(sm);
568
+ fret;
569
+ } else {
570
+ append(sm, "[/td]");
571
+ }
572
+ };
573
+
574
+ '\0' => {
575
+ g_debug("inline 0");
576
+ g_debug(" return");
577
+
578
+ fhold;
579
+ fret;
580
+ };
581
+
582
+ newline{2,} => {
583
+ g_debug("inline newline2");
584
+ g_debug(" return");
585
+
586
+ if (sm->list_mode) {
587
+ if (dstack_check(sm, BLOCK_LI)) {
588
+ dstack_rewind(sm);
589
+ }
590
+
591
+ sm->list_mode = false;
592
+ }
593
+
594
+ fexec sm->ts;
595
+ fret;
596
+ };
597
+
598
+ newline => {
599
+ g_debug("inline newline");
600
+
601
+ if (sm->list_mode && (*(sm->p+1) == '*') && dstack_check(sm, BLOCK_LI)) {
602
+ dstack_rewind(sm);
603
+ } else {
604
+ append(sm, "<br>");
605
+ append_newline(sm);
606
+ }
607
+ };
608
+
609
+ any => {
610
+ g_debug("inline c: %c", fc);
611
+ append_c_html_escaped(sm, fc);
612
+ };
613
+ *|;
614
+
615
+ code := |*
616
+ '[/code]' => {
617
+ if (dstack_check(sm, BLOCK_CODE)) {
618
+ dstack_rewind(sm);
619
+ } else {
620
+ append(sm, "[/code]");
621
+ }
622
+ fret;
623
+ };
624
+
625
+ '\0' => {
626
+ fhold;
627
+ fret;
628
+ };
629
+
630
+ any => {
631
+ append_c_html_escaped(sm, fc);
632
+ };
633
+ *|;
634
+
635
+ nodtext := |*
636
+ '[/nodtext]' => {
637
+ if (dstack_check(sm, BLOCK_NODTEXT)) {
638
+ dstack_pop(sm);
639
+ append_newline(sm);
640
+ append_closing_p(sm);
641
+ append_newline(sm);
642
+ append_newline(sm);
643
+ fret;
644
+ } else if (dstack_check(sm, INLINE_NODTEXT)) {
645
+ dstack_pop(sm);
646
+ fret;
647
+ } else {
648
+ append(sm, "[/nodtext]");
649
+ }
650
+ };
651
+
652
+ '\0' => {
653
+ fhold;
654
+ fret;
655
+ };
656
+
657
+ any => {
658
+ append_c_html_escaped(sm, fc);
659
+ };
660
+ *|;
661
+
662
+ table := |*
663
+ '[thead]' => {
664
+ dstack_push(sm, &BLOCK_THEAD);
665
+ append_newline(sm);
666
+ append_block(sm, "<thead>");
667
+ append_newline(sm);
668
+ };
669
+
670
+ '[/thead]' => {
671
+ if (dstack_check(sm, BLOCK_THEAD)) {
672
+ dstack_pop(sm);
673
+ append_newline(sm);
674
+ append_block(sm, "</thead>");
675
+ append_newline(sm);
676
+ } else {
677
+ append(sm, "[/thead]");
678
+ }
679
+ };
680
+
681
+ '[tbody]' => {
682
+ dstack_push(sm, &BLOCK_TBODY);
683
+ append_newline(sm);
684
+ append_block(sm, "<tbody>");
685
+ append_newline(sm);
686
+ };
687
+
688
+ '[/tbody]' => {
689
+ if (dstack_check(sm, BLOCK_TBODY)) {
690
+ dstack_pop(sm);
691
+ append_newline(sm);
692
+ append_block(sm, "</tbody>");
693
+ append_newline(sm);
694
+ } else {
695
+ append(sm, "[/tbody]");
696
+ }
697
+ };
698
+
699
+ '[th]' => {
700
+ dstack_push(sm, &BLOCK_TH);
701
+ append_newline(sm);
702
+ append_block(sm, "<th>");
703
+ append_newline(sm);
704
+ fcall inline;
705
+ };
706
+
707
+ '[tr]' => {
708
+ dstack_push(sm, &BLOCK_TR);
709
+ append_newline(sm);
710
+ append_block(sm, "<tr>");
711
+ append_newline(sm);
712
+ };
713
+
714
+ '[/tr]' => {
715
+ if (dstack_check(sm, BLOCK_TR)) {
716
+ dstack_pop(sm);
717
+ append_newline(sm);
718
+ append_block(sm, "</tr>");
719
+ append_newline(sm);
720
+ } else {
721
+ append(sm, "[/tr]");
722
+ }
723
+ };
724
+
725
+ '[td]' => {
726
+ dstack_push(sm, &BLOCK_TD);
727
+ append_newline(sm);
728
+ append_block(sm, "<td>");
729
+ append_newline(sm);
730
+ fcall inline;
731
+ };
732
+
733
+ '[/table]' => {
734
+ if (dstack_check(sm, BLOCK_TABLE)) {
735
+ dstack_pop(sm);
736
+ append_newline(sm);
737
+ append_block(sm, "</table>");
738
+ append_newline(sm);
739
+ append_newline(sm);
740
+ fret;
741
+ } else {
742
+ append(sm, "[/table]");
743
+ }
744
+ };
745
+
746
+ '\0' => {
747
+ fhold;
748
+ fret;
749
+ };
750
+
751
+ any;
752
+ *|;
753
+
754
+ list := |*
755
+ list_item => {
756
+ g_debug("list start");
757
+
758
+ int prev_nest = sm->list_nest;
759
+ sm->list_mode = true;
760
+ sm->list_nest = sm->a2 - sm->a1;
761
+ fexec sm->b1;
762
+
763
+ if (sm->list_nest > prev_nest) {
764
+ for (int i=prev_nest; i<sm->list_nest; ++i) {
765
+ g_debug(" dstack push ul");
766
+ g_debug(" print <ul>");
767
+ append_block(sm, "<ul>");
768
+ append_newline(sm);
769
+ dstack_push(sm, &BLOCK_UL);
770
+ }
771
+ } else if (sm->list_nest < prev_nest) {
772
+ for (int i=sm->list_nest; i<prev_nest; ++i) {
773
+ if (dstack_check(sm, BLOCK_UL)) {
774
+ g_debug(" dstack pop");
775
+ g_debug(" print </ul>");
776
+ dstack_pop(sm);
777
+ append_block(sm, "</ul>");
778
+ append_newline(sm);
779
+ }
780
+ }
781
+ }
782
+
783
+ append_block(sm, "<li>");
784
+ dstack_push(sm, &BLOCK_LI);
785
+
786
+ g_debug(" print <li>");
787
+ g_debug(" push li");
788
+ g_debug(" call inline");
789
+
790
+ fcall inline;
791
+ };
792
+
793
+ # exit list
794
+ (newline{2,} | '\0') => {
795
+ dstack_rewind(sm);
796
+ fexec sm->ts;
797
+ fret;
798
+ };
799
+
800
+ newline;
801
+
802
+ any => {
803
+ dstack_rewind(sm);
804
+ fhold;
805
+ fret;
806
+ };
807
+ *|;
808
+
809
+ main := |*
810
+ header => {
811
+ char header = *sm->a1;
812
+
813
+ if (sm->f_inline) {
814
+ header = '6';
815
+ }
816
+
817
+ append_newline(sm);
818
+ append_newline(sm);
819
+ append(sm, "<h");
820
+ append_c(sm, header);
821
+ append_c(sm, '>');
822
+ append_segment(sm, sm->b1, sm->b2 - 1);
823
+ append(sm, "</h");
824
+ append_c(sm, header);
825
+ append(sm, ">");
826
+ append_newline(sm);
827
+ append_newline(sm);
828
+ };
829
+
830
+ ('[quote]' space*) => {
831
+ g_debug("block [quote]");
832
+ g_debug(" push quote");
833
+ g_debug(" push p");
834
+ g_debug(" print <blockquote>");
835
+ g_debug(" print <p>");
836
+ g_debug(" call inline");
837
+
838
+ dstack_push(sm, &BLOCK_QUOTE);
839
+ dstack_push(sm, &BLOCK_P);
840
+ append_newline(sm);
841
+ append_newline(sm);
842
+ append_block(sm, "<blockquote><p>");
843
+ append_newline(sm);
844
+ fcall inline;
845
+ };
846
+
847
+ ('[spoiler]' space*) => {
848
+ g_debug("block [spoiler]");
849
+ g_debug(" push spoiler");
850
+ g_debug(" push p");
851
+ g_debug(" print <div>");
852
+ g_debug(" print <p>");
853
+ g_debug(" call inline");
854
+ dstack_push(sm, &BLOCK_SPOILER);
855
+ dstack_push(sm, &BLOCK_P);
856
+ append_newline(sm);
857
+ append_newline(sm);
858
+ append_block(sm, "<div class=\"spoiler\"><p>");
859
+ append_newline(sm);
860
+ fcall inline;
861
+ };
862
+
863
+ '[/spoiler]' => {
864
+ if (dstack_check(sm, BLOCK_P)) {
865
+ dstack_rewind(sm);
866
+ }
867
+
868
+ if (dstack_check(sm, BLOCK_SPOILER)) {
869
+ dstack_rewind(sm);
870
+ }
871
+ };
872
+
873
+ ('[code]' space*) => {
874
+ dstack_push(sm, &BLOCK_CODE);
875
+ append_newline(sm);
876
+ append_newline(sm);
877
+ append_block(sm, "<pre>");
878
+ append_newline(sm);
879
+ fcall code;
880
+ };
881
+
882
+ ('[expand]' space*) => {
883
+ dstack_push(sm, &BLOCK_EXPAND);
884
+ dstack_push(sm, &BLOCK_P);
885
+ append_newline(sm);
886
+ append_newline(sm);
887
+ append_block(sm, "<div class=\"expandable\"><div class=\"expandable-header\">");
888
+ append_block(sm, "<input type=\"button\" value=\"Show\" class=\"expandable-button\"/></div>");
889
+ append_block(sm, "<div class=\"expandable-content\">");
890
+ append_block(sm, "<p>");
891
+ append_newline(sm);
892
+ fcall inline;
893
+ };
894
+
895
+ (aliased_expand space*) => {
896
+ dstack_push(sm, &BLOCK_EXPAND);
897
+ dstack_push(sm, &BLOCK_P);
898
+ append_newline(sm);
899
+ append_newline(sm);
900
+ append_block(sm, "<div class=\"expandable\"><div class=\"expandable-header\">");
901
+ append(sm, "<span>");
902
+ append_segment_html_escaped(sm, sm->a1, sm->a2);
903
+ append(sm, "</span>");
904
+ append_block(sm, "<input type=\"button\" value=\"Show\" class=\"expandable-button\"/></div>");
905
+ append_block(sm, "<div class=\"expandable-content\">");
906
+ append_newline(sm);
907
+ fcall inline;
908
+ };
909
+
910
+ ('[nodtext]' space*) => {
911
+ dstack_push(sm, &BLOCK_NODTEXT);
912
+ append_newline(sm);
913
+ append_block(sm, "<p>");
914
+ fcall nodtext;
915
+ };
916
+
917
+ '[table]' => {
918
+ dstack_push(sm, &BLOCK_TABLE);
919
+ append_newline(sm);
920
+ append_newline(sm);
921
+ append_block(sm, "<table class=\"striped\">");
922
+ fcall table;
923
+ };
924
+
925
+ '[tn]' => {
926
+ dstack_push(sm, &BLOCK_TN);
927
+ append_newline(sm);
928
+ append_newline(sm);
929
+ append_block(sm, "<p class=\"tn\">");
930
+ fcall inline;
931
+ };
932
+
933
+ list_item => {
934
+ g_debug("inline list");
935
+ sm->list_nest = 0;
936
+ sm->list_mode = true;
937
+ if (dstack_check(sm, BLOCK_P)) {
938
+ g_debug(" pop dstack");
939
+ dstack_rewind(sm);
940
+ }
941
+ g_debug(" call list");
942
+ fexec sm->ts;
943
+ fcall list;
944
+ };
945
+
946
+ '\0' => {
947
+ g_debug("block 0");
948
+ g_debug(" close dstack");
949
+ dstack_close(sm);
950
+ };
951
+
952
+ newline{2,} => {
953
+ g_debug("block newline2");
954
+ if (dstack_check(sm, BLOCK_P)) {
955
+ g_debug(" pop p");
956
+ dstack_rewind(sm);
957
+ }
958
+ };
959
+
960
+ newline => {
961
+ g_debug("block newline");
962
+ };
963
+
964
+ any => {
965
+ g_debug("block c: %c", fc);
966
+ fhold;
967
+
968
+ if (g_queue_is_empty(sm->dstack) || dstack_check(sm, BLOCK_QUOTE) || dstack_check(sm, BLOCK_SPOILER)) {
969
+ g_debug(" push p");
970
+ g_debug(" print <p>");
971
+ dstack_push(sm, &BLOCK_P);
972
+ append_newline(sm);
973
+ append_block(sm, "<p>");
974
+ }
975
+
976
+ fcall inline;
977
+ };
978
+ *|;
979
+
980
+ }%%
981
+
982
+ %% write data;
983
+
984
+ static inline void underscore_string(char * str, size_t len) {
985
+ for (size_t i=0; i<len; ++i) {
986
+ if (str[i] == ' ') {
987
+ str[i] = '_';
988
+ }
989
+ }
990
+ }
991
+
992
+ static inline void append(StateMachine * sm, const char * s) {
993
+ sm->output = g_string_append(sm->output, s);
994
+ }
995
+
996
+ static inline void append_newline(StateMachine * sm) {
997
+ #if (PRETTY_PRINT)
998
+ g_string_append_c(sm->output, '\n');
999
+ #endif
1000
+ }
1001
+
1002
+ static inline void append_c(StateMachine * sm, char s) {
1003
+ sm->output = g_string_append_c(sm->output, s);
1004
+ }
1005
+
1006
+ static inline void append_c_html_escaped(StateMachine * sm, char s) {
1007
+ switch (s) {
1008
+ case '<':
1009
+ sm->output = g_string_append(sm->output, "&lt;");
1010
+ break;
1011
+
1012
+ case '>':
1013
+ sm->output = g_string_append(sm->output, "&gt;");
1014
+ break;
1015
+
1016
+ case '&':
1017
+ sm->output = g_string_append(sm->output, "&amp;");
1018
+ break;
1019
+
1020
+ default:
1021
+ sm->output = g_string_append_c(sm->output, s);
1022
+ break;
1023
+ }
1024
+ }
1025
+
1026
+ static inline void append_segment(StateMachine * sm, const char * a, const char * b) {
1027
+ sm->output = g_string_append_len(sm->output, a, b - a + 1);
1028
+ }
1029
+
1030
+ static inline void append_segment_uri_escaped(StateMachine * sm, const char * a, const char * b) {
1031
+ GString * segment_string = g_string_new_len(a, b - a + 1);
1032
+ char * segment = g_uri_escape_string(segment_string->str, G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "#%?", TRUE);
1033
+ sm->output = g_string_append(sm->output, segment);
1034
+ g_string_free(segment_string, TRUE);
1035
+ g_free(segment);
1036
+ }
1037
+
1038
+ static inline void append_segment_html_escaped(StateMachine * sm, const char * a, const char * b) {
1039
+ gchar * segment = g_markup_escape_text(a, b - a + 1);
1040
+ sm->output = g_string_append(sm->output, segment);
1041
+ g_free(segment);
1042
+ }
1043
+
1044
+ static inline void append_block(StateMachine * sm, const char * s) {
1045
+ if (sm->f_inline) {
1046
+ sm->output = g_string_append_c(sm->output, ' ');
1047
+ } else {
1048
+ sm->output = g_string_append(sm->output, s);
1049
+ }
1050
+ }
1051
+
1052
+ static inline void append_closing_p(StateMachine * sm) {
1053
+ size_t i = sm->output->len;
1054
+
1055
+ if (i > 4 && !strncmp(sm->output->str + i - 4, "<br>", 4)) {
1056
+ sm->output = g_string_truncate(sm->output, sm->output->len - 4);
1057
+ }
1058
+
1059
+ append_block(sm, "</p>");
1060
+ }
1061
+
1062
+ static inline void dstack_push(StateMachine * sm, const int * element) {
1063
+ g_queue_push_tail(sm->dstack, (gpointer)element);
1064
+ }
1065
+
1066
+ static inline int * dstack_pop(StateMachine * sm) {
1067
+ return g_queue_pop_tail(sm->dstack);
1068
+ }
1069
+
1070
+ static inline int * dstack_peek(StateMachine * sm) {
1071
+ return g_queue_peek_tail(sm->dstack);
1072
+ }
1073
+
1074
+ static inline bool dstack_search(StateMachine * sm, const int * element) {
1075
+ return g_queue_find(sm->dstack, (gconstpointer)element);
1076
+ }
1077
+
1078
+ static inline bool dstack_check(StateMachine * sm, int expected_element) {
1079
+ int * top = dstack_peek(sm);
1080
+ return top && *top == expected_element;
1081
+ }
1082
+
1083
+ static inline bool dstack_check2(StateMachine * sm, int expected_element) {
1084
+ if (sm->dstack->length < 2) {
1085
+ return false;
1086
+ }
1087
+
1088
+ int * top2 = g_queue_peek_nth(sm->dstack, sm->dstack->length - 2);
1089
+ return top2 && *top2 == expected_element;
1090
+ }
1091
+
1092
+ static void dstack_print_element(gpointer data, gpointer user_data) {
1093
+ printf("%i\n", *(int *)data);
1094
+ }
1095
+
1096
+ static void dstack_dump(StateMachine * sm) {
1097
+ g_queue_foreach(sm->dstack, dstack_print_element, NULL);
1098
+ }
1099
+
1100
+ static void dstack_rewind(StateMachine * sm) {
1101
+ int * element = dstack_pop(sm);
1102
+
1103
+ if (element == NULL) {
1104
+ return;
1105
+ }
1106
+
1107
+ switch (*element) {
1108
+ case BLOCK_P:
1109
+ append_closing_p(sm);
1110
+ append_newline(sm);
1111
+ break;
1112
+
1113
+ case INLINE_SPOILER:
1114
+ append(sm, "</span>");
1115
+ break;
1116
+
1117
+ case BLOCK_SPOILER:
1118
+ append_block(sm, "</div>");
1119
+ break;
1120
+
1121
+ case BLOCK_QUOTE:
1122
+ append_block(sm, "</blockquote>");
1123
+ break;
1124
+
1125
+ case BLOCK_EXPAND:
1126
+ append_block(sm, "</div></div>");
1127
+ break;
1128
+
1129
+ case BLOCK_NODTEXT:
1130
+ append_closing_p(sm);
1131
+ append_newline(sm);
1132
+ break;
1133
+
1134
+ case BLOCK_CODE:
1135
+ append_block(sm, "</pre>");
1136
+ break;
1137
+
1138
+ case BLOCK_TD:
1139
+ append_block(sm, "</td>");
1140
+ break;
1141
+
1142
+ case INLINE_NODTEXT:
1143
+ break;
1144
+
1145
+ case INLINE_B:
1146
+ append(sm, "</strong>");
1147
+ break;
1148
+
1149
+ case INLINE_I:
1150
+ append(sm, "</em>");
1151
+ break;
1152
+
1153
+ case INLINE_U:
1154
+ append(sm, "</u>");
1155
+ break;
1156
+
1157
+ case INLINE_S:
1158
+ append(sm, "</s>");
1159
+ break;
1160
+
1161
+ case INLINE_TN:
1162
+ append(sm, "</span>");
1163
+ break;
1164
+
1165
+ case BLOCK_TN:
1166
+ append_closing_p(sm);
1167
+ append_newline(sm);
1168
+ break;
1169
+
1170
+ case BLOCK_TABLE:
1171
+ append_block(sm, "</table>");
1172
+ break;
1173
+
1174
+ case BLOCK_THEAD:
1175
+ append_block(sm, "</thead>");
1176
+ break;
1177
+
1178
+ case BLOCK_TBODY:
1179
+ append_block(sm, "</tbody>");
1180
+ break;
1181
+
1182
+ case BLOCK_TR:
1183
+ append_block(sm, "</tr>");
1184
+ break;
1185
+
1186
+ case BLOCK_UL:
1187
+ append_block(sm, "</ul>");
1188
+ append_newline(sm);
1189
+ break;
1190
+
1191
+ case BLOCK_LI:
1192
+ append_block(sm, "</li>");
1193
+ append_newline(sm);
1194
+ break;
1195
+ }
1196
+ }
1197
+
1198
+ static void dstack_close(StateMachine * sm) {
1199
+ while (dstack_peek(sm) != NULL) {
1200
+ dstack_rewind(sm);
1201
+ }
1202
+ }
1203
+
1204
+ static inline bool is_boundary_c(char c) {
1205
+ switch (c) {
1206
+ case ':':
1207
+ case ';':
1208
+ case '.':
1209
+ case ',':
1210
+ case '!':
1211
+ case '?':
1212
+ case ')':
1213
+ case ']':
1214
+ case '<':
1215
+ case '>':
1216
+ return true;
1217
+ }
1218
+
1219
+ return false;
1220
+ }
1221
+
1222
+ static void init_machine(StateMachine * sm, VALUE input) {
1223
+ sm->p = RSTRING_PTR(input);
1224
+ sm->pe = sm->p + RSTRING_LEN(input);
1225
+ sm->eof = sm->pe;
1226
+ sm->ts = NULL;
1227
+ sm->te = NULL;
1228
+ sm->cs = 0;
1229
+ sm->act = 0;
1230
+ sm->top = 0;
1231
+ size_t output_length = RSTRING_LEN(input);
1232
+ if (output_length < (INT16_MAX / 2)) {
1233
+ output_length *= 2;
1234
+ }
1235
+ sm->output = g_string_sized_new(output_length);
1236
+ sm->a1 = NULL;
1237
+ sm->a2 = NULL;
1238
+ sm->b1 = NULL;
1239
+ sm->b2 = NULL;
1240
+ sm->f_inline = false;
1241
+ sm->stack = g_array_sized_new(FALSE, TRUE, sizeof(int), 16);
1242
+ sm->dstack = g_queue_new();
1243
+ sm->list_nest = 0;
1244
+ sm->list_mode = false;
1245
+ }
1246
+
1247
+ static void free_machine(StateMachine * sm) {
1248
+ g_string_free(sm->output, TRUE);
1249
+ g_array_free(sm->stack, FALSE);
1250
+ g_queue_free(sm->dstack);
1251
+ g_free(sm);
1252
+ }
1253
+
1254
+ static VALUE parse(int argc, VALUE * argv, VALUE self) {
1255
+ g_debug("start\n");
1256
+
1257
+ if (argc == 0) {
1258
+ rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)");
1259
+ }
1260
+
1261
+ VALUE input = argv[0];
1262
+
1263
+ StateMachine * sm = (StateMachine *)g_malloc0(sizeof(StateMachine));
1264
+ input = rb_str_cat(input, "\0", 1);
1265
+ init_machine(sm, input);
1266
+
1267
+ if (argc > 1) {
1268
+ VALUE options = argv[1];
1269
+
1270
+ if (!NIL_P(options)) {
1271
+ VALUE opt_inline = rb_hash_aref(options, ID2SYM(rb_intern("inline")));
1272
+
1273
+ if (RTEST(opt_inline)) {
1274
+ sm->f_inline = true;
1275
+ }
1276
+ }
1277
+ }
1278
+
1279
+ %% write init;
1280
+ %% write exec;
1281
+
1282
+ dstack_close(sm);
1283
+
1284
+ VALUE ret = rb_str_new(sm->output->str, sm->output->len);
1285
+
1286
+ free_machine(sm);
1287
+
1288
+ return ret;
1289
+ }
1290
+
1291
+ void Init_dtext() {
1292
+ VALUE mDTextRagel = rb_define_module("DTextRagel");
1293
+ rb_define_singleton_method(mDTextRagel, "parse", parse, -1);
1294
+ }