jay_flavored_markdown 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1152 @@
1
+ # -*- coding: utf-8 -*-
2
+ #if __FILE__ == $0
3
+ # ################################################################
4
+ # # rbenv support:
5
+ # # If this file is a symlink, and bound to a specific ruby
6
+ # # version via rbenv (indicated by RBENV_VERSION),
7
+ # # I want to resolve the symlink and re-exec
8
+ # # the original executable respecting the .ruby_version
9
+ # # which should indicate the right version.
10
+ # #
11
+ # if File.symlink?(__FILE__) and ENV["RBENV_VERSION"]
12
+ # ENV["RBENV_VERSION"] = nil
13
+ # shims_path = File.expand_path("shims", ENV["RBENV_ROOT"])
14
+ # ENV["PATH"] = shims_path + ":" + ENV["PATH"]
15
+ # exec(File.readlink(__FILE__), *ARGV)
16
+ # end
17
+
18
+ # gemfile = File.expand_path("../../Gemfile", __FILE__)
19
+
20
+ # if File.exists?(gemfile + ".lock")
21
+ # ENV["BUNDLE_GEMFILE"] = gemfile
22
+ # require "bundler/setup"
23
+ # Bundler.require
24
+ # end
25
+ # require "pp"
26
+ #end
27
+
28
+
29
+ require 'kramdown-parser-gfm'
30
+ require 'html/pipeline'
31
+ require 'active_support'
32
+ require 'active_support/core_ext'
33
+ require File.expand_path("../markdown_to_ascii", __FILE__)
34
+
35
+
36
+ ################################################################
37
+ ## Helper classes to manipulate list items
38
+ class LeveledCounter
39
+ def self.create(type)
40
+ case type
41
+ when :item
42
+ ListItemLeveledCounter.new
43
+ when :section
44
+ SectionCounter.new
45
+ end
46
+ end
47
+
48
+ def initialize(init = init_values.take(0))
49
+ @counter = init
50
+ end
51
+
52
+ def next
53
+ new do |c|
54
+ succ(c, level)
55
+ end
56
+ end
57
+
58
+ def next_level
59
+ new {|c| c << nil}
60
+ end
61
+
62
+ def previous_level
63
+ new {|c| c.pop}
64
+ end
65
+
66
+ def set_level(lv)
67
+ new do |c|
68
+ diff = lv - c.size
69
+ if diff > 0
70
+ diff.times {|i| c << init_values[c.size - 1]}
71
+ elsif diff < 0
72
+ (-diff).times { c.pop }
73
+ succ(c, c.size - 1)
74
+ else
75
+ succ(c, level)
76
+ end
77
+ end
78
+ end
79
+
80
+ def reset
81
+ new {|c| c[level] = init_values[level]}
82
+ end
83
+
84
+ def mark
85
+ @counter[level]
86
+ end
87
+
88
+ def full_mark
89
+ @counter.join(count_separator)
90
+ end
91
+
92
+ def type
93
+ self.class::COUNTER_TYPE
94
+ end
95
+
96
+ def level
97
+ @counter.size - 1
98
+ end
99
+
100
+ private
101
+
102
+ def init_values
103
+ self.class::INIT_VALUES
104
+ end
105
+
106
+ def count_separator
107
+ self.class::COUNT_SEPARATOR
108
+ end
109
+
110
+ def succ(counter, idx)
111
+ counter[idx] ? counter[idx].succ! : (counter[idx] = init_values[idx])
112
+ end
113
+
114
+ def new
115
+ dup = @counter.map{|c| c && c.dup}
116
+ yield dup
117
+ self.class.new(dup)
118
+ end
119
+ end
120
+
121
+ class ListItemLeveledCounter < LeveledCounter
122
+ INIT_VALUES = ["1", "A", "a"]
123
+ COUNT_SEPARATOR = '-'
124
+ COUNTER_TYPE = :item
125
+
126
+ def label
127
+ mark && " (#{mark})"
128
+ end
129
+ end
130
+
131
+ class SectionCounter < LeveledCounter
132
+ INIT_VALUES = ["1", "1", "1"]
133
+ COUNT_SEPARATOR = '.'
134
+ COUNTER_TYPE = :section
135
+
136
+ def label
137
+ mark && " #{full_mark}"
138
+ end
139
+ end
140
+
141
+ class MarkdownFeature
142
+ def self.create(type)
143
+ case type
144
+ when :item
145
+ ListItemFeature.new
146
+ when :section
147
+ SectionFeature.new
148
+ end
149
+ end
150
+
151
+ def match_start_regexp?(string)
152
+ start_regexp =~ string
153
+ end
154
+
155
+ def indent_length(line)
156
+ indent =~ line ? $1.length : 0
157
+ end
158
+
159
+ def create_counter
160
+ LeveledCounter.create(type).next_level
161
+ end
162
+
163
+ def select_counter(counters)
164
+ counters.find {|item| item.type == type}
165
+ end
166
+
167
+ private
168
+
169
+ def type
170
+ self.class::FEATURE_TYPE
171
+ end
172
+
173
+ def start_regexp
174
+ self.class::START_REGEXP
175
+ end
176
+
177
+ def indent
178
+ self.class::INDENT
179
+ end
180
+ end
181
+
182
+ class ListItemFeature < MarkdownFeature
183
+ START_REGEXP = /^\s*[+-] /
184
+ INDENT = /^(\s*)/
185
+ FEATURE_TYPE = :item
186
+
187
+ def inside_of_list?(string, current_indent)
188
+ return false if string.nil?
189
+ return true if indent_length(string) > current_indent
190
+ return true if string =~ /^\r*$/
191
+ return false
192
+ end
193
+ end
194
+
195
+ class SectionFeature < MarkdownFeature
196
+ START_REGEXP = /^##+ /
197
+ INDENT = /^#(#+)/
198
+ FEATURE_TYPE = :section
199
+
200
+ def inside_of_list?(string, current_indent)
201
+ return false if string.nil?
202
+ return true if indent_length(string) > current_indent
203
+ return true unless match_start_regexp?(string)
204
+ return false
205
+ end
206
+ end
207
+
208
+ class MarkdownEnumerator
209
+ def initialize(lines)
210
+ @lines = lines
211
+ @features = [MarkdownFeature.create(:item), MarkdownFeature.create(:section)]
212
+ end
213
+
214
+ def filter(&block)
215
+ scan(@lines.dup, @features.map(&:create_counter), &block)
216
+ end
217
+
218
+ private
219
+
220
+ def scan(lines, counters, &block)
221
+ return [] if lines.empty?
222
+
223
+ string = lines.shift
224
+ children = []
225
+
226
+ if (feature = @features.find {|item| item.match_start_regexp?(string)})
227
+ counter = feature.select_counter(counters)
228
+
229
+ indent = feature.indent_length(string)
230
+ children << string
231
+
232
+ while (string = lines.first) && feature.inside_of_list?(string, indent)
233
+ children << lines.shift
234
+ end
235
+
236
+ return [yield(children.shift, counter)] +
237
+ scan(children, next_level_counters(counters, counter), &block) +
238
+ scan(lines, next_counters(counters, counter), &block)
239
+ else
240
+ return [string] + scan(lines, reset_counters(counters, counter), &block)
241
+ end
242
+ end
243
+
244
+ def next_counters(counters, counter)
245
+ counters.map {|item| item == counter ? item.next : item}
246
+ end
247
+
248
+ def next_level_counters(counters, counter)
249
+ counters.map {|item| item == counter ? item.next_level : item}
250
+ end
251
+
252
+ def reset_counters(counters, counter)
253
+ counters.map {|item| item == counter ? item.reset : item}
254
+ end
255
+ end
256
+
257
+ module TreeUtils
258
+ attr_accessor :parent
259
+
260
+ def make_parent_link
261
+ @children.each do |child|
262
+ child.parent = self
263
+ child.make_parent_link
264
+ end
265
+ return self
266
+ end
267
+
268
+ def parents
269
+ ps = []
270
+ el = self
271
+ while el = el.parent
272
+ ps << el
273
+ end
274
+ return ps
275
+ end
276
+
277
+ def find_first_ancestor(type)
278
+ parents.find do |parent|
279
+ parent.type == type
280
+ end
281
+ end
282
+
283
+ def parent?(type)
284
+ parent && parent.type == type
285
+ end
286
+
287
+ def ancestor?(type)
288
+ parents.map(&:type).include?(type)
289
+ end
290
+ end
291
+
292
+ class Visitor
293
+ DISPATCHER = Hash.new {|h,k| h[k] = "visit_#{k}"}
294
+
295
+ def traverse(el)
296
+ call = DISPATCHER[el.type]
297
+ if respond_to?(call)
298
+ send(call, el)
299
+ else
300
+ el.children.each do |child|
301
+ traverse(child)
302
+ end
303
+ end
304
+ return el
305
+ end
306
+ end
307
+
308
+ class NumberingVisitor < Visitor
309
+ def initialize
310
+ @label_counter = LeveledCounter.create(:item)
311
+ @section_counter = SectionCounter.create(:section)
312
+ end
313
+
314
+ def visit_ol(el)
315
+ enter_ol(el)
316
+ el.children.each do |child|
317
+ traverse(child)
318
+ end
319
+ exit_ol(el)
320
+ end
321
+
322
+ def visit_li(el)
323
+ if el.parent.type == :ol
324
+ @label_counter = @label_counter.next
325
+ el.value = @label_counter
326
+ el.attr[:class] = "bullet-list-item"
327
+ end
328
+ el.children.each do |child|
329
+ traverse(child)
330
+ end
331
+ end
332
+
333
+ def visit_header(el)
334
+ @section_counter = @section_counter.set_level(el.options[:level])
335
+ el.value = @section_counter
336
+ end
337
+
338
+ private
339
+
340
+ def enter_ol(el)
341
+ @label_counter = @label_counter.next_level
342
+ end
343
+
344
+ def exit_ol(el)
345
+ @label_counter = @label_counter.previous_level
346
+ end
347
+ end
348
+
349
+ class ReferenceVisitor < Visitor
350
+ def initialize
351
+ @xref_table = {}
352
+ @item_table = []
353
+ @section_table = []
354
+ end
355
+ attr_reader :xref_table, :item_table, :section_table
356
+
357
+ def visit_label(el)
358
+ ref = el.find_first_ancestor(:header) || el.find_first_ancestor(:li)
359
+ @xref_table[el.value] = ref.value if ref.value
360
+ el.children.each do |child|
361
+ traverse(child)
362
+ end
363
+ return el
364
+ end
365
+
366
+ def visit_li(el)
367
+ el.options[:relative_position] = @item_table.size
368
+ @item_table << el
369
+ el.children.each do |child|
370
+ traverse(child)
371
+ end
372
+ return el
373
+ end
374
+
375
+ def visit_header(el)
376
+ el.options[:relative_position] = @section_table.size
377
+ @section_table << el
378
+ el.children.each do |child|
379
+ traverse(child)
380
+ end
381
+ return el
382
+ end
383
+ end
384
+
385
+ module Kramdown
386
+ class Element
387
+ include TreeUtils
388
+ end
389
+ end
390
+
391
+ ################################################################
392
+ ## Kramdown parser with Jay hack
393
+
394
+ module Kramdown
395
+ module Parser
396
+ class JayKramdown < GFM
397
+
398
+ JAY_LIST_START_UL = /^(#{OPT_SPACE}[*])([\t| ].*?\n)/
399
+ JAY_LIST_START_OL = /^(#{OPT_SPACE}(?:\d+\.|[+-]))([\t| ].*?\n)/
400
+
401
+ def initialize(source, options)
402
+ super
403
+ @span_parsers.unshift(:label_tags)
404
+ @span_parsers.unshift(:ref_tags)
405
+ @span_parsers.unshift(:action_item_tags)
406
+ @span_parsers.unshift(:issue_link_tags)
407
+ end
408
+
409
+ def parse
410
+ super
411
+ @root.make_parent_link
412
+ @root = NumberingVisitor.new.traverse(@root)
413
+ end
414
+
415
+ # Override element type:
416
+ # Original Kramdown parser recognizes '+' and '-' as UL.
417
+ # However, Jay takes them as OL.
418
+ def new_block_el(*args)
419
+ if args[0] == :ul && @src.check(JAY_LIST_START_OL)
420
+ args[0] = :ol
421
+ super(*args)
422
+ else
423
+ super(*args)
424
+ end
425
+ end
426
+
427
+ private
428
+
429
+ LABEL_TAGS_START = /<<([^<>]+)>>/
430
+ def parse_label_tags
431
+ @src.pos += @src.matched_size
432
+ @tree.children << Element.new(:label, @src[1], nil, category: :span)
433
+ end
434
+ define_parser(:label_tags, LABEL_TAGS_START, '<<')
435
+
436
+ REF_TAGS_START = /\[\[(.*?)\]\]/
437
+ def parse_ref_tags
438
+ @src.pos += @src.matched_size
439
+ @tree.children << Element.new(:ref, @src[1], nil, category: :span)
440
+ end
441
+ define_parser(:ref_tags, REF_TAGS_START, '\[\[')
442
+
443
+ ACTION_ITEM_TAGS_START = /-->\((.+?)!:([0-9]{4})\)/
444
+ def parse_action_item_tags
445
+ assignee, action = @src[1].strip, @src[2].strip
446
+ @src.pos += @src.matched_size
447
+ @tree.children << Element.new(:action_item, nil, {"class" => "action-item", "data-action-item" => action}, :assignee => assignee, :action => action, :location => @src.current_line_number)
448
+ end
449
+ define_parser(:action_item_tags, ACTION_ITEM_TAGS_START, '-->')
450
+
451
+ # FIXME: organizetionの省略に対応したら,コメントアウトされた正規表現を使用
452
+ # ISSUE_LINK_TAGS_START = /(?:([\w.-]+)\/)??(?:([\w.-]+)\/)?#(\d+)/
453
+ ISSUE_LINK_TAGS_START = /([\w.-]+)\/([\w.-]+)\/#(\d+)/
454
+ def parse_issue_link_tags
455
+ url = "https://github.com/#{@src[1]}/#{@src[2]}/issues/#{@src[3]}"
456
+ @src.pos += @src.matched_size
457
+ @tree.children << Element.new(:issue_link, nil, {"class" => "github-issue", "href" => url}, :match => @src[0], :location => @src.current_line_number)
458
+ end
459
+ define_parser(:issue_link_tags, ISSUE_LINK_TAGS_START, '')
460
+ end
461
+ end
462
+ end
463
+
464
+ ################################################################
465
+ ## Kramdown to HTML converter with additions
466
+
467
+ module Kramdown
468
+ module Converter
469
+ #
470
+ # Convert parsed tree to line-numberd HTML
471
+ # This class is refered from Kramdown::Document
472
+ #
473
+ class LineNumberedHtml < Html
474
+
475
+ def initialize(root, options)
476
+ super
477
+ @xref_table = {}
478
+ @root = options_to_attributes(@root, :location, "data-linenum")
479
+ # @root = add_numbers_to_li_text(@root)
480
+ ref_visitor = ReferenceVisitor.new
481
+ @root = ref_visitor.traverse(@root)
482
+ @xref_table = ref_visitor.xref_table
483
+ @item_table = ref_visitor.item_table
484
+ @section_table = ref_visitor.section_table
485
+ debug_dump_tree(@root) if $JAY_DEBUG
486
+ @root
487
+ end
488
+
489
+ private
490
+
491
+ def debug_dump_tree(tree, indent = 0)
492
+ STDERR.print " " * indent
493
+ STDERR.print "#{tree.type} #{tree.value}\n"
494
+ tree.children.each do |c|
495
+ debug_dump_tree(c, indent + 2)
496
+ end
497
+ end
498
+
499
+ def convert_ref(el, indent)
500
+ if @xref_table[el.value]
501
+ return "(#{@xref_table[el.value].full_mark})"
502
+ elsif el.value =~ /^(\++|-+)$/
503
+ parent = el.find_first_ancestor(:header) || el.find_first_ancestor(:li)
504
+ table = parent.type == :li ? @item_table : @section_table
505
+ rel_pos = ($1.include?("+") ? 1 : -1) * $1.length
506
+ idx = parent.options[:relative_position] + rel_pos
507
+ ref_el = idx >= 0 ? table[idx] : nil
508
+ return "(#{ref_el.value.full_mark})" if ref_el
509
+ end
510
+ "(???)"
511
+ end
512
+
513
+ def convert_label(el, indent)
514
+ ""
515
+ end
516
+
517
+ def convert_action_item(el, indent)
518
+ el.attr[:href] = ""
519
+ format_as_span_html(:a, el.attr, "-->(#{el.options[:assignee]} !:#{el.options[:action]})")
520
+ end
521
+
522
+ def convert_issue_link(el, indent)
523
+ format_as_span_html(:a, el.attr, el.options[:match])
524
+ end
525
+
526
+ def make_xref(el)
527
+ if el.type == :label
528
+ ref = el.find_first_ancestor(:header) || el.find_first_ancestor(:li)
529
+ @xref_table[el.value] = ref.value if ref.value
530
+ end
531
+ el.children.each do |child|
532
+ make_xref(child)
533
+ end
534
+ return el
535
+ end
536
+
537
+ # def add_numbers_to_li_text(el)
538
+ # if el.type == :li && el.value && (text = find_first_type(el, [:ref, :text]))
539
+ # STDERR.print "TEXT #{text.type}\n"
540
+ # if text.type == :text
541
+ # text.value = "(#{el.value.mark}) #{text.value}"
542
+ # else
543
+ # # :ref
544
+ # # XXX
545
+ # end
546
+ # end
547
+ # el.children.each do |child|
548
+ # add_numbers_to_li_text(child)
549
+ # end
550
+ # return el
551
+ # end
552
+
553
+ #
554
+ # Add span tags and css classes to list headers.
555
+ #
556
+ # Original HTML:
557
+ # <ul>
558
+ # <li>(1) item header1</li>
559
+ # <li>(2) item header2</li>
560
+ # </ul>
561
+ #
562
+ # This method:
563
+ # <ul>
564
+ # <li class="bullet-list-item">
565
+ # <span class="bullet-list-marker">(1)</span> item header1
566
+ # </li>
567
+ # <li class="bullet-list-item">
568
+ # <span class="bullet-list-marker">(2)</span> item header2
569
+ # </li>
570
+ # </ul>
571
+ #
572
+ def convert_li(el, indent)
573
+ if el.value
574
+ output = ' '*indent << "<#{el.type}" << html_attributes(el.attr) << ">"
575
+ else
576
+ output = ' '*indent << "<#{el.type}" << " class=\"ul_list_item\"" << html_attributes(el.attr) << ">"
577
+ end
578
+
579
+ if el.value.respond_to?(:mark)
580
+ output << "<span class=\"bullet-list-marker\">(#{el.value.mark})</span>"
581
+ end
582
+
583
+ res = inner(el, indent)
584
+ if el.children.empty? || (el.children.first.type == :p && el.children.first.options[:transparent])
585
+ output << res << (res =~ /\n\Z/ ? ' '*indent : '')
586
+ else
587
+ output << "\n" << res << ' '*indent
588
+ end
589
+ output << "</#{el.type}>\n"
590
+ STDERR.puts "LI: #{output}"
591
+ output
592
+ end
593
+
594
+ def convert_header(el, indent)
595
+ attr = el.attr.dup
596
+ if @options[:auto_ids] && !attr['id']
597
+ attr['id'] = generate_id(el.options[:raw_text])
598
+ end
599
+ @toc << [el.options[:level], attr['id'], el.children] if attr['id'] && in_toc?(el)
600
+ level = output_header_level(el.options[:level])
601
+ format_as_block_html("h#{level}", attr, "#{el.value.label} #{inner(el, indent)}", indent)
602
+ end
603
+
604
+ def options_to_attributes(el, option_name, attr_name)
605
+ if el.options[option_name]
606
+ el.attr[attr_name] = el.options[option_name]
607
+ end
608
+ el.children.each do |child|
609
+ child = options_to_attributes(child, option_name, attr_name)
610
+ end
611
+ return el
612
+ end
613
+
614
+ end # class LineNumberedHtml
615
+ end # module Converter
616
+ end # module Kramdown
617
+
618
+ #
619
+ # Convert Text to HTML filter conformed to HTML::Pipeline
620
+ # https://github.com/jch/html-pipeline
621
+ #
622
+ class JayFlavoredMarkdownFilter < HTML::Pipeline::TextFilter
623
+ def call
624
+ Kramdown::Document.new(@text, {
625
+ input: "JayKramdown",
626
+ # syntax_highlighter: :rouge,
627
+ # syntax_highlighter_opts: {
628
+ # line_numbers: true,
629
+ # css_class: 'codehilite'
630
+ # }
631
+ }
632
+ ).to_line_numbered_html.strip.force_encoding("utf-8")
633
+ end
634
+ end
635
+
636
+ #
637
+ # Convert Text to HTML filter conformed to HTML::Pipeline
638
+ # https://github.com/jch/html-pipeline
639
+ #
640
+ class JayFlavoredMarkdownToAsciiFilter < HTML::Pipeline::TextFilter
641
+ def call
642
+ Kramdown::Document.new(@text, {
643
+ input: "JayKramdown",
644
+ # hard_wrap は,GFM (の継承先JayKramdown)において有効
645
+ # :text エレメントの中に改行がある場合の挙動が代わる.
646
+ # "aaaaa\nbbbbb"
647
+ # hard_wrap: false の場合,text: aaaaa, text: "\nbbbbb"
648
+ # hard_wrap: true の場合, text:(aaaaa), :br, :text:("\nbbbbb")
649
+ # GFM デフォルト true
650
+ # hard_wrap: false,
651
+ # syntax_highlighter: :rouge,
652
+ # syntax_highlighter_opts: {
653
+ # line_numbers: true,
654
+ # css_class: 'codehilite'
655
+ # }
656
+ }
657
+ ).to_ascii.strip.force_encoding("utf-8")
658
+ end
659
+ end
660
+
661
+ ################################################################
662
+ ## Markdown to Markdown filters
663
+
664
+ #
665
+ # Fix the depth of the indent to multiple of 4(INDENT_DEPTH)
666
+ #
667
+ class JayFixIndentDepth < HTML::Pipeline::TextFilter
668
+ INDENT_DEPTH = 4
669
+
670
+ def call
671
+ lines = @text.split("\n")
672
+ items = MarkdownEnumerator.new(lines)
673
+
674
+ @text = items.filter do |header, count|
675
+ header.sub(/^(\s*)([*+-])(\s+)/){|x| "#{valid_indent(count)}#{$2}#{$3}"}
676
+ end.join("\n")
677
+ end
678
+
679
+ private
680
+
681
+ def valid_indent(count)
682
+ " " * INDENT_DEPTH * count.level
683
+ end
684
+ end
685
+
686
+ #
687
+ # Convert list item header ``+`` text to ``+ (A)``
688
+ #
689
+ class JayAddLabelToListItems < HTML::Pipeline::TextFilter
690
+ def call
691
+ lines = @text.split("\n")
692
+ items = MarkdownEnumerator.new(lines)
693
+
694
+ # store <<name>> to hash
695
+ @text = items.filter do |header, count|
696
+ header.sub(/^(\s*[+-]|##+)(\s+)/){|x| "#{$1}#{count.label}#{$2}"}
697
+ end.join("\n")
698
+ end
699
+ end
700
+
701
+ #
702
+ # Org-mode like label and ref converter
703
+ #
704
+ # + (1)
705
+ # + (A) item title <<title>>
706
+ # ...
707
+ # item [[title]] is...
708
+ #
709
+ # is converted to:
710
+ #
711
+ # + (1)
712
+ # + (A) item title
713
+ # ...
714
+ # item (1-A) is...
715
+ #
716
+ class JayAddCrossReference < HTML::Pipeline::TextFilter
717
+ def call
718
+ @labels = {}
719
+ lines = @text.split("\n")
720
+
721
+ # Scan "<<name>>" and make hash {"name" => "C"}
722
+ lines = MarkdownEnumerator.new(lines).filter do |header, count|
723
+ header.gsub(/<<([^<>]+)>>/) do |_|
724
+ store_label($1, count.full_mark)
725
+ ""
726
+ end
727
+ end
728
+
729
+ # replace "[[name]]" to "(C)"
730
+ @text = lines.map do |line|
731
+ line.gsub(/\[\[([^\[\]]+)\]\]/) do |match|
732
+ "(#{lookup_label($1) || '???'})"
733
+ end
734
+ end.join("\n")
735
+ end
736
+
737
+ private
738
+
739
+ def store_label(key, value)
740
+ @labels[key] = value
741
+ end
742
+
743
+ def lookup_label(key)
744
+ return @labels[key]
745
+ end
746
+ end
747
+
748
+ #
749
+ # Remove markup elements(*, +, -, #, [])
750
+ #
751
+ class JayRemoveMarkupElements < HTML::Pipeline::TextFilter
752
+ def call
753
+ @text = @text.split("\n").map do |line|
754
+ line = remove_emphasis(line)
755
+ line = remove_header(line)
756
+ line = remove_link(line)
757
+ line = remove_list(line)
758
+ line = remove_strikethrough(line)
759
+ end.join("\n")
760
+ end
761
+
762
+ private
763
+
764
+ # Remove " _hoge_ ", " *fuga* "
765
+ def remove_emphasis(line)
766
+ return line.gsub(/\s([\_\*])([^\1]+?)\1\s/, '\2')
767
+ end
768
+
769
+ # Remove "#"
770
+ def remove_header(line)
771
+ return line.gsub(/\A#+\s+(.*)/, '\1')
772
+ end
773
+
774
+ # Remove "[title](link)"
775
+ def remove_link(line)
776
+ return line.gsub(/(\[.*\])\(.*?\)/, '\1')
777
+ end
778
+
779
+ # Remove "*", "+", "-"
780
+ def remove_list(line)
781
+ return line.gsub(/[\*\+\-]\s+/, '')
782
+ end
783
+
784
+ # Remove " ~hoge~ "
785
+ def remove_strikethrough(line)
786
+ return line.gsub(/\s~([^~]+?)~\s/, '\1')
787
+ end
788
+ end
789
+
790
+ #
791
+ # Fill columns with MAX_COLUMN characters in one line
792
+ #
793
+ # (1) One sheep, two sheep, three sheep, four sheep, five sheep.
794
+ # (A) Six sheep, seven sheep, eight sheep, nine sheep, ten sheep.
795
+ #
796
+ # is converted to:
797
+ #
798
+ # (1) One sheep, two sheep, three sheep, four
799
+ # sheep, five sheep.
800
+ # (A) Six sheep, seven sheep, eight sheep,
801
+ # nine sheep, ten sheep.
802
+ #
803
+ class JayFillColumns < HTML::Pipeline::TextFilter
804
+ MAX_COLUMN = 70
805
+
806
+ def call
807
+ lines = @text.split("\n")
808
+ @text = lines.map do |line|
809
+ pos = paragraph_position(line)
810
+ fill_column(line, MAX_COLUMN, pos, ' ')
811
+ end.join("\n")
812
+ end
813
+
814
+ private
815
+
816
+ def character_not_to_allow_newline_in_word?(c)
817
+ newline = "\n\r"
818
+ symbol = "-,.,."
819
+ small_kana = "ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ"
820
+ return !!(c =~ /[a-zA-Z#{newline}#{symbol}#{small_kana}]/)
821
+ end
822
+
823
+ # Get position of beginning of line after second line
824
+ def paragraph_position(str)
825
+ # Example1: " No.100-01 :: Minutes of GN meeting"
826
+ # ^
827
+ # Example2: " (A) This is ...."
828
+ # ^
829
+ if /(\A\s*([^\s]+ ::|\(.+\)) +)/ =~ str
830
+ return str_mb_width($1)
831
+ else
832
+ return 0
833
+ end
834
+ end
835
+
836
+ # Get width of a character considering multibyte character
837
+ def char_mb_width(c)
838
+ return 0 if c == "\r" || c == "\n" || c.empty?
839
+ return c.ascii_only? ? 1 : 2
840
+ end
841
+
842
+ # Get width of string considering multibyte character
843
+ def str_mb_width(str)
844
+ return 0 if str.empty?
845
+ return str.each_char.map{|c| char_mb_width(c)}.inject(:+)
846
+ end
847
+
848
+ # str : String, not including newline
849
+ # max_width : Max width in one line
850
+ # positon : Position of beginning of line after second line
851
+ # padding : Character used padding
852
+ def fill_column(str, max_width, position, padding)
853
+ return str if max_width >= str_mb_width(str)
854
+
855
+ i = 0; width = 0
856
+ begin
857
+ width += char_mb_width(str[i])
858
+ end while width <= max_width && i += 1
859
+
860
+ i += 1 while character_not_to_allow_newline_in_word?(str[i])
861
+
862
+ if str.length > i + 1
863
+ x = str[0..(i-1)] + "\n"
864
+ xs = "#{padding * position}" + str[i..(str.length-1)]
865
+ return x + fill_column(xs, max_width, position, padding)
866
+ else
867
+ return str
868
+ end
869
+ end
870
+ end
871
+
872
+ #
873
+ # Shorten 4 indent to 2 indent.
874
+ #
875
+ class JayShortenIndent < HTML::Pipeline::TextFilter
876
+ def call
877
+ @text = @text.split("\n").map do |line|
878
+ shorten_indent(line)
879
+ end.join("\n")
880
+ end
881
+
882
+ private
883
+
884
+ def shorten_indent(line)
885
+ return line unless /\A(\s+)(.*)/ =~ line
886
+ indent_depth = $1.length / 2
887
+ return "#{' ' * indent_depth}#{$2}"
888
+ end
889
+ end
890
+
891
+ class JayAddLink < HTML::Pipeline::TextFilter
892
+ def call
893
+ lines = @text.split("\n")
894
+
895
+ # Replace GitHub issue notation into solid markdown link.
896
+ # Example:
897
+ # nomlab/jay/#10
898
+ # becomes:
899
+ # [nomlab/jay/#10](https://github.com/nomlab/jay/issues/10){:.github-issue}
900
+ #
901
+ # NOTE: {:.foo} is a kramdown dialect to add class="foo" to HTML.
902
+ @text = lines.map do |line|
903
+ line.gsub(%r{(?:([\w.-]+)/)??(?:([\w.-]+)/)?#(\d+)}i) do |match|
904
+ url = "https://github.com/#{$1 || context[:organization]}/#{$2}/issues/#{$3}"
905
+ "[#{match}](#{url}){:.github-issue}"
906
+ end
907
+ end
908
+
909
+ # Replace action-item notation into solid markdown link.
910
+ # Example:
911
+ # -->(name !:0001) becomes [-->(name !:0001)](){:data-action-item="0001"}
912
+ #
913
+ # NOTE: {:attr=val} is a kramdown dialect to add attribute to DOM element.
914
+ @text = @text.map do |line|
915
+ line.gsub(/-->\((.+?)!:([0-9]{4})\)/) do |macth|
916
+ assignee, action = $1.strip, $2.strip
917
+ "[-->(#{assignee} !:#{action})](){:.action-item}{:data-action-item=\"#{action}\"}"
918
+ end
919
+ end.join("\n")
920
+ end
921
+ end
922
+
923
+ ################################################################
924
+ ## HTML to HTML filters
925
+
926
+ #
927
+ # Add span tags and css classes to list headers.
928
+ #
929
+ # before:
930
+ # <ul>
931
+ # <li>(1) item header1</li>
932
+ # <li>(2) item header2</li>
933
+ # </ul>
934
+ #
935
+ # after:
936
+ # <ul>
937
+ # <li class="bullet-list-item">
938
+ # <span class="bullet-list-marker">(1)</span> item header1
939
+ # </li>
940
+ # <li class="bullet-list-item">
941
+ # <span class="bullet-list-marker">(2)</span> item header2
942
+ # </li>
943
+ # </ul>
944
+ #
945
+ class JayCustomItemBullet
946
+ def self.filter(*args)
947
+ Filter.call(*args)
948
+ end
949
+
950
+ class Filter < HTML::Pipeline::Filter
951
+ BulletPattern = /\(([a-zA-Z]|\d+)\)/.freeze
952
+
953
+ # Pattern used to identify all ``+ (1)`` style
954
+ # Useful when you need iterate over all items.
955
+ ItemPattern = /
956
+ ^
957
+ (?:\s*[-+*]|(?:\d+\.))? # optional list prefix
958
+ \s* # optional whitespace prefix
959
+ ( # checkbox
960
+ #{BulletPattern}
961
+ )
962
+ (?=\s) # followed by whitespace
963
+ /x
964
+
965
+ ListItemSelector = ".//li[bullet_list_item(.)]".freeze
966
+
967
+ class XPathSelectorFunction
968
+ def self.bullet_list_item(nodes)
969
+ nodes if nodes.text =~ ItemPattern
970
+ end
971
+ end
972
+
973
+ # Selects first P tag of an LI, if present
974
+ ItemParaSelector = "./p[1]".freeze
975
+
976
+ # List of `BuletList::Item` objects that were recognized in the document.
977
+ # This is available in the result hash as `:bullet_list_items`.
978
+ #
979
+ # Returns an Array of BulletList::Item objects.
980
+ def bullet_list_items
981
+ result[:bullet_list_items] ||= []
982
+ end
983
+
984
+ # Public: Select all bullet lists from the `doc`.
985
+ #
986
+ # Returns an Array of Nokogiri::XML::Element objects for ordered and
987
+ # unordered lists.
988
+ def list_items
989
+ doc.xpath(ListItemSelector, XPathSelectorFunction)
990
+ end
991
+
992
+ # Filters the source for bullet list items.
993
+ #
994
+ # Each item is wrapped in HTML to identify, style, and layer
995
+ # useful behavior on top of.
996
+ #
997
+ # Modifications apply to the parsed document directly.
998
+ #
999
+ # Returns nothing.
1000
+ def filter!
1001
+ list_items.reverse.each do |li|
1002
+ # add_css_class(li.parent, 'bullet-list')
1003
+
1004
+ outer, inner =
1005
+ if p = li.xpath(ItemParaSelector)[0]
1006
+ [p, p.inner_html]
1007
+ else
1008
+ [li, li.inner_html]
1009
+ end
1010
+ if match = (inner.chomp =~ ItemPattern && $1)
1011
+ # item = Bullet::Item.new(match, inner)
1012
+ # prepend because we're iterating in reverse
1013
+ # bullet_list_items.unshift item
1014
+
1015
+ add_css_class(li, 'bullet-list-item')
1016
+ outer.inner_html = render_bullet_list_item(inner)
1017
+ end
1018
+ end
1019
+ end
1020
+
1021
+ def render_bullet_list_item(item)
1022
+ Nokogiri::HTML.fragment \
1023
+ item.sub(ItemPattern, '<span class="bullet-list-marker">\1</span>'), 'utf-8'
1024
+ end
1025
+
1026
+ def call
1027
+ filter!
1028
+ doc
1029
+ end
1030
+
1031
+ # Private: adds a CSS class name to a node, respecting existing class
1032
+ # names.
1033
+ def add_css_class(node, *new_class_names)
1034
+ class_names = (node['class'] || '').split(' ')
1035
+ return if new_class_names.all? { |klass| class_names.include?(klass) }
1036
+ class_names.concat(new_class_names)
1037
+ node['class'] = class_names.uniq.join(' ')
1038
+ end
1039
+ end
1040
+ end
1041
+
1042
+ #
1043
+ # Jay Flavored Markdown to HTML converter
1044
+ #
1045
+ # Octdown is a good example for making original converter.
1046
+ # https://github.com/ianks/octodown/blob/master/lib/octodown/renderer/github_markdown.rb
1047
+ #
1048
+ class JayFlavoredMarkdownConverter
1049
+
1050
+ def initialize(text, options = {})
1051
+ @text = text
1052
+ @options = options
1053
+ end
1054
+
1055
+ def content
1056
+ pipeline.call(@text)[:output].to_s
1057
+ end
1058
+
1059
+ private
1060
+
1061
+ def context
1062
+ whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST.deep_dup
1063
+ whitelist[:attributes][:all] << "data-linenum"
1064
+ {
1065
+ input: "GFM",
1066
+ asset_root: 'https://github.githubassets.com/images/icons/',
1067
+ whitelist: whitelist,
1068
+ syntax_highlighter: :rouge,
1069
+ syntax_highlighter_opts: {inline_theme: true, line_numbers: true, code_class: 'codehilite'}
1070
+ }
1071
+ end
1072
+
1073
+ def pipeline
1074
+ HTML::Pipeline.new [
1075
+ JayFixIndentDepth,
1076
+ # JayAddLink,
1077
+ JayFlavoredMarkdownFilter,
1078
+ HTML::Pipeline::AutolinkFilter,
1079
+ # HTML::Pipeline::SanitizationFilter,
1080
+ HTML::Pipeline::ImageMaxWidthFilter,
1081
+ HTML::Pipeline::MentionFilter,
1082
+ HTML::Pipeline::EmojiFilter,
1083
+ HTML::Pipeline::SyntaxHighlightFilter,
1084
+ ], context.merge(@options)
1085
+ end
1086
+ end
1087
+
1088
+ #
1089
+ # Jay Flavored Markdown to Plain Text converter
1090
+ #
1091
+ class JayFlavoredMarkdownToPlainTextConverter
1092
+
1093
+ def initialize(text, options = {})
1094
+ @text = text
1095
+ @options = options
1096
+ end
1097
+
1098
+ def content
1099
+ pipeline.call(@text)[:output].to_s
1100
+ end
1101
+
1102
+ private
1103
+
1104
+ def context
1105
+ whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST.deep_dup
1106
+ whitelist[:attributes][:all] << "data-linenum"
1107
+ {
1108
+ input: "GFM",
1109
+ # hard_wrap: false,
1110
+ asset_root: 'https://assets-cdn.github.com/images/icons/',
1111
+ whitelist: whitelist
1112
+ }
1113
+ end
1114
+
1115
+ def pipeline
1116
+ HTML::Pipeline.new [
1117
+ JayFixIndentDepth,
1118
+ JayFlavoredMarkdownToAsciiFilter,
1119
+ # JayAddLabelToListItems,
1120
+ # JayAddCrossReference,
1121
+ # JayRemoveMarkupElements,
1122
+ # JayShortenIndent,
1123
+ JayFillColumns,
1124
+ ], context.merge(@options)
1125
+ end
1126
+ end
1127
+
1128
+ if __FILE__ == $0
1129
+
1130
+ output_type = :html
1131
+
1132
+ while ARGV[0] =~ /^--(.*)/
1133
+ ARGV.shift
1134
+ case $1
1135
+ when "output"
1136
+ output_type = ARGV.shift
1137
+ when "debug"
1138
+ $JAY_DEBUG = true
1139
+ end
1140
+ end
1141
+
1142
+ if output_type == "html"
1143
+ puts <<-EOF
1144
+ <style>
1145
+ ol {list-style-type: none;}
1146
+ </style>
1147
+ EOF
1148
+ puts JayFlavoredMarkdownConverter.new(gets(nil)).content
1149
+ else
1150
+ puts JayFlavoredMarkdownToPlainTextConverter.new(gets(nil)).content
1151
+ end
1152
+ end