ya_multilingual_markdown 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 461ba09dd9d70bb8fdc0c7faf1ec95c34ad28a608eac30cd5c6eb1f7e039e7aa
4
+ data.tar.gz: 5312109dee3c9600782046f26e72dd73c08bdfbde3e73cba88a7b3d13c8563fe
5
+ SHA512:
6
+ metadata.gz: a39a6be3c2f75f93b65833d9f9dee104a6c663a80cfc6b7914be62e5456bf70aa75f23c23f1bb946e578595387ba7b199aab4ec3ba9d8b17adbdb9f52b423881
7
+ data.tar.gz: a55008caa80072df3b578bb075d5667340d936f1f7b3589a51ec3afbd747d6ef026d8561d746efad94fa88bb7e560f9b091ef0cb9cf51a38c6c7032c7ee46bf9
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "ya_multilingual_markdown"
5
+
6
+ YAMultilingualMarkdown::CLI.run
@@ -0,0 +1,937 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kramdown"
4
+ require "kramdown-parser-gfm"
5
+ require "kramdown-math-katex"
6
+ require "yaml"
7
+ require "uri"
8
+ require "pathname"
9
+ require "logger"
10
+ require "optparse"
11
+
12
+ module Kramdown
13
+ module Parser
14
+ class YaMultilingualMarkdown < ::Kramdown::Parser::GFM
15
+ module Common
16
+ ALD_ANY_CHARS = ::Kramdown::Parser::Kramdown::ALD_ANY_CHARS
17
+ OPT_SPACE = ::Kramdown::Parser::Kramdown::OPT_SPACE
18
+
19
+ ML_IAL_BLOCK = %r{\{:(?!:|/)(?<ald>#{ALD_ANY_CHARS}+)\}\s*?\n}
20
+ ML_IAL_BLOCK_START = /^#{OPT_SPACE}#{ML_IAL_BLOCK}/
21
+
22
+ ML_DEFAULT_HEADING_LANG_SEP = " / "
23
+ end
24
+
25
+ module FrontMatter
26
+ class InvalidInputError < StandardError; end
27
+
28
+ ML_FRONT_MATTER = %r{
29
+ \A---(?:\r\n|\r|\n)
30
+ (?<contents>.*?)(?:\r\n|\r|\n)
31
+ ---(?:\r\n|\r|\n)
32
+ }mx
33
+
34
+ private
35
+
36
+ def parse_ml_front_matter
37
+ front_matter = ML_FRONT_MATTER.match(@src.matched) { |m| m[:contents] }
38
+
39
+ if (yaml = YAML.load(front_matter))
40
+ if yaml.is_a? Hash
41
+ @root.metadata = yaml
42
+ else
43
+ raise InvalidInputError, <<~EOS
44
+ front matter must be Hash, but was \
45
+ #{yaml.class}: #{yaml.inspect}
46
+ EOS
47
+ end
48
+ end
49
+
50
+ @src.current_line_number
51
+ @src.pos += @src.matched_size
52
+ end
53
+
54
+ ::Kramdown::Parser::Kramdown.define_parser(:ml_front_matter, ML_FRONT_MATTER)
55
+ end
56
+
57
+ module HeadingCommon
58
+ HEADER_ID = ::Kramdown::Parser::Kramdown::HEADER_ID
59
+
60
+ private
61
+
62
+ def parse_ml_heading_contents(contents)
63
+ contents_normalized = contents.rstrip
64
+ id_match = HEADER_ID.match(contents_normalized)
65
+ text, id =
66
+ if id_match
67
+ id = id_match["id"]
68
+ contents_without_id = contents_normalized[0...-id_match[0].length].rstrip
69
+ [contents_without_id, id]
70
+ else
71
+ [contents_normalized, nil]
72
+ end
73
+ [text, id]
74
+ end
75
+
76
+ def add_ml_heading(level, text)
77
+ start_line_number = @src.current_line_number
78
+ @src.pos += @src.matched_size
79
+
80
+ heading = new_block_el(
81
+ :header,
82
+ nil,
83
+ nil,
84
+ level: level,
85
+ raw_text: text,
86
+ location: start_line_number,
87
+ )
88
+
89
+ add_text(text, heading)
90
+
91
+ @tree.children << heading
92
+ end
93
+ end
94
+
95
+ module ATXHeading
96
+ include Common
97
+ include HeadingCommon
98
+
99
+ # NOTE: ALD for each heading must be placed _after_ the
100
+ # heading. This compromise allows us to write id at the beginning of
101
+ # headings as well as at the end, with less code.
102
+
103
+ ML_CONSECUTIVE_ATX_HEADING = %r{
104
+ (?<num_signs>\#{1,6}) # Number signs
105
+ \s
106
+ (?<contents>[^\r\n]+) # Contents
107
+ (?:\r\n|\r|\n) # EOL
108
+ #{ML_IAL_BLOCK_START}? # IAL
109
+ }x
110
+
111
+ ML_CONSECUTIVE_ATX_HEADINGS = %r{
112
+ (?<num_signs>\#{1,6}) # Number signs
113
+ \s
114
+ ([^\r\n]+) # Contents
115
+ (?:\r\n|\r|\n) # EOL
116
+ #{ML_IAL_BLOCK_START}? # IAL
117
+ (?:
118
+ (?:\r\n|\r|\n) # EOL
119
+ \k<num_signs> # Number signs
120
+ \s
121
+ [^\r\n]+ # Contents
122
+ (?:\r\n|\r|\n) # EOL
123
+ #{ML_IAL_BLOCK_START}? # IAL
124
+ )+}mx
125
+
126
+ private
127
+
128
+ def parse_ml_consecutive_atx_headings
129
+ return false unless after_block_boundary?
130
+
131
+ matched_str = @src.matched
132
+ headings = matched_str.scan(/.+?(?:(?:\r\n|\r|\n){2,}|\z)/m)
133
+ merged_text = headings.reduce([]) do |merged, heading|
134
+ ML_CONSECUTIVE_ATX_HEADING.match(heading) do |m|
135
+ _num_signs = m[:num_signs]
136
+ contents = m[:contents]
137
+ ald = m[:ald]
138
+ text, id = parse_ml_heading_contents(contents)
139
+
140
+ attrs = id ? { id: id } : {}
141
+ parse_attribute_list(ald, attrs) if ald
142
+
143
+ new_contents =
144
+ if attrs.empty?
145
+ text
146
+ else
147
+ attrs_str = attrs.map { |k, v| "#{k}=\"#{v}\"" }.join(" ")
148
+ "<span #{attrs_str}>#{text}</span>"
149
+ end
150
+
151
+ merged << new_contents
152
+ end
153
+ end.join
154
+
155
+ return false if merged_text.empty?
156
+
157
+ first_num_signs = ML_CONSECUTIVE_ATX_HEADINGS.match(matched_str) { |m| m[:num_signs] }
158
+ level = first_num_signs.size
159
+ add_ml_heading(level, merged_text)
160
+ end
161
+
162
+ ::Kramdown::Parser::Kramdown.define_parser(:ml_consecutive_atx_headings, ML_CONSECUTIVE_ATX_HEADINGS)
163
+ end
164
+
165
+ module SetextHeading
166
+ include Common
167
+ include HeadingCommon
168
+
169
+ # NOTE: ALD for each heading must be placed _after_ the
170
+ # heading. This compromise allows us to write id at the beginning of
171
+ # headings as well as at the end, with less code.
172
+
173
+ ML_CONSECUTIVE_SETEXT_HEADING = %r{
174
+ (?<contents>[^\r\n]+?) # Contents
175
+ (?:\r\n|\r|\n) # EOL
176
+ (?<underline>[-=]+) # Underline
177
+ (?:\r\n|\r|\n) # EOL
178
+ #{ML_IAL_BLOCK_START}? # IAL
179
+ }x
180
+
181
+ ML_CONSECUTIVE_SETEXT_HEADINGS = %r{
182
+ [^\r\n]+? # Contents
183
+ (?:\r\n|\r|\n) # EOL
184
+ (?<underline>[-=]+) # Underline
185
+ (?:\r\n|\r|\n) # EOL
186
+ #{ML_IAL_BLOCK_START}? # IAL
187
+ (?:
188
+ (?:\r\n|\r|\n)+ # EOL (Blank line)
189
+ [^\r\n]+? # Contents
190
+ (?:\r\n|\r|\n) # EOL
191
+ \k<underline> # Underline
192
+ (?:\r\n|\r|\n) # EOL
193
+ #{ML_IAL_BLOCK_START}? # IAL
194
+ )+}mx
195
+
196
+ private
197
+
198
+ def parse_ml_consecutive_setext_headings
199
+ return false unless after_block_boundary?
200
+
201
+ matched_str = @src.matched
202
+ headings = matched_str.scan(/.+?(?:(?:\r\n|\r|\n){2,}|\z)/m)
203
+ lang_sep = @options[:ml_heading_lang_sep] || ML_DEFAULT_HEADING_LANG_SEP
204
+ merged_text = headings.reduce([]) do |merged, heading|
205
+ ML_CONSECUTIVE_SETEXT_HEADING.match(heading) do |m|
206
+ contents = m[:contents]
207
+ _underline = m[:underline]
208
+ ald = m[:ald]
209
+ text, id = parse_ml_heading_contents(contents)
210
+
211
+ attrs = id ? { id: id } : {}
212
+ parse_attribute_list(ald, attrs) if ald
213
+
214
+ new_contents =
215
+ if attrs.empty?
216
+ text
217
+ else
218
+ attrs_str = attrs.map { |k, v| "#{k}=\"#{v}\"" }.join(" ")
219
+ "<span #{attrs_str}>#{text}</span>"
220
+ end
221
+
222
+ merged << new_contents
223
+ end
224
+ end.join(lang_sep)
225
+
226
+ return false if merged_text.empty?
227
+
228
+ first_underline = ML_CONSECUTIVE_SETEXT_HEADINGS.match(matched_str) { |m| m[:underline] }
229
+ level =
230
+ case first_underline.chars.first
231
+ when "=" then 1
232
+ when "-" then 2
233
+ else raise "Invalid setext headings: #{matched_str.inspect}"
234
+ end
235
+
236
+ add_ml_heading(level, merged_text)
237
+ end
238
+
239
+ ::Kramdown::Parser::Kramdown.define_parser(:ml_consecutive_setext_headings, ML_CONSECUTIVE_SETEXT_HEADINGS)
240
+ end
241
+
242
+ module EmphasisForNonASCIILanguages
243
+ EMPHASIS_START = ::Kramdown::Parser::Kramdown::EMPHASIS_START
244
+
245
+ # NOTE: This method is a derivation from kramdown's
246
+ # `parse_emphasis`, slightly modified so that emphasis using
247
+ # underbar (`_`) works for non-ASCII languages such as Japanese.
248
+ #
249
+ # Change(s):
250
+ # * `[[:alpha;]]` and `[[:alnum:]]` are replaced with `[\w]`
251
+ #
252
+ # Original: https://github.com/gettalong/kramdown/blob/fc051a9d93e4dc3ff05bf41b70a79297ebdb669f/lib/kramdown/parser/kramdown/emphasis.rb#L17
253
+ def parse_ml_emphasis
254
+ start_line_number = @src.current_line_number
255
+ saved_pos = @src.save_pos
256
+
257
+ result = @src.scan(EMPHASIS_START)
258
+ element = (result.length == 2 ? :strong : :em)
259
+ type = result[0..0]
260
+
261
+ if (type == '_' && @src.pre_match =~ /[\w]-?[\w]*_*\z/) || @src.check(/\s/) ||
262
+ @tree.type == element || @stack.any? {|el, _| el.type == element }
263
+ add_text(result)
264
+ return
265
+ end
266
+
267
+ warnings_pos = @warnings.size
268
+ sub_parse = lambda do |delim, elem|
269
+ el = Element.new(elem, nil, nil, location: start_line_number)
270
+ stop_re = /#{Regexp.escape(delim)}/
271
+ found = parse_spans(el, stop_re) do
272
+ (@src.pre_match[-1, 1] !~ /\s/) &&
273
+ (elem != :em || !@src.match?(/#{Regexp.escape(delim * 2)}(?!#{Regexp.escape(delim)})/)) &&
274
+ (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[\w]/)) && !el.children.empty?
275
+ end
276
+ [found, el, stop_re]
277
+ end
278
+
279
+ found, el, stop_re = sub_parse.call(result, element)
280
+ if !found && element == :strong && @tree.type != :em
281
+ @src.revert_pos(saved_pos)
282
+ @src.pos += 1
283
+ found, el, stop_re = sub_parse.call(type, :em)
284
+ end
285
+ if found
286
+ # Useful for implementing underlines.
287
+ el.options[:char] = type
288
+
289
+ @src.scan(stop_re)
290
+ @tree.children << el
291
+ else
292
+ @warnings.slice!(0...warnings_pos)
293
+ @src.revert_pos(saved_pos)
294
+ @src.pos += result.length
295
+ add_text(result)
296
+ end
297
+ end
298
+
299
+ ::Kramdown::Parser::Kramdown.define_parser(:ml_emphasis, EMPHASIS_START, '\*|_')
300
+ end
301
+
302
+ include FrontMatter
303
+ include ATXHeading
304
+ include SetextHeading
305
+ include EmphasisForNonASCIILanguages
306
+
307
+ module HasMetadata
308
+ attr_accessor :metadata
309
+ end
310
+
311
+ private
312
+
313
+ def initialize(source, options)
314
+ super
315
+ @block_parsers.unshift(:ml_front_matter)
316
+ @block_parsers.unshift(:ml_consecutive_atx_headings)
317
+ @block_parsers.unshift(:ml_consecutive_setext_headings)
318
+ @span_parsers.unshift(:ml_emphasis)
319
+ @root.extend(HasMetadata)
320
+ end
321
+ end
322
+ end
323
+
324
+ module Converter
325
+ class YaMultilingualHtml < ::Kramdown::Converter::Html
326
+ module Metadata
327
+ module Utils
328
+ include ::Kramdown::Utils::Html
329
+ module_function :escape_html
330
+ end
331
+
332
+ private
333
+
334
+ def title_element(value)
335
+ case value
336
+ in String
337
+ "<title>#{Utils.escape_html(value, :text)}</title>"
338
+ else
339
+ raise "title must be String but was #{value.inspect}"
340
+ end
341
+ end
342
+
343
+ def meta_elements(value)
344
+ case value
345
+ in Array if value.all?(Hash) &&
346
+ value.map(&:keys).flatten.all?(String) &&
347
+ value.map(&:values).flatten.all?(String)
348
+ value.map do |item|
349
+ attributes = item.map do |k, v|
350
+ "#{k}=\"#{Utils.escape_html(v, :attribute)}\""
351
+ end.join(" ")
352
+ "<meta #{attributes} />"
353
+ end
354
+ else
355
+ raise <<~EOS.chomp
356
+ meta value must be Array(Hash(String, String)) \
357
+ but was #{value.inspect}
358
+ EOS
359
+ end
360
+ end
361
+
362
+ def link_elements(value)
363
+ case value
364
+ in Array if value.all?(Hash)
365
+ value.map do |item|
366
+ attributes = item.map do |k, v|
367
+ case [k, v]
368
+ in String, String
369
+ "#{k}=\"#{Utils.escape_html(v, :attribute)}\""
370
+ in String, NilClass
371
+ k
372
+ else
373
+ raise <<~EOS.chomp
374
+ link value must be Array(Hash(String, String|NilClass)) \
375
+ but was #{value.inspect}
376
+ EOS
377
+ end
378
+ end.compact.join(" ")
379
+ "<link #{attributes} />"
380
+ end
381
+ else
382
+ raise <<~EOS.chomp
383
+ link value must be Array(Hash(String, String|NilClass)) \
384
+ but was #{value.inspect}
385
+ EOS
386
+ end
387
+ end
388
+
389
+ def script_elements(value)
390
+ case value
391
+ in String
392
+ ["<script>#{Utils.escape_html(value, :text)}</script>"]
393
+ in Array if value.all?(Hash)
394
+ value.map do |item|
395
+ attributes = item.map do |k, v|
396
+ case [k, v]
397
+ in String, String
398
+ "#{k}=\"#{Utils.escape_html(v, :attribute)}\""
399
+ in String, NilClass
400
+ k
401
+ in NilClass, String
402
+ nil
403
+ else
404
+ raise <<~EOS.chomp
405
+ script value must be String or Array of Hash with \
406
+ Null to String entry but was #{value.inspect}
407
+ EOS
408
+ end
409
+ end.compact.join(" ")
410
+ if item[nil]
411
+ escaped = Utils.escape_html(item[nil], :text)
412
+ "<script #{attributes}>#{escaped}</script>"
413
+ else
414
+ "<script #{attributes} />"
415
+ end
416
+ end
417
+ else
418
+ raise <<~EOS.chomp
419
+ script value must be String or Array of Hash with \
420
+ Null to String entry but was #{value.inspect}
421
+ EOS
422
+ end
423
+ end
424
+
425
+ def guessed_meta_elements(key, value)
426
+ case value
427
+ in String
428
+ escaped = Utils.escape_html(value, :attribute)
429
+ ["<meta name=\"#{key}\" content=\"#{escaped}\" />"]
430
+ in Array if value.all?(String)
431
+ value.map do |item|
432
+ escaped = Utils.escape_html(item, :attribute)
433
+ "<meta name=\"#{key}\" content=\"#{escaped}\" />"
434
+ end
435
+ else
436
+ raise <<~EOS.chomp
437
+ meta value must be String | Array(Hash(String, String)) \
438
+ but was #{value.inspect}
439
+ EOS
440
+ end
441
+ end
442
+
443
+ public
444
+
445
+ def to_html_elements(metadata: self)
446
+ return if metadata.nil? || metadata.empty?
447
+
448
+ metadata.reduce([]) do |elements, key_value|
449
+ key, value = key_value
450
+ case key
451
+ in "title" then elements << title_element(value)
452
+ in "meta" then elements + meta_elements(value)
453
+ in "link" then elements + link_elements(value)
454
+ in "script" then elements + script_elements(value)
455
+ in String then elements + guessed_meta_elements(key, value)
456
+ else raise "front matter key must be String"
457
+ end
458
+ end
459
+ end
460
+ end
461
+
462
+ module RewriteLinkSuffixes
463
+ def edit_url_path_suffix(url_str,
464
+ suffixes = @options[:ml_link_suffixes] || {},
465
+ rewrite_relative_only = true)
466
+ uri = URI(url_str)
467
+ if uri.path &&
468
+ (path = Pathname.new(uri.path)) &&
469
+ (rewrite_relative_only ? path.relative? : true) &&
470
+ suffixes.keys.include?(path.extname)
471
+ edited_path = path.sub_ext(suffixes[path.extname])
472
+ new_uri = uri.dup
473
+ new_uri.path.replace(edited_path.to_s)
474
+ new_uri
475
+ end
476
+ end
477
+
478
+ def convert_a(el, indent)
479
+ if (href = el.attr["href"]) &&
480
+ (edited_href = edit_url_path_suffix(href))
481
+ el.attr["href"] = edited_href
482
+ end
483
+ super
484
+ end
485
+ end
486
+
487
+ module LangSelectorCommon
488
+ def has_lang_to_remove?(el,
489
+ langs: @options[:ml_langs],
490
+ lang_attr_name: @options[:ml_lang_attr_name])
491
+ if @stack.any? { |ancestor| ancestor.attr[lang_attr_name] }
492
+ # keep descendants of elements with language attribute, because
493
+ # the decision has already been made
494
+ false
495
+ else
496
+ # remove subtree with unwanted language attribute
497
+ (a = el.attr) && (l = a[lang_attr_name]) &&
498
+ langs && !langs.include?(l)
499
+ end
500
+ end
501
+ end
502
+
503
+ module ElementsToBeLangSelected
504
+ include LangSelectorCommon
505
+
506
+ def convert(el, indent = -@indent)
507
+ has_lang_to_remove?(el) ? "" : super
508
+ end
509
+
510
+ def inner(el, indent)
511
+ has_lang_to_remove?(el) ? "" : super
512
+ end
513
+
514
+ def convert_blank(el, indent)
515
+ has_lang_to_remove?(el) ? "" : super
516
+ end
517
+
518
+ def convert_text(el, indent)
519
+ has_lang_to_remove?(el) ? "" : super
520
+ end
521
+
522
+ def convert_p(el, indent)
523
+ has_lang_to_remove?(el) ? "" : super
524
+ end
525
+
526
+ def convert_standalone_image(el, indent)
527
+ has_lang_to_remove?(el) ? "" : super
528
+ end
529
+
530
+ def convert_codeblock(el, indent)
531
+ has_lang_to_remove?(el) ? "" : super
532
+ end
533
+
534
+ def convert_blockquote(el, indent)
535
+ has_lang_to_remove?(el) ? "" : super
536
+ end
537
+
538
+ def convert_header(el, indent)
539
+ lang_sep = @options[:ml_heading_lang_sep] || ML_DEFAULT_HEADING_LANG_SEP
540
+ lang_attr_name = @options[:ml_lang_attr_name]
541
+ if has_lang_to_remove?(el)
542
+ ""
543
+ elsif el.children&.all? do |c|
544
+ c.type == :html_element &&
545
+ c.value == "span" &&
546
+ c.attr &&
547
+ c.attr[lang_attr_name]
548
+ end
549
+
550
+ # check redundant lang values
551
+ children_by_lang =
552
+ el.children.reduce({}){|tally, child|
553
+ lang = child.attr && child.attr[lang_attr_name]
554
+ if lang
555
+ tally[lang] ||= []
556
+ tally[lang] << child
557
+ end
558
+ tally
559
+ }
560
+ children_by_lang.each{|lang, elements|
561
+ if elements.size > 1
562
+ @options[:ml_logger].warn <<~EOS.chomp
563
+ lang="#{lang}" appears more than once in a single sequence \
564
+ of multilingual headings: #{elements.map{|e| convert(e)}}
565
+ EOS
566
+ end
567
+ }
568
+
569
+ el.children = el.children.each_with_object([]) do |e, acc|
570
+ if has_lang_to_remove?(e)
571
+ acc
572
+ elsif acc.empty? && !has_lang_to_remove?(e)
573
+ acc << e
574
+ else
575
+ acc << Element.new(:text, lang_sep) << e
576
+ end
577
+ end
578
+ super
579
+ else
580
+ super
581
+ end
582
+ end
583
+
584
+ def convert_hr(el, indent)
585
+ has_lang_to_remove?(el) ? "" : super
586
+ end
587
+
588
+ def convert_ul(el, indent)
589
+ has_lang_to_remove?(el) ? "" : super
590
+ end
591
+
592
+ def convert_dl(el, indent)
593
+ has_lang_to_remove?(el) ? "" : super
594
+ end
595
+
596
+ def convert_li(el, indent)
597
+ has_lang_to_remove?(el) ? "" : super
598
+ end
599
+
600
+ def convert_dt(el, indent)
601
+ has_lang_to_remove?(el) ? "" : super
602
+ end
603
+
604
+ def convert_html_element(el, indent)
605
+ has_lang_to_remove?(el) ? "" : super
606
+ end
607
+
608
+ def convert_xml_comment(el, indent)
609
+ has_lang_to_remove?(el) ? "" : super
610
+ end
611
+
612
+ def convert_table(el, indent)
613
+ has_lang_to_remove?(el) ? "" : super
614
+ end
615
+
616
+ def convert_td(el, indent)
617
+ has_lang_to_remove?(el) ? "" : super
618
+ end
619
+
620
+ def convert_comment(el, indent)
621
+ has_lang_to_remove?(el) ? "" : super
622
+ end
623
+
624
+ def convert_br(el, indent)
625
+ has_lang_to_remove?(el) ? "" : super
626
+ end
627
+
628
+ def convert_a(el, indent)
629
+ has_lang_to_remove?(el) ? "" : super
630
+ end
631
+
632
+ def convert_img(el, indent)
633
+ has_lang_to_remove?(el) ? "" : super
634
+ end
635
+
636
+ def convert_codespan(el, indent)
637
+ has_lang_to_remove?(el) ? "" : super
638
+ end
639
+
640
+ def convert_footnote(el, indent)
641
+ has_lang_to_remove?(el) ? "" : super
642
+ end
643
+
644
+ def convert_raw(el, indent)
645
+ has_lang_to_remove?(el) ? "" : super
646
+ end
647
+
648
+ def convert_em(el, indent)
649
+ has_lang_to_remove?(el) ? "" : super
650
+ end
651
+
652
+ def convert_entity(el, indent)
653
+ has_lang_to_remove?(el) ? "" : super
654
+ end
655
+
656
+ def convert_typographic_sym(el, indent)
657
+ has_lang_to_remove?(el) ? "" : super
658
+ end
659
+
660
+ def convert_smart_quote(el, indent)
661
+ has_lang_to_remove?(el) ? "" : super
662
+ end
663
+
664
+ def convert_math(el, indent)
665
+ has_lang_to_remove?(el) ? "" : super
666
+ end
667
+
668
+ def convert_abbreviation(el, indent)
669
+ has_lang_to_remove?(el) ? "" : super
670
+ end
671
+
672
+ def convert_root(el, indent)
673
+ has_lang_to_remove?(el) ? "" : super
674
+ end
675
+ end
676
+
677
+ module UseMetadata
678
+ def initialize(root, options)
679
+ super
680
+
681
+ root.metadata&.extend(Metadata)
682
+ end
683
+ end
684
+
685
+ include RewriteLinkSuffixes
686
+ include ElementsToBeLangSelected
687
+ include UseMetadata
688
+ end
689
+ end
690
+ end
691
+
692
+ module YAMultilingualMarkdown
693
+ VERSION = "0.0.2"
694
+ LOG_LEVEL = {
695
+ unknown: Logger::UNKNOWN,
696
+ fatal: Logger::FATAL,
697
+ error: Logger::ERROR,
698
+ warn: Logger::WARN,
699
+ info: Logger::INFO,
700
+ debug: Logger::DEBUG,
701
+ }
702
+
703
+ class Document
704
+ DEFAULT_OPTIONS = {
705
+ langs: nil,
706
+ lang_attr_name: "lang",
707
+ heading_lang_sep: " / ",
708
+ output_type: :auto,
709
+ template: nil,
710
+ link_suffixes: {".md" => ".html"},
711
+ log_level: LOG_LEVEL[:warn],
712
+ }
713
+ DEFAULT_KRAMDOWN_OPTIONS = {
714
+ auto_ids: false,
715
+ smart_quotes: ["apos", "apos", "quot", "quot"],
716
+ typographic_symbols: {
717
+ hellip: "...",
718
+ mdash: "---",
719
+ ndash: "--",
720
+ laquo: "<<",
721
+ raquo: ">>",
722
+ laquo_space: "<< ",
723
+ raquo_space: " >>",
724
+ },
725
+ input: "YaMultilingualMarkdown",
726
+ math_engine: "katex",
727
+ syntax_highlighter: "rouge",
728
+ syntax_highlighter_opts: {},
729
+ }
730
+ # we want to keep "\n" as-is for --show-default-template
731
+ DEFAULT_TEMPLATE = '
732
+ <!DOCTYPE html>
733
+ <html>
734
+ <head>
735
+ <%=
736
+ if metadata = @converter.root.metadata
737
+ elements = metadata.to_html_elements
738
+ elements.unshift("<title></title>") if not(metadata["title"])
739
+ elements.join("\n")
740
+ else
741
+ "<title></title>"
742
+ end
743
+ %>
744
+ </head>
745
+ <body>
746
+ <%= @body.chomp %>
747
+ </body>
748
+ </html>
749
+ '.gsub(/^ {6}/, "").strip + "\n"
750
+
751
+ module Utils
752
+ include ::Kramdown::Utils::Html
753
+ module_function :escape_html
754
+ end
755
+
756
+ class Error < StandardError; end
757
+
758
+ def initialize(source, options: nil, kramdown_options: nil)
759
+ @source = source
760
+
761
+ options ||= DEFAULT_OPTIONS.dup
762
+ kramdown_options ||= DEFAULT_KRAMDOWN_OPTIONS.dup
763
+
764
+ if options && (langs = options[:langs])
765
+ kramdown_options[:ml_langs] = langs
766
+ end
767
+
768
+ if options && (lang_attr_name = options[:lang_attr_name])
769
+ kramdown_options[:ml_lang_attr_name] = lang_attr_name
770
+ end
771
+
772
+ if options && (sep = options[:heading_lang_sep])
773
+ kramdown_options[:ml_heading_lang_sep] = sep
774
+ end
775
+
776
+ if options && (link_suffixes = options[:link_suffixes])
777
+ kramdown_options[:ml_link_suffixes] = link_suffixes
778
+ end
779
+
780
+ if options && (template = options[:template])
781
+ kramdown_options[:template] = "string://#{template}"
782
+ end
783
+
784
+ if options && (log_level = options[:log_level])
785
+ kramdown_options[:ml_logger] = Logger.new($stderr, level: log_level)
786
+ end
787
+
788
+ @options = options
789
+ @kramdown_options = kramdown_options
790
+ end
791
+
792
+ private
793
+
794
+ def to_html_fragment
795
+ doc = Kramdown::Document.new(@source, @kramdown_options)
796
+ # we have to call Kramdown::Document#to_ya_multilingual_html first, so
797
+ # that YaMultilingualHtml is instantiated, its @root.metadata is
798
+ # extended with Metadata, thus #to_html_elements becomes callable
799
+ body = doc.to_ya_multilingual_html
800
+ if (metadata = doc.root.metadata)
801
+ head_contents = metadata.to_html_elements
802
+ head_contents.join("\n") + "\n" + body
803
+ else
804
+ body
805
+ end
806
+ end
807
+
808
+ def to_html_document
809
+ kramdown_options =
810
+ if @options[:template]
811
+ @kramdown_options
812
+ else
813
+ default_template = Document::DEFAULT_TEMPLATE.dup
814
+ @kramdown_options.merge({ template: "string://#{default_template}" })
815
+ end
816
+ doc = Kramdown::Document.new(@source, kramdown_options)
817
+ doc.to_ya_multilingual_html
818
+ end
819
+
820
+ public
821
+
822
+ def to_html
823
+ case @options[:output_type]
824
+ when :auto
825
+ doc = Kramdown::Document.new(@source, @kramdown_options)
826
+ if doc.root.metadata
827
+ to_html_document
828
+ else
829
+ to_html_fragment
830
+ end
831
+ when :fragment
832
+ to_html_fragment
833
+ when :document
834
+ to_html_document
835
+ else
836
+ raise Error, <<~EOS
837
+ output_type must be :auto, :fragment or :document but was \
838
+ #{@options[:output_type].inspect}
839
+ EOS
840
+ end
841
+ end
842
+ end
843
+
844
+ module CLI
845
+ DEFAULT_OPTIONS = {
846
+ show_default_template: nil,
847
+ help: nil,
848
+ version: nil,
849
+ }
850
+
851
+ class Error < StandardError
852
+ end
853
+
854
+ class << self
855
+ def run
856
+ options = Document::DEFAULT_OPTIONS.merge(DEFAULT_OPTIONS)
857
+
858
+ option_parser = OptionParser.new do |o|
859
+ o.banner = <<~EOS
860
+ Convert Yet Another Multilingual Markdown to HTML
861
+
862
+ Usage:
863
+ #{o.program_name} [OPTIONS] [FILE]
864
+
865
+ Options:
866
+ EOS
867
+ o.on("--langs=LANG1,LANG2,...", String, <<~EOS.chomp) do |s|
868
+ Languages to be included in the output \
869
+ (default: #{options[:langs].to_s.inspect}; empty implies all)
870
+ EOS
871
+ options[:langs] = s.split(",")
872
+ end
873
+ o.on("--lang-attr-name=STRING", String, <<~EOS.chomp) do |s|
874
+ Attribute name for language (default: #{options[:lang_attr_name].inspect})
875
+ EOS
876
+ options[:lang_attr_name] = s
877
+ end
878
+ o.on("--heading-lang-sep=STRING", String, <<~EOS.chomp) do |s|
879
+ Languages separator in headings (default: #{options[:heading_lang_sep].inspect})
880
+ EOS
881
+ options[:heading_lang_sep] = s
882
+ end
883
+ o.on("--output-type=auto|fragment|document", [:auto, :fragment, :document], <<~EOS.chomp) do |s|
884
+ Output type (default: #{options[:output_type].to_s.inspect})
885
+ EOS
886
+ options[:output_type] = s.intern
887
+ end
888
+ o.on("--template-file=FILE", String, <<~EOS.chomp) do |s|
889
+ Document template file in eRuby format to embed contents
890
+ EOS
891
+ options[:template] =
892
+ case
893
+ when !File.exist?(s)
894
+ raise Error, "template file does not exist: #{s.inspect}"
895
+ when !File.readable?(s)
896
+ raise Error, "template file not readable: #{s.inspect}"
897
+ else
898
+ File.read(s)
899
+ end
900
+ end
901
+ o.on("--link-suffixes=FROM:TO,...", String, <<~EOS.chomp) do |s|
902
+ Link suffixes to rewrite (default: #{options[:link_suffixes]&.
903
+ map { |from, to| "#{from}:#{to}" }&.join(",").to_s.inspect})
904
+ EOS
905
+ options[:link_suffixes] = s.split(",").map { |pair| pair.split(":") }.to_h
906
+ end
907
+ o.on("--show-default-template", "Show default document template") {
908
+ options[:show_default_template] = true
909
+ }
910
+ o.on("--log-level=#{LOG_LEVEL.keys.join("|")}", LOG_LEVEL.keys, "Log level (default: warn)") do |s|
911
+ options[:log_level] = LOG_LEVEL[s.intern]
912
+ end
913
+ o.on("--help", "Show help message") { options[:help] = true }
914
+ o.on("--version", "Show version number") { options[:version] = true }
915
+ end
916
+ option_parser.parse!
917
+
918
+ case
919
+ when options[:show_default_template]
920
+ puts Document::DEFAULT_TEMPLATE
921
+ when options[:help]
922
+ puts option_parser
923
+ when options[:version]
924
+ v = ::YAMultilingualMarkdown::VERSION
925
+ $stdout.tty? ? puts(v) : print(v)
926
+ else
927
+ doc = Document.new(ARGF.set_encoding("UTF-8").read, options: options)
928
+ print(doc.to_html)
929
+ end
930
+ end
931
+ end
932
+ end
933
+ end
934
+
935
+ if __FILE__ == $PROGRAM_NAME
936
+ YAMultilingualMarkdown::CLI.run
937
+ end
metadata ADDED
@@ -0,0 +1,166 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ya_multilingual_markdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Hisashi Morita
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-11-11 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: kramdown
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '2.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: kramdown-parser-gfm
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: kramdown-math-katex
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rouge
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.0'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: logger
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.0'
75
+ type: :runtime
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.0'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rake
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '13.0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '13.0'
96
+ - !ruby/object:Gem::Dependency
97
+ name: bundler
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '2.5'
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '2.5'
110
+ - !ruby/object:Gem::Dependency
111
+ name: minitest
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '5.0'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '5.0'
124
+ - !ruby/object:Gem::Dependency
125
+ name: rubocop-shopify
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '1.0'
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '1.0'
138
+ executables:
139
+ - ya_multilingual_markdown
140
+ extensions: []
141
+ extra_rdoc_files: []
142
+ files:
143
+ - bin/ya_multilingual_markdown
144
+ - lib/ya_multilingual_markdown.rb
145
+ homepage: https://github.com/hisashim/ya_multilingual_markdown
146
+ licenses:
147
+ - MIT
148
+ metadata: {}
149
+ rdoc_options: []
150
+ require_paths:
151
+ - lib
152
+ required_ruby_version: !ruby/object:Gem::Requirement
153
+ requirements:
154
+ - - ">="
155
+ - !ruby/object:Gem::Version
156
+ version: '3.0'
157
+ required_rubygems_version: !ruby/object:Gem::Requirement
158
+ requirements:
159
+ - - ">="
160
+ - !ruby/object:Gem::Version
161
+ version: '0'
162
+ requirements: []
163
+ rubygems_version: 3.6.2
164
+ specification_version: 4
165
+ summary: Yet Another Multilingual Markdown processor
166
+ test_files: []