coradoc 1.0.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Makefile +35 -0
- data/{.docker/readme.md → README.Docker.adoc} +21 -25
- data/README.adoc +121 -0
- data/coradoc.gemspec +4 -0
- data/docker-compose.yml +14 -0
- data/exe/coradoc +5 -0
- data/exe/reverse_adoc +24 -34
- data/exe/w2a +16 -28
- data/lib/coradoc/cli.rb +105 -0
- data/lib/coradoc/converter.rb +145 -0
- data/lib/coradoc/document.rb +13 -0
- data/lib/coradoc/element/admonition.rb +5 -0
- data/lib/coradoc/element/attribute_list.rb +1 -1
- data/lib/coradoc/element/author.rb +4 -4
- data/lib/coradoc/element/bibliography.rb +24 -0
- data/lib/coradoc/element/bibliography_entry.rb +24 -0
- data/lib/coradoc/element/block/core.rb +6 -4
- data/lib/coradoc/element/block/example.rb +1 -0
- data/lib/coradoc/element/block/pass.rb +21 -0
- data/lib/coradoc/element/block/quote.rb +1 -1
- data/lib/coradoc/element/block/reviewer_comment.rb +19 -0
- data/lib/coradoc/element/block/side.rb +4 -2
- data/lib/coradoc/element/block/sourcecode.rb +1 -0
- data/lib/coradoc/element/block.rb +2 -0
- data/lib/coradoc/element/comment_block.rb +22 -0
- data/lib/coradoc/element/comment_line.rb +18 -0
- data/lib/coradoc/element/document_attributes.rb +8 -1
- data/lib/coradoc/element/header.rb +1 -1
- data/lib/coradoc/element/image/block_image.rb +8 -0
- data/lib/coradoc/element/image/core.rb +7 -4
- data/lib/coradoc/element/include.rb +18 -0
- data/lib/coradoc/element/inline/citation.rb +24 -0
- data/lib/coradoc/element/inline/cross_reference.rb +29 -8
- data/lib/coradoc/element/inline.rb +1 -0
- data/lib/coradoc/element/list/core.rb +16 -3
- data/lib/coradoc/element/list/definition.rb +3 -1
- data/lib/coradoc/element/list_item.rb +15 -5
- data/lib/coradoc/element/paragraph.rb +8 -5
- data/lib/coradoc/element/revision.rb +1 -1
- data/lib/coradoc/element/section.rb +11 -3
- data/lib/coradoc/element/table.rb +6 -1
- data/lib/coradoc/element/tag.rb +19 -0
- data/lib/coradoc/element/term.rb +21 -0
- data/lib/coradoc/element/text_element.rb +9 -1
- data/lib/coradoc/element/title.rb +2 -2
- data/lib/coradoc/generator.rb +2 -0
- data/lib/coradoc/input/adoc.rb +28 -0
- data/lib/coradoc/input/docx.rb +35 -0
- data/lib/coradoc/{reverse_adoc → input/html}/README.adoc +9 -9
- data/lib/coradoc/{reverse_adoc → input/html}/cleaner.rb +19 -6
- data/lib/coradoc/input/html/config.rb +75 -0
- data/lib/coradoc/{reverse_adoc → input/html}/converters/a.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/aside.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/audio.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/base.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/blockquote.rb +2 -2
- data/lib/coradoc/{reverse_adoc → input/html}/converters/br.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/bypass.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/code.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/div.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/dl.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/drop.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/em.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/figure.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/h.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/head.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/hr.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/ignore.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/img.rb +17 -10
- data/lib/coradoc/{reverse_adoc → input/html}/converters/li.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/mark.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/markup.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/math.rb +3 -3
- data/lib/coradoc/{reverse_adoc → input/html}/converters/ol.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/p.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/pass_through.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/pre.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/q.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/strong.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/sub.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/sup.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/table.rb +36 -11
- data/lib/coradoc/{reverse_adoc → input/html}/converters/td.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/text.rb +2 -2
- data/lib/coradoc/{reverse_adoc → input/html}/converters/th.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/tr.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/video.rb +1 -1
- data/lib/coradoc/input/html/converters.rb +57 -0
- data/lib/coradoc/input/html/errors.rb +12 -0
- data/lib/coradoc/{reverse_adoc → input/html}/html_converter.rb +37 -22
- data/lib/coradoc/{reverse_adoc → input/html}/plugin.rb +6 -6
- data/lib/coradoc/{reverse_adoc → input/html}/plugins/plateau.rb +62 -30
- data/lib/coradoc/{reverse_adoc → input/html}/postprocessor.rb +13 -9
- data/lib/coradoc/input/html.rb +59 -0
- data/lib/coradoc/input.rb +12 -0
- data/lib/coradoc/output/adoc.rb +17 -0
- data/lib/coradoc/output/coradoc_tree_debug.rb +19 -0
- data/lib/coradoc/output.rb +11 -0
- data/lib/coradoc/parser/asciidoc/admonition.rb +24 -0
- data/lib/coradoc/parser/asciidoc/attribute_list.rb +67 -0
- data/lib/coradoc/parser/asciidoc/base.rb +101 -13
- data/lib/coradoc/parser/asciidoc/bibliography.rb +30 -0
- data/lib/coradoc/parser/asciidoc/block.rb +82 -0
- data/lib/coradoc/parser/asciidoc/citation.rb +48 -0
- data/lib/coradoc/parser/asciidoc/content.rb +15 -120
- data/lib/coradoc/parser/asciidoc/document_attributes.rb +12 -5
- data/lib/coradoc/parser/asciidoc/header.rb +1 -4
- data/lib/coradoc/parser/asciidoc/inline.rb +72 -0
- data/lib/coradoc/parser/asciidoc/list.rb +81 -0
- data/lib/coradoc/parser/asciidoc/paragraph.rb +33 -0
- data/lib/coradoc/parser/asciidoc/section.rb +36 -31
- data/lib/coradoc/parser/asciidoc/table.rb +32 -0
- data/lib/coradoc/parser/asciidoc/term.rb +23 -0
- data/lib/coradoc/parser/base.rb +39 -4
- data/lib/coradoc/transformer.rb +353 -82
- data/lib/coradoc/util.rb +10 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +8 -4
- data/lib/reverse_adoc.rb +6 -6
- data/utils/parser_analyzer.rb +66 -0
- data/utils/round_trip.rb +37 -0
- metadata +113 -54
- data/.docker/Makefile +0 -35
- data/.docker/docker-compose.yml +0 -14
- data/Makefile +0 -1
- data/README.md +0 -73
- data/docker-compose.yml +0 -1
- data/lib/coradoc/reverse_adoc/config.rb +0 -73
- data/lib/coradoc/reverse_adoc/converters.rb +0 -53
- data/lib/coradoc/reverse_adoc/errors.rb +0 -10
- data/lib/coradoc/reverse_adoc.rb +0 -30
- /data/{.docker/Dockerfile → Dockerfile} +0 -0
- /data/lib/coradoc/{reverse_adoc → input/html}/LICENSE.txt +0 -0
@@ -1,9 +1,11 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
module List
|
4
|
-
class Definition <
|
4
|
+
class Definition < Base
|
5
5
|
attr_accessor :items, :delimiter
|
6
6
|
|
7
|
+
declare_children :items
|
8
|
+
|
7
9
|
def initialize(items, options = {})
|
8
10
|
@items = items
|
9
11
|
@delimiter = options.fetch(:delimiter, "::")
|
@@ -1,23 +1,33 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
class ListItem < Base
|
4
|
-
attr_accessor :id
|
4
|
+
attr_accessor :marker, :id, :anchor, :content, :line_break
|
5
5
|
|
6
6
|
declare_children :content, :id, :anchor
|
7
7
|
|
8
8
|
def initialize(content, options = {})
|
9
|
-
@
|
9
|
+
@marker = options.fetch(:marker, nil)
|
10
10
|
@id = options.fetch(:id, nil)
|
11
11
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
12
|
+
@content = content
|
13
|
+
@line_break = options.fetch(:line_break, "\n")
|
12
14
|
end
|
13
15
|
|
14
16
|
def to_adoc
|
15
17
|
anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
|
16
18
|
content = Array(@content).map do |subitem|
|
17
|
-
|
18
|
-
|
19
|
+
next if subitem.is_a? Inline::HardLineBreak
|
20
|
+
|
21
|
+
subcontent = Coradoc::Generator.gen_adoc(subitem)
|
22
|
+
# Only try to postprocess elements that are text,
|
23
|
+
# otherwise we could strip markup.
|
24
|
+
if Coradoc.a_single?(subitem, Coradoc::Element::TextElement)
|
25
|
+
subcontent = Coradoc.strip_unicode(subcontent)
|
26
|
+
end
|
27
|
+
subcontent.chomp
|
28
|
+
end.compact.join("\n+\n")
|
19
29
|
|
20
|
-
" #{anchor}#{content.chomp}
|
30
|
+
" #{anchor}#{content.chomp}#{@line_break}"
|
21
31
|
end
|
22
32
|
end
|
23
33
|
end
|
@@ -7,9 +7,10 @@ module Coradoc
|
|
7
7
|
|
8
8
|
def initialize(content, options = {})
|
9
9
|
@content = content
|
10
|
-
@
|
11
|
-
|
12
|
-
@
|
10
|
+
@id = options.fetch(:id, nil)
|
11
|
+
@anchor = Inline::Anchor.new(@id) if @id
|
12
|
+
@title = options.fetch(:title, nil)
|
13
|
+
@attributes = options.fetch(:attributes, nil)
|
13
14
|
@tdsinglepara = options.fetch(:tdsinglepara, nil)
|
14
15
|
end
|
15
16
|
|
@@ -22,11 +23,13 @@ module Coradoc
|
|
22
23
|
end
|
23
24
|
|
24
25
|
def to_adoc
|
26
|
+
title = @title.nil? ? "" : ".#{Coradoc::Generator.gen_adoc(@title)}\n"
|
25
27
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
28
|
+
attrs = @attributes.nil? ? "" : "#{@attributes.to_adoc}\n"
|
26
29
|
if @tdsinglepara
|
27
|
-
anchor
|
30
|
+
"#{title}#{anchor}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
|
28
31
|
else
|
29
|
-
"\n\n#{anchor}" << Coradoc::Generator.gen_adoc(@content)
|
32
|
+
"\n\n#{title}#{anchor}#{attrs}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content)) << "\n\n"
|
30
33
|
end
|
31
34
|
end
|
32
35
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
class Section < Base
|
4
|
-
attr_accessor :id, :title, :contents, :sections
|
4
|
+
attr_accessor :id, :title, :attrs, :contents, :sections
|
5
5
|
|
6
6
|
declare_children :id, :title, :contents, :sections
|
7
7
|
|
@@ -9,9 +9,10 @@ module Coradoc
|
|
9
9
|
@title = title
|
10
10
|
@id = options.fetch(:id, nil)
|
11
11
|
@id = nil if @id == ""
|
12
|
+
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
13
|
+
@attrs = options.fetch(:attribute_list, "")
|
12
14
|
@contents = options.fetch(:contents, [])
|
13
15
|
@sections = options.fetch(:sections, [])
|
14
|
-
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
15
16
|
end
|
16
17
|
|
17
18
|
def glossaries
|
@@ -27,6 +28,7 @@ module Coradoc
|
|
27
28
|
def to_adoc
|
28
29
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
29
30
|
title = Coradoc::Generator.gen_adoc(@title)
|
31
|
+
attrs = @attrs.to_s.empty? ? "" : "#{@attrs.to_adoc}\n"
|
30
32
|
content = Coradoc::Generator.gen_adoc(@contents)
|
31
33
|
sections = Coradoc::Generator.gen_adoc(@sections)
|
32
34
|
|
@@ -34,7 +36,13 @@ module Coradoc
|
|
34
36
|
# with something.
|
35
37
|
content = " #{content}" if content.start_with?(" +\n")
|
36
38
|
|
37
|
-
|
39
|
+
# Only try to postprocess elements that are text,
|
40
|
+
# otherwise we could strip markup.
|
41
|
+
if Coradoc.a_single?(@contents, Coradoc::Element::TextElement)
|
42
|
+
content = Coradoc.strip_unicode(content)
|
43
|
+
end
|
44
|
+
|
45
|
+
"\n#{anchor}" << attrs << title << content << sections << "\n"
|
38
46
|
end
|
39
47
|
|
40
48
|
# Check for cases when Section is simply an equivalent of an empty <DIV>
|
@@ -10,7 +10,7 @@ module Coradoc
|
|
10
10
|
@title = title
|
11
11
|
@id = options.fetch(:id, nil)
|
12
12
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
13
|
-
@attrs = options.fetch(:
|
13
|
+
@attrs = options.fetch(:attributes, nil)
|
14
14
|
end
|
15
15
|
|
16
16
|
def to_adoc
|
@@ -76,6 +76,11 @@ module Coradoc
|
|
76
76
|
anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
|
77
77
|
content = simplify_block_content(@content)
|
78
78
|
content = Coradoc::Generator.gen_adoc(content)
|
79
|
+
# Only try to postprocess elements that are text,
|
80
|
+
# otherwise we could strip markup.
|
81
|
+
if Coradoc.a_single?(@content, Coradoc::Element::TextElement)
|
82
|
+
content = Coradoc.strip_unicode(content)
|
83
|
+
end
|
79
84
|
"#{@colrowattr}#{@alignattr}#{@style}| #{anchor}#{content}"
|
80
85
|
end
|
81
86
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Coradoc
|
2
|
+
module Element
|
3
|
+
class Tag < Base
|
4
|
+
attr_accessor :name, :prefix, :attrs, :line_break
|
5
|
+
|
6
|
+
def initialize(name, options = {})
|
7
|
+
@name = name
|
8
|
+
@prefix = options.fetch(:prefix, "tag")
|
9
|
+
@attrs = options.fetch(:attribute_list, AttributeList.new)
|
10
|
+
@line_break = options.fetch(:line_break, "\n")
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_adoc
|
14
|
+
attrs = @attrs.to_adoc
|
15
|
+
"// #{@prefix}::#{@name}#{attrs}#{@line_break}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Coradoc
|
2
|
+
module Element
|
3
|
+
class Term < Base
|
4
|
+
attr_accessor :term, :options
|
5
|
+
|
6
|
+
declare_children :term, :options
|
7
|
+
|
8
|
+
def initialize(term, options = {})
|
9
|
+
@term = term
|
10
|
+
@type = options.fetch(:type, nil)
|
11
|
+
@lang = options.fetch(:lang, :en)
|
12
|
+
@line_break = options.fetch(:line_break, "")
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_adoc
|
16
|
+
return "#{@type}:[#{@term}]#{@line_break}" if @lang == :en
|
17
|
+
"[#{@type}]##{@term}##{@line_break}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -9,10 +9,14 @@ module Coradoc
|
|
9
9
|
@content = content # .to_s
|
10
10
|
@id = options.fetch(:id, nil)
|
11
11
|
@line_break = options.fetch(:line_break, "")
|
12
|
+
@html_cleanup = options.fetch(:html_cleanup, false)
|
13
|
+
if @html_cleanup
|
14
|
+
@content = treat_text_to_adoc(@content)
|
15
|
+
end
|
12
16
|
end
|
13
17
|
|
14
18
|
def to_adoc
|
15
|
-
Coradoc::Generator.gen_adoc(
|
19
|
+
Coradoc::Generator.gen_adoc(@content) + @line_break
|
16
20
|
end
|
17
21
|
|
18
22
|
def treat_text_to_adoc(text)
|
@@ -64,6 +68,10 @@ module Coradoc
|
|
64
68
|
def initialize(line_break)
|
65
69
|
@line_break = line_break
|
66
70
|
end
|
71
|
+
|
72
|
+
def to_adoc
|
73
|
+
@line_break
|
74
|
+
end
|
67
75
|
end
|
68
76
|
|
69
77
|
class Highlight < Element::TextElement
|
@@ -7,7 +7,7 @@ module Coradoc
|
|
7
7
|
|
8
8
|
def initialize(content, level, options = {})
|
9
9
|
@level_int = level
|
10
|
-
@level_int = level.length if level.is_a?(String)
|
10
|
+
# @level_int = level.length - 1 if level.is_a?(String)
|
11
11
|
@content = content
|
12
12
|
@id = options.fetch(:id, nil)
|
13
13
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
@@ -21,7 +21,7 @@ module Coradoc
|
|
21
21
|
|
22
22
|
def to_adoc
|
23
23
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
24
|
-
content = Coradoc::Generator.gen_adoc(@content)
|
24
|
+
content = Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
|
25
25
|
<<~HERE
|
26
26
|
|
27
27
|
#{anchor}#{style_str}#{level_str} #{content}
|
data/lib/coradoc/generator.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "coradoc/input"
|
2
|
+
|
3
|
+
module Coradoc
|
4
|
+
module Input::Adoc
|
5
|
+
def self.processor_id
|
6
|
+
:adoc
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.processor_match?(filename)
|
10
|
+
%w[.adoc].any? { |i| filename.downcase.end_with?(i) }
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.processor_execute(input, _options = {})
|
14
|
+
ast = Coradoc::Parser::Base.new.parse(input)
|
15
|
+
Coradoc::Transformer.transform(ast[:document])
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.processor_postprocess(input, options)
|
19
|
+
if options[:output_processor] == :adoc
|
20
|
+
Coradoc::Input::HTML::Cleaner.new.tidy(input)
|
21
|
+
else
|
22
|
+
input
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Coradoc::Input.define(self)
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "word-to-markdown"
|
2
|
+
require "coradoc/input/html"
|
3
|
+
require "fileutils"
|
4
|
+
|
5
|
+
module Coradoc
|
6
|
+
module Input::Docx
|
7
|
+
def self.processor_id
|
8
|
+
:docx
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.processor_match?(filename)
|
12
|
+
%w[.docx .doc].any? { |i| filename.downcase.end_with?(i) }
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.processor_execute(input, options = {})
|
16
|
+
image_dir = Dir.mktmpdir
|
17
|
+
options = options.merge(sourcedir: image_dir)
|
18
|
+
doc = WordToMarkdown.new(input, image_dir)
|
19
|
+
doc = Coradoc::Input::HTML.cleaner.preprocess_word_html(doc.document.html)
|
20
|
+
options = WordToMarkdown::REVERSE_MARKDOWN_OPTIONS.merge(options)
|
21
|
+
Coradoc::Input::HTML.to_coradoc(doc, options)
|
22
|
+
ensure
|
23
|
+
FileUtils.rm_rf(image_dir)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.processor_postprocess(data, options)
|
27
|
+
Coradoc::Input::HTML.processor_postprocess(data, options)
|
28
|
+
end
|
29
|
+
|
30
|
+
# This processor prefers to work on original files.
|
31
|
+
def self.processor_wants_filenames; true; end
|
32
|
+
|
33
|
+
Coradoc::Input.define(self)
|
34
|
+
end
|
35
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
= AsciiDoc from HTML and Microsoft Word: reverse_adoc
|
1
|
+
= AsciiDoc from HTML and Microsoft Word: formerly reverse_adoc
|
2
2
|
|
3
3
|
== Purpose
|
4
4
|
|
@@ -253,9 +253,9 @@ Simple to use.
|
|
253
253
|
|
254
254
|
[source,ruby]
|
255
255
|
----
|
256
|
-
require 'coradoc/
|
256
|
+
require 'coradoc/input/html'
|
257
257
|
|
258
|
-
result = Coradoc::
|
258
|
+
result = Coradoc::Input::HTML.convert input
|
259
259
|
result.inspect # " *feelings* "
|
260
260
|
----
|
261
261
|
|
@@ -265,9 +265,9 @@ Just pass your chosen configuration options in after the input. The given option
|
|
265
265
|
|
266
266
|
[source,ruby]
|
267
267
|
----
|
268
|
-
require 'coradoc/
|
268
|
+
require 'coradoc/input/html'
|
269
269
|
|
270
|
-
Coradoc::
|
270
|
+
Coradoc::Input::HTML.convert(input, unknown_tags: :raise, mathml2asciimath: true)
|
271
271
|
----
|
272
272
|
|
273
273
|
|
@@ -277,9 +277,9 @@ Or configure it block style on a initializer level. These configurations will la
|
|
277
277
|
|
278
278
|
[source,ruby]
|
279
279
|
----
|
280
|
-
require 'coradoc/
|
280
|
+
require 'coradoc/input/html'
|
281
281
|
|
282
|
-
Coradoc::
|
282
|
+
Coradoc::Input::HTML.config do |config|
|
283
283
|
config.unknown_tags = :bypass
|
284
284
|
config.mathml2asciimath = true
|
285
285
|
config.tag_border = ''
|
@@ -290,10 +290,10 @@ end
|
|
290
290
|
|
291
291
|
[source,ruby]
|
292
292
|
----
|
293
|
-
require 'coradoc/
|
293
|
+
require 'coradoc/input/html'
|
294
294
|
|
295
295
|
# Options can be supplied as keyword arguments
|
296
|
-
Coradoc::
|
296
|
+
Coradoc::Input::HTML::HtmlConverter.to_coradoc("<b><i>Some input</i></b>")
|
297
297
|
----
|
298
298
|
|
299
299
|
|
@@ -1,6 +1,10 @@
|
|
1
|
-
module Coradoc::
|
1
|
+
module Coradoc::Input::HTML
|
2
2
|
class Cleaner
|
3
3
|
def tidy(string)
|
4
|
+
if string.is_a? Hash
|
5
|
+
return string.transform_values { |i| tidy(i) }
|
6
|
+
end
|
7
|
+
|
4
8
|
result = HtmlConverter.track_time "Removing inner whitespace" do
|
5
9
|
remove_inner_whitespaces(String.new(string))
|
6
10
|
end
|
@@ -16,6 +20,16 @@ module Coradoc::ReverseAdoc
|
|
16
20
|
result = HtmlConverter.track_time "Cleaning punctuation characters" do
|
17
21
|
clean_punctuation_characters(result)
|
18
22
|
end
|
23
|
+
result = remove_block_leading_newlines(result)
|
24
|
+
result = remove_section_attribute_newlines(result)
|
25
|
+
end
|
26
|
+
|
27
|
+
def remove_block_leading_newlines(string)
|
28
|
+
string.gsub("]\n****\n\n", "]\n****\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
def remove_section_attribute_newlines(string)
|
32
|
+
string.gsub("]\n\n==", "]\n==")
|
19
33
|
end
|
20
34
|
|
21
35
|
def remove_newlines(string)
|
@@ -47,20 +61,20 @@ module Coradoc::ReverseAdoc
|
|
47
61
|
# Same for underscores and brackets.
|
48
62
|
def clean_tag_borders(string)
|
49
63
|
# result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
|
50
|
-
# preserve_border_whitespaces(match, default_border: Coradoc::
|
64
|
+
# preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
|
51
65
|
# match.strip.sub("** ", "**").sub(" **", "**")
|
52
66
|
# end
|
53
67
|
# end
|
54
68
|
|
55
69
|
# result = string.gsub(/\s?_{2,}.*?_{2,}\s?/) do |match|
|
56
|
-
# preserve_border_whitespaces(match, default_border: Coradoc::
|
70
|
+
# preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
|
57
71
|
# match.strip.sub("__ ", "__").sub(" __", "__")
|
58
72
|
# end
|
59
73
|
# end
|
60
74
|
|
61
75
|
result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
|
62
76
|
preserve_border_whitespaces(match,
|
63
|
-
default_border: Coradoc::
|
77
|
+
default_border: Coradoc::Input::HTML.config.tag_border) do
|
64
78
|
match.strip.sub("~~ ", "~~").sub(" ~~", "~~")
|
65
79
|
end
|
66
80
|
end
|
@@ -83,8 +97,7 @@ module Coradoc::ReverseAdoc
|
|
83
97
|
|
84
98
|
def scrub_whitespace(string)
|
85
99
|
string.gsub!(/ | |\u00a0/i, " ") # HTML encoded spaces
|
86
|
-
string.
|
87
|
-
string.sub!(/[[:space:]]+\z$/m, "") # document trailing whitespace
|
100
|
+
string = Coradoc.strip_unicode(string) # Strip document-level leading and trailing whitespace
|
88
101
|
string.gsub!(/( +)$/, " ") # line trailing whitespace
|
89
102
|
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
90
103
|
# string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "tmpdir"
|
2
|
+
|
3
|
+
module Coradoc
|
4
|
+
module Input::HTML
|
5
|
+
class Config
|
6
|
+
def initialize
|
7
|
+
@unknown_tags = :pass_through
|
8
|
+
@input_format = :html
|
9
|
+
@mathml2asciimath = false
|
10
|
+
@external_images = false
|
11
|
+
|
12
|
+
# Destination to save file and images
|
13
|
+
@destination = nil
|
14
|
+
|
15
|
+
# Source of HTML
|
16
|
+
# @sourcedir = nil
|
17
|
+
|
18
|
+
# Image counter, assuming there are max 999 images
|
19
|
+
@image_counter = 1
|
20
|
+
# pad with 0s
|
21
|
+
@image_counter_pattern = "%03d"
|
22
|
+
|
23
|
+
@em_delimiter = "_".freeze
|
24
|
+
@strong_delimiter = "*".freeze
|
25
|
+
@inline_options = {}
|
26
|
+
@tag_border = " ".freeze
|
27
|
+
|
28
|
+
@split_sections = nil
|
29
|
+
|
30
|
+
# Document width - used to compute table sizes.
|
31
|
+
# This is an assumption for screen size in input document.
|
32
|
+
# If column widths are specified in absolute values, then we
|
33
|
+
# have to convert them to relative values, as AsciiDoc only
|
34
|
+
# supports those.
|
35
|
+
@doc_width = 1000
|
36
|
+
|
37
|
+
# Plugin system
|
38
|
+
@plugins = []
|
39
|
+
|
40
|
+
# Debugging options
|
41
|
+
@track_time = false
|
42
|
+
end
|
43
|
+
|
44
|
+
def with(options = {})
|
45
|
+
old_options = @inline_options
|
46
|
+
@inline_options = options
|
47
|
+
result = yield
|
48
|
+
@inline_options = old_options
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.declare_option(option)
|
53
|
+
define_method(option) do
|
54
|
+
@inline_options[option] || instance_variable_get(:"@#{option}")
|
55
|
+
end
|
56
|
+
|
57
|
+
attr_writer option
|
58
|
+
end
|
59
|
+
|
60
|
+
declare_option :unknown_tags
|
61
|
+
declare_option :tag_border
|
62
|
+
declare_option :mathml2asciimath
|
63
|
+
declare_option :external_images
|
64
|
+
declare_option :destination
|
65
|
+
declare_option :sourcedir
|
66
|
+
declare_option :image_counter
|
67
|
+
declare_option :image_counter_pattern
|
68
|
+
declare_option :input_format
|
69
|
+
declare_option :split_sections
|
70
|
+
declare_option :doc_width
|
71
|
+
declare_option :plugins
|
72
|
+
declare_option :track_time
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
module Coradoc::
|
1
|
+
module Coradoc::Input::HTML
|
2
2
|
module Converters
|
3
3
|
class Blockquote < Base
|
4
4
|
def to_coradoc(node, state = {})
|
@@ -7,7 +7,7 @@ module Coradoc::ReverseAdoc
|
|
7
7
|
attributes = Coradoc::Element::AttributeList.new
|
8
8
|
attributes.add_positional("quote", cite) if !cite.nil?
|
9
9
|
content = treat_children(node, state).strip
|
10
|
-
content = Coradoc::
|
10
|
+
content = Coradoc::Input::HTML.cleaner.remove_newlines(content)
|
11
11
|
Coradoc::Element::Block::Quote.new(nil, lines: content,
|
12
12
|
attributes: attributes)
|
13
13
|
end
|