coradoc 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +35 -0
- data/{.docker/readme.md → README.Docker.adoc} +21 -25
- data/README.adoc +121 -0
- data/coradoc.gemspec +4 -0
- data/docker-compose.yml +14 -0
- data/exe/coradoc +5 -0
- data/exe/reverse_adoc +24 -34
- data/exe/w2a +16 -28
- data/lib/coradoc/cli.rb +105 -0
- data/lib/coradoc/converter.rb +145 -0
- data/lib/coradoc/document.rb +13 -0
- data/lib/coradoc/element/admonition.rb +5 -0
- data/lib/coradoc/element/attribute_list.rb +1 -1
- data/lib/coradoc/element/author.rb +4 -4
- data/lib/coradoc/element/bibliography.rb +24 -0
- data/lib/coradoc/element/bibliography_entry.rb +24 -0
- data/lib/coradoc/element/block/core.rb +6 -4
- data/lib/coradoc/element/block/example.rb +1 -0
- data/lib/coradoc/element/block/pass.rb +21 -0
- data/lib/coradoc/element/block/quote.rb +1 -1
- data/lib/coradoc/element/block/reviewer_comment.rb +19 -0
- data/lib/coradoc/element/block/side.rb +4 -2
- data/lib/coradoc/element/block/sourcecode.rb +1 -0
- data/lib/coradoc/element/block.rb +2 -0
- data/lib/coradoc/element/comment_block.rb +22 -0
- data/lib/coradoc/element/comment_line.rb +18 -0
- data/lib/coradoc/element/document_attributes.rb +8 -1
- data/lib/coradoc/element/header.rb +1 -1
- data/lib/coradoc/element/image/block_image.rb +8 -0
- data/lib/coradoc/element/image/core.rb +6 -5
- data/lib/coradoc/element/include.rb +18 -0
- data/lib/coradoc/element/inline/citation.rb +24 -0
- data/lib/coradoc/element/inline/cross_reference.rb +29 -8
- data/lib/coradoc/element/inline.rb +1 -0
- data/lib/coradoc/element/list/core.rb +10 -2
- data/lib/coradoc/element/list_item.rb +7 -5
- data/lib/coradoc/element/paragraph.rb +8 -5
- data/lib/coradoc/element/revision.rb +1 -1
- data/lib/coradoc/element/section.rb +6 -4
- data/lib/coradoc/element/table.rb +2 -2
- data/lib/coradoc/element/tag.rb +19 -0
- data/lib/coradoc/element/term.rb +21 -0
- data/lib/coradoc/element/text_element.rb +9 -1
- data/lib/coradoc/element/title.rb +1 -1
- data/lib/coradoc/generator.rb +2 -0
- data/lib/coradoc/input/adoc.rb +28 -0
- data/lib/coradoc/input/docx.rb +35 -0
- data/lib/coradoc/{reverse_adoc → input/html}/README.adoc +9 -9
- data/lib/coradoc/{reverse_adoc → input/html}/cleaner.rb +18 -4
- data/lib/coradoc/input/html/config.rb +75 -0
- data/lib/coradoc/{reverse_adoc → input/html}/converters/a.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/aside.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/audio.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/base.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/blockquote.rb +2 -2
- data/lib/coradoc/{reverse_adoc → input/html}/converters/br.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/bypass.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/code.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/div.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/dl.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/drop.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/em.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/figure.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/h.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/head.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/hr.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/ignore.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/img.rb +7 -7
- data/lib/coradoc/{reverse_adoc → input/html}/converters/li.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/mark.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/markup.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/math.rb +3 -3
- data/lib/coradoc/{reverse_adoc → input/html}/converters/ol.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/p.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/pass_through.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/pre.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/q.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/strong.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/sub.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/sup.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/table.rb +4 -4
- data/lib/coradoc/{reverse_adoc → input/html}/converters/td.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/text.rb +2 -2
- data/lib/coradoc/{reverse_adoc → input/html}/converters/th.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/tr.rb +1 -1
- data/lib/coradoc/{reverse_adoc → input/html}/converters/video.rb +1 -1
- data/lib/coradoc/input/html/converters.rb +57 -0
- data/lib/coradoc/input/html/errors.rb +12 -0
- data/lib/coradoc/{reverse_adoc → input/html}/html_converter.rb +37 -22
- data/lib/coradoc/{reverse_adoc → input/html}/plugin.rb +6 -6
- data/lib/coradoc/{reverse_adoc → input/html}/plugins/plateau.rb +3 -3
- data/lib/coradoc/{reverse_adoc → input/html}/postprocessor.rb +3 -3
- data/lib/coradoc/input/html.rb +59 -0
- data/lib/coradoc/input.rb +12 -0
- data/lib/coradoc/output/adoc.rb +17 -0
- data/lib/coradoc/output/coradoc_tree_debug.rb +19 -0
- data/lib/coradoc/output.rb +11 -0
- data/lib/coradoc/parser/asciidoc/admonition.rb +24 -0
- data/lib/coradoc/parser/asciidoc/attribute_list.rb +67 -0
- data/lib/coradoc/parser/asciidoc/base.rb +101 -13
- data/lib/coradoc/parser/asciidoc/bibliography.rb +30 -0
- data/lib/coradoc/parser/asciidoc/block.rb +82 -0
- data/lib/coradoc/parser/asciidoc/citation.rb +48 -0
- data/lib/coradoc/parser/asciidoc/content.rb +15 -120
- data/lib/coradoc/parser/asciidoc/document_attributes.rb +12 -5
- data/lib/coradoc/parser/asciidoc/header.rb +1 -4
- data/lib/coradoc/parser/asciidoc/inline.rb +72 -0
- data/lib/coradoc/parser/asciidoc/list.rb +81 -0
- data/lib/coradoc/parser/asciidoc/paragraph.rb +33 -0
- data/lib/coradoc/parser/asciidoc/section.rb +36 -31
- data/lib/coradoc/parser/asciidoc/table.rb +32 -0
- data/lib/coradoc/parser/asciidoc/term.rb +23 -0
- data/lib/coradoc/parser/base.rb +39 -4
- data/lib/coradoc/transformer.rb +353 -82
- data/lib/coradoc/util.rb +1 -1
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +8 -5
- data/lib/reverse_adoc.rb +6 -6
- data/utils/parser_analyzer.rb +66 -0
- data/utils/round_trip.rb +37 -0
- metadata +112 -54
- data/.docker/Makefile +0 -35
- data/.docker/docker-compose.yml +0 -14
- data/Makefile +0 -1
- data/README.md +0 -73
- data/docker-compose.yml +0 -1
- data/lib/coradoc/reverse_adoc/config.rb +0 -73
- data/lib/coradoc/reverse_adoc/converters.rb +0 -55
- data/lib/coradoc/reverse_adoc/errors.rb +0 -10
- data/lib/coradoc/reverse_adoc.rb +0 -30
- /data/{.docker/Dockerfile → Dockerfile} +0 -0
- /data/lib/coradoc/{reverse_adoc → input/html}/LICENSE.txt +0 -0
|
@@ -1,31 +1,33 @@
|
|
|
1
1
|
module Coradoc
|
|
2
2
|
module Element
|
|
3
3
|
class ListItem < Base
|
|
4
|
-
attr_accessor :id, :content, :
|
|
4
|
+
attr_accessor :marker, :id, :anchor, :content, :line_break
|
|
5
5
|
|
|
6
6
|
declare_children :content, :id, :anchor
|
|
7
7
|
|
|
8
8
|
def initialize(content, options = {})
|
|
9
|
-
@
|
|
9
|
+
@marker = options.fetch(:marker, nil)
|
|
10
10
|
@id = options.fetch(:id, nil)
|
|
11
11
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
|
12
|
+
@content = content
|
|
13
|
+
@line_break = options.fetch(:line_break, "\n")
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
def to_adoc
|
|
15
17
|
anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
|
|
16
18
|
content = Array(@content).map do |subitem|
|
|
17
|
-
next if subitem.is_a?
|
|
19
|
+
next if subitem.is_a? Inline::HardLineBreak
|
|
18
20
|
|
|
19
21
|
subcontent = Coradoc::Generator.gen_adoc(subitem)
|
|
20
22
|
# Only try to postprocess elements that are text,
|
|
21
23
|
# otherwise we could strip markup.
|
|
22
|
-
if Coradoc.
|
|
24
|
+
if Coradoc.a_single?(subitem, Coradoc::Element::TextElement)
|
|
23
25
|
subcontent = Coradoc.strip_unicode(subcontent)
|
|
24
26
|
end
|
|
25
27
|
subcontent.chomp
|
|
26
28
|
end.compact.join("\n+\n")
|
|
27
29
|
|
|
28
|
-
" #{anchor}#{content.chomp}
|
|
30
|
+
" #{anchor}#{content.chomp}#{@line_break}"
|
|
29
31
|
end
|
|
30
32
|
end
|
|
31
33
|
end
|
|
@@ -7,9 +7,10 @@ module Coradoc
|
|
|
7
7
|
|
|
8
8
|
def initialize(content, options = {})
|
|
9
9
|
@content = content
|
|
10
|
-
@
|
|
11
|
-
|
|
12
|
-
@
|
|
10
|
+
@id = options.fetch(:id, nil)
|
|
11
|
+
@anchor = Inline::Anchor.new(@id) if @id
|
|
12
|
+
@title = options.fetch(:title, nil)
|
|
13
|
+
@attributes = options.fetch(:attributes, nil)
|
|
13
14
|
@tdsinglepara = options.fetch(:tdsinglepara, nil)
|
|
14
15
|
end
|
|
15
16
|
|
|
@@ -22,11 +23,13 @@ module Coradoc
|
|
|
22
23
|
end
|
|
23
24
|
|
|
24
25
|
def to_adoc
|
|
26
|
+
title = @title.nil? ? "" : ".#{Coradoc::Generator.gen_adoc(@title)}\n"
|
|
25
27
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
|
28
|
+
attrs = @attributes.nil? ? "" : "#{@attributes.to_adoc}\n"
|
|
26
29
|
if @tdsinglepara
|
|
27
|
-
anchor
|
|
30
|
+
"#{title}#{anchor}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
|
|
28
31
|
else
|
|
29
|
-
"\n\n#{anchor}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content)) << "\n\n"
|
|
32
|
+
"\n\n#{title}#{anchor}#{attrs}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content)) << "\n\n"
|
|
30
33
|
end
|
|
31
34
|
end
|
|
32
35
|
end
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
module Coradoc
|
|
2
2
|
module Element
|
|
3
3
|
class Section < Base
|
|
4
|
-
attr_accessor :id, :title, :contents, :sections
|
|
4
|
+
attr_accessor :id, :title, :attrs, :contents, :sections
|
|
5
5
|
|
|
6
6
|
declare_children :id, :title, :contents, :sections
|
|
7
7
|
|
|
@@ -9,9 +9,10 @@ module Coradoc
|
|
|
9
9
|
@title = title
|
|
10
10
|
@id = options.fetch(:id, nil)
|
|
11
11
|
@id = nil if @id == ""
|
|
12
|
+
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
|
13
|
+
@attrs = options.fetch(:attribute_list, "")
|
|
12
14
|
@contents = options.fetch(:contents, [])
|
|
13
15
|
@sections = options.fetch(:sections, [])
|
|
14
|
-
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def glossaries
|
|
@@ -27,6 +28,7 @@ module Coradoc
|
|
|
27
28
|
def to_adoc
|
|
28
29
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
|
29
30
|
title = Coradoc::Generator.gen_adoc(@title)
|
|
31
|
+
attrs = @attrs.to_s.empty? ? "" : "#{@attrs.to_adoc}\n"
|
|
30
32
|
content = Coradoc::Generator.gen_adoc(@contents)
|
|
31
33
|
sections = Coradoc::Generator.gen_adoc(@sections)
|
|
32
34
|
|
|
@@ -36,11 +38,11 @@ module Coradoc
|
|
|
36
38
|
|
|
37
39
|
# Only try to postprocess elements that are text,
|
|
38
40
|
# otherwise we could strip markup.
|
|
39
|
-
if Coradoc.
|
|
41
|
+
if Coradoc.a_single?(@contents, Coradoc::Element::TextElement)
|
|
40
42
|
content = Coradoc.strip_unicode(content)
|
|
41
43
|
end
|
|
42
44
|
|
|
43
|
-
"\n#{anchor}" << title << content << sections << "\n"
|
|
45
|
+
"\n#{anchor}" << attrs << title << content << sections << "\n"
|
|
44
46
|
end
|
|
45
47
|
|
|
46
48
|
# Check for cases when Section is simply an equivalent of an empty <DIV>
|
|
@@ -10,7 +10,7 @@ module Coradoc
|
|
|
10
10
|
@title = title
|
|
11
11
|
@id = options.fetch(:id, nil)
|
|
12
12
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
|
13
|
-
@attrs = options.fetch(:
|
|
13
|
+
@attrs = options.fetch(:attributes, nil)
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
def to_adoc
|
|
@@ -78,7 +78,7 @@ module Coradoc
|
|
|
78
78
|
content = Coradoc::Generator.gen_adoc(content)
|
|
79
79
|
# Only try to postprocess elements that are text,
|
|
80
80
|
# otherwise we could strip markup.
|
|
81
|
-
if Coradoc.
|
|
81
|
+
if Coradoc.a_single?(@content, Coradoc::Element::TextElement)
|
|
82
82
|
content = Coradoc.strip_unicode(content)
|
|
83
83
|
end
|
|
84
84
|
"#{@colrowattr}#{@alignattr}#{@style}| #{anchor}#{content}"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Coradoc
|
|
2
|
+
module Element
|
|
3
|
+
class Tag < Base
|
|
4
|
+
attr_accessor :name, :prefix, :attrs, :line_break
|
|
5
|
+
|
|
6
|
+
def initialize(name, options = {})
|
|
7
|
+
@name = name
|
|
8
|
+
@prefix = options.fetch(:prefix, "tag")
|
|
9
|
+
@attrs = options.fetch(:attribute_list, AttributeList.new)
|
|
10
|
+
@line_break = options.fetch(:line_break, "\n")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_adoc
|
|
14
|
+
attrs = @attrs.to_adoc
|
|
15
|
+
"// #{@prefix}::#{@name}#{attrs}#{@line_break}"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
module Coradoc
|
|
2
|
+
module Element
|
|
3
|
+
class Term < Base
|
|
4
|
+
attr_accessor :term, :options
|
|
5
|
+
|
|
6
|
+
declare_children :term, :options
|
|
7
|
+
|
|
8
|
+
def initialize(term, options = {})
|
|
9
|
+
@term = term
|
|
10
|
+
@type = options.fetch(:type, nil)
|
|
11
|
+
@lang = options.fetch(:lang, :en)
|
|
12
|
+
@line_break = options.fetch(:line_break, "")
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_adoc
|
|
16
|
+
return "#{@type}:[#{@term}]#{@line_break}" if @lang == :en
|
|
17
|
+
"[#{@type}]##{@term}##{@line_break}"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -9,10 +9,14 @@ module Coradoc
|
|
|
9
9
|
@content = content # .to_s
|
|
10
10
|
@id = options.fetch(:id, nil)
|
|
11
11
|
@line_break = options.fetch(:line_break, "")
|
|
12
|
+
@html_cleanup = options.fetch(:html_cleanup, false)
|
|
13
|
+
if @html_cleanup
|
|
14
|
+
@content = treat_text_to_adoc(@content)
|
|
15
|
+
end
|
|
12
16
|
end
|
|
13
17
|
|
|
14
18
|
def to_adoc
|
|
15
|
-
Coradoc::Generator.gen_adoc(
|
|
19
|
+
Coradoc::Generator.gen_adoc(@content) + @line_break
|
|
16
20
|
end
|
|
17
21
|
|
|
18
22
|
def treat_text_to_adoc(text)
|
|
@@ -64,6 +68,10 @@ module Coradoc
|
|
|
64
68
|
def initialize(line_break)
|
|
65
69
|
@line_break = line_break
|
|
66
70
|
end
|
|
71
|
+
|
|
72
|
+
def to_adoc
|
|
73
|
+
@line_break
|
|
74
|
+
end
|
|
67
75
|
end
|
|
68
76
|
|
|
69
77
|
class Highlight < Element::TextElement
|
|
@@ -7,7 +7,7 @@ module Coradoc
|
|
|
7
7
|
|
|
8
8
|
def initialize(content, level, options = {})
|
|
9
9
|
@level_int = level
|
|
10
|
-
@level_int = level.length if level.is_a?(String)
|
|
10
|
+
# @level_int = level.length - 1 if level.is_a?(String)
|
|
11
11
|
@content = content
|
|
12
12
|
@id = options.fetch(:id, nil)
|
|
13
13
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
data/lib/coradoc/generator.rb
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require "coradoc/input"
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Input::Adoc
|
|
5
|
+
def self.processor_id
|
|
6
|
+
:adoc
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def self.processor_match?(filename)
|
|
10
|
+
%w[.adoc].any? { |i| filename.downcase.end_with?(i) }
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.processor_execute(input, _options = {})
|
|
14
|
+
ast = Coradoc::Parser::Base.new.parse(input)
|
|
15
|
+
Coradoc::Transformer.transform(ast[:document])
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.processor_postprocess(input, options)
|
|
19
|
+
if options[:output_processor] == :adoc
|
|
20
|
+
Coradoc::Input::HTML::Cleaner.new.tidy(input)
|
|
21
|
+
else
|
|
22
|
+
input
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
Coradoc::Input.define(self)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require "word-to-markdown"
|
|
2
|
+
require "coradoc/input/html"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module Coradoc
|
|
6
|
+
module Input::Docx
|
|
7
|
+
def self.processor_id
|
|
8
|
+
:docx
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.processor_match?(filename)
|
|
12
|
+
%w[.docx .doc].any? { |i| filename.downcase.end_with?(i) }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.processor_execute(input, options = {})
|
|
16
|
+
image_dir = Dir.mktmpdir
|
|
17
|
+
options = options.merge(sourcedir: image_dir)
|
|
18
|
+
doc = WordToMarkdown.new(input, image_dir)
|
|
19
|
+
doc = Coradoc::Input::HTML.cleaner.preprocess_word_html(doc.document.html)
|
|
20
|
+
options = WordToMarkdown::REVERSE_MARKDOWN_OPTIONS.merge(options)
|
|
21
|
+
Coradoc::Input::HTML.to_coradoc(doc, options)
|
|
22
|
+
ensure
|
|
23
|
+
FileUtils.rm_rf(image_dir)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.processor_postprocess(data, options)
|
|
27
|
+
Coradoc::Input::HTML.processor_postprocess(data, options)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# This processor prefers to work on original files.
|
|
31
|
+
def self.processor_wants_filenames; true; end
|
|
32
|
+
|
|
33
|
+
Coradoc::Input.define(self)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
= AsciiDoc from HTML and Microsoft Word: reverse_adoc
|
|
1
|
+
= AsciiDoc from HTML and Microsoft Word: formerly reverse_adoc
|
|
2
2
|
|
|
3
3
|
== Purpose
|
|
4
4
|
|
|
@@ -253,9 +253,9 @@ Simple to use.
|
|
|
253
253
|
|
|
254
254
|
[source,ruby]
|
|
255
255
|
----
|
|
256
|
-
require 'coradoc/
|
|
256
|
+
require 'coradoc/input/html'
|
|
257
257
|
|
|
258
|
-
result = Coradoc::
|
|
258
|
+
result = Coradoc::Input::HTML.convert input
|
|
259
259
|
result.inspect # " *feelings* "
|
|
260
260
|
----
|
|
261
261
|
|
|
@@ -265,9 +265,9 @@ Just pass your chosen configuration options in after the input. The given option
|
|
|
265
265
|
|
|
266
266
|
[source,ruby]
|
|
267
267
|
----
|
|
268
|
-
require 'coradoc/
|
|
268
|
+
require 'coradoc/input/html'
|
|
269
269
|
|
|
270
|
-
Coradoc::
|
|
270
|
+
Coradoc::Input::HTML.convert(input, unknown_tags: :raise, mathml2asciimath: true)
|
|
271
271
|
----
|
|
272
272
|
|
|
273
273
|
|
|
@@ -277,9 +277,9 @@ Or configure it block style on a initializer level. These configurations will la
|
|
|
277
277
|
|
|
278
278
|
[source,ruby]
|
|
279
279
|
----
|
|
280
|
-
require 'coradoc/
|
|
280
|
+
require 'coradoc/input/html'
|
|
281
281
|
|
|
282
|
-
Coradoc::
|
|
282
|
+
Coradoc::Input::HTML.config do |config|
|
|
283
283
|
config.unknown_tags = :bypass
|
|
284
284
|
config.mathml2asciimath = true
|
|
285
285
|
config.tag_border = ''
|
|
@@ -290,10 +290,10 @@ end
|
|
|
290
290
|
|
|
291
291
|
[source,ruby]
|
|
292
292
|
----
|
|
293
|
-
require 'coradoc/
|
|
293
|
+
require 'coradoc/input/html'
|
|
294
294
|
|
|
295
295
|
# Options can be supplied as keyword arguments
|
|
296
|
-
Coradoc::
|
|
296
|
+
Coradoc::Input::HTML::HtmlConverter.to_coradoc("<b><i>Some input</i></b>")
|
|
297
297
|
----
|
|
298
298
|
|
|
299
299
|
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
module Coradoc::
|
|
1
|
+
module Coradoc::Input::HTML
|
|
2
2
|
class Cleaner
|
|
3
3
|
def tidy(string)
|
|
4
|
+
if string.is_a? Hash
|
|
5
|
+
return string.transform_values { |i| tidy(i) }
|
|
6
|
+
end
|
|
7
|
+
|
|
4
8
|
result = HtmlConverter.track_time "Removing inner whitespace" do
|
|
5
9
|
remove_inner_whitespaces(String.new(string))
|
|
6
10
|
end
|
|
@@ -16,6 +20,16 @@ module Coradoc::ReverseAdoc
|
|
|
16
20
|
result = HtmlConverter.track_time "Cleaning punctuation characters" do
|
|
17
21
|
clean_punctuation_characters(result)
|
|
18
22
|
end
|
|
23
|
+
result = remove_block_leading_newlines(result)
|
|
24
|
+
result = remove_section_attribute_newlines(result)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def remove_block_leading_newlines(string)
|
|
28
|
+
string.gsub("]\n****\n\n", "]\n****\n")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def remove_section_attribute_newlines(string)
|
|
32
|
+
string.gsub("]\n\n==", "]\n==")
|
|
19
33
|
end
|
|
20
34
|
|
|
21
35
|
def remove_newlines(string)
|
|
@@ -47,20 +61,20 @@ module Coradoc::ReverseAdoc
|
|
|
47
61
|
# Same for underscores and brackets.
|
|
48
62
|
def clean_tag_borders(string)
|
|
49
63
|
# result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
|
|
50
|
-
# preserve_border_whitespaces(match, default_border: Coradoc::
|
|
64
|
+
# preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
|
|
51
65
|
# match.strip.sub("** ", "**").sub(" **", "**")
|
|
52
66
|
# end
|
|
53
67
|
# end
|
|
54
68
|
|
|
55
69
|
# result = string.gsub(/\s?_{2,}.*?_{2,}\s?/) do |match|
|
|
56
|
-
# preserve_border_whitespaces(match, default_border: Coradoc::
|
|
70
|
+
# preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
|
|
57
71
|
# match.strip.sub("__ ", "__").sub(" __", "__")
|
|
58
72
|
# end
|
|
59
73
|
# end
|
|
60
74
|
|
|
61
75
|
result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
|
|
62
76
|
preserve_border_whitespaces(match,
|
|
63
|
-
default_border: Coradoc::
|
|
77
|
+
default_border: Coradoc::Input::HTML.config.tag_border) do
|
|
64
78
|
match.strip.sub("~~ ", "~~").sub(" ~~", "~~")
|
|
65
79
|
end
|
|
66
80
|
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require "tmpdir"
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Input::HTML
|
|
5
|
+
class Config
|
|
6
|
+
def initialize
|
|
7
|
+
@unknown_tags = :pass_through
|
|
8
|
+
@input_format = :html
|
|
9
|
+
@mathml2asciimath = false
|
|
10
|
+
@external_images = false
|
|
11
|
+
|
|
12
|
+
# Destination to save file and images
|
|
13
|
+
@destination = nil
|
|
14
|
+
|
|
15
|
+
# Source of HTML
|
|
16
|
+
# @sourcedir = nil
|
|
17
|
+
|
|
18
|
+
# Image counter, assuming there are max 999 images
|
|
19
|
+
@image_counter = 1
|
|
20
|
+
# pad with 0s
|
|
21
|
+
@image_counter_pattern = "%03d"
|
|
22
|
+
|
|
23
|
+
@em_delimiter = "_".freeze
|
|
24
|
+
@strong_delimiter = "*".freeze
|
|
25
|
+
@inline_options = {}
|
|
26
|
+
@tag_border = " ".freeze
|
|
27
|
+
|
|
28
|
+
@split_sections = nil
|
|
29
|
+
|
|
30
|
+
# Document width - used to compute table sizes.
|
|
31
|
+
# This is an assumption for screen size in input document.
|
|
32
|
+
# If column widths are specified in absolute values, then we
|
|
33
|
+
# have to convert them to relative values, as AsciiDoc only
|
|
34
|
+
# supports those.
|
|
35
|
+
@doc_width = 1000
|
|
36
|
+
|
|
37
|
+
# Plugin system
|
|
38
|
+
@plugins = []
|
|
39
|
+
|
|
40
|
+
# Debugging options
|
|
41
|
+
@track_time = false
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def with(options = {})
|
|
45
|
+
old_options = @inline_options
|
|
46
|
+
@inline_options = options
|
|
47
|
+
result = yield
|
|
48
|
+
@inline_options = old_options
|
|
49
|
+
result
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.declare_option(option)
|
|
53
|
+
define_method(option) do
|
|
54
|
+
@inline_options[option] || instance_variable_get(:"@#{option}")
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
attr_writer option
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
declare_option :unknown_tags
|
|
61
|
+
declare_option :tag_border
|
|
62
|
+
declare_option :mathml2asciimath
|
|
63
|
+
declare_option :external_images
|
|
64
|
+
declare_option :destination
|
|
65
|
+
declare_option :sourcedir
|
|
66
|
+
declare_option :image_counter
|
|
67
|
+
declare_option :image_counter_pattern
|
|
68
|
+
declare_option :input_format
|
|
69
|
+
declare_option :split_sections
|
|
70
|
+
declare_option :doc_width
|
|
71
|
+
declare_option :plugins
|
|
72
|
+
declare_option :track_time
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
module Coradoc::
|
|
1
|
+
module Coradoc::Input::HTML
|
|
2
2
|
module Converters
|
|
3
3
|
class Blockquote < Base
|
|
4
4
|
def to_coradoc(node, state = {})
|
|
@@ -7,7 +7,7 @@ module Coradoc::ReverseAdoc
|
|
|
7
7
|
attributes = Coradoc::Element::AttributeList.new
|
|
8
8
|
attributes.add_positional("quote", cite) if !cite.nil?
|
|
9
9
|
content = treat_children(node, state).strip
|
|
10
|
-
content = Coradoc::
|
|
10
|
+
content = Coradoc::Input::HTML.cleaner.remove_newlines(content)
|
|
11
11
|
Coradoc::Element::Block::Quote.new(nil, lines: content,
|
|
12
12
|
attributes: attributes)
|
|
13
13
|
end
|
|
@@ -4,18 +4,18 @@ require "tempfile"
|
|
|
4
4
|
require "base64"
|
|
5
5
|
require "marcel"
|
|
6
6
|
|
|
7
|
-
module Coradoc::
|
|
7
|
+
module Coradoc::Input::HTML
|
|
8
8
|
module Converters
|
|
9
9
|
class Img < Base
|
|
10
10
|
def image_number
|
|
11
11
|
sprintf(
|
|
12
|
-
Coradoc::
|
|
13
|
-
Coradoc::
|
|
12
|
+
Coradoc::Input::HTML.config.image_counter_pattern,
|
|
13
|
+
Coradoc::Input::HTML.config.image_counter,
|
|
14
14
|
)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def image_number_increment
|
|
18
|
-
Coradoc::
|
|
18
|
+
Coradoc::Input::HTML.config.image_counter += 1
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
def datauri2file(src)
|
|
@@ -23,7 +23,7 @@ module Coradoc::ReverseAdoc
|
|
|
23
23
|
|
|
24
24
|
%r{^data:image/(?:[^;]+);base64,(?<imgdata>.+)$} =~ src
|
|
25
25
|
|
|
26
|
-
dest_dir = Pathname.new(Coradoc::
|
|
26
|
+
dest_dir = Pathname.new(Coradoc::Input::HTML.config.destination).dirname
|
|
27
27
|
images_dir = dest_dir.join("images")
|
|
28
28
|
FileUtils.mkdir_p(images_dir)
|
|
29
29
|
|
|
@@ -51,7 +51,7 @@ module Coradoc::ReverseAdoc
|
|
|
51
51
|
return copy_temp_file(imgdata) if imgdata
|
|
52
52
|
|
|
53
53
|
ext = File.extname(src).strip.downcase[1..-1]
|
|
54
|
-
[ext, Pathname.new(Coradoc::
|
|
54
|
+
[ext, Pathname.new(Coradoc::Input::HTML.config.sourcedir).join(src)]
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
def copy_temp_file(imgdata)
|
|
@@ -76,7 +76,7 @@ module Coradoc::ReverseAdoc
|
|
|
76
76
|
|
|
77
77
|
title = extract_title(node)
|
|
78
78
|
|
|
79
|
-
if Coradoc::
|
|
79
|
+
if Coradoc::Input::HTML.config.external_images
|
|
80
80
|
# puts "external image conversion #{id}, #{src}"
|
|
81
81
|
src = datauri2file(src)
|
|
82
82
|
end
|