isodoc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ # This project follows the Ribose OSS style guide.
2
+ # https://github.com/riboseinc/oss-guides
3
+ # All project-specific additions and overrides should be specified in this file.
4
+
5
+ inherit_from:
6
+ # Thoughtbot's style guide from: https://github.com/thoughtbot/guides
7
+ - ".rubocop.tb.yml"
8
+ # Overrides from Ribose
9
+ - ".rubocop.ribose.yml"
10
+ AllCops:
11
+ DisplayCopNames: false
12
+ StyleGuideCopsOnly: false
13
+ TargetRubyVersion: 2.4
14
+ Rails:
15
+ Enabled: true
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ribose.gemspec
4
+ gem "html2doc",
5
+ git: "https://github.com/riboseinc/html2doc.git"
6
+ gemspec
@@ -0,0 +1,32 @@
1
+ = isodoc
2
+
3
+ This Gem converts documents in the https://github.com/riboseinc/isodoc-models[ISODoc document model] into HTML and Microsoft Word.
4
+
5
+ The Gem is a class called with a hash of file locations:
6
+
7
+ htmlstylesheet:: Generic stylesheet for HTML
8
+ wordstylesheet:: Generic stylesheet for Word
9
+ standardsheet:: Stylesheet specific to Standard
10
+ header:: Header file for Word
11
+ htmlcoverpage:: Cover page for HTML
12
+ wordcoverpage:: Cover page for Word
13
+ tmlintropage:: Introductory page for HTML
14
+ wordintropage:: Introductory page for Word
15
+
16
+ e.g.
17
+
18
+ [source,ruby]
19
+ --
20
+ IsoDoc::Convert.new(
21
+ htmlstylesheet: html_doc_path("htmlstyle.css"),
22
+ wordstylesheet: nil,
23
+ standardstylesheet: html_doc_path("isodoc.css"),
24
+ header: html_doc_path("header.html"),
25
+ htmlcoverpage: html_doc_path("iso_titlepage.html"),
26
+ wordcoverpage: html_doc_path("iso_titlepage.html"),
27
+ htmlintropage: html_doc_path("iso_intro.html"),
28
+ wordintropage: html_doc_path("iso_intro.html"),
29
+ )
30
+ --
31
+
32
+ NOTE: Cover page and Intro page must be XHTML fragments, not HTML fragments. In particular, unlike Word HTML, all HTML attributes need to be quoted: `<p class="MsoToc2">`, not `<p class=MsoToc2>`.
@@ -0,0 +1,51 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "isodoc/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "isodoc"
9
+ spec.version = IsoDoc::VERSION
10
+ spec.authors = ["Ribose Inc."]
11
+ spec.email = ["open.source@ribose.com"]
12
+
13
+ spec.summary = "Convert documents in IsoDoc into Word and HTML "\
14
+ "in AsciiDoc."
15
+ spec.description = <<~DESCRIPTION
16
+ isodoc converts documents in the IsoDoc document model into
17
+ Microsoft Word and HTML.
18
+
19
+ This gem is in active development.
20
+ DESCRIPTION
21
+
22
+ spec.homepage = "https://github.com/riboseinc/isodoc"
23
+ spec.license = "MIT"
24
+
25
+ spec.bindir = "bin"
26
+ spec.require_paths = ["lib"]
27
+ spec.files = `git ls-files`.split("\n")
28
+ spec.test_files = `git ls-files -- {spec}/*`.split("\n")
29
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
30
+
31
+ spec.add_dependency "asciimath"
32
+ spec.add_dependency "htmlentities", "~> 4.3.4"
33
+ spec.add_dependency "image_size"
34
+ spec.add_dependency "mime-types"
35
+ spec.add_dependency "nokogiri", "= 1.8.1"
36
+ spec.add_dependency "ruby-xslt"
37
+ spec.add_dependency "thread_safe"
38
+ spec.add_dependency "uuidtools"
39
+ spec.add_dependency "html2doc"
40
+
41
+ spec.add_development_dependency "bundler", "~> 1.15"
42
+ spec.add_development_dependency "byebug", "~> 9.1"
43
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
44
+ spec.add_development_dependency "guard", "~> 2.14"
45
+ spec.add_development_dependency "guard-rspec", "~> 4.7"
46
+ spec.add_development_dependency "rake", "~> 12.0"
47
+ spec.add_development_dependency "rspec", "~> 3.6"
48
+ spec.add_development_dependency "rubocop", "~> 0.50"
49
+ spec.add_development_dependency "simplecov", "~> 0.15"
50
+ spec.add_development_dependency "timecop", "~> 0.9"
51
+ end
@@ -0,0 +1,74 @@
1
+ require_relative "isodoc/version"
2
+
3
+ require "nokogiri"
4
+ require "asciimath"
5
+ require "xml/xslt"
6
+ require "uuidtools"
7
+ require "base64"
8
+ require "mime/types"
9
+ require "image_size"
10
+ require "set"
11
+ require_relative "isodoc/iso2wordhtml"
12
+ require_relative "isodoc/cleanup"
13
+ require_relative "isodoc/postprocessing"
14
+ require_relative "isodoc/utils"
15
+ require_relative "isodoc/metadata"
16
+ require_relative "isodoc/section"
17
+ require_relative "isodoc/references"
18
+ require_relative "isodoc/terms"
19
+ require_relative "isodoc/blocks"
20
+ require_relative "isodoc/lists"
21
+ require_relative "isodoc/table"
22
+ require_relative "isodoc/inline"
23
+ require_relative "isodoc/xref_gen"
24
+ require_relative "isodoc/html"
25
+ require "pp"
26
+
27
+ module IsoDoc
28
+ class Convert
29
+
30
+ # htmlstylesheet: Generic stylesheet for HTML
31
+ # wordstylesheet: Generic stylesheet for Word
32
+ # standardsheet: Stylesheet specific to Standard
33
+ # header: Header file for Word
34
+ # htmlcoverpage: Cover page for HTML
35
+ # wordcoverpage: Cover page for Word
36
+ # htmlintropage: Introductory page for HTML
37
+ # wordintropage: Introductory page for Word
38
+ def initialize(options)
39
+ @htmlstylesheet = options[:htmlstylesheet]
40
+ @wordstylesheet = options[:wordstylesheet]
41
+ @standardstylesheet = options[:standardstylesheet]
42
+ @header = options[:header]
43
+ @htmlcoverpage = options[:htmlcoverpage]
44
+ @wordcoverpage = options[:wordcoverpage]
45
+ @htmlintropage = options[:htmlintropage]
46
+ @wordintropage = options[:wordintropage]
47
+ @termdomain = ""
48
+ @termexample = false
49
+ @note = false
50
+ @sourcecode = false
51
+ @anchors = {}
52
+ @meta = {}
53
+ @footnotes = []
54
+ @comments = []
55
+ @in_footnote = false
56
+ @in_table = false
57
+ @in_figure = false
58
+ @seen_footnote = Set.new
59
+ end
60
+
61
+ def convert(filename)
62
+ docxml = Nokogiri::XML(File.read(filename))
63
+ filename, dir = init_file(filename)
64
+ docxml.root.default_namespace = ""
65
+ result = noko do |xml|
66
+ xml.html do |html|
67
+ html_header(html, docxml, filename, dir)
68
+ make_body(html, docxml)
69
+ end
70
+ end.join("\n")
71
+ postprocess(result, filename, dir)
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,184 @@
1
+ module IsoDoc
2
+ class Convert
3
+ #attr_accessor :termdomain, :termexample, :sourcecode, :note
4
+ def set_termdomain(termdomain)
5
+ @termdomain = termdomain
6
+ end
7
+
8
+ def get_termexample
9
+ @termexample
10
+ end
11
+
12
+ def set_termexample(value)
13
+ @termexample = value
14
+ end
15
+
16
+ def in_sourcecode
17
+ @sourcecode
18
+ end
19
+
20
+ def is_note
21
+ @note
22
+ end
23
+
24
+ def note_label(node)
25
+ n = get_anchors()[node["id"]]
26
+ return "NOTE" if n.nil?
27
+ n[:label]
28
+ end
29
+
30
+ def note_p_parse(node, div)
31
+ div.p **{ class: "Note" } do |p|
32
+ p << note_label(node)
33
+ insert_tab(p, 1)
34
+ node.first_element_child.children.each { |n| parse(n, p) }
35
+ end
36
+ node.element_children[1..-1].each { |n| parse(n, div) }
37
+ end
38
+
39
+ def note_parse(node, out)
40
+ @note = true
41
+ out.div **{ id: node["id"], class: "Note" } do |div|
42
+ if node.first_element_child.name == "p"
43
+ note_p_parse(node, div)
44
+ else
45
+ div.p **{ class: "Note" } do |p|
46
+ p << note_label(node)
47
+ insert_tab(p, 1)
48
+ end
49
+ node.children.each { |n| parse(n, div) }
50
+ end
51
+ end
52
+ @note = false
53
+ end
54
+
55
+ def figure_name_parse(node, div, name)
56
+ div.p **{ class: "FigureTitle", align: "center" } do |p|
57
+ p.b do |b|
58
+ b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
59
+ b << name.text
60
+ end
61
+ end
62
+ end
63
+
64
+ def figure_key(out)
65
+ out.p do |p|
66
+ p.b { |b| b << "Key" }
67
+ end
68
+ end
69
+
70
+ def figure_parse(node, out)
71
+ @in_figure = true
72
+ name = node.at(ns("./name"))
73
+ out.div **attr_code(id: node["id"], class: "figure") do |div|
74
+ node.children.each do |n|
75
+ figure_key(out) if n.name == "dl"
76
+ parse(n, div) unless n.name == "name"
77
+ end
78
+ figure_name_parse(node, div, name) if name
79
+ end
80
+ @in_figure = false
81
+ end
82
+
83
+ def sourcecode_name_parse(node, div, name)
84
+ div.p **{ class: "FigureTitle", align: "center" } do |p|
85
+ p.b do |b|
86
+ b << name.text
87
+ end
88
+ end
89
+ end
90
+
91
+ def sourcecode_parse(node, out)
92
+ name = node.at(ns("./name"))
93
+ out.p **attr_code(id: node["id"], class: "Sourcecode") do |div|
94
+ @sourcecode = true
95
+ node.children.each do |n|
96
+ parse(n, div) unless n.name == "name"
97
+ end
98
+ @sourcecode = false
99
+ sourcecode_name_parse(node, div, name) if name
100
+ end
101
+ end
102
+
103
+ def annotation_parse(node, out)
104
+ out.p **{ class: "Sourcecode" } do |li|
105
+ node.children.each { |n| parse(n, li) }
106
+ end
107
+ end
108
+
109
+ def admonition_parse(node, out)
110
+ name = node["type"]
111
+ out.div **{ class: "Admonition" } do |t|
112
+ t.p.b { |b| b << name.upcase } if name
113
+ node.children.each do |n|
114
+ parse(n, t)
115
+ end
116
+ end
117
+ end
118
+
119
+ def formula_parse(node, out)
120
+ dl = node.at(ns("./dl"))
121
+ out.div **attr_code(id: node["id"], class: "formula") do |div|
122
+ parse(node.at(ns("./stem")), out)
123
+ insert_tab(div, 1)
124
+ div << "(#{get_anchors()[node['id']][:label]})"
125
+ end
126
+ if dl
127
+ out.p { |p| p << "where" }
128
+ parse(dl, out)
129
+ end
130
+ end
131
+
132
+ def para_attrs(node)
133
+ classtype = nil
134
+ classtype = "Note" if @note
135
+ classtype = "MsoFootnoteText" if in_footnote
136
+ attrs = { class: classtype }
137
+ unless node["align"].nil?
138
+ attrs[:align] = node["align"] unless node["align"] == "justify"
139
+ attrs[:style] = "text-align:#{node["align"]}"
140
+ end
141
+ attrs
142
+ end
143
+
144
+ def para_parse(node, out)
145
+ out.p **attr_code(para_attrs(node)) do |p|
146
+ unless @termdomain.empty?
147
+ p << "&lt;#{@termdomain}&gt; "
148
+ @termdomain = ""
149
+ end
150
+ node.children.each { |n| parse(n, p) }
151
+ end
152
+ end
153
+
154
+ def quote_attribution(node, out)
155
+ author = node.at(ns("./author/fullname/"))
156
+ source = node.at(ns("./source"))
157
+ # TODO implement
158
+ end
159
+
160
+ def quote_parse(node, out)
161
+ attrs = para_attrs(node)
162
+ attrs[:class] = "Quote"
163
+ out.p **attr_code(attrs) do |p|
164
+ node.children.each do
165
+ |n| parse(n, p) unless ["author", "source"].include? n.name
166
+ end
167
+ quote_attribution(node, out)
168
+ end
169
+ end
170
+
171
+ def image_title_parse(out, caption)
172
+ unless caption.nil?
173
+ out.p **{ class: "FigureTitle", align: "center" } do |p|
174
+ p.b { |b| b << caption.to_s }
175
+ end
176
+ end
177
+ end
178
+
179
+ def image_parse(url, out, caption)
180
+ out.img **attr_code(src: url)
181
+ image_title_parse(out, caption)
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,155 @@
1
+ require "html2doc"
2
+ require "htmlentities"
3
+ require "nokogiri"
4
+ require "pp"
5
+
6
+ module IsoDoc
7
+ class Convert
8
+ def cleanup(docxml)
9
+ comment_cleanup(docxml)
10
+ footnote_cleanup(docxml)
11
+ inline_header_cleanup(docxml)
12
+ figure_cleanup(docxml)
13
+ table_cleanup(docxml)
14
+ docxml
15
+ end
16
+
17
+ def figure_get_or_make_dl(t)
18
+ dl = t.at(".//dl")
19
+ if dl.nil?
20
+ t.add_child("<p><b>Key</b></p><dl></dl>")
21
+ dl = t.at(".//dl")
22
+ end
23
+ dl
24
+ end
25
+
26
+ FIGURE_WITH_FOOTNOTES =
27
+ "//div[@class = 'figure'][descendant::aside]"\
28
+ "[not(descendant::div[@class = 'figure'])]".freeze
29
+
30
+ def figure_aside_process(f, aside, key)
31
+ # get rid of footnote link, it is in diagram
32
+ f.at("./a[@class='zzFootnote']").remove
33
+ fnref = f.at(".//a[@class='zzFootnote']")
34
+ dt = key.add_child("<dt></dt>").first
35
+ dd = key.add_child("<dd></dd>").first
36
+ fnref.parent = dt
37
+ aside.xpath(".//p").each do |a|
38
+ a.delete("class")
39
+ a.parent = dd
40
+ end
41
+ end
42
+
43
+ def figure_cleanup(docxml)
44
+ # move footnotes into key, and get rid of footnote reference
45
+ # since it is in diagram
46
+ docxml.xpath(FIGURE_WITH_FOOTNOTES).each do |f|
47
+ key = figure_get_or_make_dl(f)
48
+ f.xpath(".//aside").each do |aside|
49
+ figure_aside_process(f, aside, key)
50
+ end
51
+ end
52
+ end
53
+
54
+ def inline_header_cleanup(docxml)
55
+ docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
56
+ n = x.next_element
57
+ if n.nil?
58
+ html = Nokogiri::XML.fragment("<p></p>")
59
+ html.parent = x.parent
60
+ x.parent = html
61
+ else
62
+ n.children.first.add_previous_sibling(x.remove)
63
+ end
64
+ end
65
+ end
66
+
67
+ def comment_cleanup(docxml)
68
+ docxml.xpath('//div/span[@style="MsoCommentReference"]').
69
+ each do |x|
70
+ prev = x.previous_element
71
+ if !prev.nil?
72
+ x.parent = prev
73
+ end
74
+ end
75
+ docxml
76
+ end
77
+
78
+ def footnote_cleanup(docxml)
79
+ docxml.xpath('//div[@style="mso-element:footnote"]/a').
80
+ each do |x|
81
+ n = x.next_element
82
+ if !n.nil?
83
+ n.children.first.add_previous_sibling(x.remove)
84
+ end
85
+ end
86
+ docxml
87
+ end
88
+
89
+ def merge_fnref_into_fn_text(a)
90
+ fn = a.at('.//a[@class="zzFootnote"]')
91
+ n = fn.next_element
92
+ n.children.first.add_previous_sibling(fn.remove) unless n.nil?
93
+ end
94
+
95
+ TABLE_WITH_FOOTNOTES = "//table[descendant::aside]".freeze
96
+
97
+ def table_footnote_cleanup(docxml)
98
+ docxml.xpath(TABLE_WITH_FOOTNOTES).each do |t|
99
+ t.xpath(".//aside").each do |a|
100
+ merge_fnref_into_fn_text(a)
101
+ a.name = "div"
102
+ a["class"] = "Note"
103
+ t << a.remove
104
+ end
105
+ end
106
+ end
107
+
108
+ def remove_bottom_border(td)
109
+ td["style"] =
110
+ td["style"].gsub(/border-bottom:[^;]+;/, "border-bottom:0pt;").
111
+ gsub(/mso-border-bottom-alt:[^;]+;/, "mso-border-bottom-alt:0pt;")
112
+ end
113
+
114
+ def table_get_or_make_tfoot(t)
115
+ tfoot = t.at(".//tfoot")
116
+ if tfoot.nil?
117
+ t.add_child("<tfoot></tfoot>")
118
+ tfoot = t.at(".//tfoot")
119
+ else
120
+ # nuke its bottom border
121
+ tfoot.xpath(".//td | .//th").each do |td|
122
+ remove_bottom_border(td)
123
+ end
124
+ end
125
+ tfoot
126
+ end
127
+
128
+ def new_fullcolspan_row(t, tfoot)
129
+ # how many columns in the table?
130
+ cols = 0
131
+ t.at(".//tr").xpath("./td | ./th").each do |td|
132
+ cols += ( td["colspan"] ? td["colspan"].to_i : 1 )
133
+ end
134
+ style = %{border-top:0pt;mso-border-top-alt:0pt;
135
+ border-bottom:#{SW} 1.5pt;mso-border-bottom-alt:#{SW} 1.5pt;}
136
+ tfoot.add_child("<tr><td colspan='#{cols}' style='#{style}'/></tr>")
137
+ tfoot.xpath(".//td").last
138
+ end
139
+
140
+ def table_note_cleanup(docxml)
141
+ docxml.xpath("//table[div[@class = 'Note']]").each do |t|
142
+ tfoot = table_get_or_make_tfoot(t)
143
+ insert_here = new_fullcolspan_row(t, tfoot)
144
+ t.xpath("div[@class = 'Note']").each do |d|
145
+ d.parent = insert_here
146
+ end
147
+ end
148
+ end
149
+
150
+ def table_cleanup(docxml)
151
+ table_footnote_cleanup(docxml)
152
+ table_note_cleanup(docxml)
153
+ end
154
+ end
155
+ end