isodoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ # This project follows the Ribose OSS style guide.
2
+ # https://github.com/riboseinc/oss-guides
3
+ # All project-specific additions and overrides should be specified in this file.
4
+
5
+ inherit_from:
6
+ # Thoughtbot's style guide from: https://github.com/thoughtbot/guides
7
+ - ".rubocop.tb.yml"
8
+ # Overrides from Ribose
9
+ - ".rubocop.ribose.yml"
10
+ AllCops:
11
+ DisplayCopNames: false
12
+ StyleGuideCopsOnly: false
13
+ TargetRubyVersion: 2.4
14
+ Rails:
15
+ Enabled: true
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ribose.gemspec
4
+ gem "html2doc",
5
+ git: "https://github.com/riboseinc/html2doc.git"
6
+ gemspec
@@ -0,0 +1,32 @@
1
+ = isodoc
2
+
3
+ This Gem converts documents in the https://github.com/riboseinc/isodoc-models[ISODoc document model] into HTML and Microsoft Word.
4
+
5
+ The Gem is a class called with a hash of file locations:
6
+
7
+ htmlstylesheet:: Generic stylesheet for HTML
8
+ wordstylesheet:: Generic stylesheet for Word
9
+ standardsheet:: Stylesheet specific to Standard
10
+ header:: Header file for Word
11
+ htmlcoverpage:: Cover page for HTML
12
+ wordcoverpage:: Cover page for Word
13
+ tmlintropage:: Introductory page for HTML
14
+ wordintropage:: Introductory page for Word
15
+
16
+ e.g.
17
+
18
+ [source,ruby]
19
+ --
20
+ IsoDoc::Convert.new(
21
+ htmlstylesheet: html_doc_path("htmlstyle.css"),
22
+ wordstylesheet: nil,
23
+ standardstylesheet: html_doc_path("isodoc.css"),
24
+ header: html_doc_path("header.html"),
25
+ htmlcoverpage: html_doc_path("iso_titlepage.html"),
26
+ wordcoverpage: html_doc_path("iso_titlepage.html"),
27
+ htmlintropage: html_doc_path("iso_intro.html"),
28
+ wordintropage: html_doc_path("iso_intro.html"),
29
+ )
30
+ --
31
+
32
+ NOTE: Cover page and Intro page must be XHTML fragments, not HTML fragments. In particular, unlike Word HTML, all HTML attributes need to be quoted: `<p class="MsoToc2">`, not `<p class=MsoToc2>`.
@@ -0,0 +1,51 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "isodoc/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "isodoc"
9
+ spec.version = IsoDoc::VERSION
10
+ spec.authors = ["Ribose Inc."]
11
+ spec.email = ["open.source@ribose.com"]
12
+
13
+ spec.summary = "Convert documents in IsoDoc into Word and HTML "\
14
+ "in AsciiDoc."
15
+ spec.description = <<~DESCRIPTION
16
+ isodoc converts documents in the IsoDoc document model into
17
+ Microsoft Word and HTML.
18
+
19
+ This gem is in active development.
20
+ DESCRIPTION
21
+
22
+ spec.homepage = "https://github.com/riboseinc/isodoc"
23
+ spec.license = "MIT"
24
+
25
+ spec.bindir = "bin"
26
+ spec.require_paths = ["lib"]
27
+ spec.files = `git ls-files`.split("\n")
28
+ spec.test_files = `git ls-files -- {spec}/*`.split("\n")
29
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
30
+
31
+ spec.add_dependency "asciimath"
32
+ spec.add_dependency "htmlentities", "~> 4.3.4"
33
+ spec.add_dependency "image_size"
34
+ spec.add_dependency "mime-types"
35
+ spec.add_dependency "nokogiri", "= 1.8.1"
36
+ spec.add_dependency "ruby-xslt"
37
+ spec.add_dependency "thread_safe"
38
+ spec.add_dependency "uuidtools"
39
+ spec.add_dependency "html2doc"
40
+
41
+ spec.add_development_dependency "bundler", "~> 1.15"
42
+ spec.add_development_dependency "byebug", "~> 9.1"
43
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
44
+ spec.add_development_dependency "guard", "~> 2.14"
45
+ spec.add_development_dependency "guard-rspec", "~> 4.7"
46
+ spec.add_development_dependency "rake", "~> 12.0"
47
+ spec.add_development_dependency "rspec", "~> 3.6"
48
+ spec.add_development_dependency "rubocop", "~> 0.50"
49
+ spec.add_development_dependency "simplecov", "~> 0.15"
50
+ spec.add_development_dependency "timecop", "~> 0.9"
51
+ end
@@ -0,0 +1,74 @@
1
+ require_relative "isodoc/version"
2
+
3
+ require "nokogiri"
4
+ require "asciimath"
5
+ require "xml/xslt"
6
+ require "uuidtools"
7
+ require "base64"
8
+ require "mime/types"
9
+ require "image_size"
10
+ require "set"
11
+ require_relative "isodoc/iso2wordhtml"
12
+ require_relative "isodoc/cleanup"
13
+ require_relative "isodoc/postprocessing"
14
+ require_relative "isodoc/utils"
15
+ require_relative "isodoc/metadata"
16
+ require_relative "isodoc/section"
17
+ require_relative "isodoc/references"
18
+ require_relative "isodoc/terms"
19
+ require_relative "isodoc/blocks"
20
+ require_relative "isodoc/lists"
21
+ require_relative "isodoc/table"
22
+ require_relative "isodoc/inline"
23
+ require_relative "isodoc/xref_gen"
24
+ require_relative "isodoc/html"
25
+ require "pp"
26
+
27
+ module IsoDoc
28
+ class Convert
29
+
30
+ # htmlstylesheet: Generic stylesheet for HTML
31
+ # wordstylesheet: Generic stylesheet for Word
32
+ # standardsheet: Stylesheet specific to Standard
33
+ # header: Header file for Word
34
+ # htmlcoverpage: Cover page for HTML
35
+ # wordcoverpage: Cover page for Word
36
+ # htmlintropage: Introductory page for HTML
37
+ # wordintropage: Introductory page for Word
38
+ def initialize(options)
39
+ @htmlstylesheet = options[:htmlstylesheet]
40
+ @wordstylesheet = options[:wordstylesheet]
41
+ @standardstylesheet = options[:standardstylesheet]
42
+ @header = options[:header]
43
+ @htmlcoverpage = options[:htmlcoverpage]
44
+ @wordcoverpage = options[:wordcoverpage]
45
+ @htmlintropage = options[:htmlintropage]
46
+ @wordintropage = options[:wordintropage]
47
+ @termdomain = ""
48
+ @termexample = false
49
+ @note = false
50
+ @sourcecode = false
51
+ @anchors = {}
52
+ @meta = {}
53
+ @footnotes = []
54
+ @comments = []
55
+ @in_footnote = false
56
+ @in_table = false
57
+ @in_figure = false
58
+ @seen_footnote = Set.new
59
+ end
60
+
61
+ def convert(filename)
62
+ docxml = Nokogiri::XML(File.read(filename))
63
+ filename, dir = init_file(filename)
64
+ docxml.root.default_namespace = ""
65
+ result = noko do |xml|
66
+ xml.html do |html|
67
+ html_header(html, docxml, filename, dir)
68
+ make_body(html, docxml)
69
+ end
70
+ end.join("\n")
71
+ postprocess(result, filename, dir)
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,184 @@
1
+ module IsoDoc
2
+ class Convert
3
+ #attr_accessor :termdomain, :termexample, :sourcecode, :note
4
+ def set_termdomain(termdomain)
5
+ @termdomain = termdomain
6
+ end
7
+
8
+ def get_termexample
9
+ @termexample
10
+ end
11
+
12
+ def set_termexample(value)
13
+ @termexample = value
14
+ end
15
+
16
+ def in_sourcecode
17
+ @sourcecode
18
+ end
19
+
20
+ def is_note
21
+ @note
22
+ end
23
+
24
+ def note_label(node)
25
+ n = get_anchors()[node["id"]]
26
+ return "NOTE" if n.nil?
27
+ n[:label]
28
+ end
29
+
30
+ def note_p_parse(node, div)
31
+ div.p **{ class: "Note" } do |p|
32
+ p << note_label(node)
33
+ insert_tab(p, 1)
34
+ node.first_element_child.children.each { |n| parse(n, p) }
35
+ end
36
+ node.element_children[1..-1].each { |n| parse(n, div) }
37
+ end
38
+
39
+ def note_parse(node, out)
40
+ @note = true
41
+ out.div **{ id: node["id"], class: "Note" } do |div|
42
+ if node.first_element_child.name == "p"
43
+ note_p_parse(node, div)
44
+ else
45
+ div.p **{ class: "Note" } do |p|
46
+ p << note_label(node)
47
+ insert_tab(p, 1)
48
+ end
49
+ node.children.each { |n| parse(n, div) }
50
+ end
51
+ end
52
+ @note = false
53
+ end
54
+
55
+ def figure_name_parse(node, div, name)
56
+ div.p **{ class: "FigureTitle", align: "center" } do |p|
57
+ p.b do |b|
58
+ b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
59
+ b << name.text
60
+ end
61
+ end
62
+ end
63
+
64
+ def figure_key(out)
65
+ out.p do |p|
66
+ p.b { |b| b << "Key" }
67
+ end
68
+ end
69
+
70
+ def figure_parse(node, out)
71
+ @in_figure = true
72
+ name = node.at(ns("./name"))
73
+ out.div **attr_code(id: node["id"], class: "figure") do |div|
74
+ node.children.each do |n|
75
+ figure_key(out) if n.name == "dl"
76
+ parse(n, div) unless n.name == "name"
77
+ end
78
+ figure_name_parse(node, div, name) if name
79
+ end
80
+ @in_figure = false
81
+ end
82
+
83
+ def sourcecode_name_parse(node, div, name)
84
+ div.p **{ class: "FigureTitle", align: "center" } do |p|
85
+ p.b do |b|
86
+ b << name.text
87
+ end
88
+ end
89
+ end
90
+
91
+ def sourcecode_parse(node, out)
92
+ name = node.at(ns("./name"))
93
+ out.p **attr_code(id: node["id"], class: "Sourcecode") do |div|
94
+ @sourcecode = true
95
+ node.children.each do |n|
96
+ parse(n, div) unless n.name == "name"
97
+ end
98
+ @sourcecode = false
99
+ sourcecode_name_parse(node, div, name) if name
100
+ end
101
+ end
102
+
103
+ def annotation_parse(node, out)
104
+ out.p **{ class: "Sourcecode" } do |li|
105
+ node.children.each { |n| parse(n, li) }
106
+ end
107
+ end
108
+
109
+ def admonition_parse(node, out)
110
+ name = node["type"]
111
+ out.div **{ class: "Admonition" } do |t|
112
+ t.p.b { |b| b << name.upcase } if name
113
+ node.children.each do |n|
114
+ parse(n, t)
115
+ end
116
+ end
117
+ end
118
+
119
+ def formula_parse(node, out)
120
+ dl = node.at(ns("./dl"))
121
+ out.div **attr_code(id: node["id"], class: "formula") do |div|
122
+ parse(node.at(ns("./stem")), out)
123
+ insert_tab(div, 1)
124
+ div << "(#{get_anchors()[node['id']][:label]})"
125
+ end
126
+ if dl
127
+ out.p { |p| p << "where" }
128
+ parse(dl, out)
129
+ end
130
+ end
131
+
132
+ def para_attrs(node)
133
+ classtype = nil
134
+ classtype = "Note" if @note
135
+ classtype = "MsoFootnoteText" if in_footnote
136
+ attrs = { class: classtype }
137
+ unless node["align"].nil?
138
+ attrs[:align] = node["align"] unless node["align"] == "justify"
139
+ attrs[:style] = "text-align:#{node["align"]}"
140
+ end
141
+ attrs
142
+ end
143
+
144
+ def para_parse(node, out)
145
+ out.p **attr_code(para_attrs(node)) do |p|
146
+ unless @termdomain.empty?
147
+ p << "&lt;#{@termdomain}&gt; "
148
+ @termdomain = ""
149
+ end
150
+ node.children.each { |n| parse(n, p) }
151
+ end
152
+ end
153
+
154
+ def quote_attribution(node, out)
155
+ author = node.at(ns("./author/fullname/"))
156
+ source = node.at(ns("./source"))
157
+ # TODO implement
158
+ end
159
+
160
+ def quote_parse(node, out)
161
+ attrs = para_attrs(node)
162
+ attrs[:class] = "Quote"
163
+ out.p **attr_code(attrs) do |p|
164
+ node.children.each do
165
+ |n| parse(n, p) unless ["author", "source"].include? n.name
166
+ end
167
+ quote_attribution(node, out)
168
+ end
169
+ end
170
+
171
+ def image_title_parse(out, caption)
172
+ unless caption.nil?
173
+ out.p **{ class: "FigureTitle", align: "center" } do |p|
174
+ p.b { |b| b << caption.to_s }
175
+ end
176
+ end
177
+ end
178
+
179
+ def image_parse(url, out, caption)
180
+ out.img **attr_code(src: url)
181
+ image_title_parse(out, caption)
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,155 @@
1
+ require "html2doc"
2
+ require "htmlentities"
3
+ require "nokogiri"
4
+ require "pp"
5
+
6
+ module IsoDoc
7
+ class Convert
8
+ def cleanup(docxml)
9
+ comment_cleanup(docxml)
10
+ footnote_cleanup(docxml)
11
+ inline_header_cleanup(docxml)
12
+ figure_cleanup(docxml)
13
+ table_cleanup(docxml)
14
+ docxml
15
+ end
16
+
17
+ def figure_get_or_make_dl(t)
18
+ dl = t.at(".//dl")
19
+ if dl.nil?
20
+ t.add_child("<p><b>Key</b></p><dl></dl>")
21
+ dl = t.at(".//dl")
22
+ end
23
+ dl
24
+ end
25
+
26
+ FIGURE_WITH_FOOTNOTES =
27
+ "//div[@class = 'figure'][descendant::aside]"\
28
+ "[not(descendant::div[@class = 'figure'])]".freeze
29
+
30
+ def figure_aside_process(f, aside, key)
31
+ # get rid of footnote link, it is in diagram
32
+ f.at("./a[@class='zzFootnote']").remove
33
+ fnref = f.at(".//a[@class='zzFootnote']")
34
+ dt = key.add_child("<dt></dt>").first
35
+ dd = key.add_child("<dd></dd>").first
36
+ fnref.parent = dt
37
+ aside.xpath(".//p").each do |a|
38
+ a.delete("class")
39
+ a.parent = dd
40
+ end
41
+ end
42
+
43
+ def figure_cleanup(docxml)
44
+ # move footnotes into key, and get rid of footnote reference
45
+ # since it is in diagram
46
+ docxml.xpath(FIGURE_WITH_FOOTNOTES).each do |f|
47
+ key = figure_get_or_make_dl(f)
48
+ f.xpath(".//aside").each do |aside|
49
+ figure_aside_process(f, aside, key)
50
+ end
51
+ end
52
+ end
53
+
54
+ def inline_header_cleanup(docxml)
55
+ docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
56
+ n = x.next_element
57
+ if n.nil?
58
+ html = Nokogiri::XML.fragment("<p></p>")
59
+ html.parent = x.parent
60
+ x.parent = html
61
+ else
62
+ n.children.first.add_previous_sibling(x.remove)
63
+ end
64
+ end
65
+ end
66
+
67
+ def comment_cleanup(docxml)
68
+ docxml.xpath('//div/span[@style="MsoCommentReference"]').
69
+ each do |x|
70
+ prev = x.previous_element
71
+ if !prev.nil?
72
+ x.parent = prev
73
+ end
74
+ end
75
+ docxml
76
+ end
77
+
78
+ def footnote_cleanup(docxml)
79
+ docxml.xpath('//div[@style="mso-element:footnote"]/a').
80
+ each do |x|
81
+ n = x.next_element
82
+ if !n.nil?
83
+ n.children.first.add_previous_sibling(x.remove)
84
+ end
85
+ end
86
+ docxml
87
+ end
88
+
89
+ def merge_fnref_into_fn_text(a)
90
+ fn = a.at('.//a[@class="zzFootnote"]')
91
+ n = fn.next_element
92
+ n.children.first.add_previous_sibling(fn.remove) unless n.nil?
93
+ end
94
+
95
+ TABLE_WITH_FOOTNOTES = "//table[descendant::aside]".freeze
96
+
97
+ def table_footnote_cleanup(docxml)
98
+ docxml.xpath(TABLE_WITH_FOOTNOTES).each do |t|
99
+ t.xpath(".//aside").each do |a|
100
+ merge_fnref_into_fn_text(a)
101
+ a.name = "div"
102
+ a["class"] = "Note"
103
+ t << a.remove
104
+ end
105
+ end
106
+ end
107
+
108
+ def remove_bottom_border(td)
109
+ td["style"] =
110
+ td["style"].gsub(/border-bottom:[^;]+;/, "border-bottom:0pt;").
111
+ gsub(/mso-border-bottom-alt:[^;]+;/, "mso-border-bottom-alt:0pt;")
112
+ end
113
+
114
+ def table_get_or_make_tfoot(t)
115
+ tfoot = t.at(".//tfoot")
116
+ if tfoot.nil?
117
+ t.add_child("<tfoot></tfoot>")
118
+ tfoot = t.at(".//tfoot")
119
+ else
120
+ # nuke its bottom border
121
+ tfoot.xpath(".//td | .//th").each do |td|
122
+ remove_bottom_border(td)
123
+ end
124
+ end
125
+ tfoot
126
+ end
127
+
128
+ def new_fullcolspan_row(t, tfoot)
129
+ # how many columns in the table?
130
+ cols = 0
131
+ t.at(".//tr").xpath("./td | ./th").each do |td|
132
+ cols += ( td["colspan"] ? td["colspan"].to_i : 1 )
133
+ end
134
+ style = %{border-top:0pt;mso-border-top-alt:0pt;
135
+ border-bottom:#{SW} 1.5pt;mso-border-bottom-alt:#{SW} 1.5pt;}
136
+ tfoot.add_child("<tr><td colspan='#{cols}' style='#{style}'/></tr>")
137
+ tfoot.xpath(".//td").last
138
+ end
139
+
140
+ def table_note_cleanup(docxml)
141
+ docxml.xpath("//table[div[@class = 'Note']]").each do |t|
142
+ tfoot = table_get_or_make_tfoot(t)
143
+ insert_here = new_fullcolspan_row(t, tfoot)
144
+ t.xpath("div[@class = 'Note']").each do |d|
145
+ d.parent = insert_here
146
+ end
147
+ end
148
+ end
149
+
150
+ def table_cleanup(docxml)
151
+ table_footnote_cleanup(docxml)
152
+ table_note_cleanup(docxml)
153
+ end
154
+ end
155
+ end