isodoc 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +46 -0
  3. data/LICENSE +25 -0
  4. data/README.adoc +1 -1
  5. data/Rakefile +6 -0
  6. data/isodoc.gemspec +1 -0
  7. data/lib/isodoc.rb +4 -95
  8. data/lib/isodoc/cleanup.rb +14 -10
  9. data/lib/isodoc/{notes.rb → comments.rb} +0 -73
  10. data/lib/isodoc/convert.rb +97 -0
  11. data/lib/isodoc/footnotes.rb +74 -0
  12. data/lib/isodoc/html.rb +41 -4
  13. data/lib/isodoc/i18n-en.yaml +1 -0
  14. data/lib/isodoc/i18n-fr.yaml +1 -0
  15. data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
  16. data/lib/isodoc/i18n.rb +1 -0
  17. data/lib/isodoc/inline.rb +4 -12
  18. data/lib/isodoc/iso2wordhtml.rb +26 -13
  19. data/lib/isodoc/metadata.rb +23 -10
  20. data/lib/isodoc/references.rb +20 -22
  21. data/lib/isodoc/section.rb +4 -3
  22. data/lib/isodoc/table.rb +0 -2
  23. data/lib/isodoc/terms.rb +2 -13
  24. data/lib/isodoc/utils.rb +24 -3
  25. data/lib/isodoc/version.rb +1 -1
  26. data/lib/isodoc/wordconvert/comments.rb +155 -0
  27. data/lib/isodoc/wordconvert/convert.rb +31 -0
  28. data/lib/isodoc/wordconvert/footnotes.rb +80 -0
  29. data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
  30. data/lib/isodoc/xref_gen.rb +50 -79
  31. data/lib/isodoc/xref_sect_gen.rb +82 -0
  32. data/spec/assets/header.html +7 -0
  33. data/spec/assets/html.css +2 -0
  34. data/spec/assets/htmlcover.html +4 -0
  35. data/spec/assets/htmlintro.html +5 -0
  36. data/spec/assets/i18n.yaml +2 -0
  37. data/spec/assets/iso.xml +8 -0
  38. data/spec/assets/rice_image1.png +0 -0
  39. data/spec/assets/std.css +2 -0
  40. data/spec/assets/word.css +2 -0
  41. data/spec/assets/wordcover.html +3 -0
  42. data/spec/assets/wordintro.html +4 -0
  43. data/spec/isodoc/blocks_spec.rb +130 -47
  44. data/spec/isodoc/cleanup_spec.rb +693 -0
  45. data/spec/isodoc/footnotes_spec.rb +282 -0
  46. data/spec/isodoc/i18n_spec.rb +662 -0
  47. data/spec/isodoc/inline_spec.rb +344 -0
  48. data/spec/isodoc/lists_spec.rb +81 -18
  49. data/spec/isodoc/metadata_spec.rb +141 -0
  50. data/spec/isodoc/postproc_spec.rb +444 -0
  51. data/spec/isodoc/ref_spec.rb +158 -0
  52. data/spec/isodoc/section_spec.rb +275 -112
  53. data/spec/isodoc/table_spec.rb +146 -8
  54. data/spec/isodoc/terms_spec.rb +118 -0
  55. data/spec/isodoc/xref_spec.rb +490 -114
  56. metadata +46 -4
  57. data/lib/isodoc/postprocessing.rb +0 -176
@@ -0,0 +1,74 @@
1
+ require "uuidtools"
2
+
3
+ module IsoDoc
4
+ class Convert
5
+ def footnotes(div)
6
+ return if @footnotes.empty?
7
+ @footnotes.each { |fn| div.parent << fn }
8
+ end
9
+
10
+ def make_table_footnote_link(out, fnid, fnref)
11
+ attrs = { href: "##{fnid}", class: "TableFootnoteRef" }
12
+ out.a **attrs do |a|
13
+ a << fnref
14
+ end
15
+ end
16
+
17
+ def make_table_footnote_target(out, fnid, fnref)
18
+ attrs = { id: fnid, class: "TableFootnoteRef" }
19
+ out.a **attrs do |a|
20
+ a << fnref
21
+ insert_tab(a, 1)
22
+ end
23
+ end
24
+
25
+ def make_table_footnote_text(node, fnid, fnref)
26
+ attrs = { id: "ftn#{fnid}" }
27
+ noko do |xml|
28
+ xml.div **attr_code(attrs) do |div|
29
+ make_table_footnote_target(div, fnid, fnref)
30
+ node.children.each { |n| parse(n, div) }
31
+ end
32
+ end.join("\n")
33
+ end
34
+
35
+ def make_generic_footnote_text(node, fnid)
36
+ noko do |xml|
37
+ xml.aside **{ id: "ftn#{fnid}" } do |div|
38
+ node.children.each { |n| parse(n, div) }
39
+ end
40
+ end.join("\n")
41
+ end
42
+
43
+ def get_table_ancestor_id(node)
44
+ table = node.ancestors("table") || node.ancestors("figure")
45
+ return UUIDTools::UUID.random_create.to_s if table.empty?
46
+ table.last["id"]
47
+ end
48
+
49
+ def table_footnote_parse(node, out)
50
+ fn = node["reference"]
51
+ tid = get_table_ancestor_id(node)
52
+ make_table_footnote_link(out, tid + fn, fn)
53
+ # do not output footnote text if we have already seen it for this table
54
+ return if @seen_footnote.include?(tid + fn)
55
+ @in_footnote = true
56
+ out.aside { |a| a << make_table_footnote_text(node, tid + fn, fn) }
57
+ @in_footnote = false
58
+ @seen_footnote << (tid + fn)
59
+ end
60
+
61
+ def footnote_parse(node, out)
62
+ return table_footnote_parse(node, out) if @in_table || @in_figure
63
+ fn = node["reference"]
64
+ out.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
65
+ a.sup { |sup| sup << fn }
66
+ end
67
+ return if @seen_footnote.include?(fn)
68
+ @in_footnote = true
69
+ @footnotes << make_generic_footnote_text(node, fn)
70
+ @in_footnote = false
71
+ @seen_footnote << fn
72
+ end
73
+ end
74
+ end
@@ -1,5 +1,10 @@
1
1
  module IsoDoc
2
2
  class Convert
3
+ def postprocess(result, filename, dir)
4
+ result = from_xhtml(cleanup(to_xhtml(result)))
5
+ toHTML(result, filename)
6
+ end
7
+
3
8
  def toHTML(result, filename)
4
9
  result = from_xhtml(html_cleanup(to_xhtml(result)))
5
10
  result = populate_template(result, :html)
@@ -10,7 +15,7 @@ module IsoDoc
10
15
 
11
16
  def html_cleanup(x)
12
17
  footnote_backlinks(
13
- move_images(html_footnote_filter(html_preface(htmlstyle(x))))
18
+ html_toc(move_images(html_footnote_filter(html_preface(htmlstyle(x)))))
14
19
  )
15
20
  end
16
21
 
@@ -32,15 +37,23 @@ module IsoDoc
32
37
  end
33
38
 
34
39
  def html_preface(docxml)
40
+ html_cover(docxml) if @htmlcoverpage
41
+ html_intro(docxml) if @htmlintropage
42
+ docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
43
+ @closemathdelim)
44
+ docxml
45
+ end
46
+
47
+ def html_cover(docxml)
35
48
  cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
36
49
  d = docxml.at('//div[@class="WordSection1"]')
37
50
  d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
51
+ end
52
+
53
+ def html_intro(docxml)
38
54
  cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
39
55
  d = docxml.at('//div[@class="WordSection2"]')
40
56
  d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
41
- docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
42
- @closemathdelim)
43
- docxml
44
57
  end
45
58
 
46
59
  def htmlstylesheet
@@ -110,5 +123,29 @@ module IsoDoc
110
123
  end
111
124
  docxml
112
125
  end
126
+
127
+ def html_toc1(h, ret, prevname)
128
+ h["id"] = UUIDTools::UUID.random_create.to_s unless h["id"]
129
+ li = "<li><a href='##{h["id"]}'>#{header_strip(h)}</a></li>"
130
+ if h.name == "h1"
131
+ ret += "</ul>" if prevname == "h2"
132
+ else
133
+ ret += "<ul>" if prevname == "h1"
134
+ end
135
+ ret + li
136
+ end
137
+
138
+ def html_toc(docxml)
139
+ return docxml unless @htmlintropage
140
+ ret = ""
141
+ prevname = ""
142
+ docxml.xpath("//h1 | //h2").each do |h|
143
+ ret = html_toc1(h, ret, prevname) unless h["class"] == "toc-contents"
144
+ prevname = h.name
145
+ end
146
+ ret += "<ul>" if prevname == "h2"
147
+ docxml.at("//*[@id='toc-list']").replace("<ul>#{ret}</ret>")
148
+ docxml
149
+ end
113
150
  end
114
151
  end
@@ -40,6 +40,7 @@ internal_external_terms_boilerplate: |
40
40
  note: NOTE
41
41
  note_xref: Note
42
42
  termnote: Note % to entry
43
+ list: List
43
44
  figure: Figure
44
45
  formula: Formula
45
46
  table: Table
@@ -35,6 +35,7 @@ note_xref: Note
35
35
  termnote: Note % à l'article
36
36
  figure: Figure
37
37
  formula: Formule
38
+ list: Liste
38
39
  table: Tableau
39
40
  key: Légende
40
41
  example: EXEMPLE
@@ -35,6 +35,7 @@ note_xref: 注
35
35
  termnote: 注%
36
36
  figure: 图
37
37
  formula: 公式
38
+ list: 列项
38
39
  table: 表
39
40
  key: 说明
40
41
  example: 示例
@@ -39,6 +39,7 @@ module IsoDoc
39
39
  @note_xref_lbl = y["note_xref"]
40
40
  @termnote_lbl = y["termnote"]
41
41
  @figure_lbl = y["figure"]
42
+ @list_lbl = y["list"]
42
43
  @formula_lbl = y["formula"]
43
44
  @table_lbl = y["table"]
44
45
  @key_lbl = y["key"]
@@ -3,14 +3,11 @@ require "uuidtools"
3
3
  module IsoDoc
4
4
  class Convert
5
5
  def section_break(body)
6
- body.br **{ clear: "all", class: "section" }
6
+ body.br
7
7
  end
8
8
 
9
- def page_break(body)
10
- body.br **{
11
- clear: "all",
12
- style: "mso-special-character:line-break;page-break-before:always",
13
- }
9
+ def page_break(out)
10
+ out.br
14
11
  end
15
12
 
16
13
  def link_parse(node, out)
@@ -57,7 +54,7 @@ module IsoDoc
57
54
  l10n(", #{@whole_of_text}")
58
55
  else
59
56
  eref_localities1(r["type"], r.at(ns("./referenceFrom")),
60
- r.at(ns("./referenceTo")))
57
+ r.at(ns("./referenceTo")), @lang)
61
58
  end
62
59
  end
63
60
  ret
@@ -86,11 +83,6 @@ module IsoDoc
86
83
  end
87
84
  end
88
85
 
89
- def pagebreak_parse(_node, out)
90
- attrs = { clear: all, class: "pagebreak" }
91
- out.br **attrs
92
- end
93
-
94
86
  def error_parse(node, out)
95
87
  text = node.to_xml.gsub(/</, "&lt;").gsub(/>/, "&gt;")
96
88
  out.para do |p|
@@ -6,14 +6,6 @@ module IsoDoc
6
6
  @termdomain = termdomain
7
7
  end
8
8
 
9
- def get_termexample
10
- @termexample
11
- end
12
-
13
- def set_termexample(value)
14
- @termexample = value
15
- end
16
-
17
9
  def in_sourcecode
18
10
  @sourcecode
19
11
  end
@@ -32,6 +24,27 @@ module IsoDoc
32
24
  [filename, dir]
33
25
  end
34
26
 
27
+ # these are in fact preprocess,
28
+ # but they are extraneous to main HTML file
29
+ def html_header(html, docxml, filename, dir)
30
+ anchor_names docxml
31
+ define_head html, filename, dir
32
+ end
33
+
34
+ # isodoc.css overrides any CSS injected by Html2Doc, which
35
+ # is inserted before this CSS.
36
+ def define_head(html, filename, _dir)
37
+ html.head do |head|
38
+ head.title { |t| t << filename }
39
+ return unless @standardstylesheet
40
+ head.style do |style|
41
+ stylesheet = File.read(@standardstylesheet).
42
+ gsub("FILENAME", filename)
43
+ style.comment "\n#{stylesheet}\n"
44
+ end
45
+ end
46
+ end
47
+
35
48
  def make_body(xml, docxml)
36
49
  body_attr = { lang: "EN-US", link: "blue", vlink: "#954F72" }
37
50
  xml.body **body_attr do |body|
@@ -51,6 +64,8 @@ module IsoDoc
51
64
  def make_body2(body, docxml)
52
65
  body.div **{ class: "WordSection2" } do |div2|
53
66
  info docxml, div2
67
+ foreword docxml, div2
68
+ introduction docxml, div2
54
69
  div2.p { |p| p << "&nbsp;" } # placeholder
55
70
  end
56
71
  section_break(body)
@@ -65,7 +80,6 @@ module IsoDoc
65
80
  end
66
81
 
67
82
  def info(isoxml, out)
68
- # intropage(out)
69
83
  title isoxml, out
70
84
  subtitle isoxml, out
71
85
  id isoxml, out
@@ -73,8 +87,7 @@ module IsoDoc
73
87
  bibdate isoxml, out
74
88
  relations isoxml, out
75
89
  version isoxml, out
76
- foreword isoxml, out
77
- introduction isoxml, out
90
+ get_metadata
78
91
  end
79
92
 
80
93
  def middle_title(out)
@@ -125,7 +138,7 @@ module IsoDoc
125
138
  when "br" then out.br
126
139
  when "hr" then out.hr
127
140
  when "bookmark" then bookmark_parse(node, out)
128
- when "pagebreak" then pagebreak_parse(node, out)
141
+ when "pagebreak" then page_break(out)
129
142
  when "callout" then callout_parse(node, out)
130
143
  when "stem" then stem_parse(node, out)
131
144
  when "clause" then clause_parse(node, out)
@@ -161,7 +174,7 @@ module IsoDoc
161
174
  when "termsource" then termref_parse(node, out)
162
175
  when "modification" then modification_parse(node, out)
163
176
  when "termnote" then termnote_parse(node, out)
164
- when "termexample" then termexample_parse(node, out)
177
+ when "termexample" then example_parse(node, out)
165
178
  when "terms" then terms_parse(node, out)
166
179
  when "symbols-abbrevs" then symbols_parse(node, out)
167
180
  when "references" then bibliography_parse(node, out)
@@ -65,17 +65,35 @@ module IsoDoc
65
65
  set_metadata(:secretariat, sec.text) if sec
66
66
  end
67
67
 
68
+ def date_range(date)
69
+ from = date.at(ns("./from"))
70
+ to = date.at(ns("./to"))
71
+ ret = from.text
72
+ ret += "&ndash;#{to.text}" if to
73
+ ret
74
+ end
75
+
68
76
  def bibdate(isoxml, _out)
69
77
  isoxml.xpath(ns("//bibdata/date")).each do |d|
70
- set_metadata("#{d['type']}date".to_sym, d.text)
78
+ set_metadata("#{d['type']}date".to_sym, date_range(d))
71
79
  end
72
80
  end
73
81
 
82
+ def iso?(org)
83
+ name = org&.at(ns("./name"))&.text
84
+ abbrev = org&.at(ns("./abbreviation"))&.text
85
+ (abbrev == "ISO" ||
86
+ name == "International Organization for Standardization" )
87
+ end
88
+
74
89
  def agency(xml)
75
90
  agency = ""
76
91
  xml.xpath(ns("//bibdata/contributor[xmlns:role/@type = 'publisher']/"\
77
- "organization/name")).each do |org|
78
- agency = org.text == "ISO" ? "ISO/#{agency}" : "#{agency}#{org.text}/"
92
+ "organization")).each do |org|
93
+ name = org&.at(ns("./name"))&.text
94
+ abbrev = org&.at(ns("./abbreviation"))&.text
95
+ agency1 = abbrev || name
96
+ agency = iso?(org) ? "ISO/#{agency}" : "#{agency}#{agency1}/"
79
97
  end
80
98
  set_metadata(:agency, agency.sub(%r{/$}, ""))
81
99
  end
@@ -105,8 +123,8 @@ module IsoDoc
105
123
  def draftinfo(draft, revdate)
106
124
  draftinfo = ""
107
125
  if draft
108
- draftinfo = " (#{@draft_lbl} #{draft.text}"
109
- draftinfo += ", #{revdate.text}" if revdate
126
+ draftinfo = " (#{@draft_lbl} #{draft}"
127
+ draftinfo += ", #{revdate}" if revdate
110
128
  draftinfo += ")"
111
129
  end
112
130
  l10n(draftinfo)
@@ -114,13 +132,8 @@ module IsoDoc
114
132
 
115
133
  def version(isoxml, _out)
116
134
  set_metadata(:docyear, isoxml&.at(ns("//copyright/from"))&.text)
117
- # draft = isoxml.at(ns("//version/draft"))
118
- # set_metadata(:draft, draft.nil? ? nil : draft.text)
119
135
  set_metadata(:draft, isoxml&.at(ns("//version/draft"))&.text)
120
- # revdate = isoxml.at(ns("//version/revision-date"))
121
- #set_metadata(:revdate, revdate.nil? ? nil : revdate.text)
122
136
  set_metadata(:revdate, isoxml&.at(ns("//version/revision-date"))&.text)
123
- #set_metadata(:draftinfo, draftinfo(draft, revdate))
124
137
  set_metadata(:draftinfo,
125
138
  draftinfo(get_metadata[:draft], get_metadata[:revdate]))
126
139
  end
@@ -8,7 +8,7 @@ module IsoDoc
8
8
  isocode = b.at(ns("./docidentifier")).text
9
9
  isodate = b.at(ns("./date[@type = 'published']"))
10
10
  reference = docid_l10n(isocode)
11
- reference += ": #{isodate.text}" if isodate
11
+ reference += ": #{date_range(isodate)}" if isodate
12
12
  reference
13
13
  end
14
14
 
@@ -48,15 +48,6 @@ module IsoDoc
48
48
  end
49
49
  end
50
50
 
51
- def ref_entry(list, b, ordinal, _bibliography)
52
- ref = b.at(ns("./ref"))
53
- para = b.at(ns("./p"))
54
- list.p **attr_code("id": ref["id"], class: "Biblio") do |r|
55
- ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
56
- para.children.each { |n| parse(n, r) }
57
- end
58
- end
59
-
60
51
  # TODO generate formatted ref if not present
61
52
  def noniso_bibitem(list, b, ordinal, bibliography)
62
53
  list.p **attr_code("id": b["id"], class: "Biblio") do |r|
@@ -66,13 +57,15 @@ module IsoDoc
66
57
  else
67
58
  r << "#{iso_bibitem_ref_code(b)}, "
68
59
  end
69
- b.at(ns("./formattedref")).children.each { |n| parse(n, r) }
60
+ b.at(ns("./formattedref"))&.children&.each { |n| parse(n, r) }
70
61
  end
71
62
  end
72
63
 
73
64
  ISO_PUBLISHER_XPATH =
74
65
  "./contributor[xmlns:role/@type = 'publisher']/"\
75
- "organization[name = 'ISO' or xmlns:name = 'IEC']".freeze
66
+ "organization[abbreviation = 'ISO' or xmlns:abbreviation = 'IEC' or "\
67
+ "xmlns:name = 'International Organization for Standardization' or "\
68
+ "xmlns:name = 'International Electrotechnical Commission']".freeze
76
69
 
77
70
  def split_bibitems(f)
78
71
  iso_bibitem = []
@@ -142,8 +135,13 @@ module IsoDoc
142
135
  end
143
136
  end
144
137
 
145
- def format_ref(ref, isopub)
146
- return "ISO #{ref}" if isopub
138
+ def format_ref(ref, isopub, date)
139
+ if isopub
140
+ return ref unless date
141
+ from = date.at(ns("./from"))
142
+ return ref if from&.text == "--"
143
+ return ref + ": #{date_range(date)}"
144
+ end
147
145
  return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
148
146
  ref
149
147
  end
@@ -151,17 +149,17 @@ module IsoDoc
151
149
  def reference_names(ref)
152
150
  isopub = ref.at(ns(ISO_PUBLISHER_XPATH))
153
151
  docid = ref.at(ns("./docidentifier"))
154
- return ref_names(ref) unless docid
152
+ # return ref_names(ref) unless docid
155
153
  date = ref.at(ns("./date[@type = 'published']"))
156
- reference = format_ref(docid_l10n(docid.text), isopub)
157
- reference += ": #{date.text}" if date && isopub && date != "--"
154
+ reference = format_ref(docid_l10n(docid.text), isopub, date)
155
+ # reference += ": #{date_range(date)}" if date && isopub && from.text != "--"
158
156
  @anchors[ref["id"]] = { xref: reference }
159
157
  end
160
158
 
161
- def ref_names(ref)
162
- linkend = ref.text
163
- linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
164
- @anchors[ref["id"]] = { xref: linkend }
165
- end
159
+ # def ref_names(ref)
160
+ # linkend = ref.text
161
+ # linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
162
+ # @anchors[ref["id"]] = { xref: linkend }
163
+ # end
166
164
  end
167
165
  end