isodoc 0.5.5 → 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +46 -0
  3. data/LICENSE +25 -0
  4. data/README.adoc +1 -1
  5. data/Rakefile +6 -0
  6. data/isodoc.gemspec +1 -0
  7. data/lib/isodoc.rb +4 -95
  8. data/lib/isodoc/cleanup.rb +14 -10
  9. data/lib/isodoc/{notes.rb → comments.rb} +0 -73
  10. data/lib/isodoc/convert.rb +97 -0
  11. data/lib/isodoc/footnotes.rb +74 -0
  12. data/lib/isodoc/html.rb +41 -4
  13. data/lib/isodoc/i18n-en.yaml +1 -0
  14. data/lib/isodoc/i18n-fr.yaml +1 -0
  15. data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
  16. data/lib/isodoc/i18n.rb +1 -0
  17. data/lib/isodoc/inline.rb +4 -12
  18. data/lib/isodoc/iso2wordhtml.rb +26 -13
  19. data/lib/isodoc/metadata.rb +23 -10
  20. data/lib/isodoc/references.rb +20 -22
  21. data/lib/isodoc/section.rb +4 -3
  22. data/lib/isodoc/table.rb +0 -2
  23. data/lib/isodoc/terms.rb +2 -13
  24. data/lib/isodoc/utils.rb +24 -3
  25. data/lib/isodoc/version.rb +1 -1
  26. data/lib/isodoc/wordconvert/comments.rb +155 -0
  27. data/lib/isodoc/wordconvert/convert.rb +31 -0
  28. data/lib/isodoc/wordconvert/footnotes.rb +80 -0
  29. data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
  30. data/lib/isodoc/xref_gen.rb +50 -79
  31. data/lib/isodoc/xref_sect_gen.rb +82 -0
  32. data/spec/assets/header.html +7 -0
  33. data/spec/assets/html.css +2 -0
  34. data/spec/assets/htmlcover.html +4 -0
  35. data/spec/assets/htmlintro.html +5 -0
  36. data/spec/assets/i18n.yaml +2 -0
  37. data/spec/assets/iso.xml +8 -0
  38. data/spec/assets/rice_image1.png +0 -0
  39. data/spec/assets/std.css +2 -0
  40. data/spec/assets/word.css +2 -0
  41. data/spec/assets/wordcover.html +3 -0
  42. data/spec/assets/wordintro.html +4 -0
  43. data/spec/isodoc/blocks_spec.rb +130 -47
  44. data/spec/isodoc/cleanup_spec.rb +693 -0
  45. data/spec/isodoc/footnotes_spec.rb +282 -0
  46. data/spec/isodoc/i18n_spec.rb +662 -0
  47. data/spec/isodoc/inline_spec.rb +344 -0
  48. data/spec/isodoc/lists_spec.rb +81 -18
  49. data/spec/isodoc/metadata_spec.rb +141 -0
  50. data/spec/isodoc/postproc_spec.rb +444 -0
  51. data/spec/isodoc/ref_spec.rb +158 -0
  52. data/spec/isodoc/section_spec.rb +275 -112
  53. data/spec/isodoc/table_spec.rb +146 -8
  54. data/spec/isodoc/terms_spec.rb +118 -0
  55. data/spec/isodoc/xref_spec.rb +490 -114
  56. metadata +46 -4
  57. data/lib/isodoc/postprocessing.rb +0 -176
@@ -0,0 +1,74 @@
1
+ require "uuidtools"
2
+
3
+ module IsoDoc
4
+ class Convert
5
+ def footnotes(div)
6
+ return if @footnotes.empty?
7
+ @footnotes.each { |fn| div.parent << fn }
8
+ end
9
+
10
+ def make_table_footnote_link(out, fnid, fnref)
11
+ attrs = { href: "##{fnid}", class: "TableFootnoteRef" }
12
+ out.a **attrs do |a|
13
+ a << fnref
14
+ end
15
+ end
16
+
17
+ def make_table_footnote_target(out, fnid, fnref)
18
+ attrs = { id: fnid, class: "TableFootnoteRef" }
19
+ out.a **attrs do |a|
20
+ a << fnref
21
+ insert_tab(a, 1)
22
+ end
23
+ end
24
+
25
+ def make_table_footnote_text(node, fnid, fnref)
26
+ attrs = { id: "ftn#{fnid}" }
27
+ noko do |xml|
28
+ xml.div **attr_code(attrs) do |div|
29
+ make_table_footnote_target(div, fnid, fnref)
30
+ node.children.each { |n| parse(n, div) }
31
+ end
32
+ end.join("\n")
33
+ end
34
+
35
+ def make_generic_footnote_text(node, fnid)
36
+ noko do |xml|
37
+ xml.aside **{ id: "ftn#{fnid}" } do |div|
38
+ node.children.each { |n| parse(n, div) }
39
+ end
40
+ end.join("\n")
41
+ end
42
+
43
+ def get_table_ancestor_id(node)
44
+ table = node.ancestors("table") || node.ancestors("figure")
45
+ return UUIDTools::UUID.random_create.to_s if table.empty?
46
+ table.last["id"]
47
+ end
48
+
49
+ def table_footnote_parse(node, out)
50
+ fn = node["reference"]
51
+ tid = get_table_ancestor_id(node)
52
+ make_table_footnote_link(out, tid + fn, fn)
53
+ # do not output footnote text if we have already seen it for this table
54
+ return if @seen_footnote.include?(tid + fn)
55
+ @in_footnote = true
56
+ out.aside { |a| a << make_table_footnote_text(node, tid + fn, fn) }
57
+ @in_footnote = false
58
+ @seen_footnote << (tid + fn)
59
+ end
60
+
61
+ def footnote_parse(node, out)
62
+ return table_footnote_parse(node, out) if @in_table || @in_figure
63
+ fn = node["reference"]
64
+ out.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
65
+ a.sup { |sup| sup << fn }
66
+ end
67
+ return if @seen_footnote.include?(fn)
68
+ @in_footnote = true
69
+ @footnotes << make_generic_footnote_text(node, fn)
70
+ @in_footnote = false
71
+ @seen_footnote << fn
72
+ end
73
+ end
74
+ end
@@ -1,5 +1,10 @@
1
1
  module IsoDoc
2
2
  class Convert
3
+ def postprocess(result, filename, dir)
4
+ result = from_xhtml(cleanup(to_xhtml(result)))
5
+ toHTML(result, filename)
6
+ end
7
+
3
8
  def toHTML(result, filename)
4
9
  result = from_xhtml(html_cleanup(to_xhtml(result)))
5
10
  result = populate_template(result, :html)
@@ -10,7 +15,7 @@ module IsoDoc
10
15
 
11
16
  def html_cleanup(x)
12
17
  footnote_backlinks(
13
- move_images(html_footnote_filter(html_preface(htmlstyle(x))))
18
+ html_toc(move_images(html_footnote_filter(html_preface(htmlstyle(x)))))
14
19
  )
15
20
  end
16
21
 
@@ -32,15 +37,23 @@ module IsoDoc
32
37
  end
33
38
 
34
39
  def html_preface(docxml)
40
+ html_cover(docxml) if @htmlcoverpage
41
+ html_intro(docxml) if @htmlintropage
42
+ docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
43
+ @closemathdelim)
44
+ docxml
45
+ end
46
+
47
+ def html_cover(docxml)
35
48
  cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
36
49
  d = docxml.at('//div[@class="WordSection1"]')
37
50
  d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
51
+ end
52
+
53
+ def html_intro(docxml)
38
54
  cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
39
55
  d = docxml.at('//div[@class="WordSection2"]')
40
56
  d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
41
- docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
42
- @closemathdelim)
43
- docxml
44
57
  end
45
58
 
46
59
  def htmlstylesheet
@@ -110,5 +123,29 @@ module IsoDoc
110
123
  end
111
124
  docxml
112
125
  end
126
+
127
+ def html_toc1(h, ret, prevname)
128
+ h["id"] = UUIDTools::UUID.random_create.to_s unless h["id"]
129
+ li = "<li><a href='##{h["id"]}'>#{header_strip(h)}</a></li>"
130
+ if h.name == "h1"
131
+ ret += "</ul>" if prevname == "h2"
132
+ else
133
+ ret += "<ul>" if prevname == "h1"
134
+ end
135
+ ret + li
136
+ end
137
+
138
+ def html_toc(docxml)
139
+ return docxml unless @htmlintropage
140
+ ret = ""
141
+ prevname = ""
142
+ docxml.xpath("//h1 | //h2").each do |h|
143
+ ret = html_toc1(h, ret, prevname) unless h["class"] == "toc-contents"
144
+ prevname = h.name
145
+ end
146
+ ret += "<ul>" if prevname == "h2"
147
+ docxml.at("//*[@id='toc-list']").replace("<ul>#{ret}</ret>")
148
+ docxml
149
+ end
113
150
  end
114
151
  end
@@ -40,6 +40,7 @@ internal_external_terms_boilerplate: |
40
40
  note: NOTE
41
41
  note_xref: Note
42
42
  termnote: Note % to entry
43
+ list: List
43
44
  figure: Figure
44
45
  formula: Formula
45
46
  table: Table
@@ -35,6 +35,7 @@ note_xref: Note
35
35
  termnote: Note % à l'article
36
36
  figure: Figure
37
37
  formula: Formule
38
+ list: Liste
38
39
  table: Tableau
39
40
  key: Légende
40
41
  example: EXEMPLE
@@ -35,6 +35,7 @@ note_xref: 注
35
35
  termnote: 注%
36
36
  figure: 图
37
37
  formula: 公式
38
+ list: 列项
38
39
  table: 表
39
40
  key: 说明
40
41
  example: 示例
@@ -39,6 +39,7 @@ module IsoDoc
39
39
  @note_xref_lbl = y["note_xref"]
40
40
  @termnote_lbl = y["termnote"]
41
41
  @figure_lbl = y["figure"]
42
+ @list_lbl = y["list"]
42
43
  @formula_lbl = y["formula"]
43
44
  @table_lbl = y["table"]
44
45
  @key_lbl = y["key"]
@@ -3,14 +3,11 @@ require "uuidtools"
3
3
  module IsoDoc
4
4
  class Convert
5
5
  def section_break(body)
6
- body.br **{ clear: "all", class: "section" }
6
+ body.br
7
7
  end
8
8
 
9
- def page_break(body)
10
- body.br **{
11
- clear: "all",
12
- style: "mso-special-character:line-break;page-break-before:always",
13
- }
9
+ def page_break(out)
10
+ out.br
14
11
  end
15
12
 
16
13
  def link_parse(node, out)
@@ -57,7 +54,7 @@ module IsoDoc
57
54
  l10n(", #{@whole_of_text}")
58
55
  else
59
56
  eref_localities1(r["type"], r.at(ns("./referenceFrom")),
60
- r.at(ns("./referenceTo")))
57
+ r.at(ns("./referenceTo")), @lang)
61
58
  end
62
59
  end
63
60
  ret
@@ -86,11 +83,6 @@ module IsoDoc
86
83
  end
87
84
  end
88
85
 
89
- def pagebreak_parse(_node, out)
90
- attrs = { clear: all, class: "pagebreak" }
91
- out.br **attrs
92
- end
93
-
94
86
  def error_parse(node, out)
95
87
  text = node.to_xml.gsub(/</, "&lt;").gsub(/>/, "&gt;")
96
88
  out.para do |p|
@@ -6,14 +6,6 @@ module IsoDoc
6
6
  @termdomain = termdomain
7
7
  end
8
8
 
9
- def get_termexample
10
- @termexample
11
- end
12
-
13
- def set_termexample(value)
14
- @termexample = value
15
- end
16
-
17
9
  def in_sourcecode
18
10
  @sourcecode
19
11
  end
@@ -32,6 +24,27 @@ module IsoDoc
32
24
  [filename, dir]
33
25
  end
34
26
 
27
+ # these are in fact preprocess,
28
+ # but they are extraneous to main HTML file
29
+ def html_header(html, docxml, filename, dir)
30
+ anchor_names docxml
31
+ define_head html, filename, dir
32
+ end
33
+
34
+ # isodoc.css overrides any CSS injected by Html2Doc, which
35
+ # is inserted before this CSS.
36
+ def define_head(html, filename, _dir)
37
+ html.head do |head|
38
+ head.title { |t| t << filename }
39
+ return unless @standardstylesheet
40
+ head.style do |style|
41
+ stylesheet = File.read(@standardstylesheet).
42
+ gsub("FILENAME", filename)
43
+ style.comment "\n#{stylesheet}\n"
44
+ end
45
+ end
46
+ end
47
+
35
48
  def make_body(xml, docxml)
36
49
  body_attr = { lang: "EN-US", link: "blue", vlink: "#954F72" }
37
50
  xml.body **body_attr do |body|
@@ -51,6 +64,8 @@ module IsoDoc
51
64
  def make_body2(body, docxml)
52
65
  body.div **{ class: "WordSection2" } do |div2|
53
66
  info docxml, div2
67
+ foreword docxml, div2
68
+ introduction docxml, div2
54
69
  div2.p { |p| p << "&nbsp;" } # placeholder
55
70
  end
56
71
  section_break(body)
@@ -65,7 +80,6 @@ module IsoDoc
65
80
  end
66
81
 
67
82
  def info(isoxml, out)
68
- # intropage(out)
69
83
  title isoxml, out
70
84
  subtitle isoxml, out
71
85
  id isoxml, out
@@ -73,8 +87,7 @@ module IsoDoc
73
87
  bibdate isoxml, out
74
88
  relations isoxml, out
75
89
  version isoxml, out
76
- foreword isoxml, out
77
- introduction isoxml, out
90
+ get_metadata
78
91
  end
79
92
 
80
93
  def middle_title(out)
@@ -125,7 +138,7 @@ module IsoDoc
125
138
  when "br" then out.br
126
139
  when "hr" then out.hr
127
140
  when "bookmark" then bookmark_parse(node, out)
128
- when "pagebreak" then pagebreak_parse(node, out)
141
+ when "pagebreak" then page_break(out)
129
142
  when "callout" then callout_parse(node, out)
130
143
  when "stem" then stem_parse(node, out)
131
144
  when "clause" then clause_parse(node, out)
@@ -161,7 +174,7 @@ module IsoDoc
161
174
  when "termsource" then termref_parse(node, out)
162
175
  when "modification" then modification_parse(node, out)
163
176
  when "termnote" then termnote_parse(node, out)
164
- when "termexample" then termexample_parse(node, out)
177
+ when "termexample" then example_parse(node, out)
165
178
  when "terms" then terms_parse(node, out)
166
179
  when "symbols-abbrevs" then symbols_parse(node, out)
167
180
  when "references" then bibliography_parse(node, out)
@@ -65,17 +65,35 @@ module IsoDoc
65
65
  set_metadata(:secretariat, sec.text) if sec
66
66
  end
67
67
 
68
+ def date_range(date)
69
+ from = date.at(ns("./from"))
70
+ to = date.at(ns("./to"))
71
+ ret = from.text
72
+ ret += "&ndash;#{to.text}" if to
73
+ ret
74
+ end
75
+
68
76
  def bibdate(isoxml, _out)
69
77
  isoxml.xpath(ns("//bibdata/date")).each do |d|
70
- set_metadata("#{d['type']}date".to_sym, d.text)
78
+ set_metadata("#{d['type']}date".to_sym, date_range(d))
71
79
  end
72
80
  end
73
81
 
82
+ def iso?(org)
83
+ name = org&.at(ns("./name"))&.text
84
+ abbrev = org&.at(ns("./abbreviation"))&.text
85
+ (abbrev == "ISO" ||
86
+ name == "International Organization for Standardization" )
87
+ end
88
+
74
89
  def agency(xml)
75
90
  agency = ""
76
91
  xml.xpath(ns("//bibdata/contributor[xmlns:role/@type = 'publisher']/"\
77
- "organization/name")).each do |org|
78
- agency = org.text == "ISO" ? "ISO/#{agency}" : "#{agency}#{org.text}/"
92
+ "organization")).each do |org|
93
+ name = org&.at(ns("./name"))&.text
94
+ abbrev = org&.at(ns("./abbreviation"))&.text
95
+ agency1 = abbrev || name
96
+ agency = iso?(org) ? "ISO/#{agency}" : "#{agency}#{agency1}/"
79
97
  end
80
98
  set_metadata(:agency, agency.sub(%r{/$}, ""))
81
99
  end
@@ -105,8 +123,8 @@ module IsoDoc
105
123
  def draftinfo(draft, revdate)
106
124
  draftinfo = ""
107
125
  if draft
108
- draftinfo = " (#{@draft_lbl} #{draft.text}"
109
- draftinfo += ", #{revdate.text}" if revdate
126
+ draftinfo = " (#{@draft_lbl} #{draft}"
127
+ draftinfo += ", #{revdate}" if revdate
110
128
  draftinfo += ")"
111
129
  end
112
130
  l10n(draftinfo)
@@ -114,13 +132,8 @@ module IsoDoc
114
132
 
115
133
  def version(isoxml, _out)
116
134
  set_metadata(:docyear, isoxml&.at(ns("//copyright/from"))&.text)
117
- # draft = isoxml.at(ns("//version/draft"))
118
- # set_metadata(:draft, draft.nil? ? nil : draft.text)
119
135
  set_metadata(:draft, isoxml&.at(ns("//version/draft"))&.text)
120
- # revdate = isoxml.at(ns("//version/revision-date"))
121
- #set_metadata(:revdate, revdate.nil? ? nil : revdate.text)
122
136
  set_metadata(:revdate, isoxml&.at(ns("//version/revision-date"))&.text)
123
- #set_metadata(:draftinfo, draftinfo(draft, revdate))
124
137
  set_metadata(:draftinfo,
125
138
  draftinfo(get_metadata[:draft], get_metadata[:revdate]))
126
139
  end
@@ -8,7 +8,7 @@ module IsoDoc
8
8
  isocode = b.at(ns("./docidentifier")).text
9
9
  isodate = b.at(ns("./date[@type = 'published']"))
10
10
  reference = docid_l10n(isocode)
11
- reference += ": #{isodate.text}" if isodate
11
+ reference += ": #{date_range(isodate)}" if isodate
12
12
  reference
13
13
  end
14
14
 
@@ -48,15 +48,6 @@ module IsoDoc
48
48
  end
49
49
  end
50
50
 
51
- def ref_entry(list, b, ordinal, _bibliography)
52
- ref = b.at(ns("./ref"))
53
- para = b.at(ns("./p"))
54
- list.p **attr_code("id": ref["id"], class: "Biblio") do |r|
55
- ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
56
- para.children.each { |n| parse(n, r) }
57
- end
58
- end
59
-
60
51
  # TODO generate formatted ref if not present
61
52
  def noniso_bibitem(list, b, ordinal, bibliography)
62
53
  list.p **attr_code("id": b["id"], class: "Biblio") do |r|
@@ -66,13 +57,15 @@ module IsoDoc
66
57
  else
67
58
  r << "#{iso_bibitem_ref_code(b)}, "
68
59
  end
69
- b.at(ns("./formattedref")).children.each { |n| parse(n, r) }
60
+ b.at(ns("./formattedref"))&.children&.each { |n| parse(n, r) }
70
61
  end
71
62
  end
72
63
 
73
64
  ISO_PUBLISHER_XPATH =
74
65
  "./contributor[xmlns:role/@type = 'publisher']/"\
75
- "organization[name = 'ISO' or xmlns:name = 'IEC']".freeze
66
+ "organization[abbreviation = 'ISO' or xmlns:abbreviation = 'IEC' or "\
67
+ "xmlns:name = 'International Organization for Standardization' or "\
68
+ "xmlns:name = 'International Electrotechnical Commission']".freeze
76
69
 
77
70
  def split_bibitems(f)
78
71
  iso_bibitem = []
@@ -142,8 +135,13 @@ module IsoDoc
142
135
  end
143
136
  end
144
137
 
145
- def format_ref(ref, isopub)
146
- return "ISO #{ref}" if isopub
138
+ def format_ref(ref, isopub, date)
139
+ if isopub
140
+ return ref unless date
141
+ from = date.at(ns("./from"))
142
+ return ref if from&.text == "--"
143
+ return ref + ": #{date_range(date)}"
144
+ end
147
145
  return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
148
146
  ref
149
147
  end
@@ -151,17 +149,17 @@ module IsoDoc
151
149
  def reference_names(ref)
152
150
  isopub = ref.at(ns(ISO_PUBLISHER_XPATH))
153
151
  docid = ref.at(ns("./docidentifier"))
154
- return ref_names(ref) unless docid
152
+ # return ref_names(ref) unless docid
155
153
  date = ref.at(ns("./date[@type = 'published']"))
156
- reference = format_ref(docid_l10n(docid.text), isopub)
157
- reference += ": #{date.text}" if date && isopub && date != "--"
154
+ reference = format_ref(docid_l10n(docid.text), isopub, date)
155
+ # reference += ": #{date_range(date)}" if date && isopub && from.text != "--"
158
156
  @anchors[ref["id"]] = { xref: reference }
159
157
  end
160
158
 
161
- def ref_names(ref)
162
- linkend = ref.text
163
- linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
164
- @anchors[ref["id"]] = { xref: linkend }
165
- end
159
+ # def ref_names(ref)
160
+ # linkend = ref.text
161
+ # linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
162
+ # @anchors[ref["id"]] = { xref: linkend }
163
+ # end
166
164
  end
167
165
  end