html2doc 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.hound.yml +3 -1
- data/.rubocop.yml +4 -6
- data/README.adoc +2 -1
- data/bin/html2doc +1 -1
- data/bin/rspec +1 -1
- data/html2doc.gemspec +8 -9
- data/lib/html2doc/base.rb +4 -3
- data/lib/html2doc/lists.rb +24 -27
- data/lib/html2doc/math.rb +35 -18
- data/lib/html2doc/mime.rb +16 -17
- data/lib/html2doc/notes.rb +12 -9
- data/lib/html2doc/version.rb +1 -1
- data/lib/html2doc.rb +0 -3
- data/spec/html2doc_spec.rb +167 -121
- metadata +42 -42
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 64cb262d3176610f0887cc69bab66fa2a3a7ca8445f8ad493d9d65c455a091d6
         | 
| 4 | 
            +
              data.tar.gz: 468dc7a8fb687cdbf6db1497cf9d9b5e164687b7d460a5eac1fb983b4673672b
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 2a47bbe5df7ae0767ad2f4ccf52c1f96b8e27fc32d08b3b7b25e5051a3d229b29b9852a51c052a701990f9be6dbf0efc97795ea0c0ff4b3745b63f5a0c7adb4f
         | 
| 7 | 
            +
              data.tar.gz: c4c10a84141889d820fd8d2afc273122b28372794edd9fdb3e60aee28773350d3e545f2a3efb75c4d28eff350d367b020d01d6e5cc2874a957ca612124e78fd4
         | 
    
        data/.github/workflows/rake.yml
    CHANGED
    
    | @@ -16,19 +16,9 @@ jobs: | |
| 16 16 | 
             
                strategy:
         | 
| 17 17 | 
             
                  fail-fast: false
         | 
| 18 18 | 
             
                  matrix:
         | 
| 19 | 
            -
                    ruby: [ ' | 
| 19 | 
            +
                    ruby: [ '3.0', '2.7', '2.6', '2.5' ]
         | 
| 20 20 | 
             
                    os: [ ubuntu-latest, windows-latest, macos-latest ]
         | 
| 21 21 | 
             
                    experimental: [ false ]
         | 
| 22 | 
            -
                    include:
         | 
| 23 | 
            -
                      - ruby: '3.0'
         | 
| 24 | 
            -
                        os: 'ubuntu-latest'
         | 
| 25 | 
            -
                        experimental: true
         | 
| 26 | 
            -
                      - ruby: '3.0'
         | 
| 27 | 
            -
                        os: 'windows-latest'
         | 
| 28 | 
            -
                        experimental: true
         | 
| 29 | 
            -
                      - ruby: '3.0'
         | 
| 30 | 
            -
                        os: 'macos-latest'
         | 
| 31 | 
            -
                        experimental: true
         | 
| 32 22 | 
             
                steps:
         | 
| 33 23 | 
             
                  - uses: actions/checkout@v2
         | 
| 34 24 | 
             
                    with:
         | 
    
        data/.hound.yml
    CHANGED
    
    
    
        data/.rubocop.yml
    CHANGED
    
    | @@ -1,12 +1,10 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
            # https://github.com/ | 
| 3 | 
            -
            # All project-specific additions and overrides should be specified in this file.
         | 
| 1 | 
            +
            # Auto-generated by Cimas: Do not edit it manually!
         | 
| 2 | 
            +
            # See https://github.com/metanorma/cimas
         | 
| 4 3 | 
             
            inherit_from:
         | 
| 5 4 | 
             
              - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
         | 
| 6 5 |  | 
| 7 6 | 
             
            # local repo-specific modifications
         | 
| 7 | 
            +
            # ...
         | 
| 8 8 |  | 
| 9 9 | 
             
            AllCops:
         | 
| 10 | 
            -
               | 
| 11 | 
            -
              StyleGuideCopsOnly: false
         | 
| 12 | 
            -
              TargetRubyVersion: 2.4
         | 
| 10 | 
            +
              TargetRubyVersion: 2.5
         | 
    
        data/README.adoc
    CHANGED
    
    | @@ -58,11 +58,12 @@ There there are two other Microsoft Word vendors in the Ruby ecosystem. | |
| 58 58 | 
             
            --
         | 
| 59 59 | 
             
            require "html2doc"
         | 
| 60 60 |  | 
| 61 | 
            -
            Html2Doc.process(result, filename: filename, stylesheet: stylesheet, header_filename: header_filename, dir: dir, asciimathdelims: asciimathdelims, liststyles: liststyles)
         | 
| 61 | 
            +
            Html2Doc.process(result, filename: filename, imagedir: imagedir, stylesheet: stylesheet, header_filename: header_filename, dir: dir, asciimathdelims: asciimathdelims, liststyles: liststyles)
         | 
| 62 62 | 
             
            --
         | 
| 63 63 |  | 
| 64 64 | 
             
            result:: is the Html document to be converted into Word, as a string.
         | 
| 65 65 | 
             
            filename:: is the name the document is to be saved as, without a file suffix
         | 
| 66 | 
            +
            imagedir:: base directory for local image file names in source XML
         | 
| 66 67 | 
             
            stylesheet:: is the full path filename of the CSS stylesheet for Microsoft Word-specific styles. If this is not provided, the program will used the default stylesheet included in the gem, `lib/html2doc/wordstyle.css`. The stylsheet provided must match this stylesheet; you can obtain one by saving a Word document with your desired styles to HTML, and extracting the style definitions from the HTML document header.
         | 
| 67 68 | 
             
            header_filename:: is the filename of the HTML document containing header and footer for the document, as well as footnote/endnote separators; if there is none, use nil. To generate your own such document, save a Word document with headers/footers and/or footnote/endnote separators as an HTML document; the `header.html` will be in the `{filename}.fld` folder generated along with the HTML. A sample file is available at https://github.com/metanorma/metanorma-iso/blob/master/lib/asciidoctor/iso/word/header.html
         | 
| 68 69 | 
             
            dir:: is the folder that any ancillary files (images, headers, filelist) are to be saved to. If not provided, it will be created as `{filename}_files`. Anything in the directory will be attached to the Word document; so this folder should only contain the images that accompany the document. (If the images are elsewhere on the local drive, the gem will move them into the folder. External URL images are left alone, and are not downloaded.)
         | 
    
        data/bin/html2doc
    CHANGED
    
    
    
        data/bin/rspec
    CHANGED
    
    
    
        data/html2doc.gemspec
    CHANGED
    
    | @@ -1,5 +1,4 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
            lib = File.expand_path("../lib", __FILE__)
         | 
| 1 | 
            +
            lib = File.expand_path("lib", __dir__)
         | 
| 3 2 | 
             
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         | 
| 4 3 | 
             
            require "html2doc/version"
         | 
| 5 4 |  | 
| @@ -16,23 +15,23 @@ Gem::Specification.new do |spec| | |
| 16 15 | 
             
                This gem is in active development.
         | 
| 17 16 | 
             
              DESCRIPTION
         | 
| 18 17 |  | 
| 19 | 
            -
              spec.homepage | 
| 20 | 
            -
              spec.licenses | 
| 18 | 
            +
              spec.homepage = "https://github.com/metanorma/html2doc"
         | 
| 19 | 
            +
              spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
         | 
| 21 20 |  | 
| 22 21 | 
             
              spec.bindir        = "bin"
         | 
| 23 22 | 
             
              spec.require_paths = ["lib"]
         | 
| 24 23 | 
             
              spec.files         = `git ls-files`.split("\n")
         | 
| 25 24 | 
             
              spec.test_files    = `git ls-files -- {spec}/*`.split("\n")
         | 
| 26 | 
            -
              spec.required_ruby_version = Gem::Requirement.new(">= 2. | 
| 25 | 
            +
              spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
         | 
| 27 26 |  | 
| 27 | 
            +
              spec.add_dependency "asciimath", "~> 2.0.2"
         | 
| 28 28 | 
             
              spec.add_dependency "htmlentities", "~> 4.3.4"
         | 
| 29 29 | 
             
              spec.add_dependency "image_size"
         | 
| 30 30 | 
             
              spec.add_dependency "mime-types"
         | 
| 31 | 
            -
              spec.add_dependency "nokogiri", "~> 1. | 
| 31 | 
            +
              spec.add_dependency "nokogiri", "~> 1.12"
         | 
| 32 | 
            +
              spec.add_dependency "plane1converter", "~> 0.0.1"
         | 
| 32 33 | 
             
              spec.add_dependency "thread_safe"
         | 
| 33 34 | 
             
              spec.add_dependency "uuidtools"
         | 
| 34 | 
            -
              spec.add_dependency "asciimath", "~> 2.0.2"
         | 
| 35 | 
            -
              spec.add_dependency "plane1converter", "~> 0.0.1"
         | 
| 36 35 |  | 
| 37 36 | 
             
              spec.add_development_dependency "byebug", "~> 9.1"
         | 
| 38 37 | 
             
              spec.add_development_dependency "equivalent-xml", "~> 0.6"
         | 
| @@ -40,8 +39,8 @@ Gem::Specification.new do |spec| | |
| 40 39 | 
             
              spec.add_development_dependency "guard-rspec", "~> 4.7"
         | 
| 41 40 | 
             
              spec.add_development_dependency "rake", "~> 12.0"
         | 
| 42 41 | 
             
              spec.add_development_dependency "rspec", "~> 3.6"
         | 
| 42 | 
            +
              spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
         | 
| 43 43 | 
             
              spec.add_development_dependency "rubocop", "~> 1.5.2"
         | 
| 44 44 | 
             
              spec.add_development_dependency "simplecov", "~> 0.15"
         | 
| 45 45 | 
             
              spec.add_development_dependency "timecop", "~> 0.9"
         | 
| 46 | 
            -
              spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
         | 
| 47 46 | 
             
            end
         | 
    
        data/lib/html2doc/base.rb
    CHANGED
    
    | @@ -53,7 +53,7 @@ module Html2Doc | |
| 53 53 |  | 
| 54 54 | 
             
              def self.cleanup(docxml, hash)
         | 
| 55 55 | 
             
                namespace(docxml.root)
         | 
| 56 | 
            -
                image_cleanup(docxml, hash[:dir1],  | 
| 56 | 
            +
                image_cleanup(docxml, hash[:dir1], hash[:imagedir])
         | 
| 57 57 | 
             
                mathml_to_ooml(docxml)
         | 
| 58 58 | 
             
                lists(docxml, hash[:liststyles])
         | 
| 59 59 | 
             
                footnotes(docxml)
         | 
| @@ -106,6 +106,7 @@ module Html2Doc | |
| 106 106 | 
             
                doc.gsub!(%r{></o:lock>}, "/>")
         | 
| 107 107 | 
             
                doc.gsub!(%r{></v:imagedata>}, "/>")
         | 
| 108 108 | 
             
                doc.gsub!(%r{></w:wrap>}, "/>")
         | 
| 109 | 
            +
                doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
         | 
| 109 110 | 
             
                doc.gsub!(%r{&tab;|&tab;},
         | 
| 110 111 | 
             
                          '<span style="mso-tab-count:1">  </span>')
         | 
| 111 112 | 
             
                doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
         | 
| @@ -127,7 +128,7 @@ module Html2Doc | |
| 127 128 | 
             
                <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
         | 
| 128 129 | 
             
              XML
         | 
| 129 130 |  | 
| 130 | 
            -
              def self.define_head1(docxml,  | 
| 131 | 
            +
              def self.define_head1(docxml, _dir)
         | 
| 131 132 | 
             
                docxml.xpath("//*[local-name() = 'head']").each do |h|
         | 
| 132 133 | 
             
                  h.children.first.add_previous_sibling <<~XML
         | 
| 133 134 | 
             
                    #{PRINT_VIEW}
         | 
| @@ -147,7 +148,7 @@ module Html2Doc | |
| 147 148 | 
             
                end
         | 
| 148 149 | 
             
              end
         | 
| 149 150 |  | 
| 150 | 
            -
              def self.stylesheet( | 
| 151 | 
            +
              def self.stylesheet(_filename, _header_filename, fn)
         | 
| 151 152 | 
             
                (fn.nil? || fn.empty?) and
         | 
| 152 153 | 
             
                  fn = File.join(File.dirname(__FILE__), "wordstyle.css")
         | 
| 153 154 | 
             
                stylesheet = File.read(fn, encoding: "UTF-8")
         | 
    
        data/lib/html2doc/lists.rb
    CHANGED
    
    | @@ -2,30 +2,29 @@ require "uuidtools" | |
| 2 2 | 
             
            require "asciimath"
         | 
| 3 3 | 
             
            require "htmlentities"
         | 
| 4 4 | 
             
            require "nokogiri"
         | 
| 5 | 
            -
            require "uuidtools"
         | 
| 6 5 |  | 
| 7 6 | 
             
            module Html2Doc
         | 
| 8 | 
            -
              def self.style_list( | 
| 7 | 
            +
              def self.style_list(elem, level, liststyle, listnumber)
         | 
| 9 8 | 
             
                return unless liststyle
         | 
| 10 9 |  | 
| 11 | 
            -
                if  | 
| 12 | 
            -
                   | 
| 10 | 
            +
                if elem["style"]
         | 
| 11 | 
            +
                  elem["style"] += ";"
         | 
| 13 12 | 
             
                else
         | 
| 14 | 
            -
                   | 
| 13 | 
            +
                  elem["style"] = ""
         | 
| 15 14 | 
             
                end
         | 
| 16 | 
            -
                 | 
| 15 | 
            +
                elem["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
         | 
| 17 16 | 
             
              end
         | 
| 18 17 |  | 
| 19 | 
            -
              def self.list_add1( | 
| 18 | 
            +
              def self.list_add1(elem, liststyles, listtype, level)
         | 
| 20 19 | 
             
                if %i[ul ol].include? listtype
         | 
| 21 | 
            -
                  list_add( | 
| 20 | 
            +
                  list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
         | 
| 22 21 | 
             
                           liststyles, :ul, level + 1)
         | 
| 23 | 
            -
                  list_add( | 
| 22 | 
            +
                  list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
         | 
| 24 23 | 
             
                           liststyles, :ol, level + 1)
         | 
| 25 24 | 
             
                else
         | 
| 26 | 
            -
                  list_add( | 
| 25 | 
            +
                  list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
         | 
| 27 26 | 
             
                           liststyles, listtype, level + 1)
         | 
| 28 | 
            -
                  list_add( | 
| 27 | 
            +
                  list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
         | 
| 29 28 | 
             
                           liststyles, listtype, level + 1)
         | 
| 30 29 | 
             
                end
         | 
| 31 30 | 
             
              end
         | 
| @@ -47,45 +46,43 @@ module Html2Doc | |
| 47 46 | 
             
                end
         | 
| 48 47 | 
             
              end
         | 
| 49 48 |  | 
| 50 | 
            -
              def self.list2para( | 
| 51 | 
            -
                return if  | 
| 49 | 
            +
              def self.list2para(list)
         | 
| 50 | 
            +
                return if list.xpath("./li").empty?
         | 
| 52 51 |  | 
| 53 | 
            -
                 | 
| 54 | 
            -
                 | 
| 55 | 
            -
                 | 
| 56 | 
            -
                 | 
| 52 | 
            +
                list.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
         | 
| 53 | 
            +
                list.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
         | 
| 54 | 
            +
                list.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
         | 
| 55 | 
            +
                list.xpath("./li").each do |l|
         | 
| 57 56 | 
             
                  l.name = "p"
         | 
| 58 57 | 
             
                  l["class"] ||= "MsoListParagraphCxSpMiddle"
         | 
| 59 58 | 
             
                  l&.first_element_child&.name == "p" and
         | 
| 60 59 | 
             
                    l.first_element_child.replace(l.first_element_child.children)
         | 
| 61 60 | 
             
                end
         | 
| 62 | 
            -
                 | 
| 61 | 
            +
                list.replace(list.children)
         | 
| 63 62 | 
             
              end
         | 
| 64 63 |  | 
| 65 64 | 
             
              TOPLIST = "[not(ancestor::ul) and not(ancestor::ol)]".freeze
         | 
| 66 65 |  | 
| 67 | 
            -
              def self.lists1(docxml, liststyles,  | 
| 68 | 
            -
                case  | 
| 66 | 
            +
              def self.lists1(docxml, liststyles, style)
         | 
| 67 | 
            +
                case style
         | 
| 69 68 | 
             
                when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
         | 
| 70 69 | 
             
                                       liststyles, :ul, 1)
         | 
| 71 70 | 
             
                when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
         | 
| 72 71 | 
             
                                       liststyles, :ol, 1)
         | 
| 73 72 | 
             
                else
         | 
| 74 | 
            -
                  list_add(docxml.xpath("//ol[@class = '#{ | 
| 75 | 
            -
                                        "//ul[@class = '#{ | 
| 76 | 
            -
                  liststyles,  | 
| 73 | 
            +
                  list_add(docxml.xpath("//ol[@class = '#{style}']#{TOPLIST} | "\
         | 
| 74 | 
            +
                                        "//ul[@class = '#{style}']#{TOPLIST}"),
         | 
| 75 | 
            +
                  liststyles, style, 1)
         | 
| 77 76 | 
             
                end
         | 
| 78 77 | 
             
              end
         | 
| 79 78 |  | 
| 80 79 | 
             
              def self.lists_unstyled(docxml, liststyles)
         | 
| 81 | 
            -
                 | 
| 80 | 
            +
                liststyles.has_key?(:ul) and
         | 
| 82 81 | 
             
                  list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
         | 
| 83 82 | 
             
                           liststyles, :ul, 1)
         | 
| 84 | 
            -
                 | 
| 85 | 
            -
                if liststyles.has_key?(:ol)
         | 
| 83 | 
            +
                liststyles.has_key?(:ol) and
         | 
| 86 84 | 
             
                  list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
         | 
| 87 85 | 
             
                           liststyles, :ul, 1)
         | 
| 88 | 
            -
                end
         | 
| 89 86 | 
             
                docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
         | 
| 90 87 | 
             
                  l.delete("seen")
         | 
| 91 88 | 
             
                end
         | 
    
        data/lib/html2doc/math.rb
    CHANGED
    
    | @@ -25,13 +25,18 @@ module Html2Doc | |
| 25 25 |  | 
| 26 26 | 
             
                m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
         | 
| 27 27 | 
             
                m.each_slice(4).map.with_index do |(*a), i|
         | 
| 28 | 
            -
                  i  | 
| 29 | 
            -
                    warn "MathML #{i} of #{(m.size / 4).floor}"
         | 
| 28 | 
            +
                  progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
         | 
| 30 29 | 
             
                  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
         | 
| 31 30 | 
             
                  a.size > 1 ? a[0] + a[2] : a[0]
         | 
| 32 31 | 
             
                end.join
         | 
| 33 32 | 
             
              end
         | 
| 34 33 |  | 
| 34 | 
            +
              def self.progress_conv(idx, step, total, threshold, msg)
         | 
| 35 | 
            +
                return unless (idx % step).zero? && total > threshold && idx.positive?
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                warn "#{msg} #{idx} of #{total}"
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
             | 
| 35 40 | 
             
              def self.unwrap_accents(doc)
         | 
| 36 41 | 
             
                doc.xpath("//*[@accent = 'true']").each do |x|
         | 
| 37 42 | 
             
                  x.elements.length > 1 or next
         | 
| @@ -69,18 +74,20 @@ module Html2Doc | |
| 69 74 | 
             
                math
         | 
| 70 75 | 
             
              end
         | 
| 71 76 |  | 
| 77 | 
            +
              HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
         | 
| 78 | 
            +
             | 
| 72 79 | 
             
              def self.unitalic(math)
         | 
| 73 80 | 
             
                math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
         | 
| 74 | 
            -
                  x.wrap("<span style='font-style:normal;'></span>")
         | 
| 81 | 
            +
                  x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
         | 
| 75 82 | 
             
                end
         | 
| 76 83 | 
             
                math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
         | 
| 77 | 
            -
                  x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
         | 
| 84 | 
            +
                  x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
         | 
| 78 85 | 
             
                end
         | 
| 79 86 | 
             
                math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
         | 
| 80 | 
            -
                  x.wrap("<span class='nostem'><em></em></span>")
         | 
| 87 | 
            +
                  x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
         | 
| 81 88 | 
             
                end
         | 
| 82 89 | 
             
                math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
         | 
| 83 | 
            -
                  x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
         | 
| 90 | 
            +
                  x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
         | 
| 84 91 | 
             
                end
         | 
| 85 92 | 
             
                math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
         | 
| 86 93 | 
             
                  to_plane1(x, :monospace)
         | 
| @@ -128,20 +135,30 @@ module Html2Doc | |
| 128 135 | 
             
                docnamespaces = docxml.collect_namespaces
         | 
| 129 136 | 
             
                m = docxml.xpath("//*[local-name() = 'math']")
         | 
| 130 137 | 
             
                m.each_with_index do |x, i|
         | 
| 131 | 
            -
                  i  | 
| 132 | 
            -
             | 
| 133 | 
            -
                  element = ooxml_cleanup(x, docnamespaces)
         | 
| 134 | 
            -
                  doc = Nokogiri::XML::Document::new
         | 
| 135 | 
            -
                  doc.root = element
         | 
| 136 | 
            -
                  ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
         | 
| 137 | 
            -
                    .gsub(/<\?[^>]+>\s*/, "")
         | 
| 138 | 
            -
                    .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
         | 
| 139 | 
            -
                    .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
         | 
| 140 | 
            -
                  ooxml = uncenter(x, ooxml)
         | 
| 141 | 
            -
                  x.swap(ooxml)
         | 
| 138 | 
            +
                  progress_conv(i, 100, m.size, 500, "Math OOXML")
         | 
| 139 | 
            +
                  mathml_to_ooml1(x, docnamespaces)
         | 
| 142 140 | 
             
                end
         | 
| 143 141 | 
             
              end
         | 
| 144 142 |  | 
| 143 | 
            +
              # We need span and em not to be namespaced. Word can't deal with explicit 
         | 
| 144 | 
            +
              # namespaces.
         | 
| 145 | 
            +
              # We will end up stripping them out again under Nokogiri 1.11, which correctly
         | 
| 146 | 
            +
              # insists on inheriting namespace from parent.
         | 
| 147 | 
            +
              def self.ooml_clean(xml)
         | 
| 148 | 
            +
                xml.to_s
         | 
| 149 | 
            +
                  .gsub(/<\?[^>]+>\s*/, "")
         | 
| 150 | 
            +
                  .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
         | 
| 151 | 
            +
                  .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
         | 
| 152 | 
            +
              end
         | 
| 153 | 
            +
             | 
| 154 | 
            +
              def self.mathml_to_ooml1(xml, docnamespaces)
         | 
| 155 | 
            +
                doc = Nokogiri::XML::Document::new
         | 
| 156 | 
            +
                doc.root = ooxml_cleanup(xml, docnamespaces)
         | 
| 157 | 
            +
                  ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
         | 
| 158 | 
            +
                ooxml = uncenter(xml, ooxml)
         | 
| 159 | 
            +
                xml.swap(ooxml)
         | 
| 160 | 
            +
              end
         | 
| 161 | 
            +
             | 
| 145 162 | 
             
              # escape space as 2; we are removing any spaces generated by
         | 
| 146 163 | 
             
              # XML indentation
         | 
| 147 164 | 
             
              def self.esc_space(xml)
         | 
| @@ -157,7 +174,7 @@ module Html2Doc | |
| 157 174 | 
             
              # left/right if parent is so tagged
         | 
| 158 175 | 
             
              def self.uncenter(math, ooxml)
         | 
| 159 176 | 
             
                alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
         | 
| 160 | 
            -
             | 
| 177 | 
            +
                                    "local-name() = 'div' or local-name() = 'td']/@style")
         | 
| 161 178 | 
             
                return ooxml unless alignnode && (math.next == nil && math.previous == nil)
         | 
| 162 179 |  | 
| 163 180 | 
             
                %w(left right).each do |dir|
         | 
    
        data/lib/html2doc/mime.rb
    CHANGED
    
    | @@ -43,7 +43,7 @@ module Html2Doc | |
| 43 43 | 
             
              def self.mime_type(item)
         | 
| 44 44 | 
             
                types = MIME::Types.type_for(item)
         | 
| 45 45 | 
             
                type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
         | 
| 46 | 
            -
                type = type  | 
| 46 | 
            +
                type = %(#{type} charset="utf-8") if /^text/.match(type) && types
         | 
| 47 47 | 
             
                type
         | 
| 48 48 | 
             
              end
         | 
| 49 49 |  | 
| @@ -77,14 +77,14 @@ module Html2Doc | |
| 77 77 | 
             
              end
         | 
| 78 78 |  | 
| 79 79 | 
             
              # max width for Word document is 400, max height is 680
         | 
| 80 | 
            -
              def self.image_resize( | 
| 81 | 
            -
                 | 
| 82 | 
            -
                s = [ | 
| 83 | 
            -
                s =  | 
| 84 | 
            -
                return [nil, nil] if  | 
| 85 | 
            -
             | 
| 86 | 
            -
                s[1] = s[0] *  | 
| 87 | 
            -
                s[0] = s[1] *  | 
| 80 | 
            +
              def self.image_resize(img, path, maxheight, maxwidth)
         | 
| 81 | 
            +
                realsize = ImageSize.path(path).size
         | 
| 82 | 
            +
                s = [img["width"].to_i, img["height"].to_i]
         | 
| 83 | 
            +
                s = realsize if s[0].zero? && s[1].zero?
         | 
| 84 | 
            +
                return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
         | 
| 87 | 
            +
                s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
         | 
| 88 88 | 
             
                s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
         | 
| 89 89 | 
             
                s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
         | 
| 90 90 | 
             
                s
         | 
| @@ -100,16 +100,18 @@ module Html2Doc | |
| 100 100 | 
             
                warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
         | 
| 101 101 | 
             
              end
         | 
| 102 102 |  | 
| 103 | 
            +
              def self.localname(src, localdir)
         | 
| 104 | 
            +
                %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
         | 
| 105 | 
            +
              end
         | 
| 106 | 
            +
             | 
| 103 107 | 
             
              # only processes locally stored images
         | 
| 104 108 | 
             
              def self.image_cleanup(docxml, dir, localdir)
         | 
| 105 109 | 
             
                docxml.traverse do |i|
         | 
| 106 110 | 
             
                  next unless i.element? && %w(img v:imagedata).include?(i.name)
         | 
| 107 | 
            -
                   | 
| 108 | 
            -
                  next if /^http/.match i["src"]
         | 
| 111 | 
            +
                  next if /^http/.match? i["src"]
         | 
| 109 112 | 
             
                  next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
         | 
| 110 113 |  | 
| 111 | 
            -
                  local_filename =  | 
| 112 | 
            -
                    File.join(localdir, i["src"])
         | 
| 114 | 
            +
                  local_filename = localname(i["src"], localdir)
         | 
| 113 115 | 
             
                  new_filename = "#{mkuuid}#{File.extname(i['src'])}"
         | 
| 114 116 | 
             
                  FileUtils.cp local_filename, File.join(dir, new_filename)
         | 
| 115 117 | 
             
                  i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
         | 
| @@ -130,12 +132,9 @@ module Html2Doc | |
| 130 132 | 
             
                if a.size == 2 && !(/ src="https?:/.match a[1]) &&
         | 
| 131 133 | 
             
                    !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
         | 
| 132 134 | 
             
                  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
         | 
| 133 | 
            -
                  #warnsvg(m[:src])
         | 
| 134 135 | 
             
                  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
         | 
| 135 136 | 
             
                  new_filename = "#{mkuuid}.#{m2[:suffix]}"
         | 
| 136 | 
            -
                   | 
| 137 | 
            -
                    File.join(localdir, m[:src])
         | 
| 138 | 
            -
                  FileUtils.cp old_filename, File.join(dir, new_filename)
         | 
| 137 | 
            +
                  FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
         | 
| 139 138 | 
             
                  a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
         | 
| 140 139 | 
             
                end
         | 
| 141 140 | 
             
                a.join
         | 
    
        data/lib/html2doc/notes.rb
    CHANGED
    
    | @@ -56,19 +56,22 @@ module Html2Doc | |
| 56 56 |  | 
| 57 57 | 
             
                set_footnote_link_attrs(elem, idx)
         | 
| 58 58 | 
             
                if elem.at("./span[@class = 'MsoFootnoteReference']")
         | 
| 59 | 
            -
                  elem | 
| 60 | 
            -
             | 
| 61 | 
            -
                      c.replace(FN)
         | 
| 62 | 
            -
                    else
         | 
| 63 | 
            -
                      c.wrap("<span class='MsoFootnoteReference'></span>")
         | 
| 64 | 
            -
                    end
         | 
| 65 | 
            -
                  end
         | 
| 66 | 
            -
                else
         | 
| 67 | 
            -
                  elem.children = FN
         | 
| 59 | 
            +
                  process_footnote_link1(elem)
         | 
| 60 | 
            +
                else elem.children = FN
         | 
| 68 61 | 
             
                end
         | 
| 69 62 | 
             
                footnote << transform_footnote_text(note)
         | 
| 70 63 | 
             
              end
         | 
| 71 64 |  | 
| 65 | 
            +
              def self.process_footnote_link1(elem)
         | 
| 66 | 
            +
                elem.children.each do |c|
         | 
| 67 | 
            +
                  if c.name == "span" && c["class"] == "MsoFootnoteReference"
         | 
| 68 | 
            +
                    c.replace(FN)
         | 
| 69 | 
            +
                  else
         | 
| 70 | 
            +
                    c.wrap("<span class='MsoFootnoteReference'></span>")
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
              end
         | 
| 74 | 
            +
             | 
| 72 75 | 
             
              def self.transform_footnote_text(note)
         | 
| 73 76 | 
             
                note["id"] = ""
         | 
| 74 77 | 
             
                note.xpath(".//div").each { |div| div.replace(div.children) }
         | 
    
        data/lib/html2doc/version.rb
    CHANGED
    
    
    
        data/lib/html2doc.rb
    CHANGED