html2doc 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f88d61ea0bbf6baa2a355ec6d5abfd35399c91e
4
- data.tar.gz: 1f135f2907b6a7d51196dd6774fc0b81b2e51e34
3
+ metadata.gz: 326527587765630ec2b376716c48428cfa946d94
4
+ data.tar.gz: 1690c23c76ce1cea7971d8f399544c5dea36166c
5
5
  SHA512:
6
- metadata.gz: 69b5db648e40a37723b5591caa28c3f3131b40ce777245b80ff578cb80de7e183dbdfad677e13dccbdcb5aa2327520f386d52b975ea930d676f250113c2163a3
7
- data.tar.gz: eca2bc4cb73d72345b362ec2f3eaa6389073aba39c77d20fbe2bd413c73347b10f2c6f4a9b2e216016a3c1f74cf4ed461b6f812ba525d3b44c9083ed3c49684a
6
+ metadata.gz: 0405adec203661a00d411a0c14458493eb58992a5c052f43c4470f7ad8d9943207b8d917ef91eafc4dfa4b79f2dad4db6ff381568682149d19d001cb1aad775b
7
+ data.tar.gz: 6cae33b7a484bf5c41ad55c9355aeebd89fef0a95c59aec68771dac95af64b151a303e2876c8eb31aef43ea9ddc143c5e770d86e8c86d509c4e1a06c13ebc59e
@@ -352,8 +352,18 @@ Style/TrailingCommaInArguments:
352
352
  - no_comma
353
353
  Enabled: true
354
354
 
355
- Style/TrailingCommaInLiteral:
356
- Description: 'Checks for trailing comma in array and hash literals.'
355
+ Style/TrailingCommaInArrayLiteral:
356
+ Description: 'Checks for trailing comma in array literals.'
357
+ StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
358
+ EnforcedStyleForMultiline: comma
359
+ SupportedStylesForMultiline:
360
+ - comma
361
+ - consistent_comma
362
+ - no_comma
363
+ Enabled: true
364
+
365
+ Style/TrailingCommaInHashLiteral:
366
+ Description: 'Checks for trailing comma in hash literals.'
357
367
  StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
358
368
  EnforcedStyleForMultiline: comma
359
369
  SupportedStylesForMultiline:
@@ -398,6 +408,13 @@ Layout/AlignParameters:
398
408
  StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-double-indent'
399
409
  Enabled: false
400
410
 
411
+ Layout/ConditionPosition:
412
+ Description: >-
413
+ Checks for condition placed in a confusing position relative to
414
+ the keyword.
415
+ StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#same-line-condition'
416
+ Enabled: false
417
+
401
418
  Layout/DotPosition:
402
419
  Description: 'Checks the position of the dot in multi-line method calls.'
403
420
  StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#consistent-multi-line-chains'
@@ -450,13 +467,6 @@ Lint/CircularArgumentReference:
450
467
  Description: "Don't refer to the keyword argument in the default value."
451
468
  Enabled: false
452
469
 
453
- Lint/ConditionPosition:
454
- Description: >-
455
- Checks for condition placed in a confusing position relative to
456
- the keyword.
457
- StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#same-line-condition'
458
- Enabled: false
459
-
460
470
  Lint/DeprecatedClassMethods:
461
471
  Description: 'Check for deprecated class method calls.'
462
472
  Enabled: false
@@ -523,7 +533,7 @@ Lint/UnderscorePrefixedVariableName:
523
533
  Description: 'Do not use prefix `_` for a variable that is used.'
524
534
  Enabled: false
525
535
 
526
- Lint/UnneededDisable:
536
+ Lint/UnneededCopDisableDirective:
527
537
  Description: >-
528
538
  Checks for rubocop:disable comments that can be removed.
529
539
  Note: this cop is not disabled when disabling all cops.
@@ -9,7 +9,7 @@ Gem to convert an HTML document into a Word document (.doc) format. This is inte
9
9
 
10
10
  This gem originated out of https://github.com/riboseinc/asciidoctor-iso, which creates a Word document from a automatically generated HTML document (created in turn by processing Asciidoc).
11
11
 
12
- This work is driven by the Word document generation procedure documented in http://sebsauvage.net/wiki/doku.php?id=word_document_generation
12
+ This work is driven by the Word document generation procedure documented in http://sebsauvage.net/wiki/doku.php?id=word_document_generation. For more on the approach taken, and on alternative approaches, see https://github.com/riboseinc/html2doc/wiki/Why-not-docx%3F
13
13
 
14
14
  The gem currently does the following:
15
15
 
@@ -9,14 +9,20 @@ module Html2Doc
9
9
  def self.process(result, hash)
10
10
  hash[:dir1] = create_dir(hash[:filename], hash[:dir])
11
11
  result = process_html(result, hash)
12
- hash[:header_file].nil? ||
13
- system("cp #{hash[:header_file]} #{hash[:dir1]}/header.html")
12
+ process_header(hash[:header_file], hash)
14
13
  generate_filelist(hash[:filename], hash[:dir1])
15
14
  File.open("#{hash[:filename]}.htm", "w") { |f| f.write(result) }
16
15
  mime_package result, hash[:filename], hash[:dir1]
17
16
  rm_temp_files(hash[:filename], hash[:dir], hash[:dir1])
18
17
  end
19
18
 
19
+ def self.process_header(headerfile, hash)
20
+ return if headerfile.nil?
21
+ doc = File.read(headerfile, encoding: "utf-8")
22
+ doc = header_image_cleanup(doc, hash[:dir1], hash[:filename])
23
+ File.open("#{hash[:dir1]}/header.html", "w") { |f| f.write(doc) }
24
+ end
25
+
20
26
  def self.create_dir(filename, dir)
21
27
  return dir if dir
22
28
  dir = "#{filename}_files"
@@ -84,6 +84,27 @@ module Html2Doc
84
84
  docxml
85
85
  end
86
86
 
87
+ # do not parse the header through Nokogiri, since it will contain
88
+ # non-XML like <![if !supportFootnotes]>
89
+ def self.header_image_cleanup(doc, dir, filename)
90
+ doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
91
+ header_image_cleanup1(a, dir, filename)
92
+ end.join
93
+ end
94
+
95
+ def self.header_image_cleanup1(a, dir, filename)
96
+ if a.size == 2
97
+ matched = / src=['"](?<src>[^"']+)['"]/.match a[1]
98
+ matched2 = /\.(?<suffix>\S+)$/.match matched[:src]
99
+ uuid = UUIDTools::UUID.random_create.to_s
100
+ new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
101
+ dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
102
+ system "cp #{matched[:src]} #{dest_filename}"
103
+ a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
104
+ end
105
+ a.join
106
+ end
107
+
87
108
  def self.generate_filelist(filename, dir)
88
109
  File.open(File.join(dir, "filelist.xml"), "w") do |f|
89
110
  f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office">
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.7.0".freeze
2
+ VERSION = "0.7.1".freeze
3
3
  end
@@ -364,6 +364,12 @@ RSpec.describe Html2Doc do
364
364
  OUTPUT
365
365
  end
366
366
 
367
+ it "processes a header with an image" do
368
+ Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
369
+ expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
370
+ end
371
+
372
+
367
373
  it "processes a populated document" do
368
374
  simple_body = "<h1>Hello word!</h1>
369
375
  <div>This is a very simple document</div>"
@@ -407,7 +413,7 @@ RSpec.describe Html2Doc do
407
413
  to match_fuzzy(<<~OUTPUT)
408
414
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
409
415
  #{word_body('
410
- <div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"></m:jc></m:oMathParaPr><m:oMath>
416
+ <div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
411
417
  <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub>
412
418
  <m:r><m:t>i=1</m:t></m:r>
413
419
  </m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary>
@@ -432,7 +438,7 @@ RSpec.describe Html2Doc do
432
438
  to match_fuzzy(<<~OUTPUT)
433
439
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
434
440
  #{word_body('
435
- <div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"></m:jc></m:oMathParaPr><m:oMath>
441
+ <div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
436
442
  <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub>
437
443
  <m:r><m:t>i=1</m:t></m:r>
438
444
  </m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-17 00:00:00.000000000 Z
11
+ date: 2018-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities