html2doc 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.tb.yml +20 -10
- data/README.adoc +1 -1
- data/lib/html2doc/base.rb +8 -2
- data/lib/html2doc/mime.rb +21 -0
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +8 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 326527587765630ec2b376716c48428cfa946d94
|
4
|
+
data.tar.gz: 1690c23c76ce1cea7971d8f399544c5dea36166c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0405adec203661a00d411a0c14458493eb58992a5c052f43c4470f7ad8d9943207b8d917ef91eafc4dfa4b79f2dad4db6ff381568682149d19d001cb1aad775b
|
7
|
+
data.tar.gz: 6cae33b7a484bf5c41ad55c9355aeebd89fef0a95c59aec68771dac95af64b151a303e2876c8eb31aef43ea9ddc143c5e770d86e8c86d509c4e1a06c13ebc59e
|
data/.rubocop.tb.yml
CHANGED
@@ -352,8 +352,18 @@ Style/TrailingCommaInArguments:
|
|
352
352
|
- no_comma
|
353
353
|
Enabled: true
|
354
354
|
|
355
|
-
Style/
|
356
|
-
Description: 'Checks for trailing comma in array
|
355
|
+
Style/TrailingCommaInArrayLiteral:
|
356
|
+
Description: 'Checks for trailing comma in array literals.'
|
357
|
+
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
|
358
|
+
EnforcedStyleForMultiline: comma
|
359
|
+
SupportedStylesForMultiline:
|
360
|
+
- comma
|
361
|
+
- consistent_comma
|
362
|
+
- no_comma
|
363
|
+
Enabled: true
|
364
|
+
|
365
|
+
Style/TrailingCommaInHashLiteral:
|
366
|
+
Description: 'Checks for trailing comma in hash literals.'
|
357
367
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
|
358
368
|
EnforcedStyleForMultiline: comma
|
359
369
|
SupportedStylesForMultiline:
|
@@ -398,6 +408,13 @@ Layout/AlignParameters:
|
|
398
408
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-double-indent'
|
399
409
|
Enabled: false
|
400
410
|
|
411
|
+
Layout/ConditionPosition:
|
412
|
+
Description: >-
|
413
|
+
Checks for condition placed in a confusing position relative to
|
414
|
+
the keyword.
|
415
|
+
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#same-line-condition'
|
416
|
+
Enabled: false
|
417
|
+
|
401
418
|
Layout/DotPosition:
|
402
419
|
Description: 'Checks the position of the dot in multi-line method calls.'
|
403
420
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#consistent-multi-line-chains'
|
@@ -450,13 +467,6 @@ Lint/CircularArgumentReference:
|
|
450
467
|
Description: "Don't refer to the keyword argument in the default value."
|
451
468
|
Enabled: false
|
452
469
|
|
453
|
-
Lint/ConditionPosition:
|
454
|
-
Description: >-
|
455
|
-
Checks for condition placed in a confusing position relative to
|
456
|
-
the keyword.
|
457
|
-
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#same-line-condition'
|
458
|
-
Enabled: false
|
459
|
-
|
460
470
|
Lint/DeprecatedClassMethods:
|
461
471
|
Description: 'Check for deprecated class method calls.'
|
462
472
|
Enabled: false
|
@@ -523,7 +533,7 @@ Lint/UnderscorePrefixedVariableName:
|
|
523
533
|
Description: 'Do not use prefix `_` for a variable that is used.'
|
524
534
|
Enabled: false
|
525
535
|
|
526
|
-
Lint/
|
536
|
+
Lint/UnneededCopDisableDirective:
|
527
537
|
Description: >-
|
528
538
|
Checks for rubocop:disable comments that can be removed.
|
529
539
|
Note: this cop is not disabled when disabling all cops.
|
data/README.adoc
CHANGED
@@ -9,7 +9,7 @@ Gem to convert an HTML document into a Word document (.doc) format. This is inte
|
|
9
9
|
|
10
10
|
This gem originated out of https://github.com/riboseinc/asciidoctor-iso, which creates a Word document from a automatically generated HTML document (created in turn by processing Asciidoc).
|
11
11
|
|
12
|
-
This work is driven by the Word document generation procedure documented in http://sebsauvage.net/wiki/doku.php?id=word_document_generation
|
12
|
+
This work is driven by the Word document generation procedure documented in http://sebsauvage.net/wiki/doku.php?id=word_document_generation. For more on the approach taken, and on alternative approaches, see https://github.com/riboseinc/html2doc/wiki/Why-not-docx%3F
|
13
13
|
|
14
14
|
The gem currently does the following:
|
15
15
|
|
data/lib/html2doc/base.rb
CHANGED
@@ -9,14 +9,20 @@ module Html2Doc
|
|
9
9
|
def self.process(result, hash)
|
10
10
|
hash[:dir1] = create_dir(hash[:filename], hash[:dir])
|
11
11
|
result = process_html(result, hash)
|
12
|
-
hash[:header_file]
|
13
|
-
system("cp #{hash[:header_file]} #{hash[:dir1]}/header.html")
|
12
|
+
process_header(hash[:header_file], hash)
|
14
13
|
generate_filelist(hash[:filename], hash[:dir1])
|
15
14
|
File.open("#{hash[:filename]}.htm", "w") { |f| f.write(result) }
|
16
15
|
mime_package result, hash[:filename], hash[:dir1]
|
17
16
|
rm_temp_files(hash[:filename], hash[:dir], hash[:dir1])
|
18
17
|
end
|
19
18
|
|
19
|
+
def self.process_header(headerfile, hash)
|
20
|
+
return if headerfile.nil?
|
21
|
+
doc = File.read(headerfile, encoding: "utf-8")
|
22
|
+
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename])
|
23
|
+
File.open("#{hash[:dir1]}/header.html", "w") { |f| f.write(doc) }
|
24
|
+
end
|
25
|
+
|
20
26
|
def self.create_dir(filename, dir)
|
21
27
|
return dir if dir
|
22
28
|
dir = "#{filename}_files"
|
data/lib/html2doc/mime.rb
CHANGED
@@ -84,6 +84,27 @@ module Html2Doc
|
|
84
84
|
docxml
|
85
85
|
end
|
86
86
|
|
87
|
+
# do not parse the header through Nokogiri, since it will contain
|
88
|
+
# non-XML like <![if !supportFootnotes]>
|
89
|
+
def self.header_image_cleanup(doc, dir, filename)
|
90
|
+
doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
|
91
|
+
header_image_cleanup1(a, dir, filename)
|
92
|
+
end.join
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.header_image_cleanup1(a, dir, filename)
|
96
|
+
if a.size == 2
|
97
|
+
matched = / src=['"](?<src>[^"']+)['"]/.match a[1]
|
98
|
+
matched2 = /\.(?<suffix>\S+)$/.match matched[:src]
|
99
|
+
uuid = UUIDTools::UUID.random_create.to_s
|
100
|
+
new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
|
101
|
+
dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
|
102
|
+
system "cp #{matched[:src]} #{dest_filename}"
|
103
|
+
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
|
104
|
+
end
|
105
|
+
a.join
|
106
|
+
end
|
107
|
+
|
87
108
|
def self.generate_filelist(filename, dir)
|
88
109
|
File.open(File.join(dir, "filelist.xml"), "w") do |f|
|
89
110
|
f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -364,6 +364,12 @@ RSpec.describe Html2Doc do
|
|
364
364
|
OUTPUT
|
365
365
|
end
|
366
366
|
|
367
|
+
it "processes a header with an image" do
|
368
|
+
Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
|
369
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
|
370
|
+
end
|
371
|
+
|
372
|
+
|
367
373
|
it "processes a populated document" do
|
368
374
|
simple_body = "<h1>Hello word!</h1>
|
369
375
|
<div>This is a very simple document</div>"
|
@@ -407,7 +413,7 @@ RSpec.describe Html2Doc do
|
|
407
413
|
to match_fuzzy(<<~OUTPUT)
|
408
414
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
409
415
|
#{word_body('
|
410
|
-
<div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"
|
416
|
+
<div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
|
411
417
|
<m:nary><m:naryPr><m:chr m:val="∑"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub>
|
412
418
|
<m:r><m:t>i=1</m:t></m:r>
|
413
419
|
</m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary>
|
@@ -432,7 +438,7 @@ RSpec.describe Html2Doc do
|
|
432
438
|
to match_fuzzy(<<~OUTPUT)
|
433
439
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
434
440
|
#{word_body('
|
435
|
-
<div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"
|
441
|
+
<div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
|
436
442
|
<m:nary><m:naryPr><m:chr m:val="∑"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub>
|
437
443
|
<m:r><m:t>i=1</m:t></m:r>
|
438
444
|
</m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|