html2doc 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.tb.yml +20 -10
- data/README.adoc +1 -1
- data/lib/html2doc/base.rb +8 -2
- data/lib/html2doc/mime.rb +21 -0
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +8 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 326527587765630ec2b376716c48428cfa946d94
|
4
|
+
data.tar.gz: 1690c23c76ce1cea7971d8f399544c5dea36166c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0405adec203661a00d411a0c14458493eb58992a5c052f43c4470f7ad8d9943207b8d917ef91eafc4dfa4b79f2dad4db6ff381568682149d19d001cb1aad775b
|
7
|
+
data.tar.gz: 6cae33b7a484bf5c41ad55c9355aeebd89fef0a95c59aec68771dac95af64b151a303e2876c8eb31aef43ea9ddc143c5e770d86e8c86d509c4e1a06c13ebc59e
|
data/.rubocop.tb.yml
CHANGED
@@ -352,8 +352,18 @@ Style/TrailingCommaInArguments:
|
|
352
352
|
- no_comma
|
353
353
|
Enabled: true
|
354
354
|
|
355
|
-
Style/
|
356
|
-
Description: 'Checks for trailing comma in array
|
355
|
+
Style/TrailingCommaInArrayLiteral:
|
356
|
+
Description: 'Checks for trailing comma in array literals.'
|
357
|
+
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
|
358
|
+
EnforcedStyleForMultiline: comma
|
359
|
+
SupportedStylesForMultiline:
|
360
|
+
- comma
|
361
|
+
- consistent_comma
|
362
|
+
- no_comma
|
363
|
+
Enabled: true
|
364
|
+
|
365
|
+
Style/TrailingCommaInHashLiteral:
|
366
|
+
Description: 'Checks for trailing comma in hash literals.'
|
357
367
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
|
358
368
|
EnforcedStyleForMultiline: comma
|
359
369
|
SupportedStylesForMultiline:
|
@@ -398,6 +408,13 @@ Layout/AlignParameters:
|
|
398
408
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-double-indent'
|
399
409
|
Enabled: false
|
400
410
|
|
411
|
+
Layout/ConditionPosition:
|
412
|
+
Description: >-
|
413
|
+
Checks for condition placed in a confusing position relative to
|
414
|
+
the keyword.
|
415
|
+
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#same-line-condition'
|
416
|
+
Enabled: false
|
417
|
+
|
401
418
|
Layout/DotPosition:
|
402
419
|
Description: 'Checks the position of the dot in multi-line method calls.'
|
403
420
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#consistent-multi-line-chains'
|
@@ -450,13 +467,6 @@ Lint/CircularArgumentReference:
|
|
450
467
|
Description: "Don't refer to the keyword argument in the default value."
|
451
468
|
Enabled: false
|
452
469
|
|
453
|
-
Lint/ConditionPosition:
|
454
|
-
Description: >-
|
455
|
-
Checks for condition placed in a confusing position relative to
|
456
|
-
the keyword.
|
457
|
-
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#same-line-condition'
|
458
|
-
Enabled: false
|
459
|
-
|
460
470
|
Lint/DeprecatedClassMethods:
|
461
471
|
Description: 'Check for deprecated class method calls.'
|
462
472
|
Enabled: false
|
@@ -523,7 +533,7 @@ Lint/UnderscorePrefixedVariableName:
|
|
523
533
|
Description: 'Do not use prefix `_` for a variable that is used.'
|
524
534
|
Enabled: false
|
525
535
|
|
526
|
-
Lint/
|
536
|
+
Lint/UnneededCopDisableDirective:
|
527
537
|
Description: >-
|
528
538
|
Checks for rubocop:disable comments that can be removed.
|
529
539
|
Note: this cop is not disabled when disabling all cops.
|
data/README.adoc
CHANGED
@@ -9,7 +9,7 @@ Gem to convert an HTML document into a Word document (.doc) format. This is inte
|
|
9
9
|
|
10
10
|
This gem originated out of https://github.com/riboseinc/asciidoctor-iso, which creates a Word document from a automatically generated HTML document (created in turn by processing Asciidoc).
|
11
11
|
|
12
|
-
This work is driven by the Word document generation procedure documented in http://sebsauvage.net/wiki/doku.php?id=word_document_generation
|
12
|
+
This work is driven by the Word document generation procedure documented in http://sebsauvage.net/wiki/doku.php?id=word_document_generation. For more on the approach taken, and on alternative approaches, see https://github.com/riboseinc/html2doc/wiki/Why-not-docx%3F
|
13
13
|
|
14
14
|
The gem currently does the following:
|
15
15
|
|
data/lib/html2doc/base.rb
CHANGED
@@ -9,14 +9,20 @@ module Html2Doc
|
|
9
9
|
def self.process(result, hash)
|
10
10
|
hash[:dir1] = create_dir(hash[:filename], hash[:dir])
|
11
11
|
result = process_html(result, hash)
|
12
|
-
hash[:header_file]
|
13
|
-
system("cp #{hash[:header_file]} #{hash[:dir1]}/header.html")
|
12
|
+
process_header(hash[:header_file], hash)
|
14
13
|
generate_filelist(hash[:filename], hash[:dir1])
|
15
14
|
File.open("#{hash[:filename]}.htm", "w") { |f| f.write(result) }
|
16
15
|
mime_package result, hash[:filename], hash[:dir1]
|
17
16
|
rm_temp_files(hash[:filename], hash[:dir], hash[:dir1])
|
18
17
|
end
|
19
18
|
|
19
|
+
def self.process_header(headerfile, hash)
|
20
|
+
return if headerfile.nil?
|
21
|
+
doc = File.read(headerfile, encoding: "utf-8")
|
22
|
+
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename])
|
23
|
+
File.open("#{hash[:dir1]}/header.html", "w") { |f| f.write(doc) }
|
24
|
+
end
|
25
|
+
|
20
26
|
def self.create_dir(filename, dir)
|
21
27
|
return dir if dir
|
22
28
|
dir = "#{filename}_files"
|
data/lib/html2doc/mime.rb
CHANGED
@@ -84,6 +84,27 @@ module Html2Doc
|
|
84
84
|
docxml
|
85
85
|
end
|
86
86
|
|
87
|
+
# do not parse the header through Nokogiri, since it will contain
|
88
|
+
# non-XML like <![if !supportFootnotes]>
|
89
|
+
def self.header_image_cleanup(doc, dir, filename)
|
90
|
+
doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
|
91
|
+
header_image_cleanup1(a, dir, filename)
|
92
|
+
end.join
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.header_image_cleanup1(a, dir, filename)
|
96
|
+
if a.size == 2
|
97
|
+
matched = / src=['"](?<src>[^"']+)['"]/.match a[1]
|
98
|
+
matched2 = /\.(?<suffix>\S+)$/.match matched[:src]
|
99
|
+
uuid = UUIDTools::UUID.random_create.to_s
|
100
|
+
new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
|
101
|
+
dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
|
102
|
+
system "cp #{matched[:src]} #{dest_filename}"
|
103
|
+
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
|
104
|
+
end
|
105
|
+
a.join
|
106
|
+
end
|
107
|
+
|
87
108
|
def self.generate_filelist(filename, dir)
|
88
109
|
File.open(File.join(dir, "filelist.xml"), "w") do |f|
|
89
110
|
f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -364,6 +364,12 @@ RSpec.describe Html2Doc do
|
|
364
364
|
OUTPUT
|
365
365
|
end
|
366
366
|
|
367
|
+
it "processes a header with an image" do
|
368
|
+
Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
|
369
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
|
370
|
+
end
|
371
|
+
|
372
|
+
|
367
373
|
it "processes a populated document" do
|
368
374
|
simple_body = "<h1>Hello word!</h1>
|
369
375
|
<div>This is a very simple document</div>"
|
@@ -407,7 +413,7 @@ RSpec.describe Html2Doc do
|
|
407
413
|
to match_fuzzy(<<~OUTPUT)
|
408
414
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
409
415
|
#{word_body('
|
410
|
-
<div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"
|
416
|
+
<div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
|
411
417
|
<m:nary><m:naryPr><m:chr m:val="∑"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub>
|
412
418
|
<m:r><m:t>i=1</m:t></m:r>
|
413
419
|
</m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary>
|
@@ -432,7 +438,7 @@ RSpec.describe Html2Doc do
|
|
432
438
|
to match_fuzzy(<<~OUTPUT)
|
433
439
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
434
440
|
#{word_body('
|
435
|
-
<div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"
|
441
|
+
<div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
|
436
442
|
<m:nary><m:naryPr><m:chr m:val="∑"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub>
|
437
443
|
<m:r><m:t>i=1</m:t></m:r>
|
438
444
|
</m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|