htmltoword 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/.travis.yml +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +92 -0
- data/Rakefile +4 -0
- data/htmltoword.gemspec +27 -0
- data/lib/htmltoword.rb +84 -0
- data/lib/htmltoword/htmltoword_helper.rb +35 -0
- data/lib/htmltoword/version.rb +3 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/xslt_spec.rb +51 -0
- data/templates/default.docx +0 -0
- data/xslt/html_to_wordml.xslt +228 -0
- data/xslt/style2.xslt +49 -0
- metadata +161 -0
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Nicholas Frandsen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# Ruby Html to word Gem [![Code Climate](https://codeclimate.com/github/nickfrandsen/htmltoword.png)](https://codeclimate.com/github/nickfrandsen/htmltoword) [![Build Status](https://travis-ci.org/nickfrandsen/htmltoword.png)](https://travis-ci.org/nickfrandsen/htmltoword)
|
2
|
+
|
3
|
+
This simple gem allows you to create MS Word docx documents from simple html documents. This makes it easy to create dynamic reports and forms that can be downloaded by your users as simple MS Word docx files.
|
4
|
+
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
gem 'htmltoword'
|
8
|
+
|
9
|
+
And then execute:
|
10
|
+
|
11
|
+
$ bundle
|
12
|
+
|
13
|
+
Or install it yourself as:
|
14
|
+
|
15
|
+
$ gem install htmltoword
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
### Standalone
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
require 'htmltoword'
|
23
|
+
|
24
|
+
file = Htmltoword::Document.create params[:assembly_content_html], file_name
|
25
|
+
```
|
26
|
+
|
27
|
+
### With Rails
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
# Add mime-type in /config/initializers/mime_types.rb:
|
31
|
+
Mime::Type.register "application/vnd.openxmlformats-officedocument.wordprocessingml.document", :docx
|
32
|
+
|
33
|
+
# Add docx reponder in your controler
|
34
|
+
def show
|
35
|
+
respond_to do |format|
|
36
|
+
format.docx do
|
37
|
+
file = Htmltoword::Document.create params[:docx_html_source], "file_name.docx"
|
38
|
+
send_file file.path, :disposition => "attachment"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
```
|
43
|
+
|
44
|
+
## Features
|
45
|
+
|
46
|
+
All standard html elements are supported and will create the closest equivalent in wordml. For example spans will create inline elements and divs will create block like elements.
|
47
|
+
|
48
|
+
### Highlighting text
|
49
|
+
|
50
|
+
You can add highlighting to text by wrapping it in a span with class h and adding a data style with a color that wordml supports (TODO: Provide list) ie:
|
51
|
+
|
52
|
+
```html
|
53
|
+
<span class="h" data-style="green">This text will have a green highlight</span>
|
54
|
+
```
|
55
|
+
|
56
|
+
### Page breaks
|
57
|
+
|
58
|
+
To create page breaks simply add a div with class -page-break ie:
|
59
|
+
|
60
|
+
```html
|
61
|
+
<div class="-page-break"></div>
|
62
|
+
````
|
63
|
+
|
64
|
+
## Contributing / Extending
|
65
|
+
|
66
|
+
Word docx files are essentially just a zipped collection of xml files and resources.
|
67
|
+
This gem contains a standard empty MS Word docx file and a stylesheet to transform arbitrary html into wordml.
|
68
|
+
The basic functioning of this gem can be summarised as:
|
69
|
+
|
70
|
+
1. Transform inputed html to wordml.
|
71
|
+
2. Unzip empty word docx file bundled with gem and replace its document.xml content with the new transformed result of step 1.
|
72
|
+
3. Zip up contents again into a resulting .docx file.
|
73
|
+
|
74
|
+
For more info about WordML: http://rep.oio.dk/microsoft.com/officeschemas/wordprocessingml_article.htm
|
75
|
+
|
76
|
+
Contributions would be very much appreciated.
|
77
|
+
|
78
|
+
1. Fork it
|
79
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
80
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
81
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
82
|
+
5. Create new Pull Request
|
83
|
+
|
84
|
+
## License
|
85
|
+
|
86
|
+
(The MIT License)
|
87
|
+
|
88
|
+
Copyright © 2013:
|
89
|
+
|
90
|
+
* Cristina Matonte
|
91
|
+
|
92
|
+
* Nicholas Frandsen
|
data/Rakefile
ADDED
data/htmltoword.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'htmltoword/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "htmltoword"
|
8
|
+
spec.version = Htmltoword::VERSION
|
9
|
+
spec.authors = ["Nicholas Frandsen"]
|
10
|
+
spec.email = ["nick.rowe.frandsen@gmail.com"]
|
11
|
+
spec.description = %q{Convert html to word docx document.}
|
12
|
+
spec.summary = %q{This simple gem allows you to create MS Word docx documents from simple html documents. This makes it easy to create dynamic reports and forms that can be downloaded by your users as simple MS Word docx files.}
|
13
|
+
spec.homepage = "http://github.com/nickfrandsen/htmltoword"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "actionpack"
|
22
|
+
spec.add_dependency "nokogiri"
|
23
|
+
spec.add_dependency "rubyzip"
|
24
|
+
spec.add_development_dependency "rspec"
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
26
|
+
spec.add_development_dependency "rake"
|
27
|
+
end
|
data/lib/htmltoword.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require "htmltoword/version"
|
3
|
+
require "htmltoword/htmltoword_helper"
|
4
|
+
require "action_controller"
|
5
|
+
require "action_view"
|
6
|
+
require "nokogiri"
|
7
|
+
require "zip/zip"
|
8
|
+
|
9
|
+
module Htmltoword
|
10
|
+
def self.root
|
11
|
+
File.expand_path '../..', __FILE__
|
12
|
+
end
|
13
|
+
def self.templates_path
|
14
|
+
File.join root, "templates"
|
15
|
+
end
|
16
|
+
|
17
|
+
class Document
|
18
|
+
|
19
|
+
DOC_XML_FILE = "word/document.xml"
|
20
|
+
BASIC_PATH = ::Htmltoword.root
|
21
|
+
FILE_EXTENSION = ".docx"
|
22
|
+
XSLT_TEMPLATE = File.join(BASIC_PATH, 'xslt', 'html_to_wordml.xslt')
|
23
|
+
|
24
|
+
class << self
|
25
|
+
include HtmltowordHelper
|
26
|
+
|
27
|
+
def create content, file_name
|
28
|
+
word_file = new(template_file, file_name)
|
29
|
+
word_file.replace_file content
|
30
|
+
word_file.save
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_with_content template, file_name, content, set=nil
|
34
|
+
word_file = new(template_file("#{template}#{FILE_EXTENSION}"), file_name)
|
35
|
+
content = replace_values(content, set) if set
|
36
|
+
word_file.replace_file content
|
37
|
+
word_file.save
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def initialize(template_path, file_name)
|
42
|
+
@file_name = file_name
|
43
|
+
@replaceable_files = {}
|
44
|
+
@template_zip = Zip::ZipFile.open(template_path)
|
45
|
+
end
|
46
|
+
|
47
|
+
def file_name
|
48
|
+
@file_name
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# It creates missing folders if needed, creates a new zip/word file on the
|
53
|
+
# specified location, copies all the files from the template word document
|
54
|
+
# and replace the content of the ones to be replaced.
|
55
|
+
# It will create a tempfile and return it. The rails app using the gem
|
56
|
+
# should decide what to do with it.
|
57
|
+
#
|
58
|
+
#
|
59
|
+
def save
|
60
|
+
output_file = Tempfile.new([file_name, FILE_EXTENSION], type: 'application/zip')
|
61
|
+
Zip::ZipOutputStream.open(output_file.path) do |out|
|
62
|
+
@template_zip.each do |entry|
|
63
|
+
out.put_next_entry entry.name
|
64
|
+
if @replaceable_files[entry.name]
|
65
|
+
out.write(@replaceable_files[entry.name])
|
66
|
+
else
|
67
|
+
out.write(@template_zip.read(entry.name))
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
@template_zip.close
|
72
|
+
output_file.close
|
73
|
+
return output_file
|
74
|
+
end
|
75
|
+
|
76
|
+
def replace_file html, file_name=DOC_XML_FILE
|
77
|
+
source = Nokogiri::HTML(html.gsub(/>\s+</, "><"))
|
78
|
+
xslt = Nokogiri::XSLT( File.read(XSLT_TEMPLATE) )
|
79
|
+
source = xslt.transform( source ) unless (source/"/html").blank?
|
80
|
+
@replaceable_files[file_name] = source.to_s
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Htmltoword
|
2
|
+
module HtmltowordHelper
|
3
|
+
|
4
|
+
def template_file template_file_name=nil
|
5
|
+
default_path = File.join(::Htmltoword.templates_path, "default.docx")
|
6
|
+
template_path = template_file_name.present? ? File.join(::Htmltoword.templates_path, template_file_name) : ""
|
7
|
+
File.exist?(template_path) ? template_path : default_path
|
8
|
+
end
|
9
|
+
|
10
|
+
def replace_values content, set
|
11
|
+
doc = Nokogiri::HTML(content)
|
12
|
+
set.each_pair do |key, value|
|
13
|
+
fields = (doc/"//span[@data-id='#{key}']")
|
14
|
+
fields.each do |f|
|
15
|
+
date_format = f.attr("date-format") || "long"
|
16
|
+
data_transform = f.attr("data-transform")
|
17
|
+
if value.is_a? Hash
|
18
|
+
view = ActionView::Base.new(ActionController::Base.view_paths, {})
|
19
|
+
final_value = view.render "partials/answer_table", answer: value
|
20
|
+
fragment = doc.root.parse(final_value).first
|
21
|
+
new_node = doc.root.add_child(fragment)
|
22
|
+
f.parent.replace new_node
|
23
|
+
elsif value.is_a? Time
|
24
|
+
f.content = I18n.l(value.to_date, format: date_format.to_sym)
|
25
|
+
elsif data_transform == "capitalized"
|
26
|
+
f.content = value.mb_chars.capitalize rescue value
|
27
|
+
else
|
28
|
+
f.content = value
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
doc.to_s
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/spec/xslt_spec.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "XSLT" do
|
4
|
+
|
5
|
+
it "transforms an empty html doc into an empty docx doc" do
|
6
|
+
html = '<html><head></head><body></body></html>'
|
7
|
+
compare_resulting_wordml_with_expected(html, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<w:document xmlns:wpc=\"http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas\" xmlns:mo=\"http://schemas.microsoft.com/office/mac/office/2008/main\" xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:mv=\"urn:schemas-microsoft-com:mac:vml\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\" xmlns:wp14=\"http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing\" xmlns:wp=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:wpg=\"http://schemas.microsoft.com/office/word/2010/wordprocessingGroup\" xmlns:wpi=\"http://schemas.microsoft.com/office/word/2010/wordprocessingInk\" xmlns:wne=\"http://schemas.microsoft.com/office/word/2006/wordml\" xmlns:wps=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:pkg=\"http://schemas.microsoft.com/office/2006/xmlPackage\" xmlns:str=\"http://exslt.org/common\" xmlns:fn=\"http://www.w3.org/2005/xpath-functions\" mc:Ignorable=\"w14 wp14\">\n <w:body>\n <w:p/> <w:sectPr>\n <w:pgSz w:w=\"11906\" w:h=\"16838\"/>\n <w:pgMar w:top=\"1440\" w:right=\"1440\" w:bottom=\"1440\" w:left=\"1440\" w:header=\"708\" w:footer=\"708\" w:gutter=\"0\"/>\n <w:cols w:space=\"708\"/>\n <w:docGrid w:linePitch=\"360\"/>\n </w:sectPr>\n </w:body>\n</w:document>\n")
|
8
|
+
end
|
9
|
+
|
10
|
+
it "transforms a div into a docx block element." do
|
11
|
+
html = '<html><head></head><body><div>Hello</div></body></html>'
|
12
|
+
compare_resulting_wordml_with_expected(html, "<w:p> <w:r> <w:t xml:space=\"preserve\">Hello</w:t> </w:r> </w:p>")
|
13
|
+
end
|
14
|
+
|
15
|
+
context "transform a span" do
|
16
|
+
|
17
|
+
it "into a docx block elmenet if child of body." do
|
18
|
+
html = '<html><head></head><body><span>Hello</span></body></html>'
|
19
|
+
compare_resulting_wordml_with_expected(html, "<w:p> <w:r> <w:t xml:space=\"preserve\">Hello</w:t> </w:r> </w:p>")
|
20
|
+
end
|
21
|
+
|
22
|
+
it "into a docx inline element if not child of body." do
|
23
|
+
html = '<html><head></head><body><div><span>Hello</span></div></body></html>'
|
24
|
+
compare_resulting_wordml_with_expected(html, "<w:p> <w:r> <w:t xml:space=\"preserve\">Hello</w:t> </w:r> </w:p>")
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
it "transforms a p into a docx block element." do
|
30
|
+
html = '<html><head></head><body><p>Hello</p></body></html>'
|
31
|
+
compare_resulting_wordml_with_expected(html, "<w:p> <w:r> <w:t xml:space=\"preserve\">Hello</w:t> </w:r> </w:p>")
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
|
36
|
+
def compare_resulting_wordml_with_expected(html, resulting_wordml)
|
37
|
+
source = Nokogiri::HTML(html.gsub(/>\s+</, "><"))
|
38
|
+
xslt = Nokogiri::XSLT( File.read(Htmltoword::Document::XSLT_TEMPLATE))
|
39
|
+
result = xslt.transform(source)
|
40
|
+
if compare_content_of_body?(resulting_wordml)
|
41
|
+
result.at("//w:sectPr").remove
|
42
|
+
result = result.at("//w:body/*")
|
43
|
+
end
|
44
|
+
result.to_s.gsub(/\s+/, " ").should == resulting_wordml.gsub(/\s+/, " ")
|
45
|
+
end
|
46
|
+
|
47
|
+
def compare_content_of_body?(wordml)
|
48
|
+
wordml !~ /<?xml version/
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
Binary file
|
@@ -0,0 +1,228 @@
|
|
1
|
+
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
2
|
+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
3
|
+
xmlns:o="urn:schemas-microsoft-com:office:office"
|
4
|
+
xmlns:v="urn:schemas-microsoft-com:vml"
|
5
|
+
xmlns:WX="http://schemas.microsoft.com/office/word/2003/auxHint"
|
6
|
+
xmlns:aml="http://schemas.microsoft.com/aml/2001/core"
|
7
|
+
xmlns:w10="urn:schemas-microsoft-com:office:word"
|
8
|
+
xmlns:pkg="http://schemas.microsoft.com/office/2006/xmlPackage"
|
9
|
+
xmlns:msxsl="urn:schemas-microsoft-com:xslt"
|
10
|
+
xmlns:ext="http://www.xmllab.net/wordml2html/ext"
|
11
|
+
xmlns:java="http://xml.apache.org/xalan/java"
|
12
|
+
xmlns:str="http://exslt.org/common"
|
13
|
+
xmlns:fn="http://www.w3.org/2005/xpath-functions"
|
14
|
+
version="1.0"
|
15
|
+
exclude-result-prefixes="java msxsl ext w o v WX aml w10">
|
16
|
+
|
17
|
+
|
18
|
+
<xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no" indent="yes" />
|
19
|
+
|
20
|
+
<xsl:template match="/ | html">
|
21
|
+
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mo="http://schemas.microsoft.com/office/mac/office/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 wp14">
|
22
|
+
<xsl:apply-templates select="//body"/>
|
23
|
+
</w:document>
|
24
|
+
</xsl:template>
|
25
|
+
|
26
|
+
<xsl:template match="body">
|
27
|
+
<w:body>
|
28
|
+
<w:p>
|
29
|
+
<xsl:apply-templates/>
|
30
|
+
</w:p>
|
31
|
+
<w:sectPr>
|
32
|
+
<w:pgSz w:w="11906" w:h="16838"/>
|
33
|
+
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/>
|
34
|
+
<w:cols w:space="708"/>
|
35
|
+
<w:docGrid w:linePitch="360"/>
|
36
|
+
</w:sectPr>
|
37
|
+
</w:body>
|
38
|
+
</xsl:template>
|
39
|
+
|
40
|
+
<xsl:template match="br">
|
41
|
+
<xsl:choose>
|
42
|
+
<xsl:when test="name(..)='div' or name(..)='small'">
|
43
|
+
<w:r><w:br/></w:r>
|
44
|
+
</xsl:when>
|
45
|
+
<xsl:when test="name(..)='td'">
|
46
|
+
</xsl:when>
|
47
|
+
<xsl:otherwise>
|
48
|
+
<w:pPr><w:pStyle w:val="Afsnit"/></w:pPr><w:r><w:br/></w:r>
|
49
|
+
</xsl:otherwise>
|
50
|
+
</xsl:choose>
|
51
|
+
</xsl:template>
|
52
|
+
|
53
|
+
<xsl:template match="i|em">
|
54
|
+
<w:r>
|
55
|
+
<w:rPr>
|
56
|
+
<w:i />
|
57
|
+
</w:rPr>
|
58
|
+
<xsl:apply-templates />
|
59
|
+
</w:r>
|
60
|
+
</xsl:template>
|
61
|
+
|
62
|
+
<xsl:template match="b|strong">
|
63
|
+
<w:r>
|
64
|
+
<w:rPr>
|
65
|
+
<w:b />
|
66
|
+
</w:rPr>
|
67
|
+
<xsl:apply-templates />
|
68
|
+
</w:r>
|
69
|
+
</xsl:template>
|
70
|
+
|
71
|
+
<xsl:template match="font">
|
72
|
+
<w:r>
|
73
|
+
<xsl:apply-templates />
|
74
|
+
</w:r>
|
75
|
+
</xsl:template>
|
76
|
+
|
77
|
+
<xsl:template match="div[@class='crumbNav']"/>
|
78
|
+
<xsl:template match="small"/>
|
79
|
+
|
80
|
+
<xsl:template match="div[contains(concat(' ', @class, ' '), ' -page-break ')]">
|
81
|
+
<xsl:comment>Making PAGEBREAKS</xsl:comment>
|
82
|
+
<w:r><w:br w:type="page" /></w:r>
|
83
|
+
<xsl:apply-templates select="node()"/>
|
84
|
+
</xsl:template>
|
85
|
+
|
86
|
+
<xsl:template match="div">
|
87
|
+
<xsl:choose>
|
88
|
+
<xsl:when test="name(..)='body'">
|
89
|
+
<xsl:apply-templates select="node()"/>
|
90
|
+
</xsl:when>
|
91
|
+
<xsl:when test="./div">
|
92
|
+
<xsl:apply-templates select="node()"/>
|
93
|
+
</xsl:when>
|
94
|
+
<xsl:otherwise>
|
95
|
+
<w:r><w:br/></w:r>
|
96
|
+
<xsl:apply-templates select="node()"/>
|
97
|
+
<w:r><w:br/></w:r>
|
98
|
+
</xsl:otherwise>
|
99
|
+
</xsl:choose>
|
100
|
+
</xsl:template>
|
101
|
+
|
102
|
+
<xsl:template match="p">
|
103
|
+
<xsl:apply-templates/>
|
104
|
+
</xsl:template>
|
105
|
+
|
106
|
+
<xsl:template match="ol|ul">
|
107
|
+
<w:r><w:br/></w:r>
|
108
|
+
<xsl:apply-templates/>
|
109
|
+
<w:r><w:br/></w:r>
|
110
|
+
</xsl:template>
|
111
|
+
|
112
|
+
<xsl:template match="li">
|
113
|
+
<w:r><w:t xml:space="preserve"> </w:t></w:r>
|
114
|
+
<xsl:apply-templates/>
|
115
|
+
|
116
|
+
<w:r><w:br/></w:r>
|
117
|
+
</xsl:template>
|
118
|
+
|
119
|
+
<xsl:template match="span[contains(concat(' ', @class, ' '), ' h ')]">
|
120
|
+
<xsl:variable name="color">
|
121
|
+
<xsl:choose>
|
122
|
+
<xsl:when test="./@data-style='pink'">magenta</xsl:when>
|
123
|
+
<xsl:when test="./@data-style='blue'">cyan</xsl:when>
|
124
|
+
<xsl:otherwise><xsl:value-of select="./@data-style"/></xsl:otherwise>
|
125
|
+
</xsl:choose>
|
126
|
+
</xsl:variable>
|
127
|
+
<w:r>
|
128
|
+
<w:rPr>
|
129
|
+
<w:highlight w:val="{$color}"/>
|
130
|
+
</w:rPr>
|
131
|
+
<xsl:apply-templates/>
|
132
|
+
</w:r>
|
133
|
+
</xsl:template>
|
134
|
+
|
135
|
+
<xsl:template match="span">
|
136
|
+
<xsl:apply-templates/>
|
137
|
+
</xsl:template>
|
138
|
+
|
139
|
+
<xsl:template match="table">
|
140
|
+
<w:tbl>
|
141
|
+
<w:tblPr>
|
142
|
+
<w:tblStyle w:val="TableGrid"/>
|
143
|
+
<w:tblW w:w="0" w:type="auto"/>
|
144
|
+
<w:tblBorders>
|
145
|
+
<w:top w:val="none" w:sz="0" w:space="0" w:color="auto"/>
|
146
|
+
<w:left w:val="none" w:sz="0" w:space="0" w:color="auto"/>
|
147
|
+
<w:bottom w:val="none" w:sz="0" w:space="0" w:color="auto"/>
|
148
|
+
<w:right w:val="none" w:sz="0" w:space="0" w:color="auto"/>
|
149
|
+
<w:insideH w:val="none" w:sz="0" w:space="0" w:color="auto"/>
|
150
|
+
<w:insideV w:val="none" w:sz="0" w:space="0" w:color="auto"/>
|
151
|
+
</w:tblBorders>
|
152
|
+
<w:tblLook w:val="0600" w:firstRow="0" w:lastRow="0" w:firstColumn="0" w:lastColumn="0" w:noHBand="1" w:noVBand="1"/>
|
153
|
+
</w:tblPr>
|
154
|
+
<w:tblGrid>
|
155
|
+
<w:gridCol w:w="2310"/>
|
156
|
+
<w:gridCol w:w="2310"/>
|
157
|
+
</w:tblGrid>
|
158
|
+
<xsl:apply-templates select="tr"/>
|
159
|
+
</w:tbl>
|
160
|
+
</xsl:template>
|
161
|
+
|
162
|
+
<xsl:template match="tr">
|
163
|
+
<w:tr>
|
164
|
+
<xsl:apply-templates select="td"/>
|
165
|
+
</w:tr>
|
166
|
+
</xsl:template>
|
167
|
+
|
168
|
+
<xsl:template match="td">
|
169
|
+
<w:tc>
|
170
|
+
<xsl:apply-templates/>
|
171
|
+
</w:tc>
|
172
|
+
</xsl:template>
|
173
|
+
|
174
|
+
<xsl:template match="a">
|
175
|
+
<xsl:apply-templates/>
|
176
|
+
</xsl:template>
|
177
|
+
|
178
|
+
<xsl:template match="h1|h2|h3|h4">
|
179
|
+
<w:r>
|
180
|
+
<w:rPr>
|
181
|
+
<w:rStyle w:val="{name(.)}"/>
|
182
|
+
</w:rPr>
|
183
|
+
<w:br/>
|
184
|
+
<xsl:apply-templates />
|
185
|
+
<w:br/>
|
186
|
+
</w:r>
|
187
|
+
</xsl:template>
|
188
|
+
|
189
|
+
<xsl:template match="text()">
|
190
|
+
<xsl:choose>
|
191
|
+
<xsl:when test="name(..)='i' or name(..)='em' or name(..)='b' or name(..)='strong' or name(..)='font' or ancestor::h3 or ancestor::h2 or ancestor::h1 or ancestor::h4">
|
192
|
+
<xsl:if test="string-length(.) > 0">
|
193
|
+
<w:t xml:space="preserve"><xsl:value-of select="."/></w:t>
|
194
|
+
</xsl:if>
|
195
|
+
</xsl:when>
|
196
|
+
<xsl:when test="name(..)='a' or name(..)='div' or name(..)='span' or name(..)='li' or name(..)='td' or name(..)='p'">
|
197
|
+
<xsl:if test="string-length(.) > 0">
|
198
|
+
<w:r>
|
199
|
+
<w:t xml:space="preserve"><xsl:value-of select="."/></w:t>
|
200
|
+
</w:r>
|
201
|
+
</xsl:if>
|
202
|
+
</xsl:when>
|
203
|
+
<xsl:otherwise>
|
204
|
+
<xsl:comment>What to do with text '<xsl:value-of select="."/>' in <xsl:value-of select="name(..)"/> element?</xsl:comment>
|
205
|
+
</xsl:otherwise>
|
206
|
+
</xsl:choose>
|
207
|
+
|
208
|
+
</xsl:template>
|
209
|
+
<!--Need to tokenize the value of the class and remove the useless ones.-->
|
210
|
+
<xsl:template match="@class">
|
211
|
+
<xsl:choose>
|
212
|
+
<xsl:when test="name(..)='span'">
|
213
|
+
<xsl:comment>Is this being used? 1</xsl:comment>
|
214
|
+
<w:rPr>
|
215
|
+
<w:rStyle w:val="{.}"/>
|
216
|
+
</w:rPr>
|
217
|
+
</xsl:when>
|
218
|
+
<xsl:when test="name(..)='div'">
|
219
|
+
<xsl:comment>Is this being used? 2</xsl:comment>
|
220
|
+
<w:pPr>
|
221
|
+
<w:pStyle w:val="{.}"/>
|
222
|
+
</w:pPr>
|
223
|
+
</xsl:when>
|
224
|
+
</xsl:choose>
|
225
|
+
</xsl:template>
|
226
|
+
|
227
|
+
</xsl:stylesheet>
|
228
|
+
|
data/xslt/style2.xslt
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
2
|
+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
3
|
+
xmlns:o="urn:schemas-microsoft-com:office:office"
|
4
|
+
xmlns:v="urn:schemas-microsoft-com:vml"
|
5
|
+
xmlns:WX="http://schemas.microsoft.com/office/word/2003/auxHint"
|
6
|
+
xmlns:aml="http://schemas.microsoft.com/aml/2001/core"
|
7
|
+
xmlns:w10="urn:schemas-microsoft-com:office:word"
|
8
|
+
xmlns:pkg="http://schemas.microsoft.com/office/2006/xmlPackage"
|
9
|
+
xmlns:msxsl="urn:schemas-microsoft-com:xslt"
|
10
|
+
xmlns:ext="http://www.xmllab.net/wordml2html/ext"
|
11
|
+
xmlns:java="http://xml.apache.org/xalan/java"
|
12
|
+
xmlns:str="http://exslt.org/common"
|
13
|
+
xmlns:fn="http://www.w3.org/2005/xpath-functions"
|
14
|
+
version="1.0"
|
15
|
+
exclude-result-prefixes="java msxsl ext w o v WX aml w10">
|
16
|
+
|
17
|
+
|
18
|
+
<xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no" indent="yes" />
|
19
|
+
|
20
|
+
<xsl:template match="/ | html">
|
21
|
+
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mo="http://schemas.microsoft.com/office/mac/office/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 wp14">
|
22
|
+
<xsl:apply-templates select="//body"/>
|
23
|
+
</w:document>
|
24
|
+
</xsl:template>
|
25
|
+
|
26
|
+
<xsl:template match="body">
|
27
|
+
<w:body>
|
28
|
+
<w:p>
|
29
|
+
<xsl:apply-templates/>
|
30
|
+
</w:p>
|
31
|
+
<w:sectPr>
|
32
|
+
<w:pgSz w:w="11906" w:h="16838"/>
|
33
|
+
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/>
|
34
|
+
<w:cols w:space="708"/>
|
35
|
+
<w:docGrid w:linePitch="360"/>
|
36
|
+
</w:sectPr>
|
37
|
+
</w:body>
|
38
|
+
</xsl:template>
|
39
|
+
|
40
|
+
<xsl:template match="h1|h2|h3|li|span">
|
41
|
+
<w:br/>
|
42
|
+
<w:r>
|
43
|
+
<xsl:comment>Im block</xsl:comment>
|
44
|
+
<w:t xml:space="preserve"><xsl:value-of select="."/></w:t>
|
45
|
+
</w:r>
|
46
|
+
<w:br/>
|
47
|
+
</xsl:template>
|
48
|
+
|
49
|
+
</xsl:stylesheet>
|
metadata
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: htmltoword
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Nicholas Frandsen
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-08-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: actionpack
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: nokogiri
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rubyzip
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rspec
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: bundler
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '1.3'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.3'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: rake
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
description: Convert html to word docx document.
|
111
|
+
email:
|
112
|
+
- nick.rowe.frandsen@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- .gitignore
|
118
|
+
- .travis.yml
|
119
|
+
- Gemfile
|
120
|
+
- LICENSE.txt
|
121
|
+
- README.md
|
122
|
+
- Rakefile
|
123
|
+
- htmltoword.gemspec
|
124
|
+
- lib/htmltoword.rb
|
125
|
+
- lib/htmltoword/htmltoword_helper.rb
|
126
|
+
- lib/htmltoword/version.rb
|
127
|
+
- spec/spec_helper.rb
|
128
|
+
- spec/xslt_spec.rb
|
129
|
+
- templates/default.docx
|
130
|
+
- xslt/html_to_wordml.xslt
|
131
|
+
- xslt/style2.xslt
|
132
|
+
homepage: http://github.com/nickfrandsen/htmltoword
|
133
|
+
licenses:
|
134
|
+
- MIT
|
135
|
+
post_install_message:
|
136
|
+
rdoc_options: []
|
137
|
+
require_paths:
|
138
|
+
- lib
|
139
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
140
|
+
none: false
|
141
|
+
requirements:
|
142
|
+
- - ! '>='
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
|
+
none: false
|
147
|
+
requirements:
|
148
|
+
- - ! '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
requirements: []
|
152
|
+
rubyforge_project:
|
153
|
+
rubygems_version: 1.8.25
|
154
|
+
signing_key:
|
155
|
+
specification_version: 3
|
156
|
+
summary: This simple gem allows you to create MS Word docx documents from simple html
|
157
|
+
documents. This makes it easy to create dynamic reports and forms that can be downloaded
|
158
|
+
by your users as simple MS Word docx files.
|
159
|
+
test_files:
|
160
|
+
- spec/spec_helper.rb
|
161
|
+
- spec/xslt_spec.rb
|