janie-htmltoword 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +194 -0
- data/Rakefile +4 -0
- data/bin/htmltoword +36 -0
- data/lib/htmltoword.rb +29 -0
- data/lib/htmltoword/configuration.rb +12 -0
- data/lib/htmltoword/document.rb +155 -0
- data/lib/htmltoword/helpers/templates_helper.rb +9 -0
- data/lib/htmltoword/helpers/xslt_helper.rb +17 -0
- data/lib/htmltoword/railtie.rb +25 -0
- data/lib/htmltoword/renderer.rb +43 -0
- data/lib/htmltoword/templates/default.docx +0 -0
- data/lib/htmltoword/version.rb +3 -0
- data/lib/htmltoword/xslt/base.xslt +363 -0
- data/lib/htmltoword/xslt/cleanup.xslt +71 -0
- data/lib/htmltoword/xslt/extras.xslt +26 -0
- data/lib/htmltoword/xslt/functions.xslt +37 -0
- data/lib/htmltoword/xslt/header.xslt +34 -0
- data/lib/htmltoword/xslt/htmltoword.xslt +22 -0
- data/lib/htmltoword/xslt/image_functions.xslt +148 -0
- data/lib/htmltoword/xslt/images.xslt +136 -0
- data/lib/htmltoword/xslt/inline_elements.xslt +40 -0
- data/lib/htmltoword/xslt/links.xslt +34 -0
- data/lib/htmltoword/xslt/numbering.xslt +189 -0
- data/lib/htmltoword/xslt/relations.xslt +58 -0
- data/lib/htmltoword/xslt/style2.xslt +49 -0
- data/lib/htmltoword/xslt/tables.xslt +190 -0
- metadata +185 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0d05729149187d3e87a12e6c9fd1375a42774fdb
|
4
|
+
data.tar.gz: 14e590f291d423492d0b59f045120c22eade51ce
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0e209db03dc735e32eec2c55e19243fba57c6d5d565f359119deb351e2f5aaf9152342076dff6a3462858df8e92e7bc62c6be1f01976cc906cd6fb809a15395f
|
7
|
+
data.tar.gz: 1882c1168595571284475ae441cbd7016b94bd91e6105f51a196549b187146756a213d79776e50ee0f29dd257c6ae308fce8bcf21a2b1befab5f411598550978
|
data/README.md
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
# Ruby Html to word Gem
|
2
|
+
|
3
|
+
This simple gem allows you to create MS Word docx documents from simple html documents. This makes it easy to create dynamic reports and forms that can be downloaded by your users as simple MS Word docx files.
|
4
|
+
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
gem 'htmltoword'
|
8
|
+
|
9
|
+
And then execute:
|
10
|
+
|
11
|
+
$ bundle
|
12
|
+
|
13
|
+
Or install it yourself as:
|
14
|
+
|
15
|
+
$ gem install htmltoword
|
16
|
+
|
17
|
+
|
18
|
+
** Note: ** Since version 0.4.0 the ```create``` method will return a string with the contents of the file. If you want to save the file please use ```create_and_save```. See the usage for more
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
### Standalone
|
23
|
+
|
24
|
+
By default, the file will be saved at the specified location. In case you want to handle the contents of the file
|
25
|
+
as a string and do what suits you best, you can specify that when calling the create function.
|
26
|
+
|
27
|
+
Using the default word file as template
|
28
|
+
```ruby
|
29
|
+
require 'htmltoword'
|
30
|
+
|
31
|
+
my_html = '<html><head></head><body><p>Hello</p></body></html>'
|
32
|
+
document = Htmltoword::Document.create(my_html)
|
33
|
+
file = Htmltoword::Document.create_and_save(my_html, file_path)
|
34
|
+
```
|
35
|
+
|
36
|
+
Using your custom word file as a template, where you can setup your own style for normal text, h1,h2, etc.
|
37
|
+
```ruby
|
38
|
+
require 'htmltoword'
|
39
|
+
|
40
|
+
# Configure the location of your custom templates
|
41
|
+
Htmltoword.config.custom_templates_path = 'some_path'
|
42
|
+
|
43
|
+
my_html = '<html><head></head><body><p>Hello</p></body></html>'
|
44
|
+
document = Htmltoword::Document.create(my_html, word_template_file_name)
|
45
|
+
file = Htmltoword::Document.create_and_save(my_html, file_path, word_template_file_name)
|
46
|
+
```
|
47
|
+
|
48
|
+
The ```create``` function will return a string with the file, so you can do with it what you consider best.
|
49
|
+
The ```create_and_save``` function will create the file in the specified file_path.
|
50
|
+
|
51
|
+
### With Rails
|
52
|
+
**For htmltoword version >= 0.2**
|
53
|
+
An action controller renderer has been defined, so there's no need to declare the mime-type and you can just respond to .docx format. It will look then for views with the extension ```.docx.erb``` which will provide the HTML that will be rendered in the Word file.
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
# On your controller.
|
57
|
+
respond_to :docx
|
58
|
+
|
59
|
+
# filename and word_template are optional. By default it will name the file as your action and use the default template provided by the gem. The use of the .docx in the filename and word_template is optional.
|
60
|
+
def my_action
|
61
|
+
# ...
|
62
|
+
respond_with(@object, filename: 'my_file.docx', word_template: 'my_template.docx')
|
63
|
+
# Alternatively, if you don't want to create the .docx.erb template you could
|
64
|
+
respond_with(@object, content: '<html><head></head><body><p>Hello</p></body></html>', filename: 'my_file.docx')
|
65
|
+
end
|
66
|
+
|
67
|
+
def my_action2
|
68
|
+
# ...
|
69
|
+
respond_to do |format|
|
70
|
+
format.docx do
|
71
|
+
render docx: 'my_view', filename: 'my_file.docx'
|
72
|
+
# Alternatively, if you don't want to create the .docx.erb template you could
|
73
|
+
render docx: 'my_file.docx', content: '<html><head></head><body><p>Hello</p></body></html>'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
```
|
78
|
+
|
79
|
+
Example of my_view.docx.erb
|
80
|
+
```
|
81
|
+
<h1> My custom template </h1>
|
82
|
+
<%= render partial: 'my_partial', collection: @objects, as: :item %>
|
83
|
+
```
|
84
|
+
Example of _my_partial.docx.erb
|
85
|
+
```
|
86
|
+
<h3><%= item.title %></h3>
|
87
|
+
<p> My html for item <%= item.id %> goes here </p>
|
88
|
+
```
|
89
|
+
|
90
|
+
**For htmltoword version <= 0.1.8**
|
91
|
+
```ruby
|
92
|
+
# Add mime-type in /config/initializers/mime_types.rb:
|
93
|
+
Mime::Type.register "application/vnd.openxmlformats-officedocument.wordprocessingml.document", :docx
|
94
|
+
|
95
|
+
# Add docx responder in your controller
|
96
|
+
def show
|
97
|
+
respond_to do |format|
|
98
|
+
format.docx do
|
99
|
+
file = Htmltoword::Document.create params[:docx_html_source], "file_name.docx"
|
100
|
+
send_file file.path, :disposition => "attachment"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
```javascript
|
107
|
+
// OPTIONAL: Use a jquery click handler to store the markup in a hidden form field before the form is submitted.
|
108
|
+
// Using this strategy makes it easy to allow users to dynamically edit the document that will be turned
|
109
|
+
// into a docx file, for example by toggling sections of a document.
|
110
|
+
$('#download-as-docx').on('click', function () {
|
111
|
+
$('input[name="docx_html_source"]').val('<!DOCTYPE html>\n' + $('.delivery').html());
|
112
|
+
});
|
113
|
+
```
|
114
|
+
|
115
|
+
### Configure templates and xslt paths
|
116
|
+
|
117
|
+
From version 2.0 you can configure the location of default and custom templates and xslt files. By default templates are defined under ```lib/htmltoword/templates``` and xslt under ```lib/htmltoword/xslt```
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
Htmltoword.configure do |config|
|
121
|
+
config.custom_templates_path = 'path_for_custom_templates'
|
122
|
+
# If you modify this path, there should be a 'default.docx' file in there
|
123
|
+
config.default_templates_path = 'path_for_default_template'
|
124
|
+
# If you modify this path, there should be a 'html_to_wordml.xslt' file in there
|
125
|
+
config.default_xslt_path = 'some_path'
|
126
|
+
# The use of additional custom xslt will come soon
|
127
|
+
config.custom_xslt_path = 'some_path'
|
128
|
+
end
|
129
|
+
```
|
130
|
+
|
131
|
+
## Features
|
132
|
+
|
133
|
+
All standard html elements are supported and will create the closest equivalent in wordml. For example spans will create inline elements and divs will create block like elements.
|
134
|
+
|
135
|
+
### Highlighting text
|
136
|
+
|
137
|
+
You can add highlighting to text by wrapping it in a span with class h and adding a data style with a color that wordml supports (http://www.schemacentral.com/sc/ooxml/t-w_ST_HighlightColor.html) ie:
|
138
|
+
|
139
|
+
```html
|
140
|
+
<span class="h" data-style="green">This text will have a green highlight</span>
|
141
|
+
```
|
142
|
+
|
143
|
+
### Page breaks
|
144
|
+
|
145
|
+
To create page breaks simply add a div with class -page-break ie:
|
146
|
+
|
147
|
+
```html
|
148
|
+
<div class="-page-break"></div>
|
149
|
+
````
|
150
|
+
|
151
|
+
### Images
|
152
|
+
Support for images is very basic and is only possible for external images(i.e accessed via URL). If the image doesn't
|
153
|
+
have correctly defined it's width and height it won't be included in the document
|
154
|
+
|
155
|
+
**Limitations:**
|
156
|
+
- Images are external i.e. pictures accessed via URL, not stored within document
|
157
|
+
- only sizing is customisable
|
158
|
+
|
159
|
+
Examples:
|
160
|
+
```html
|
161
|
+
<img src="http://placehold.it/250x100.png" style="width: 250px; height: 100px">
|
162
|
+
<img src="http://placehold.it/250x100.png" data-width="250px" data-height="100px">
|
163
|
+
<img src="http://placehold.it/250x100.png" data-height="150px" style="width:250px; height:100px">
|
164
|
+
```
|
165
|
+
|
166
|
+
## Contributing / Extending
|
167
|
+
|
168
|
+
Word docx files are essentially just a zipped collection of xml files and resources.
|
169
|
+
This gem contains a standard empty MS Word docx file and a stylesheet to transform arbitrary html into wordml.
|
170
|
+
The basic functioning of this gem can be summarised as:
|
171
|
+
|
172
|
+
1. Transform inputed html to wordml.
|
173
|
+
2. Unzip empty word docx file bundled with gem and replace its document.xml content with the new transformed result of step 1.
|
174
|
+
3. Zip up contents again into a resulting .docx file.
|
175
|
+
|
176
|
+
For more info about WordML: http://rep.oio.dk/microsoft.com/officeschemas/wordprocessingml_article.htm
|
177
|
+
|
178
|
+
Contributions would be very much appreciated.
|
179
|
+
|
180
|
+
1. Fork it
|
181
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
182
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
183
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
184
|
+
5. Create new Pull Request
|
185
|
+
|
186
|
+
## License
|
187
|
+
|
188
|
+
(The MIT License)
|
189
|
+
|
190
|
+
Copyright © 2013:
|
191
|
+
|
192
|
+
* Cristina Matonte
|
193
|
+
|
194
|
+
* Nicholas Frandsen
|
data/Rakefile
ADDED
data/bin/htmltoword
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'methadone'
|
3
|
+
require 'rmultimarkdown'
|
4
|
+
require_relative '../lib/htmltoword'
|
5
|
+
|
6
|
+
include Methadone::Main
|
7
|
+
include Methadone::CLILogging
|
8
|
+
|
9
|
+
main do |input, output|
|
10
|
+
puts "Converting #{input} to #{output}" if options[:verbose]
|
11
|
+
markup = File.read input
|
12
|
+
if options[:format] == 'markdown'
|
13
|
+
markup = markdown2html(markup)
|
14
|
+
end
|
15
|
+
Htmltoword::Document.create_and_save(markup, output, options[:template_name], options[:extras])
|
16
|
+
puts "Done" if options[:verbose]
|
17
|
+
end
|
18
|
+
|
19
|
+
def markdown2html(text)
|
20
|
+
MultiMarkdown.new(text.to_s).to_html
|
21
|
+
end
|
22
|
+
|
23
|
+
version Htmltoword::VERSION
|
24
|
+
description 'Convert simple html input (or markdown) to MS Word (docx)'
|
25
|
+
arg :input, :required
|
26
|
+
arg :output, :required
|
27
|
+
|
28
|
+
on('--verbose', '-v', 'Be verbose')
|
29
|
+
on('--extras', '-e', 'Use extra formatting features')
|
30
|
+
on('--template', '-t', 'Use custom word base template (.docx file)')
|
31
|
+
on('-f FORMAT', '--format', 'Format', /markdown|html/)
|
32
|
+
|
33
|
+
# options['ip-address'] = '127.0.0.1'
|
34
|
+
# on('-i IP_ADDRESS', '--ip-address', 'IP Address', /^\d+\.\d+\.\d+\.\d+$/)
|
35
|
+
|
36
|
+
go!
|
data/lib/htmltoword.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'zip'
|
4
|
+
require 'open-uri'
|
5
|
+
require_relative 'htmltoword/configuration'
|
6
|
+
|
7
|
+
module Htmltoword
|
8
|
+
class << self
|
9
|
+
def configure
|
10
|
+
yield configuration
|
11
|
+
end
|
12
|
+
|
13
|
+
def configuration
|
14
|
+
@configuration ||= Configuration.new
|
15
|
+
end
|
16
|
+
|
17
|
+
alias_method :config, :configuration
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
require_relative 'htmltoword/version'
|
22
|
+
require_relative 'htmltoword/helpers/templates_helper'
|
23
|
+
require_relative 'htmltoword/helpers/xslt_helper'
|
24
|
+
require_relative 'htmltoword/document'
|
25
|
+
|
26
|
+
if defined?(Rails)
|
27
|
+
require_relative 'htmltoword/renderer'
|
28
|
+
require_relative 'htmltoword/railtie'
|
29
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Htmltoword
|
2
|
+
class Configuration
|
3
|
+
attr_accessor :default_templates_path, :custom_templates_path, :default_xslt_path, :custom_xslt_path
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@default_templates_path = File.join(File.expand_path('../', __FILE__), 'templates')
|
7
|
+
@custom_templates_path = File.join(File.expand_path('../', __FILE__), 'templates')
|
8
|
+
@default_xslt_path = File.join(File.expand_path('../', __FILE__), 'xslt')
|
9
|
+
@custom_xslt_path = File.join(File.expand_path('../', __FILE__), 'xslt')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
module Htmltoword
|
2
|
+
class Document
|
3
|
+
include XSLTHelper
|
4
|
+
|
5
|
+
class << self
|
6
|
+
include TemplatesHelper
|
7
|
+
def create(content, template_name = nil, extras = false)
|
8
|
+
template_name += extension if template_name && !template_name.end_with?(extension)
|
9
|
+
document = new(template_file(template_name))
|
10
|
+
document.replace_files(content, extras)
|
11
|
+
document.generate
|
12
|
+
end
|
13
|
+
|
14
|
+
def create_and_save(content, file_path, template_name = nil, extras = false)
|
15
|
+
File.open(file_path, 'wb') do |out|
|
16
|
+
out << create(content, template_name, extras)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def create_with_content(template, content, extras = false)
|
21
|
+
template += extension unless template.end_with?(extension)
|
22
|
+
document = new(template_file(template))
|
23
|
+
document.replace_files(content, extras)
|
24
|
+
document.generate
|
25
|
+
end
|
26
|
+
|
27
|
+
def extension
|
28
|
+
'.docx'
|
29
|
+
end
|
30
|
+
|
31
|
+
def doc_xml_file
|
32
|
+
'word/document.xml'
|
33
|
+
end
|
34
|
+
|
35
|
+
def numbering_xml_file
|
36
|
+
'word/numbering.xml'
|
37
|
+
end
|
38
|
+
|
39
|
+
def relations_xml_file
|
40
|
+
'word/_rels/document.xml.rels'
|
41
|
+
end
|
42
|
+
|
43
|
+
def content_types_xml_file
|
44
|
+
'[Content_Types].xml'
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize(template_path)
|
49
|
+
@replaceable_files = {}
|
50
|
+
@template_path = template_path
|
51
|
+
@image_files = []
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Generate a string representing the contents of a docx file.
|
56
|
+
#
|
57
|
+
def generate
|
58
|
+
Zip::File.open(@template_path) do |template_zip|
|
59
|
+
buffer = Zip::OutputStream.write_buffer do |out|
|
60
|
+
template_zip.each do |entry|
|
61
|
+
out.put_next_entry entry.name
|
62
|
+
if @replaceable_files[entry.name] && entry.name == Document.doc_xml_file
|
63
|
+
source = entry.get_input_stream.read
|
64
|
+
# Change only the body of document. TODO: Improve this...
|
65
|
+
source = source.sub(/(<w:body>)((.|\n)*?)(<w:sectPr)/, "\\1#{@replaceable_files[entry.name]}\\4")
|
66
|
+
out.write(source)
|
67
|
+
elsif @replaceable_files[entry.name]
|
68
|
+
out.write(@replaceable_files[entry.name])
|
69
|
+
elsif entry.name == Document.content_types_xml_file
|
70
|
+
raw_file = entry.get_input_stream.read
|
71
|
+
content_types = @image_files.empty? ? raw_file : inject_image_content_types(raw_file)
|
72
|
+
|
73
|
+
out.write(content_types)
|
74
|
+
else
|
75
|
+
out.write(template_zip.read(entry.name))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
unless @image_files.empty?
|
79
|
+
#stream the image files into the media folder using open-uri
|
80
|
+
@image_files.each do |hash|
|
81
|
+
out.put_next_entry("word/media/#{hash[:filename]}")
|
82
|
+
open(hash[:url], 'rb') do |f|
|
83
|
+
out.write(f.read)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
buffer.string
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def replace_files(html, extras = false)
|
93
|
+
html = '<body></body>' if html.nil? || html.empty?
|
94
|
+
original_source = Nokogiri::HTML(html.gsub(/>\s+</, '><'))
|
95
|
+
source = xslt(stylesheet_name: 'cleanup').transform(original_source)
|
96
|
+
transform_and_replace(source, xslt_path('numbering'), Document.numbering_xml_file)
|
97
|
+
transform_and_replace(source, xslt_path('relations'), Document.relations_xml_file)
|
98
|
+
transform_doc_xml(source, extras)
|
99
|
+
local_images(source)
|
100
|
+
end
|
101
|
+
|
102
|
+
def transform_doc_xml(source, extras = false)
|
103
|
+
transformed_source = xslt(stylesheet_name: 'cleanup').transform(source)
|
104
|
+
transformed_source = xslt(stylesheet_name: 'inline_elements').transform(transformed_source)
|
105
|
+
transform_and_replace(transformed_source, document_xslt(extras), Document.doc_xml_file, extras)
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def transform_and_replace(source, stylesheet_path, file, remove_ns = false)
|
111
|
+
stylesheet = xslt(stylesheet_path: stylesheet_path)
|
112
|
+
content = stylesheet.apply_to(source)
|
113
|
+
content.gsub!(/\s*xmlns:(\w+)="(.*?)\s*"/, '') if remove_ns
|
114
|
+
@replaceable_files[file] = content
|
115
|
+
end
|
116
|
+
|
117
|
+
#generates an array of hashes with filename and full url
|
118
|
+
#for all images to be embeded in the word document
|
119
|
+
def local_images(source)
|
120
|
+
source.css('img').each_with_index do |image,i|
|
121
|
+
filename = image['data-filename'] ? image['data-filename'] : image['src'].split("/").last
|
122
|
+
ext = File.extname(filename).delete(".").downcase
|
123
|
+
|
124
|
+
@image_files << { filename: "image#{i+1}.#{ext}", url: image['src'], ext: ext }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
#get extension from filename and clean to match content_types
|
129
|
+
def content_type_from_extension(ext)
|
130
|
+
ext == "jpg" ? "jpeg" : ext
|
131
|
+
end
|
132
|
+
|
133
|
+
#inject the required content_types into the [content_types].xml file...
|
134
|
+
def inject_image_content_types(source)
|
135
|
+
doc = Nokogiri::XML(source)
|
136
|
+
|
137
|
+
#get a list of all extensions currently in content_types file
|
138
|
+
existing_exts = doc.css("Default").map { |node| node.attribute("Extension").value }.compact
|
139
|
+
|
140
|
+
#get a list of extensions we need for our images
|
141
|
+
required_exts = @image_files.map{ |i| i[:ext] }
|
142
|
+
|
143
|
+
#workout which required extensions are missing from the content_types file
|
144
|
+
missing_exts = (required_exts - existing_exts).uniq
|
145
|
+
|
146
|
+
#inject missing extensions into document
|
147
|
+
missing_exts.each do |ext|
|
148
|
+
doc.at_css("Types").add_child( "<Default Extension='#{ext}' ContentType='image/#{content_type_from_extension(ext)}'/>")
|
149
|
+
end
|
150
|
+
|
151
|
+
#return the amended source to be saved into the zip
|
152
|
+
doc.to_s
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Htmltoword
|
2
|
+
module TemplatesHelper
|
3
|
+
def template_file(template_file_name = nil)
|
4
|
+
default_path = File.join(::Htmltoword.config.default_templates_path, 'default.docx')
|
5
|
+
template_path = template_file_name.nil? ? '' : File.join(::Htmltoword.config.custom_templates_path, template_file_name)
|
6
|
+
File.exist?(template_path) ? template_path : default_path
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|