html2odt 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +3 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +21 -0
- data/README.md +163 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/html2odt.gemspec +29 -0
- data/lib/html2odt/document.rb +309 -0
- data/lib/html2odt/image.rb +82 -0
- data/lib/html2odt/version.rb +3 -0
- data/lib/html2odt.rb +18 -0
- data/odt/template.odt +0 -0
- data/xsl/LICENSE.txt +502 -0
- data/xsl/README.rst +144 -0
- data/xsl/document-content/block.xsl +169 -0
- data/xsl/document-content/html5.xsl +122 -0
- data/xsl/document-content/ignore.xsl +93 -0
- data/xsl/document-content/inline.xsl +167 -0
- data/xsl/document-content/link.xsl +127 -0
- data/xsl/document-content/lists.xsl +130 -0
- data/xsl/document-content/media.xsl +130 -0
- data/xsl/document-content/paragraph.xsl +151 -0
- data/xsl/document-content/section.xsl +80 -0
- data/xsl/document-content/tables.xsl +334 -0
- data/xsl/document-content.xsl +63 -0
- data/xsl/param.xsl +70 -0
- data/xsl/specific/elyxer.xsl +93 -0
- data/xsl/specific/geshi.xsl +94 -0
- data/xsl/specific/lyx.xsl +101 -0
- data/xsl/specific/pygments.xsl +170 -0
- data/xsl/specific/trac.xsl +61 -0
- data/xsl/specific.xsl +58 -0
- data/xsl/styles/automatic-styles.xsl +388 -0
- data/xsl/styles/fonts.xsl +62 -0
- data/xsl/styles/highlight.xsl +152 -0
- data/xsl/styles/inline.xsl +114 -0
- data/xsl/styles/main-styles.xsl +455 -0
- data/xsl/styles.xsl +143 -0
- data/xsl/xhtml2odt.xsl +116 -0
- metadata +185 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 94e165a5f9ab5a021c0e4b07c79c86ab5ef0213f
|
4
|
+
data.tar.gz: d4f2a69cb1db7d641cc05914d738758782b38d95
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4758a732062fc1483a70a619f1740f56b5d4cc53238b556cc27926d878214e9a5c1bd6cb928f1c44736e9e5adda7fcce01726deea08dcc62d70157c34b3644be
|
7
|
+
data.tar.gz: 075d0444c1b5e61066d87e461edfd3d97711dcb37c36ab277e72901142fd0b83db5a86bd46cd15e863bc328b052c73a4d7518ddadbbf39c6f052f5d45f887fbf
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
/pkg/
|
data/CHANGELOG.md
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, and in the interest of
|
4
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
+
contribute through reporting issues, posting feature requests, updating
|
6
|
+
documentation, submitting pull requests or patches, and other activities.
|
7
|
+
|
8
|
+
We are committed to making participation in this project a harassment-free
|
9
|
+
experience for everyone, regardless of level of experience, gender, gender
|
10
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
+
body size, race, ethnicity, age, religion, or nationality.
|
12
|
+
|
13
|
+
Examples of unacceptable behavior by participants include:
|
14
|
+
|
15
|
+
* The use of sexualized language or imagery
|
16
|
+
* Personal attacks
|
17
|
+
* Trolling or insulting/derogatory comments
|
18
|
+
* Public or private harassment
|
19
|
+
* Publishing other's private information, such as physical or electronic
|
20
|
+
addresses, without explicit permission
|
21
|
+
* Other unethical or unprofessional conduct
|
22
|
+
|
23
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
24
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
25
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
26
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
27
|
+
threatening, offensive, or harmful.
|
28
|
+
|
29
|
+
By adopting this Code of Conduct, project maintainers commit themselves to
|
30
|
+
fairly and consistently applying these principles to every aspect of managing
|
31
|
+
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
+
Conduct may be permanently removed from the project team.
|
33
|
+
|
34
|
+
This code of conduct applies both within project spaces and in public spaces
|
35
|
+
when an individual is representing the project or its community.
|
36
|
+
|
37
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
+
reported by contacting a project maintainer at schmidt@nach-vorne.eu. All
|
39
|
+
complaints will be reviewed and investigated and will result in a response that
|
40
|
+
is deemed necessary and appropriate to the circumstances. Maintainers are
|
41
|
+
obligated to maintain confidentiality with regard to the reporter of an
|
42
|
+
incident.
|
43
|
+
|
44
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
+
version 1.3.0, available at
|
46
|
+
[http://contributor-covenant.org/version/1/3/0/][version]
|
47
|
+
|
48
|
+
[homepage]: http://contributor-covenant.org
|
49
|
+
[version]: http://contributor-covenant.org/version/1/3/0/
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
html2odt (0.1.0)
|
5
|
+
dimensions (~> 1.3.0)
|
6
|
+
nokogiri (~> 1.6.7.2)
|
7
|
+
rubyzip (~> 1.2.0)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
byebug (9.0.3)
|
13
|
+
dimensions (1.3.0)
|
14
|
+
mini_portile2 (2.0.0)
|
15
|
+
minitest (5.8.4)
|
16
|
+
nokogiri (1.6.7.2)
|
17
|
+
mini_portile2 (~> 2.0.0.rc2)
|
18
|
+
rake (10.5.0)
|
19
|
+
rubyzip (1.2.0)
|
20
|
+
|
21
|
+
PLATFORMS
|
22
|
+
ruby
|
23
|
+
|
24
|
+
DEPENDENCIES
|
25
|
+
bundler (~> 1.12)
|
26
|
+
byebug
|
27
|
+
html2odt!
|
28
|
+
minitest (~> 5.0)
|
29
|
+
rake (~> 10.0)
|
30
|
+
|
31
|
+
BUNDLED WITH
|
32
|
+
1.12.3
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2016 Gregor Schmidt - Planio GmbH, Berlin, Germany
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
# html2odt
|
2
|
+
|
3
|
+
This gem provides a Ruby wrapper around the set of XLST stylesheets published as
|
4
|
+
[xhtml2odt](https://github.com/abompard/xhtml2odt).
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'html2odt'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install html2odt
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### Basic usage
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
# Create an Html2Odt::Document instance
|
28
|
+
doc = Html2Odt::Document.new
|
29
|
+
|
30
|
+
# Set the input HTML
|
31
|
+
doc.html <<HTML
|
32
|
+
<h1>Hello, World!</h1>
|
33
|
+
<p>It works.</p>
|
34
|
+
HTML
|
35
|
+
|
36
|
+
# Set author and title
|
37
|
+
doc.author = "Jane Doe"
|
38
|
+
doc.title = "Example Document"
|
39
|
+
|
40
|
+
|
41
|
+
# Write ODT to disk
|
42
|
+
doc.write_to "demo.odt"
|
43
|
+
|
44
|
+
# Or get binary content as string
|
45
|
+
doc.data
|
46
|
+
```
|
47
|
+
|
48
|
+
### Configuration options
|
49
|
+
|
50
|
+
`html2odt` comes with a basic `template.odt`, which is as a boilerplate to create
|
51
|
+
the desired ODT file. If you like to provide your own styles or additional
|
52
|
+
content next to the content added via the API, you may provide your own template
|
53
|
+
in the `Html2Odt::Document` constructor.
|
54
|
+
|
55
|
+
*Please note:* If the template file cannot be read or if it does not appear to
|
56
|
+
be a valid ODT file, an `ArgumentError` will be raised.
|
57
|
+
|
58
|
+
The template needs to contain an otherwise empty paragraph containing the string
|
59
|
+
`{{content}}`.
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
# Provide optional template file
|
63
|
+
doc = Html2Odt::Document.new(template: "template.odt")
|
64
|
+
```
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
The HTML which should become part of the document may also be provided via the
|
70
|
+
constructor
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
# Provide HTML in constructor
|
74
|
+
doc = Html2Odt::Document.new(html: <<HTML)
|
75
|
+
<h1>Hello, World!</h1>
|
76
|
+
<p>It works.</p>
|
77
|
+
HTML
|
78
|
+
```
|
79
|
+
|
80
|
+
### Image handling
|
81
|
+
|
82
|
+
`html2odt` provides basic image inlining, i.e. images referenced in the HTML
|
83
|
+
code will be embeded into the ODT file by default. This is true for images
|
84
|
+
referenced with a full `file://`, `http://`, or `https://` URL. Absolute URLs
|
85
|
+
(i.e. starting `/`) and relative URLs are not supported, since `html2odt` has no
|
86
|
+
idea, which server or document they are relating to.
|
87
|
+
|
88
|
+
Images referencing an unsupported resource will be replaced with a link
|
89
|
+
containing the alt text of the image.
|
90
|
+
|
91
|
+
If you are using `html2odt` in a web application context, you will probably want
|
92
|
+
to provide some special handling for resources residing on your own server. This
|
93
|
+
should be done for security reasons or to save roundtrips.
|
94
|
+
|
95
|
+
`html2odt` provides the following API to map image `src` attributes to local
|
96
|
+
file locations.
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
# Provide custom mapping for image locations
|
100
|
+
doc = Html2Odt::Document.new
|
101
|
+
|
102
|
+
doc.image_location_mapping = lambda do |src|
|
103
|
+
# Attention! Add protection against directory traversal attacks
|
104
|
+
"/var/www/mywebsite/#{src}"
|
105
|
+
end
|
106
|
+
```
|
107
|
+
|
108
|
+
Registering an `image_location_mapping` callback will deactivate the default
|
109
|
+
behaviour of including images with `file` and `http` URLs automatically.
|
110
|
+
|
111
|
+
|
112
|
+
## License
|
113
|
+
|
114
|
+
Files within the `xsl` directory belong to the [xhtml2odt
|
115
|
+
project](https://github.com/abompard/xhtml2odt) published by Aurelien Bompard
|
116
|
+
(2009-2010) under the terms of the GNU LGP v2.1 or later:
|
117
|
+
http://www.gnu.org/licenses/lgpl-2.1.html
|
118
|
+
|
119
|
+
The remaining files are licensed under the terms of the MIT license.
|
120
|
+
|
121
|
+
```
|
122
|
+
Copyright (c) 2016 Gregor Schmidt - Planio GmbH, Berlin, Germany
|
123
|
+
|
124
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
125
|
+
of this software and associated documentation files (the "Software"), to deal
|
126
|
+
in the Software without restriction, including without limitation the rights
|
127
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
128
|
+
copies of the Software, and to permit persons to whom the Software is
|
129
|
+
furnished to do so, subject to the following conditions:
|
130
|
+
|
131
|
+
The above copyright notice and this permission notice shall be included in all
|
132
|
+
copies or substantial portions of the Software.
|
133
|
+
|
134
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
135
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
136
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
137
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
138
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
139
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
140
|
+
SOFTWARE.
|
141
|
+
```
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
## Development
|
146
|
+
|
147
|
+
After checking out the repo, run `bundle install` to install dependencies. Then,
|
148
|
+
run `rake test` to run the tests. You can also run `bin/console` for an
|
149
|
+
interactive prompt that will allow you to experiment.
|
150
|
+
|
151
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
152
|
+
release a new version, update the version number in `version.rb`, and then run
|
153
|
+
`bundle exec rake release`, which will create a git tag for the version, push
|
154
|
+
git commits and tags, and push the `.gem` file to
|
155
|
+
[rubygems.org](https://rubygems.org).
|
156
|
+
|
157
|
+
## Contributing
|
158
|
+
|
159
|
+
Bug reports and pull requests are welcome on GitHub at
|
160
|
+
https://github.com/planio-gmbh/html2odt. This project is intended to be a safe,
|
161
|
+
welcoming space for collaboration, and contributors are expected to adhere to
|
162
|
+
the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
163
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "html2odt"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/html2odt.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'html2odt/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "html2odt"
|
8
|
+
spec.version = Html2Odt::VERSION
|
9
|
+
spec.authors = ["Gregor Schmidt (Planio)"]
|
10
|
+
spec.email = ["gregor@plan.io", "support@plan.io"]
|
11
|
+
|
12
|
+
spec.summary = %q{html2odt generates ODT documents based on HTML fragments}
|
13
|
+
spec.description = %q{html2odt generates ODT documents based on HTML fragments using xhtml2odt}
|
14
|
+
spec.homepage = "https://github.com/planio-gmbh/html2odt"
|
15
|
+
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "dimensions", "~> 1.3.0"
|
22
|
+
spec.add_dependency "nokogiri", "~> 1.6.7.2"
|
23
|
+
spec.add_dependency "rubyzip", "~> 1.2.0"
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
28
|
+
spec.add_development_dependency "byebug"
|
29
|
+
end
|
@@ -0,0 +1,309 @@
|
|
1
|
+
class Html2Odt::Document
|
2
|
+
CONTENT_REGEX = /<text:p[^>]*>{{content}}<\/text:p>/
|
3
|
+
INCH_TO_CM = 2.54
|
4
|
+
|
5
|
+
# The following value was determined by comparing the generated result with
|
6
|
+
# an image dropped into LibreOffice interactively. Though this might be
|
7
|
+
# related to the fact, that my screen has a native resolution of 114 dpi.
|
8
|
+
#
|
9
|
+
# xhtml2odt uses 96 by default.
|
10
|
+
DPI = 114.0
|
11
|
+
|
12
|
+
attr_accessor :image_location_mapping
|
13
|
+
|
14
|
+
# Document meta data
|
15
|
+
attr_accessor :author, :title
|
16
|
+
|
17
|
+
def initialize(template: Html2Odt::ODT_TEMPLATE, html: nil)
|
18
|
+
@html = html
|
19
|
+
@template = template
|
20
|
+
|
21
|
+
read_xmls
|
22
|
+
end
|
23
|
+
|
24
|
+
def html=(html)
|
25
|
+
reset
|
26
|
+
@html = html
|
27
|
+
end
|
28
|
+
|
29
|
+
def html
|
30
|
+
@html
|
31
|
+
end
|
32
|
+
|
33
|
+
def content_xml
|
34
|
+
@content_xml ||= begin
|
35
|
+
|
36
|
+
html = prepare_html
|
37
|
+
|
38
|
+
xml = xslt_tranform(html, Html2Odt::XHTML2ODT_XSL)
|
39
|
+
|
40
|
+
xml = xml.sub('<?xml version="1.0" encoding="utf-8"?>', '')
|
41
|
+
xml = @tpl_content_xml.sub(CONTENT_REGEX, xml)
|
42
|
+
|
43
|
+
xml = xslt_tranform(xml, Html2Odt::XHTML2ODT_STYLES_XSL)
|
44
|
+
|
45
|
+
xml
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def styles_xml
|
50
|
+
@styles_xml ||= xslt_tranform(@tpl_styles_xml, Html2Odt::XHTML2ODT_STYLES_XSL)
|
51
|
+
end
|
52
|
+
|
53
|
+
def manifest_xml
|
54
|
+
@manifest_xml ||= begin
|
55
|
+
content_xml # trigger HTML parsing
|
56
|
+
|
57
|
+
if @images.nil? or @images.empty?
|
58
|
+
@tpl_manifest_xml
|
59
|
+
else
|
60
|
+
doc = Nokogiri::XML(@tpl_manifest_xml)
|
61
|
+
|
62
|
+
@images.each do |image|
|
63
|
+
entry = create_node(doc, "manifest:file-entry")
|
64
|
+
entry["manifest:full-path"] = image.target
|
65
|
+
entry["manifest:media-type"] = image.mime_type
|
66
|
+
|
67
|
+
doc.root.add_child entry
|
68
|
+
end
|
69
|
+
|
70
|
+
doc.to_xml
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def meta_xml
|
76
|
+
@meta_xml ||= begin
|
77
|
+
doc = Nokogiri::XML(@tpl_meta_xml)
|
78
|
+
|
79
|
+
meta = doc.at_xpath("office:document-meta/office:meta")
|
80
|
+
|
81
|
+
meta.xpath("meta:generator").remove
|
82
|
+
meta.add_child create_node(doc, "meta:generator", "html2odt.rb/#{Html2Odt::VERSION}")
|
83
|
+
|
84
|
+
meta.xpath("meta:creation-date").remove
|
85
|
+
meta.add_child create_node(doc, "meta:creation-date", Time.now.utc.iso8601)
|
86
|
+
|
87
|
+
meta.xpath("dc:date").remove
|
88
|
+
meta.add_child create_node(doc, "dc:date", Time.now.utc.iso8601)
|
89
|
+
|
90
|
+
meta.xpath("meta:editing-duration").remove
|
91
|
+
meta.add_child create_node(doc, "meta:editing-duration", "P0D")
|
92
|
+
|
93
|
+
meta.xpath("meta:editing-cycles").remove
|
94
|
+
meta.add_child create_node(doc, "meta:editing-cycles", "1")
|
95
|
+
|
96
|
+
meta.xpath("meta:initial-creator").remove
|
97
|
+
meta.add_child create_node(doc, "meta:initial-creator", author) if author
|
98
|
+
|
99
|
+
meta.xpath("dc:creator").remove
|
100
|
+
meta.add_child create_node(doc, "dc:creator", author) if author
|
101
|
+
|
102
|
+
meta.xpath("dc:title").remove
|
103
|
+
meta.add_child create_node(doc, "dc:title", title) if title
|
104
|
+
|
105
|
+
doc.to_xml
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def data
|
110
|
+
@data ||= begin
|
111
|
+
buffer = Zip::OutputStream.write_buffer do |output_stream|
|
112
|
+
# Copy contents from template, while replacing content.xml and
|
113
|
+
# styles.xml
|
114
|
+
Zip::File.open(@template) do |file|
|
115
|
+
file.each do |entry|
|
116
|
+
next if entry.directory?
|
117
|
+
|
118
|
+
entry.get_input_stream do |input_stream|
|
119
|
+
data = case entry.name
|
120
|
+
when "content.xml"
|
121
|
+
content_xml
|
122
|
+
when "meta.xml"
|
123
|
+
meta_xml
|
124
|
+
when "styles.xml"
|
125
|
+
styles_xml
|
126
|
+
when "META-INF/manifest.xml"
|
127
|
+
manifest_xml
|
128
|
+
else
|
129
|
+
input_stream.sysread
|
130
|
+
end
|
131
|
+
|
132
|
+
output_stream.put_next_entry(entry.name)
|
133
|
+
output_stream.write data
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Adding images found in the HTML sources
|
139
|
+
(@images || []).each do |image|
|
140
|
+
output_stream.put_next_entry(image.target)
|
141
|
+
output_stream.write File.read(image.source)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
buffer.string
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def write_to(path)
|
150
|
+
File.write(path, data)
|
151
|
+
end
|
152
|
+
|
153
|
+
protected
|
154
|
+
|
155
|
+
|
156
|
+
def read_xmls
|
157
|
+
unless File.readable?(@template)
|
158
|
+
raise ArgumentError, "Cannot read template file #{@template.inspect}"
|
159
|
+
end
|
160
|
+
|
161
|
+
Zip::File.open(@template) do |zip_file|
|
162
|
+
@tpl_content_xml = zip_file.read("content.xml")
|
163
|
+
@tpl_manifest_xml = zip_file.read("META-INF/manifest.xml")
|
164
|
+
@tpl_meta_xml = zip_file.read("meta.xml")
|
165
|
+
@tpl_styles_xml = zip_file.read("styles.xml")
|
166
|
+
end
|
167
|
+
|
168
|
+
unless @tpl_content_xml =~ CONTENT_REGEX
|
169
|
+
raise ArgumentError, "Template file does not contain `{{content}}` paragraph"
|
170
|
+
end
|
171
|
+
|
172
|
+
rescue Zip::Error
|
173
|
+
raise ArgumentError, "Template file does not look like a ODT file - #{$!.message}"
|
174
|
+
rescue Errno::ENOENT
|
175
|
+
raise ArgumentError, "Template file does not contain expected file - #{$!.message}"
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
def prepare_html
|
180
|
+
html = self.html
|
181
|
+
html = fix_images_in_html(html)
|
182
|
+
html = create_document(html)
|
183
|
+
html
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
def create_document(html)
|
188
|
+
%Q{<html xmlns="http://www.w3.org/1999/xhtml">#{html}</html>}
|
189
|
+
end
|
190
|
+
|
191
|
+
def fix_images_in_html(html)
|
192
|
+
doc = Nokogiri::HTML::DocumentFragment.parse(html)
|
193
|
+
|
194
|
+
@images = []
|
195
|
+
doc.css("img").each_with_index do |img, index|
|
196
|
+
image = Html2Odt::Image.new(index)
|
197
|
+
|
198
|
+
image.source = file_path_for(img["src"])
|
199
|
+
|
200
|
+
if image.valid?
|
201
|
+
update_img_tag(img, image)
|
202
|
+
@images << image
|
203
|
+
else
|
204
|
+
# Replace img with link if alt tag is present
|
205
|
+
alt = img["alt"]
|
206
|
+
|
207
|
+
if alt.nil? || alt.empty?
|
208
|
+
img.remove
|
209
|
+
else
|
210
|
+
a = create_node(doc, "a")
|
211
|
+
a["href"] = img["src"]
|
212
|
+
a.content = alt
|
213
|
+
|
214
|
+
img.replace(a)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
doc.to_xml
|
220
|
+
end
|
221
|
+
|
222
|
+
def file_path_for(src)
|
223
|
+
if image_location_mapping
|
224
|
+
return image_location_mapping.call(src)
|
225
|
+
end
|
226
|
+
|
227
|
+
case src
|
228
|
+
when /\Afile:\/\//
|
229
|
+
# local file URL
|
230
|
+
#
|
231
|
+
# TODO: Verify, that this does not pose a security threat, maybe make
|
232
|
+
# this optional. In any case, it's useful for testing.
|
233
|
+
|
234
|
+
src[7..-1]
|
235
|
+
|
236
|
+
when /\Ahttps?:\/\//
|
237
|
+
# remote image URL
|
238
|
+
#
|
239
|
+
# TODO: Verify, that this does not pose a security threat, maybe make
|
240
|
+
# this optional.
|
241
|
+
|
242
|
+
uri = URI.parse(src)
|
243
|
+
file = Tempfile.new("html2odt")
|
244
|
+
file.binmode
|
245
|
+
|
246
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
|
247
|
+
resp = http.get(uri.path)
|
248
|
+
|
249
|
+
file.write(resp.body)
|
250
|
+
file.flush
|
251
|
+
file
|
252
|
+
end
|
253
|
+
|
254
|
+
file.path
|
255
|
+
else
|
256
|
+
# cannot handle image properly, return nil
|
257
|
+
nil
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def update_img_tag(img, image)
|
262
|
+
img["src"] = image.target
|
263
|
+
|
264
|
+
if img["width"] and img["height"]
|
265
|
+
# use values supplied in HTML
|
266
|
+
width = img["width"].to_i
|
267
|
+
height = img["height"].to_i
|
268
|
+
elsif img["width"]
|
269
|
+
# compute height based on width keeping aspect ratio
|
270
|
+
width = img["width"].to_i
|
271
|
+
height = width * image.width / image.height
|
272
|
+
elsif img["height"]
|
273
|
+
# compute width based on height keeping aspect ratio
|
274
|
+
height = img["height"].to_i
|
275
|
+
width = height * image.height / image.width
|
276
|
+
else
|
277
|
+
width = image.width
|
278
|
+
height = image.height
|
279
|
+
end
|
280
|
+
|
281
|
+
img["width"] = "#{(width / DPI * INCH_TO_CM).round(2)}cm"
|
282
|
+
img["height"] = "#{(height / DPI * INCH_TO_CM).round(2)}cm"
|
283
|
+
end
|
284
|
+
|
285
|
+
|
286
|
+
def xslt_tranform(xml, xsl)
|
287
|
+
xslt = File.open(xsl) do |file|
|
288
|
+
Nokogiri::XSLT(file)
|
289
|
+
end
|
290
|
+
|
291
|
+
xml = Nokogiri::XML(xml)
|
292
|
+
|
293
|
+
# raises RuntimeError or Nokogiri::XML::SyntaxError if something goes wrong
|
294
|
+
xslt.transform(xml).to_s
|
295
|
+
end
|
296
|
+
|
297
|
+
def reset
|
298
|
+
@content_xml = nil
|
299
|
+
@manifest_xml = nil
|
300
|
+
@data = nil
|
301
|
+
@images = nil
|
302
|
+
end
|
303
|
+
|
304
|
+
def create_node(doc, tagname, content = nil)
|
305
|
+
entry = Nokogiri::XML::Node.new tagname, doc
|
306
|
+
entry.content = content unless content.nil?
|
307
|
+
entry
|
308
|
+
end
|
309
|
+
end
|