html2odt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +3 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +21 -0
- data/README.md +163 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/html2odt.gemspec +29 -0
- data/lib/html2odt/document.rb +309 -0
- data/lib/html2odt/image.rb +82 -0
- data/lib/html2odt/version.rb +3 -0
- data/lib/html2odt.rb +18 -0
- data/odt/template.odt +0 -0
- data/xsl/LICENSE.txt +502 -0
- data/xsl/README.rst +144 -0
- data/xsl/document-content/block.xsl +169 -0
- data/xsl/document-content/html5.xsl +122 -0
- data/xsl/document-content/ignore.xsl +93 -0
- data/xsl/document-content/inline.xsl +167 -0
- data/xsl/document-content/link.xsl +127 -0
- data/xsl/document-content/lists.xsl +130 -0
- data/xsl/document-content/media.xsl +130 -0
- data/xsl/document-content/paragraph.xsl +151 -0
- data/xsl/document-content/section.xsl +80 -0
- data/xsl/document-content/tables.xsl +334 -0
- data/xsl/document-content.xsl +63 -0
- data/xsl/param.xsl +70 -0
- data/xsl/specific/elyxer.xsl +93 -0
- data/xsl/specific/geshi.xsl +94 -0
- data/xsl/specific/lyx.xsl +101 -0
- data/xsl/specific/pygments.xsl +170 -0
- data/xsl/specific/trac.xsl +61 -0
- data/xsl/specific.xsl +58 -0
- data/xsl/styles/automatic-styles.xsl +388 -0
- data/xsl/styles/fonts.xsl +62 -0
- data/xsl/styles/highlight.xsl +152 -0
- data/xsl/styles/inline.xsl +114 -0
- data/xsl/styles/main-styles.xsl +455 -0
- data/xsl/styles.xsl +143 -0
- data/xsl/xhtml2odt.xsl +116 -0
- metadata +185 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 94e165a5f9ab5a021c0e4b07c79c86ab5ef0213f
|
4
|
+
data.tar.gz: d4f2a69cb1db7d641cc05914d738758782b38d95
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4758a732062fc1483a70a619f1740f56b5d4cc53238b556cc27926d878214e9a5c1bd6cb928f1c44736e9e5adda7fcce01726deea08dcc62d70157c34b3644be
|
7
|
+
data.tar.gz: 075d0444c1b5e61066d87e461edfd3d97711dcb37c36ab277e72901142fd0b83db5a86bd46cd15e863bc328b052c73a4d7518ddadbbf39c6f052f5d45f887fbf
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
/pkg/
|
data/CHANGELOG.md
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, and in the interest of
|
4
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
+
contribute through reporting issues, posting feature requests, updating
|
6
|
+
documentation, submitting pull requests or patches, and other activities.
|
7
|
+
|
8
|
+
We are committed to making participation in this project a harassment-free
|
9
|
+
experience for everyone, regardless of level of experience, gender, gender
|
10
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
+
body size, race, ethnicity, age, religion, or nationality.
|
12
|
+
|
13
|
+
Examples of unacceptable behavior by participants include:
|
14
|
+
|
15
|
+
* The use of sexualized language or imagery
|
16
|
+
* Personal attacks
|
17
|
+
* Trolling or insulting/derogatory comments
|
18
|
+
* Public or private harassment
|
19
|
+
* Publishing other's private information, such as physical or electronic
|
20
|
+
addresses, without explicit permission
|
21
|
+
* Other unethical or unprofessional conduct
|
22
|
+
|
23
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
24
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
25
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
26
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
27
|
+
threatening, offensive, or harmful.
|
28
|
+
|
29
|
+
By adopting this Code of Conduct, project maintainers commit themselves to
|
30
|
+
fairly and consistently applying these principles to every aspect of managing
|
31
|
+
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
+
Conduct may be permanently removed from the project team.
|
33
|
+
|
34
|
+
This code of conduct applies both within project spaces and in public spaces
|
35
|
+
when an individual is representing the project or its community.
|
36
|
+
|
37
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
+
reported by contacting a project maintainer at schmidt@nach-vorne.eu. All
|
39
|
+
complaints will be reviewed and investigated and will result in a response that
|
40
|
+
is deemed necessary and appropriate to the circumstances. Maintainers are
|
41
|
+
obligated to maintain confidentiality with regard to the reporter of an
|
42
|
+
incident.
|
43
|
+
|
44
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
+
version 1.3.0, available at
|
46
|
+
[http://contributor-covenant.org/version/1/3/0/][version]
|
47
|
+
|
48
|
+
[homepage]: http://contributor-covenant.org
|
49
|
+
[version]: http://contributor-covenant.org/version/1/3/0/
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
html2odt (0.1.0)
|
5
|
+
dimensions (~> 1.3.0)
|
6
|
+
nokogiri (~> 1.6.7.2)
|
7
|
+
rubyzip (~> 1.2.0)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
byebug (9.0.3)
|
13
|
+
dimensions (1.3.0)
|
14
|
+
mini_portile2 (2.0.0)
|
15
|
+
minitest (5.8.4)
|
16
|
+
nokogiri (1.6.7.2)
|
17
|
+
mini_portile2 (~> 2.0.0.rc2)
|
18
|
+
rake (10.5.0)
|
19
|
+
rubyzip (1.2.0)
|
20
|
+
|
21
|
+
PLATFORMS
|
22
|
+
ruby
|
23
|
+
|
24
|
+
DEPENDENCIES
|
25
|
+
bundler (~> 1.12)
|
26
|
+
byebug
|
27
|
+
html2odt!
|
28
|
+
minitest (~> 5.0)
|
29
|
+
rake (~> 10.0)
|
30
|
+
|
31
|
+
BUNDLED WITH
|
32
|
+
1.12.3
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2016 Gregor Schmidt - Planio GmbH, Berlin, Germany
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
# html2odt
|
2
|
+
|
3
|
+
This gem provides a Ruby wrapper around the set of XLST stylesheets published as
|
4
|
+
[xhtml2odt](https://github.com/abompard/xhtml2odt).
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'html2odt'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install html2odt
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### Basic usage
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
# Create an Html2Odt::Document instance
|
28
|
+
doc = Html2Odt::Document.new
|
29
|
+
|
30
|
+
# Set the input HTML
|
31
|
+
doc.html <<HTML
|
32
|
+
<h1>Hello, World!</h1>
|
33
|
+
<p>It works.</p>
|
34
|
+
HTML
|
35
|
+
|
36
|
+
# Set author and title
|
37
|
+
doc.author = "Jane Doe"
|
38
|
+
doc.title = "Example Document"
|
39
|
+
|
40
|
+
|
41
|
+
# Write ODT to disk
|
42
|
+
doc.write_to "demo.odt"
|
43
|
+
|
44
|
+
# Or get binary content as string
|
45
|
+
doc.data
|
46
|
+
```
|
47
|
+
|
48
|
+
### Configuration options
|
49
|
+
|
50
|
+
`html2odt` comes with a basic `template.odt`, which is as a boilerplate to create
|
51
|
+
the desired ODT file. If you like to provide your own styles or additional
|
52
|
+
content next to the content added via the API, you may provide your own template
|
53
|
+
in the `Html2Odt::Document` constructor.
|
54
|
+
|
55
|
+
*Please note:* If the template file cannot be read or if it does not appear to
|
56
|
+
be a valid ODT file, an `ArgumentError` will be raised.
|
57
|
+
|
58
|
+
The template needs to contain an otherwise empty paragraph containing the string
|
59
|
+
`{{content}}`.
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
# Provide optional template file
|
63
|
+
doc = Html2Odt::Document.new(template: "template.odt")
|
64
|
+
```
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
The HTML which should become part of the document may also be provided via the
|
70
|
+
constructor
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
# Provide HTML in constructor
|
74
|
+
doc = Html2Odt::Document.new(html: <<HTML)
|
75
|
+
<h1>Hello, World!</h1>
|
76
|
+
<p>It works.</p>
|
77
|
+
HTML
|
78
|
+
```
|
79
|
+
|
80
|
+
### Image handling
|
81
|
+
|
82
|
+
`html2odt` provides basic image inlining, i.e. images referenced in the HTML
|
83
|
+
code will be embeded into the ODT file by default. This is true for images
|
84
|
+
referenced with a full `file://`, `http://`, or `https://` URL. Absolute URLs
|
85
|
+
(i.e. starting `/`) and relative URLs are not supported, since `html2odt` has no
|
86
|
+
idea, which server or document they are relating to.
|
87
|
+
|
88
|
+
Images referencing an unsupported resource will be replaced with a link
|
89
|
+
containing the alt text of the image.
|
90
|
+
|
91
|
+
If you are using `html2odt` in a web application context, you will probably want
|
92
|
+
to provide some special handling for resources residing on your own server. This
|
93
|
+
should be done for security reasons or to save roundtrips.
|
94
|
+
|
95
|
+
`html2odt` provides the following API to map image `src` attributes to local
|
96
|
+
file locations.
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
# Provide custom mapping for image locations
|
100
|
+
doc = Html2Odt::Document.new
|
101
|
+
|
102
|
+
doc.image_location_mapping = lambda do |src|
|
103
|
+
# Attention! Add protection against directory traversal attacks
|
104
|
+
"/var/www/mywebsite/#{src}"
|
105
|
+
end
|
106
|
+
```
|
107
|
+
|
108
|
+
Registering an `image_location_mapping` callback will deactivate the default
|
109
|
+
behaviour of including images with `file` and `http` URLs automatically.
|
110
|
+
|
111
|
+
|
112
|
+
## License
|
113
|
+
|
114
|
+
Files within the `xsl` directory belong to the [xhtml2odt
|
115
|
+
project](https://github.com/abompard/xhtml2odt) published by Aurelien Bompard
|
116
|
+
(2009-2010) under the terms of the GNU LGP v2.1 or later:
|
117
|
+
http://www.gnu.org/licenses/lgpl-2.1.html
|
118
|
+
|
119
|
+
The remaining files are licensed under the terms of the MIT license.
|
120
|
+
|
121
|
+
```
|
122
|
+
Copyright (c) 2016 Gregor Schmidt - Planio GmbH, Berlin, Germany
|
123
|
+
|
124
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
125
|
+
of this software and associated documentation files (the "Software"), to deal
|
126
|
+
in the Software without restriction, including without limitation the rights
|
127
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
128
|
+
copies of the Software, and to permit persons to whom the Software is
|
129
|
+
furnished to do so, subject to the following conditions:
|
130
|
+
|
131
|
+
The above copyright notice and this permission notice shall be included in all
|
132
|
+
copies or substantial portions of the Software.
|
133
|
+
|
134
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
135
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
136
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
137
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
138
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
139
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
140
|
+
SOFTWARE.
|
141
|
+
```
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
## Development
|
146
|
+
|
147
|
+
After checking out the repo, run `bundle install` to install dependencies. Then,
|
148
|
+
run `rake test` to run the tests. You can also run `bin/console` for an
|
149
|
+
interactive prompt that will allow you to experiment.
|
150
|
+
|
151
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
152
|
+
release a new version, update the version number in `version.rb`, and then run
|
153
|
+
`bundle exec rake release`, which will create a git tag for the version, push
|
154
|
+
git commits and tags, and push the `.gem` file to
|
155
|
+
[rubygems.org](https://rubygems.org).
|
156
|
+
|
157
|
+
## Contributing
|
158
|
+
|
159
|
+
Bug reports and pull requests are welcome on GitHub at
|
160
|
+
https://github.com/planio-gmbh/html2odt. This project is intended to be a safe,
|
161
|
+
welcoming space for collaboration, and contributors are expected to adhere to
|
162
|
+
the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
163
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "html2odt"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/html2odt.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'html2odt/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "html2odt"
|
8
|
+
spec.version = Html2Odt::VERSION
|
9
|
+
spec.authors = ["Gregor Schmidt (Planio)"]
|
10
|
+
spec.email = ["gregor@plan.io", "support@plan.io"]
|
11
|
+
|
12
|
+
spec.summary = %q{html2odt generates ODT documents based on HTML fragments}
|
13
|
+
spec.description = %q{html2odt generates ODT documents based on HTML fragments using xhtml2odt}
|
14
|
+
spec.homepage = "https://github.com/planio-gmbh/html2odt"
|
15
|
+
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "dimensions", "~> 1.3.0"
|
22
|
+
spec.add_dependency "nokogiri", "~> 1.6.7.2"
|
23
|
+
spec.add_dependency "rubyzip", "~> 1.2.0"
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
28
|
+
spec.add_development_dependency "byebug"
|
29
|
+
end
|
@@ -0,0 +1,309 @@
|
|
1
|
+
class Html2Odt::Document
|
2
|
+
CONTENT_REGEX = /<text:p[^>]*>{{content}}<\/text:p>/
|
3
|
+
INCH_TO_CM = 2.54
|
4
|
+
|
5
|
+
# The following value was determined by comparing the generated result with
|
6
|
+
# an image dropped into LibreOffice interactively. Though this might be
|
7
|
+
# related to the fact, that my screen has a native resolution of 114 dpi.
|
8
|
+
#
|
9
|
+
# xhtml2odt uses 96 by default.
|
10
|
+
DPI = 114.0
|
11
|
+
|
12
|
+
attr_accessor :image_location_mapping
|
13
|
+
|
14
|
+
# Document meta data
|
15
|
+
attr_accessor :author, :title
|
16
|
+
|
17
|
+
def initialize(template: Html2Odt::ODT_TEMPLATE, html: nil)
|
18
|
+
@html = html
|
19
|
+
@template = template
|
20
|
+
|
21
|
+
read_xmls
|
22
|
+
end
|
23
|
+
|
24
|
+
def html=(html)
|
25
|
+
reset
|
26
|
+
@html = html
|
27
|
+
end
|
28
|
+
|
29
|
+
def html
|
30
|
+
@html
|
31
|
+
end
|
32
|
+
|
33
|
+
def content_xml
|
34
|
+
@content_xml ||= begin
|
35
|
+
|
36
|
+
html = prepare_html
|
37
|
+
|
38
|
+
xml = xslt_tranform(html, Html2Odt::XHTML2ODT_XSL)
|
39
|
+
|
40
|
+
xml = xml.sub('<?xml version="1.0" encoding="utf-8"?>', '')
|
41
|
+
xml = @tpl_content_xml.sub(CONTENT_REGEX, xml)
|
42
|
+
|
43
|
+
xml = xslt_tranform(xml, Html2Odt::XHTML2ODT_STYLES_XSL)
|
44
|
+
|
45
|
+
xml
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def styles_xml
|
50
|
+
@styles_xml ||= xslt_tranform(@tpl_styles_xml, Html2Odt::XHTML2ODT_STYLES_XSL)
|
51
|
+
end
|
52
|
+
|
53
|
+
def manifest_xml
|
54
|
+
@manifest_xml ||= begin
|
55
|
+
content_xml # trigger HTML parsing
|
56
|
+
|
57
|
+
if @images.nil? or @images.empty?
|
58
|
+
@tpl_manifest_xml
|
59
|
+
else
|
60
|
+
doc = Nokogiri::XML(@tpl_manifest_xml)
|
61
|
+
|
62
|
+
@images.each do |image|
|
63
|
+
entry = create_node(doc, "manifest:file-entry")
|
64
|
+
entry["manifest:full-path"] = image.target
|
65
|
+
entry["manifest:media-type"] = image.mime_type
|
66
|
+
|
67
|
+
doc.root.add_child entry
|
68
|
+
end
|
69
|
+
|
70
|
+
doc.to_xml
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def meta_xml
|
76
|
+
@meta_xml ||= begin
|
77
|
+
doc = Nokogiri::XML(@tpl_meta_xml)
|
78
|
+
|
79
|
+
meta = doc.at_xpath("office:document-meta/office:meta")
|
80
|
+
|
81
|
+
meta.xpath("meta:generator").remove
|
82
|
+
meta.add_child create_node(doc, "meta:generator", "html2odt.rb/#{Html2Odt::VERSION}")
|
83
|
+
|
84
|
+
meta.xpath("meta:creation-date").remove
|
85
|
+
meta.add_child create_node(doc, "meta:creation-date", Time.now.utc.iso8601)
|
86
|
+
|
87
|
+
meta.xpath("dc:date").remove
|
88
|
+
meta.add_child create_node(doc, "dc:date", Time.now.utc.iso8601)
|
89
|
+
|
90
|
+
meta.xpath("meta:editing-duration").remove
|
91
|
+
meta.add_child create_node(doc, "meta:editing-duration", "P0D")
|
92
|
+
|
93
|
+
meta.xpath("meta:editing-cycles").remove
|
94
|
+
meta.add_child create_node(doc, "meta:editing-cycles", "1")
|
95
|
+
|
96
|
+
meta.xpath("meta:initial-creator").remove
|
97
|
+
meta.add_child create_node(doc, "meta:initial-creator", author) if author
|
98
|
+
|
99
|
+
meta.xpath("dc:creator").remove
|
100
|
+
meta.add_child create_node(doc, "dc:creator", author) if author
|
101
|
+
|
102
|
+
meta.xpath("dc:title").remove
|
103
|
+
meta.add_child create_node(doc, "dc:title", title) if title
|
104
|
+
|
105
|
+
doc.to_xml
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def data
|
110
|
+
@data ||= begin
|
111
|
+
buffer = Zip::OutputStream.write_buffer do |output_stream|
|
112
|
+
# Copy contents from template, while replacing content.xml and
|
113
|
+
# styles.xml
|
114
|
+
Zip::File.open(@template) do |file|
|
115
|
+
file.each do |entry|
|
116
|
+
next if entry.directory?
|
117
|
+
|
118
|
+
entry.get_input_stream do |input_stream|
|
119
|
+
data = case entry.name
|
120
|
+
when "content.xml"
|
121
|
+
content_xml
|
122
|
+
when "meta.xml"
|
123
|
+
meta_xml
|
124
|
+
when "styles.xml"
|
125
|
+
styles_xml
|
126
|
+
when "META-INF/manifest.xml"
|
127
|
+
manifest_xml
|
128
|
+
else
|
129
|
+
input_stream.sysread
|
130
|
+
end
|
131
|
+
|
132
|
+
output_stream.put_next_entry(entry.name)
|
133
|
+
output_stream.write data
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Adding images found in the HTML sources
|
139
|
+
(@images || []).each do |image|
|
140
|
+
output_stream.put_next_entry(image.target)
|
141
|
+
output_stream.write File.read(image.source)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
buffer.string
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def write_to(path)
|
150
|
+
File.write(path, data)
|
151
|
+
end
|
152
|
+
|
153
|
+
protected
|
154
|
+
|
155
|
+
|
156
|
+
def read_xmls
|
157
|
+
unless File.readable?(@template)
|
158
|
+
raise ArgumentError, "Cannot read template file #{@template.inspect}"
|
159
|
+
end
|
160
|
+
|
161
|
+
Zip::File.open(@template) do |zip_file|
|
162
|
+
@tpl_content_xml = zip_file.read("content.xml")
|
163
|
+
@tpl_manifest_xml = zip_file.read("META-INF/manifest.xml")
|
164
|
+
@tpl_meta_xml = zip_file.read("meta.xml")
|
165
|
+
@tpl_styles_xml = zip_file.read("styles.xml")
|
166
|
+
end
|
167
|
+
|
168
|
+
unless @tpl_content_xml =~ CONTENT_REGEX
|
169
|
+
raise ArgumentError, "Template file does not contain `{{content}}` paragraph"
|
170
|
+
end
|
171
|
+
|
172
|
+
rescue Zip::Error
|
173
|
+
raise ArgumentError, "Template file does not look like a ODT file - #{$!.message}"
|
174
|
+
rescue Errno::ENOENT
|
175
|
+
raise ArgumentError, "Template file does not contain expected file - #{$!.message}"
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
def prepare_html
|
180
|
+
html = self.html
|
181
|
+
html = fix_images_in_html(html)
|
182
|
+
html = create_document(html)
|
183
|
+
html
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
def create_document(html)
|
188
|
+
%Q{<html xmlns="http://www.w3.org/1999/xhtml">#{html}</html>}
|
189
|
+
end
|
190
|
+
|
191
|
+
def fix_images_in_html(html)
|
192
|
+
doc = Nokogiri::HTML::DocumentFragment.parse(html)
|
193
|
+
|
194
|
+
@images = []
|
195
|
+
doc.css("img").each_with_index do |img, index|
|
196
|
+
image = Html2Odt::Image.new(index)
|
197
|
+
|
198
|
+
image.source = file_path_for(img["src"])
|
199
|
+
|
200
|
+
if image.valid?
|
201
|
+
update_img_tag(img, image)
|
202
|
+
@images << image
|
203
|
+
else
|
204
|
+
# Replace img with link if alt tag is present
|
205
|
+
alt = img["alt"]
|
206
|
+
|
207
|
+
if alt.nil? || alt.empty?
|
208
|
+
img.remove
|
209
|
+
else
|
210
|
+
a = create_node(doc, "a")
|
211
|
+
a["href"] = img["src"]
|
212
|
+
a.content = alt
|
213
|
+
|
214
|
+
img.replace(a)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
doc.to_xml
|
220
|
+
end
|
221
|
+
|
222
|
+
def file_path_for(src)
|
223
|
+
if image_location_mapping
|
224
|
+
return image_location_mapping.call(src)
|
225
|
+
end
|
226
|
+
|
227
|
+
case src
|
228
|
+
when /\Afile:\/\//
|
229
|
+
# local file URL
|
230
|
+
#
|
231
|
+
# TODO: Verify, that this does not pose a security threat, maybe make
|
232
|
+
# this optional. In any case, it's useful for testing.
|
233
|
+
|
234
|
+
src[7..-1]
|
235
|
+
|
236
|
+
when /\Ahttps?:\/\//
|
237
|
+
# remote image URL
|
238
|
+
#
|
239
|
+
# TODO: Verify, that this does not pose a security threat, maybe make
|
240
|
+
# this optional.
|
241
|
+
|
242
|
+
uri = URI.parse(src)
|
243
|
+
file = Tempfile.new("html2odt")
|
244
|
+
file.binmode
|
245
|
+
|
246
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
|
247
|
+
resp = http.get(uri.path)
|
248
|
+
|
249
|
+
file.write(resp.body)
|
250
|
+
file.flush
|
251
|
+
file
|
252
|
+
end
|
253
|
+
|
254
|
+
file.path
|
255
|
+
else
|
256
|
+
# cannot handle image properly, return nil
|
257
|
+
nil
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def update_img_tag(img, image)
|
262
|
+
img["src"] = image.target
|
263
|
+
|
264
|
+
if img["width"] and img["height"]
|
265
|
+
# use values supplied in HTML
|
266
|
+
width = img["width"].to_i
|
267
|
+
height = img["height"].to_i
|
268
|
+
elsif img["width"]
|
269
|
+
# compute height based on width keeping aspect ratio
|
270
|
+
width = img["width"].to_i
|
271
|
+
height = width * image.width / image.height
|
272
|
+
elsif img["height"]
|
273
|
+
# compute width based on height keeping aspect ratio
|
274
|
+
height = img["height"].to_i
|
275
|
+
width = height * image.height / image.width
|
276
|
+
else
|
277
|
+
width = image.width
|
278
|
+
height = image.height
|
279
|
+
end
|
280
|
+
|
281
|
+
img["width"] = "#{(width / DPI * INCH_TO_CM).round(2)}cm"
|
282
|
+
img["height"] = "#{(height / DPI * INCH_TO_CM).round(2)}cm"
|
283
|
+
end
|
284
|
+
|
285
|
+
|
286
|
+
def xslt_tranform(xml, xsl)
|
287
|
+
xslt = File.open(xsl) do |file|
|
288
|
+
Nokogiri::XSLT(file)
|
289
|
+
end
|
290
|
+
|
291
|
+
xml = Nokogiri::XML(xml)
|
292
|
+
|
293
|
+
# raises RuntimeError or Nokogiri::XML::SyntaxError if something goes wrong
|
294
|
+
xslt.transform(xml).to_s
|
295
|
+
end
|
296
|
+
|
297
|
+
def reset
|
298
|
+
@content_xml = nil
|
299
|
+
@manifest_xml = nil
|
300
|
+
@data = nil
|
301
|
+
@images = nil
|
302
|
+
end
|
303
|
+
|
304
|
+
def create_node(doc, tagname, content = nil)
|
305
|
+
entry = Nokogiri::XML::Node.new tagname, doc
|
306
|
+
entry.content = content unless content.nil?
|
307
|
+
entry
|
308
|
+
end
|
309
|
+
end
|