htmls_to_pdf 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ *.swp
3
+ *.html
4
+ *.css
5
+ *.pdf
data/README.markdown ADDED
@@ -0,0 +1,104 @@
1
+ # HtmlsToPdf
2
+
3
+ ## DESCRIPTION
4
+
5
+ HtmlsToPdf enables you to package one or more (ordered) HTML pages as a PDF.
6
+
7
+ ## REQUIREMENTS
8
+
9
+ HtmlsToPdf uses the PDFKit gem, which itself uses the [wkhtmltopdf](http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf-0.9.9-doc.html) program, which uses qtwebkit.
10
+
11
+ Dependence chain summary: HtmlsToPdf -> PDFKit -> wkhtmltopdf -> qtwebkit -> webkit
12
+
13
+ For information on qtwebkit:
14
+
15
+ - [Installing on Linux](http://trac.webkit.org/wiki/BuildingQtOnLinux)
16
+
17
+ - [Installing on MacOS](http://trac.webkit.org/wiki/BuildingQtOnOSX)
18
+
19
+ - [Installing on Windows](http://trac.webkit.org/wiki/BuildingQtOnWindows)
20
+
21
+ For information on wkhtmltopdf:
22
+
23
+ - [Installation guide from PDFKit author](https://github.com/jdpace/PDFKit/wiki/Installing-WKHTMLTOPDF)
24
+
25
+ - [code.google.com](http://code.google.com/p/wkhtmltopdf/)
26
+
27
+ For information on PDFKit:
28
+
29
+ - [Github](https://github.com/jdpace/PDFKit)
30
+
31
+ - [Railscasts](http://railscasts.com/episodes/220-pdfkit)
32
+
33
+ ## BASIC USAGE
34
+
35
+ You will find six example scripts in the /examples directory.
36
+
37
+ After you install HtmlsToPdf and its dependencies, you can write an ordinary Ruby script with the following features:
38
+
39
+ ### EXAMPLE 1
40
+
41
+ Annotated version of /examples/get_rubygems_user_guide.rb:
42
+
43
+ # require the gem
44
+ require 'rubygems'
45
+ require 'htmls_to_pdf'
46
+
47
+ # Get 'RubyGems User Guide' as pdf file
48
+ # Source: 'http://docs.rubygems.org/read/book/1'
49
+
50
+ # create an empty hash to hold your configuration options
51
+ config = {}
52
+
53
+ # set a :urls key with a value of an array containing all the
54
+ # urls you want in your PDF (in the order you want them)
55
+ config[:urls] = ['http://docs.rubygems.org/read/book/1']
56
+ # I have no idea why these chapters are numbered as they are!
57
+ [1,2,3,4,16,7,5,6,21].each do |val|
58
+ config[:urls] << 'http://docs.rubygems.org/read/chapter/' + val.to_s
59
+ end
60
+
61
+ # set a :savedir key with a string value indicating the directory to create
62
+ # your PDF file in. If the directory does not exist, it will be created
63
+ config[:savedir] = '~/Tech/Ruby/GEMS/DOCUMENTATION'
64
+
65
+ # set a :savename key with a string value indicating the name of the PDF file
66
+ config[:savename] = 'RubyGems_User_Guide.pdf'
67
+
68
+ # create a new HtmlsToPdf object, passing in your hash, and then call create_pdf
69
+ # on the new object
70
+ HtmlsToPdf.new(config).create_pdf
71
+
72
+ ### EXAMPLE 2
73
+
74
+ Annotated version of /examples/get_coffeescript_meet_backbone.rb:
75
+
76
+ require 'rubygems'
77
+ require 'htmls_to_pdf'
78
+
79
+ # Get 'CoffeeScript, Meet Backbone.js' as pdf file
80
+ # Source: 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/'
81
+
82
+ config = {}
83
+ config[:urls] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/']
84
+ (1..5).each do |val|
85
+ config[:urls] << 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/0' + val.to_s + '/docs/script.html'
86
+ end
87
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/BACKBONE.JS'
88
+ config[:savename] = 'CoffeeScript_Meet_Backbone.js.pdf'
89
+
90
+ # If a :css key is given with an array value, the CSS files in the array will be used to generate
91
+ # the PDF document. This allows you to modify the CSS file(s) to, for example, hide HTML headers,
92
+ # sidebars and footers you do not wish to appear in your PDF.
93
+ config[:css] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/05/docs/docco.css']
94
+
95
+ # If a :options key is passed with a hash value, that hash will be passed to wkhtmltopdf.
96
+ # Many options are available through wkhtmltopdf; see: [the wkhtmltopdf documentation](http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf-0.9.9-doc.html).
97
+ config[:options] = {:page_size => 'Letter', :orientation => 'Landscape'}
98
+
99
+ HtmlsToPdf.new(config).create_pdf
100
+
101
+ ## LEGAL DISCLAIMER
102
+
103
+ Please use at your own risk. I do not guarantee anything about this program.
104
+
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'CoffeeScript_documentation' as pdf file
5
+ # Source: 'http://jashkenas.github.com/coffee-script/'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://jashkenas.github.com/coffee-script/']
9
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/DOCUMENTATION'
10
+ config[:savename] = 'CoffeeScript_documentation.pdf'
11
+ config[:css] = ['http://jashkenas.github.com/coffee-script/documentation/css/docs.css',
12
+ 'http://jashkenas.github.com/coffee-script/documentation/css/idle.css']
13
+
14
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'CoffeeScript, Meet Backbone.js' as pdf file
5
+ # Source: 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/']
9
+ (1..5).each do |val|
10
+ config[:urls] << 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/0' + val.to_s + '/docs/script.html'
11
+ end
12
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/BACKBONE.JS'
13
+ config[:savename] = 'CoffeeScript_Meet_Backbone.js.pdf'
14
+ config[:css] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/05/docs/docco.css']
15
+ config[:options] = {:page_size => 'Letter', :orientation => 'Landscape'}
16
+
17
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'Exploring CoffeeScript' as pdf file
5
+ # Source: 'http://elegantcode.com'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://elegantcode.com/2011/06/21/exploring-coffeescript-part-1-and-then-there-was-coffee/',
9
+ 'http://elegantcode.com/2011/06/30/exploring-coffeescript-part-2-variables-and-functions/',
10
+ 'http://elegantcode.com/2011/07/13/exploring-coffeescript-part-3-more-on-functions/',
11
+ 'http://elegantcode.com/2011/07/26/exploring-coffeescript-part-4-objects-and-classes/',
12
+ 'http://elegantcode.com/2011/08/02/exploring-coffeescript-part-5-ranges-loops-and-comprehensions/',
13
+ 'http://elegantcode.com/2011/08/09/exploring-coffeescript-part-6-show-me-the-goodies/']
14
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/DOCUMENTATION/Exploring_CoffeeScript'
15
+ config[:savename] = 'Exploring_CoffeeScript.pdf'
16
+
17
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'Learn Python the Hard Way' as pdf file
5
+ # Source: 'http://learnpythonthehardway.org/book/'
6
+
7
+ def python_hard_way_urls
8
+ urls = ['http://learnpythonthehardway.org/book/intro.html']
9
+ (0..52).each do |val|
10
+ urls << 'http://learnpythonthehardway.org/book/ex' + val.to_s + '.html'
11
+ end
12
+ urls << 'http://learnpythonthehardway.org/book/next.html'
13
+ urls << 'http://learnpythonthehardway.org/book/advice.html'
14
+ urls
15
+ end
16
+
17
+ config[:savedir] = '~/Tech/Python/Learn_Python_the_Hard_Way'
18
+ config[:savename] = 'Learn_Python_the_Hard_Way.pdf'
19
+ config[:urls] = python_hard_way_urls
20
+ config[:css] = ['http://learnpythonthehardway.org/book/_static/basic.css']
21
+ config[:remove_temp_files] = false
22
+
23
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'Learn Ruby the Hard Way' as a pdf
5
+ # Source: 'http://ruby.learncodethehardway.org/book/'
6
+
7
+ def ruby_hard_way_urls
8
+ urls = ['http://ruby.learncodethehardway.org/book/intro.html']
9
+ (1..52).each do |val|
10
+ urls << 'http://ruby.learncodethehardway.org/book/ex' + val.to_s.rjust(2,'0') + '.html'
11
+ end
12
+ urls << 'http://ruby.learncodethehardway.org/book/next.html'
13
+ urls << 'http://ruby.learncodethehardway.org/book/advice.html'
14
+ urls
15
+ end
16
+
17
+ config[:savedir] = '~/Ruby_programs/Learn_Ruby_the_Hard_Way'
18
+ config[:savename] = 'Learn_Ruby_the_Hard_Way.pdf'
19
+ config[:urls] = ruby_hard_way_urls
20
+ config[:css] = ['http://ruby.learncodethehardway.org/book/css/syntax.css']
21
+ config[:remove_temp_files] = false
22
+
23
+ html_files = HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'RubyGems User Guide' as pdf file
5
+ # Source: 'http://docs.rubygems.org/read/book/1'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://docs.rubygems.org/read/book/1']
9
+ # I have no idea why these chapters are numbered as they are!
10
+ [1,2,3,4,16,7,5,6,21].each do |val|
11
+ config[:urls] << 'http://docs.rubygems.org/read/chapter/' + val.to_s
12
+ end
13
+
14
+ config[:savedir] = '~/Tech/Ruby/GEMS/DOCUMENTATION'
15
+ config[:savename] = 'RubyGems_User_Guide.pdf'
16
+
17
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,18 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'htmls_to_pdf/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'htmls_to_pdf'
6
+ s.version = HtmlsToPdf::VERSION
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ['James Lavin']
9
+ s.email = ['htmls_to_pdf@futureresearch.com']
10
+ s.summary = %q{Creates single PDF file from 1+ HTML pages}
11
+ s.description = %q{Creates single PDF file from 1+ HTML pages using PDFKit}
12
+ s.add_runtime_dependency 'pdfkit', '~> 0.5', '>= 0.5.2'
13
+ s.add_development_dependency 'rspec'
14
+ s.require_paths = ['lib']
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ end
@@ -0,0 +1,144 @@
1
+ require 'rubygems'
2
+ require 'fileutils'
3
+ require 'pdfkit'
4
+ require 'uri'
5
+ include URI
6
+
7
+ class HtmlsToPdf
8
+
9
+ attr_reader :htmlarray, :pdfarray, :cssarray, :urls, :savedir, :savename, :remove_temp_files
10
+
11
+ TMP_HTML_PREFIX = 'tmp_html_file_'
12
+ TMP_PDF_PREFIX = 'tmp_pdf_file_'
13
+
14
+ def initialize(in_config = {})
15
+ config = {
16
+ :css => [],
17
+ :remove_temp_files => true,
18
+ :options => {}
19
+ }.merge(in_config)
20
+ set_dir(config[:savedir])
21
+ @savename = config[:savename]
22
+ exit_if_pdf_exists
23
+ @urls = clean_urls(config[:urls])
24
+ @pdfarray = create_pdfarray
25
+ @cssarray = config[:css]
26
+ @remove_temp_files = config[:remove_temp_files]
27
+ @options = config[:options]
28
+ end
29
+
30
+ def get_htmlarray
31
+ everything_after_last_slash(@urls)
32
+ end
33
+
34
+ def clean_urls(urls)
35
+ if !urls.is_a?(Array)
36
+ urls = Array(urls) if Array(urls).is_a?(Array)
37
+ else
38
+ raise "config[:urls] must be an array" unless urls.is_a?(Array)
39
+ end
40
+ remove_trailing_url_slashes(urls)
41
+ end
42
+
43
+ def remove_trailing_url_slashes(urls)
44
+ urls.map { |url| url.match(/\/$/) ? url.sub(/\/$/,'') : url }
45
+ end
46
+
47
+ def everything_after_last_slash(urls)
48
+ urls.map { |url| url.match(/([^\/]+)$/)[0] }
49
+ end
50
+
51
+ def add_dot_html(urls)
52
+ urls.map { |url| url.match(/\.html?$/) ? url : url + '.html' }
53
+ end
54
+
55
+ def create_pdfarray
56
+ outarray = []
57
+ (0...@urls.length).each do |idx|
58
+ outarray << TMP_PDF_PREFIX + idx.to_s
59
+ end
60
+ outarray
61
+ end
62
+
63
+ def exit_if_pdf_exists
64
+ if File.exists?(@savename)
65
+ puts "File #{@savename} already exists. Please rename or delete and re-run this program."
66
+ exit
67
+ end
68
+ end
69
+
70
+ def set_dir(savedir)
71
+ @savedir = savedir
72
+ save_to = File.expand_path(savedir)
73
+ FileUtils.mkdir_p(save_to)
74
+ Dir.chdir(save_to)
75
+ end
76
+
77
+ #def add_css(css_file)
78
+ # @cssarray << css_file
79
+ #end
80
+
81
+ def download_files
82
+ download_html_files
83
+ download_css_files
84
+ end
85
+
86
+ def download_html_files
87
+ existing_files = Dir.entries(".")
88
+ @htmlarray = []
89
+ @urls.each_with_index do |url,idx|
90
+ savename = TMP_HTML_PREFIX + idx.to_s
91
+ unless existing_files.include?(savename)
92
+ `wget #{url} -O #{savename}`
93
+ end
94
+ @htmlarray << savename
95
+ end
96
+ end
97
+
98
+ def download_css_files
99
+ existing_files = Dir.entries(".")
100
+ @cssarray.each do |css_url|
101
+ `wget #{css_url}` unless existing_files.include?(File.basename(css_url))
102
+ end
103
+ end
104
+
105
+ def generate_pdfs
106
+ @urls.each_with_index { |url,i| html_to_pdf(TMP_HTML_PREFIX + i.to_s,@pdfarray[i]) }
107
+ end
108
+
109
+ def html_to_pdf(html_file,pdf_file)
110
+ puts "creating #{pdf_file} from #{html_file}"
111
+ html = nil
112
+ unless Dir.entries(".").include?(pdf_file)
113
+ File.open(html_file, 'r') { |inf| html = inf.read }
114
+ #kit = PDFKit.new(html, :page_size => 'Letter', :orientation => 'Landscape')
115
+ kit = PDFKit.new(html, @options)
116
+ @cssarray.each { |cssfile| kit.stylesheets << File.basename(cssfile) }
117
+ kit.to_file(pdf_file)
118
+ end
119
+ end
120
+
121
+ def join_pdfs
122
+ unless File.exists?(@savename)
123
+ pdfs_string = @pdfarray.join(" ")
124
+ `pdftk #{pdfs_string} output #{@savename}`
125
+ end
126
+ end
127
+
128
+ def delete_temp_files
129
+ @pdfarray.each { |pdffile| File.delete(pdffile) }
130
+ @htmlarray.each { |htmlfile| File.delete(htmlfile) }
131
+ @cssarray.each { |cssfile| File.delete(File.basename(cssfile)) }
132
+ end
133
+
134
+ def create_pdf
135
+ download_files
136
+ generate_pdfs
137
+ join_pdfs
138
+ delete_temp_files if @remove_temp_files
139
+ end
140
+
141
+ end
142
+
143
+
144
+
@@ -0,0 +1,6 @@
1
+ require 'pdfkit'
2
+
3
+ PDFKit.configure do |config|
4
+ config.wkhtmltopdf = '/usr/bin/wkhtmltopdf'
5
+ end
6
+
@@ -0,0 +1,3 @@
1
+ module HtmlsToPdf
2
+ VERSION = "0.0.4"
3
+ end
@@ -0,0 +1,2 @@
1
+ require 'htmls_to_pdf/pdfkit_config'
2
+ require 'htmls_to_pdf/htmls_to_pdf'
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: htmls_to_pdf
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 4
10
+ version: 0.0.4
11
+ platform: ruby
12
+ authors:
13
+ - James Lavin
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-10-07 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: pdfkit
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 1
29
+ segments:
30
+ - 0
31
+ - 5
32
+ version: "0.5"
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ hash: 15
36
+ segments:
37
+ - 0
38
+ - 5
39
+ - 2
40
+ version: 0.5.2
41
+ type: :runtime
42
+ version_requirements: *id001
43
+ - !ruby/object:Gem::Dependency
44
+ name: rspec
45
+ prerelease: false
46
+ requirement: &id002 !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ hash: 3
52
+ segments:
53
+ - 0
54
+ version: "0"
55
+ type: :development
56
+ version_requirements: *id002
57
+ description: Creates single PDF file from 1+ HTML pages using PDFKit
58
+ email:
59
+ - htmls_to_pdf@futureresearch.com
60
+ executables: []
61
+
62
+ extensions: []
63
+
64
+ extra_rdoc_files: []
65
+
66
+ files:
67
+ - .gitignore
68
+ - README.markdown
69
+ - examples/get_coffeescript.rb
70
+ - examples/get_coffeescript_meet_backbone.rb
71
+ - examples/get_exploring_coffeescript.rb
72
+ - examples/get_python_book.rb
73
+ - examples/get_ruby_book.rb
74
+ - examples/get_rubygems_user_guide.rb
75
+ - htmls_to_pdf.gemspec
76
+ - lib/htmls_to_pdf.rb
77
+ - lib/htmls_to_pdf/htmls_to_pdf.rb
78
+ - lib/htmls_to_pdf/pdfkit_config.rb
79
+ - lib/htmls_to_pdf/version.rb
80
+ homepage:
81
+ licenses: []
82
+
83
+ post_install_message:
84
+ rdoc_options: []
85
+
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
105
+ version: "0"
106
+ requirements: []
107
+
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.7
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: Creates single PDF file from 1+ HTML pages
113
+ test_files: []
114
+