htmls_to_pdf 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ *.swp
3
+ *.html
4
+ *.css
5
+ *.pdf
data/README.markdown ADDED
@@ -0,0 +1,104 @@
1
+ # HtmlsToPdf
2
+
3
+ ## DESCRIPTION
4
+
5
+ HtmlsToPdf enables you to package one or more (ordered) HTML pages as a PDF.
6
+
7
+ ## REQUIREMENTS
8
+
9
+ HtmlsToPdf uses the PDFKit gem, which itself uses the [wkhtmltopdf](http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf-0.9.9-doc.html) program, which uses qtwebkit.
10
+
11
+ Dependence chain summary: HtmlsToPdf -> PDFKit -> wkhtmltopdf -> qtwebkit -> webkit
12
+
13
+ For information on qtwebkit:
14
+
15
+ - [Installing on Linux](http://trac.webkit.org/wiki/BuildingQtOnLinux)
16
+
17
+ - [Installing on MacOS](http://trac.webkit.org/wiki/BuildingQtOnOSX)
18
+
19
+ - [Installing on Windows](http://trac.webkit.org/wiki/BuildingQtOnWindows)
20
+
21
+ For information on wkhtmltopdf:
22
+
23
+ - [Installation guide from PDFKit author](https://github.com/jdpace/PDFKit/wiki/Installing-WKHTMLTOPDF)
24
+
25
+ - [code.google.com](http://code.google.com/p/wkhtmltopdf/)
26
+
27
+ For information on PDFKit:
28
+
29
+ - [Github](https://github.com/jdpace/PDFKit)
30
+
31
+ - [Railscasts](http://railscasts.com/episodes/220-pdfkit)
32
+
33
+ ## BASIC USAGE
34
+
35
+ You will find six example scripts in the /examples directory.
36
+
37
+ After you install HtmlsToPdf and its dependencies, you can write an ordinary Ruby script with the following features:
38
+
39
+ ### EXAMPLE 1
40
+
41
+ Annotated version of /examples/get_rubygems_user_guide.rb:
42
+
43
+ # require the gem
44
+ require 'rubygems'
45
+ require 'htmls_to_pdf'
46
+
47
+ # Get 'RubyGems User Guide' as pdf file
48
+ # Source: 'http://docs.rubygems.org/read/book/1'
49
+
50
+ # create an empty hash to hold your configuration options
51
+ config = {}
52
+
53
+ # set a :urls key with a value of an array containing all the
54
+ # urls you want in your PDF (in the order you want them)
55
+ config[:urls] = ['http://docs.rubygems.org/read/book/1']
56
+ # I have no idea why these chapters are numbered as they are!
57
+ [1,2,3,4,16,7,5,6,21].each do |val|
58
+ config[:urls] << 'http://docs.rubygems.org/read/chapter/' + val.to_s
59
+ end
60
+
61
+ # set a :savedir key with a string value indicating the directory to create
62
+ # your PDF file in. If the directory does not exist, it will be created
63
+ config[:savedir] = '~/Tech/Ruby/GEMS/DOCUMENTATION'
64
+
65
+ # set a :savename key with a string value indicating the name of the PDF file
66
+ config[:savename] = 'RubyGems_User_Guide.pdf'
67
+
68
+ # create a new HtmlsToPdf object, passing in your hash, and then call create_pdf
69
+ # on the new object
70
+ HtmlsToPdf.new(config).create_pdf
71
+
72
+ ### EXAMPLE 2
73
+
74
+ Annotated version of /examples/get_coffeescript_meet_backbone.rb:
75
+
76
+ require 'rubygems'
77
+ require 'htmls_to_pdf'
78
+
79
+ # Get 'CoffeeScript, Meet Backbone.js' as pdf file
80
+ # Source: 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/'
81
+
82
+ config = {}
83
+ config[:urls] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/']
84
+ (1..5).each do |val|
85
+ config[:urls] << 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/0' + val.to_s + '/docs/script.html'
86
+ end
87
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/BACKBONE.JS'
88
+ config[:savename] = 'CoffeeScript_Meet_Backbone.js.pdf'
89
+
90
+ # If a :css key is given with an array value, the CSS files in the array will be used to generate
91
+ # the PDF document. This allows you to modify the CSS file(s) to, for example, hide HTML headers,
92
+ # sidebars and footers you do not wish to appear in your PDF.
93
+ config[:css] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/05/docs/docco.css']
94
+
95
+ # If a :options key is passed with a hash value, that hash will be passed to wkhtmltopdf.
96
+ # Many options are available through wkhtmltopdf; see: [the wkhtmltopdf documentation](http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf-0.9.9-doc.html).
97
+ config[:options] = {:page_size => 'Letter', :orientation => 'Landscape'}
98
+
99
+ HtmlsToPdf.new(config).create_pdf
100
+
101
+ ## LEGAL DISCLAIMER
102
+
103
+ Please use at your own risk. I do not guarantee anything about this program.
104
+
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'CoffeeScript_documentation' as pdf file
5
+ # Source: 'http://jashkenas.github.com/coffee-script/'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://jashkenas.github.com/coffee-script/']
9
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/DOCUMENTATION'
10
+ config[:savename] = 'CoffeeScript_documentation.pdf'
11
+ config[:css] = ['http://jashkenas.github.com/coffee-script/documentation/css/docs.css',
12
+ 'http://jashkenas.github.com/coffee-script/documentation/css/idle.css']
13
+
14
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'CoffeeScript, Meet Backbone.js' as pdf file
5
+ # Source: 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/']
9
+ (1..5).each do |val|
10
+ config[:urls] << 'http://adamjspooner.github.com/coffeescript-meet-backbonejs/0' + val.to_s + '/docs/script.html'
11
+ end
12
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/BACKBONE.JS'
13
+ config[:savename] = 'CoffeeScript_Meet_Backbone.js.pdf'
14
+ config[:css] = ['http://adamjspooner.github.com/coffeescript-meet-backbonejs/05/docs/docco.css']
15
+ config[:options] = {:page_size => 'Letter', :orientation => 'Landscape'}
16
+
17
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'Exploring CoffeeScript' as pdf file
5
+ # Source: 'http://elegantcode.com'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://elegantcode.com/2011/06/21/exploring-coffeescript-part-1-and-then-there-was-coffee/',
9
+ 'http://elegantcode.com/2011/06/30/exploring-coffeescript-part-2-variables-and-functions/',
10
+ 'http://elegantcode.com/2011/07/13/exploring-coffeescript-part-3-more-on-functions/',
11
+ 'http://elegantcode.com/2011/07/26/exploring-coffeescript-part-4-objects-and-classes/',
12
+ 'http://elegantcode.com/2011/08/02/exploring-coffeescript-part-5-ranges-loops-and-comprehensions/',
13
+ 'http://elegantcode.com/2011/08/09/exploring-coffeescript-part-6-show-me-the-goodies/']
14
+ config[:savedir] = '~/Tech/Javascript/COFFEESCRIPT/DOCUMENTATION/Exploring_CoffeeScript'
15
+ config[:savename] = 'Exploring_CoffeeScript.pdf'
16
+
17
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'Learn Python the Hard Way' as pdf file
5
+ # Source: 'http://learnpythonthehardway.org/book/'
6
+
7
+ def python_hard_way_urls
8
+ urls = ['http://learnpythonthehardway.org/book/intro.html']
9
+ (0..52).each do |val|
10
+ urls << 'http://learnpythonthehardway.org/book/ex' + val.to_s + '.html'
11
+ end
12
+ urls << 'http://learnpythonthehardway.org/book/next.html'
13
+ urls << 'http://learnpythonthehardway.org/book/advice.html'
14
+ urls
15
+ end
16
+
17
+ config[:savedir] = '~/Tech/Python/Learn_Python_the_Hard_Way'
18
+ config[:savename] = 'Learn_Python_the_Hard_Way.pdf'
19
+ config[:urls] = python_hard_way_urls
20
+ config[:css] = ['http://learnpythonthehardway.org/book/_static/basic.css']
21
+ config[:remove_temp_files] = false
22
+
23
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'Learn Ruby the Hard Way' as a pdf
5
+ # Source: 'http://ruby.learncodethehardway.org/book/'
6
+
7
+ def ruby_hard_way_urls
8
+ urls = ['http://ruby.learncodethehardway.org/book/intro.html']
9
+ (1..52).each do |val|
10
+ urls << 'http://ruby.learncodethehardway.org/book/ex' + val.to_s.rjust(2,'0') + '.html'
11
+ end
12
+ urls << 'http://ruby.learncodethehardway.org/book/next.html'
13
+ urls << 'http://ruby.learncodethehardway.org/book/advice.html'
14
+ urls
15
+ end
16
+
17
+ config[:savedir] = '~/Ruby_programs/Learn_Ruby_the_Hard_Way'
18
+ config[:savename] = 'Learn_Ruby_the_Hard_Way.pdf'
19
+ config[:urls] = ruby_hard_way_urls
20
+ config[:css] = ['http://ruby.learncodethehardway.org/book/css/syntax.css']
21
+ config[:remove_temp_files] = false
22
+
23
+ html_files = HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'htmls_to_pdf'
3
+
4
+ # Get 'RubyGems User Guide' as pdf file
5
+ # Source: 'http://docs.rubygems.org/read/book/1'
6
+
7
+ config = {}
8
+ config[:urls] = ['http://docs.rubygems.org/read/book/1']
9
+ # I have no idea why these chapters are numbered as they are!
10
+ [1,2,3,4,16,7,5,6,21].each do |val|
11
+ config[:urls] << 'http://docs.rubygems.org/read/chapter/' + val.to_s
12
+ end
13
+
14
+ config[:savedir] = '~/Tech/Ruby/GEMS/DOCUMENTATION'
15
+ config[:savename] = 'RubyGems_User_Guide.pdf'
16
+
17
+ HtmlsToPdf.new(config).create_pdf
@@ -0,0 +1,18 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'htmls_to_pdf/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'htmls_to_pdf'
6
+ s.version = HtmlsToPdf::VERSION
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ['James Lavin']
9
+ s.email = ['htmls_to_pdf@futureresearch.com']
10
+ s.summary = %q{Creates single PDF file from 1+ HTML pages}
11
+ s.description = %q{Creates single PDF file from 1+ HTML pages using PDFKit}
12
+ s.add_runtime_dependency 'pdfkit', '~> 0.5', '>= 0.5.2'
13
+ s.add_development_dependency 'rspec'
14
+ s.require_paths = ['lib']
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ end
@@ -0,0 +1,144 @@
1
+ require 'rubygems'
2
+ require 'fileutils'
3
+ require 'pdfkit'
4
+ require 'uri'
5
+ include URI
6
+
7
+ class HtmlsToPdf
8
+
9
+ attr_reader :htmlarray, :pdfarray, :cssarray, :urls, :savedir, :savename, :remove_temp_files
10
+
11
+ TMP_HTML_PREFIX = 'tmp_html_file_'
12
+ TMP_PDF_PREFIX = 'tmp_pdf_file_'
13
+
14
+ def initialize(in_config = {})
15
+ config = {
16
+ :css => [],
17
+ :remove_temp_files => true,
18
+ :options => {}
19
+ }.merge(in_config)
20
+ set_dir(config[:savedir])
21
+ @savename = config[:savename]
22
+ exit_if_pdf_exists
23
+ @urls = clean_urls(config[:urls])
24
+ @pdfarray = create_pdfarray
25
+ @cssarray = config[:css]
26
+ @remove_temp_files = config[:remove_temp_files]
27
+ @options = config[:options]
28
+ end
29
+
30
+ def get_htmlarray
31
+ everything_after_last_slash(@urls)
32
+ end
33
+
34
+ def clean_urls(urls)
35
+ if !urls.is_a?(Array)
36
+ urls = Array(urls) if Array(urls).is_a?(Array)
37
+ else
38
+ raise "config[:urls] must be an array" unless urls.is_a?(Array)
39
+ end
40
+ remove_trailing_url_slashes(urls)
41
+ end
42
+
43
+ def remove_trailing_url_slashes(urls)
44
+ urls.map { |url| url.match(/\/$/) ? url.sub(/\/$/,'') : url }
45
+ end
46
+
47
+ def everything_after_last_slash(urls)
48
+ urls.map { |url| url.match(/([^\/]+)$/)[0] }
49
+ end
50
+
51
+ def add_dot_html(urls)
52
+ urls.map { |url| url.match(/\.html?$/) ? url : url + '.html' }
53
+ end
54
+
55
+ def create_pdfarray
56
+ outarray = []
57
+ (0...@urls.length).each do |idx|
58
+ outarray << TMP_PDF_PREFIX + idx.to_s
59
+ end
60
+ outarray
61
+ end
62
+
63
+ def exit_if_pdf_exists
64
+ if File.exists?(@savename)
65
+ puts "File #{@savename} already exists. Please rename or delete and re-run this program."
66
+ exit
67
+ end
68
+ end
69
+
70
+ def set_dir(savedir)
71
+ @savedir = savedir
72
+ save_to = File.expand_path(savedir)
73
+ FileUtils.mkdir_p(save_to)
74
+ Dir.chdir(save_to)
75
+ end
76
+
77
+ #def add_css(css_file)
78
+ # @cssarray << css_file
79
+ #end
80
+
81
+ def download_files
82
+ download_html_files
83
+ download_css_files
84
+ end
85
+
86
+ def download_html_files
87
+ existing_files = Dir.entries(".")
88
+ @htmlarray = []
89
+ @urls.each_with_index do |url,idx|
90
+ savename = TMP_HTML_PREFIX + idx.to_s
91
+ unless existing_files.include?(savename)
92
+ `wget #{url} -O #{savename}`
93
+ end
94
+ @htmlarray << savename
95
+ end
96
+ end
97
+
98
+ def download_css_files
99
+ existing_files = Dir.entries(".")
100
+ @cssarray.each do |css_url|
101
+ `wget #{css_url}` unless existing_files.include?(File.basename(css_url))
102
+ end
103
+ end
104
+
105
+ def generate_pdfs
106
+ @urls.each_with_index { |url,i| html_to_pdf(TMP_HTML_PREFIX + i.to_s,@pdfarray[i]) }
107
+ end
108
+
109
+ def html_to_pdf(html_file,pdf_file)
110
+ puts "creating #{pdf_file} from #{html_file}"
111
+ html = nil
112
+ unless Dir.entries(".").include?(pdf_file)
113
+ File.open(html_file, 'r') { |inf| html = inf.read }
114
+ #kit = PDFKit.new(html, :page_size => 'Letter', :orientation => 'Landscape')
115
+ kit = PDFKit.new(html, @options)
116
+ @cssarray.each { |cssfile| kit.stylesheets << File.basename(cssfile) }
117
+ kit.to_file(pdf_file)
118
+ end
119
+ end
120
+
121
+ def join_pdfs
122
+ unless File.exists?(@savename)
123
+ pdfs_string = @pdfarray.join(" ")
124
+ `pdftk #{pdfs_string} output #{@savename}`
125
+ end
126
+ end
127
+
128
+ def delete_temp_files
129
+ @pdfarray.each { |pdffile| File.delete(pdffile) }
130
+ @htmlarray.each { |htmlfile| File.delete(htmlfile) }
131
+ @cssarray.each { |cssfile| File.delete(File.basename(cssfile)) }
132
+ end
133
+
134
+ def create_pdf
135
+ download_files
136
+ generate_pdfs
137
+ join_pdfs
138
+ delete_temp_files if @remove_temp_files
139
+ end
140
+
141
+ end
142
+
143
+
144
+
@@ -0,0 +1,6 @@
1
+ require 'pdfkit'
2
+
3
+ PDFKit.configure do |config|
4
+ config.wkhtmltopdf = '/usr/bin/wkhtmltopdf'
5
+ end
6
+
@@ -0,0 +1,3 @@
1
+ module HtmlsToPdf
2
+ VERSION = "0.0.4"
3
+ end
@@ -0,0 +1,2 @@
1
+ require 'htmls_to_pdf/pdfkit_config'
2
+ require 'htmls_to_pdf/htmls_to_pdf'
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: htmls_to_pdf
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 4
10
+ version: 0.0.4
11
+ platform: ruby
12
+ authors:
13
+ - James Lavin
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-10-07 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: pdfkit
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 1
29
+ segments:
30
+ - 0
31
+ - 5
32
+ version: "0.5"
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ hash: 15
36
+ segments:
37
+ - 0
38
+ - 5
39
+ - 2
40
+ version: 0.5.2
41
+ type: :runtime
42
+ version_requirements: *id001
43
+ - !ruby/object:Gem::Dependency
44
+ name: rspec
45
+ prerelease: false
46
+ requirement: &id002 !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ hash: 3
52
+ segments:
53
+ - 0
54
+ version: "0"
55
+ type: :development
56
+ version_requirements: *id002
57
+ description: Creates single PDF file from 1+ HTML pages using PDFKit
58
+ email:
59
+ - htmls_to_pdf@futureresearch.com
60
+ executables: []
61
+
62
+ extensions: []
63
+
64
+ extra_rdoc_files: []
65
+
66
+ files:
67
+ - .gitignore
68
+ - README.markdown
69
+ - examples/get_coffeescript.rb
70
+ - examples/get_coffeescript_meet_backbone.rb
71
+ - examples/get_exploring_coffeescript.rb
72
+ - examples/get_python_book.rb
73
+ - examples/get_ruby_book.rb
74
+ - examples/get_rubygems_user_guide.rb
75
+ - htmls_to_pdf.gemspec
76
+ - lib/htmls_to_pdf.rb
77
+ - lib/htmls_to_pdf/htmls_to_pdf.rb
78
+ - lib/htmls_to_pdf/pdfkit_config.rb
79
+ - lib/htmls_to_pdf/version.rb
80
+ homepage:
81
+ licenses: []
82
+
83
+ post_install_message:
84
+ rdoc_options: []
85
+
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
105
+ version: "0"
106
+ requirements: []
107
+
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.7
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: Creates single PDF file from 1+ HTML pages
113
+ test_files: []
114
+