jekyll-pandoc-exports 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a5364fb46cdd2204734b7da5c0839a52718720a21716e3e1f7ba44e4575793af
4
+ data.tar.gz: 54488d1f7b0cb3d922c270d1bd0073782532c4b9bc83613af65bab743e19b8a8
5
+ SHA512:
6
+ metadata.gz: 68956bc865a4f121db2383f76ee4da4b182738dca373a12170d303a1562c384bb8acee5fb08a7b345793eb5872cea2e1739b34603b360c436d8c61a7382da2af
7
+ data.tar.gz: f64ba616557c883f967557a6f933cc0e46ad289866c3deb57aac7c0ac5ee98a354c742319fda9e1fb0cf036b8f012838e345af0177929650e60d1df7accc6778
data/CHANGELOG.md ADDED
@@ -0,0 +1,17 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [1.0.0] - 2025-01-XX
6
+
7
+ ### Added
8
+ - Initial release of jekyll-pandoc-exports plugin
9
+ - DOCX generation using pandoc-ruby
10
+ - PDF generation with LaTeX support
11
+ - Configurable PDF options (margins, paper size, etc.)
12
+ - Unicode cleanup for LaTeX compatibility
13
+ - Automatic download link injection
14
+ - Configurable HTML cleanup patterns
15
+ - Image path fixing for pandoc conversion
16
+ - Print-friendly CSS class support
17
+ - Per-page PDF option overrides
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Michael McGarrah
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,196 @@
1
+ # Jekyll Pandoc Exports Plugin
2
+
3
+ A Jekyll plugin that automatically generates DOCX and PDF exports of your pages using Pandoc.
4
+
5
+ ## Features
6
+
7
+ - Generate Word documents (.docx) and PDFs from Jekyll pages, posts, and collections
8
+ - Configurable output directories for organized file management
9
+ - Incremental builds (only regenerate changed files)
10
+ - Automatic dependency validation (Pandoc/LaTeX)
11
+ - Configurable PDF options (margins, paper size, etc.)
12
+ - Automatic Unicode cleanup for LaTeX compatibility
13
+ - Configurable HTML cleanup patterns
14
+ - Auto-injection of download links
15
+ - Flexible image path fixing
16
+ - Print-friendly CSS class support
17
+
18
+ ## Installation
19
+
20
+ ### 1. Install Dependencies
21
+
22
+ First, install Pandoc and LaTeX (for PDF generation):
23
+
24
+ ```bash
25
+ # Ubuntu/Debian
26
+ sudo apt-get install pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra
27
+
28
+ # macOS
29
+ brew install pandoc
30
+ brew install --cask mactex
31
+ ```
32
+
33
+ ### 2. Add to Gemfile
34
+
35
+ Add to your Jekyll site's Gemfile:
36
+
37
+ ```ruby
38
+ gem "jekyll-pandoc-exports"
39
+ ```
40
+
41
+ ### 3. Enable Plugin
42
+
43
+ Add to your `_config.yml`:
44
+
45
+ ```yaml
46
+ plugins:
47
+ - jekyll-pandoc-exports
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ### Basic Usage
53
+
54
+ Add front matter to any page you want to export:
55
+
56
+ ```yaml
57
+ ---
58
+ title: My Document
59
+ docx: true # Generate Word document
60
+ pdf: true # Generate PDF
61
+ ---
62
+ ```
63
+
64
+ ### Configuration
65
+
66
+ Add configuration to your `_config.yml`:
67
+
68
+ ```yaml
69
+ pandoc_exports:
70
+ enabled: true
71
+ output_dir: 'downloads' # Custom output directory (optional)
72
+ collections: ['pages', 'posts'] # Collections to process
73
+ incremental: true # Only regenerate changed files
74
+ pdf_options:
75
+ variable: 'geometry:margin=0.75in'
76
+ unicode_cleanup: true
77
+ inject_downloads: true
78
+ download_class: 'pandoc-downloads no-print'
79
+ title_cleanup:
80
+ - '<title>.*?</title>'
81
+ - '<h1[^>]*>.*?Site Title.*?</h1>'
82
+ image_path_fixes:
83
+ - pattern: 'src="/assets/images/'
84
+ replacement: 'src="{{site.dest}}/assets/images/'
85
+ ```
86
+
87
+ ### Configuration Options
88
+
89
+ - `enabled`: Enable/disable the plugin (default: true)
90
+ - `output_dir`: Custom output directory for exports (default: site root)
91
+ - `collections`: Array of collections to process (default: ['pages', 'posts'])
92
+ - `incremental`: Only regenerate files when source changes (default: false)
93
+ - `pdf_options`: Pandoc options for PDF generation (default: 1in margins)
94
+ - `unicode_cleanup`: Remove problematic Unicode characters for LaTeX (default: true)
95
+ - `inject_downloads`: Auto-inject download links into pages (default: true)
96
+ - `download_class`: CSS class for download links (default: 'pandoc-downloads no-print')
97
+ - `title_cleanup`: Array of regex patterns to remove from PDF HTML
98
+ - `image_path_fixes`: Array of path replacements for images
99
+
100
+ ### Per-Page PDF Options
101
+
102
+ Override PDF options for specific pages:
103
+
104
+ ```yaml
105
+ ---
106
+ title: My Document
107
+ pdf: true
108
+ pdf_options:
109
+ variable: 'geometry:margin=0.5in'
110
+ ---
111
+ ```
112
+
113
+ ### CSS for Print Hiding
114
+
115
+ Add to your main CSS to hide download links when printing:
116
+
117
+ ```css
118
+ @media print {
119
+ .no-print {
120
+ display: none !important;
121
+ }
122
+ }
123
+ ```
124
+
125
+ ## Generated Files
126
+
127
+ The plugin generates files with the same name as your markdown file:
128
+
129
+ - `my-page.md` → `my-page.docx` and `my-page.pdf`
130
+ - Accessible at `/my-page.docx` and `/my-page.pdf`
131
+
132
+ ## Download Links
133
+
134
+ When `inject_downloads` is enabled, the plugin automatically adds download links to pages that generate exports. Links are inserted after the first heading or at the beginning of the body.
135
+
136
+ ## Publishing to RubyGems
137
+
138
+ If you want to publish this gem to RubyGems:
139
+
140
+ ### 1. Build the gem:
141
+ ```bash
142
+ gem build jekyll-pandoc-exports.gemspec
143
+ ```
144
+
145
+ ### 2. Test locally (optional):
146
+ ```bash
147
+ gem install ./jekyll-pandoc-exports-1.0.0.gem
148
+ ```
149
+
150
+ ### 3. Publish to RubyGems:
151
+ ```bash
152
+ # First time setup (if needed)
153
+ gem signin
154
+
155
+ # Publish the gem
156
+ gem push jekyll-pandoc-exports-1.0.0.gem
157
+ ```
158
+
159
+ ## Gem Structure
160
+
161
+ - **`jekyll-pandoc-exports.gemspec`** - Gem specification with dependencies
162
+ - **`lib/jekyll-pandoc-exports.rb`** - Main entry point
163
+ - **`lib/jekyll-pandoc-exports/version.rb`** - Version management
164
+ - **`lib/jekyll-pandoc-exports/generator.rb`** - Plugin code
165
+ - **`README.md`** - Complete documentation
166
+ - **`LICENSE`** - MIT license
167
+ - **`CHANGELOG.md`** - Version history
168
+ - **`Gemfile`** - Development dependencies
169
+ - **`Rakefile`** - Build tasks
170
+
171
+ ## Troubleshooting
172
+
173
+ ### LaTeX Errors
174
+ - Ensure LaTeX packages are installed
175
+ - Enable `unicode_cleanup` to remove problematic characters
176
+ - Add custom cleanup patterns to `title_cleanup`
177
+
178
+ ### Image Issues
179
+ - Configure `image_path_fixes` for your site's image paths
180
+ - Use absolute paths in the replacement patterns
181
+
182
+ ### Missing Files
183
+ - Check that Pandoc is installed and accessible
184
+ - Verify file permissions in the `_site` directory
185
+
186
+ ## Contributing
187
+
188
+ 1. Fork the repository
189
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
190
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
191
+ 4. Push to the branch (`git push origin my-new-feature`)
192
+ 5. Create a new Pull Request
193
+
194
+ ## License
195
+
196
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,232 @@
1
+ require 'pandoc-ruby'
2
+
3
+ module Jekyll
4
+ module PandocExports
5
+
6
+ Jekyll::Hooks.register :site, :post_write do |site|
7
+ config = setup_configuration(site)
8
+ return unless config['enabled']
9
+
10
+ unless validate_dependencies
11
+ Jekyll.logger.error "Pandoc Exports:", "Missing required dependencies. Please install Pandoc and LaTeX."
12
+ return
13
+ end
14
+
15
+ process_collections(site, config)
16
+ end
17
+
18
+ def self.setup_configuration(site)
19
+ config = site.config['pandoc_exports'] || {}
20
+ {
21
+ 'enabled' => true,
22
+ 'output_dir' => '',
23
+ 'collections' => ['pages', 'posts'],
24
+ 'pdf_options' => { 'variable' => 'geometry:margin=1in' },
25
+ 'unicode_cleanup' => true,
26
+ 'inject_downloads' => true,
27
+ 'download_class' => 'pandoc-downloads no-print',
28
+ 'download_style' => 'margin: 20px 0; padding: 15px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 5px;',
29
+ 'title_cleanup' => [],
30
+ 'image_path_fixes' => []
31
+ }.merge(config)
32
+ end
33
+
34
+ def self.validate_dependencies
35
+ pandoc_available = system('pandoc --version > /dev/null 2>&1')
36
+ latex_available = system('pdflatex --version > /dev/null 2>&1')
37
+
38
+ unless pandoc_available
39
+ Jekyll.logger.warn "Pandoc Exports:", "Pandoc not found. Install with: brew install pandoc (macOS) or apt-get install pandoc (Ubuntu)"
40
+ end
41
+
42
+ unless latex_available
43
+ Jekyll.logger.warn "Pandoc Exports:", "LaTeX not found. Install with: brew install --cask mactex (macOS) or apt-get install texlive-latex-base (Ubuntu)"
44
+ end
45
+
46
+ pandoc_available
47
+ end
48
+
49
+ def self.process_collections(site, config)
50
+ config['collections'].each do |collection_name|
51
+ case collection_name
52
+ when 'pages'
53
+ site.pages.each { |item| process_item(site, item, config) }
54
+ when 'posts'
55
+ site.posts.docs.each { |item| process_item(site, item, config) }
56
+ else
57
+ collection = site.collections[collection_name]
58
+ collection&.docs&.each { |item| process_item(site, item, config) }
59
+ end
60
+ end
61
+ end
62
+
63
+ def self.process_item(site, item, config)
64
+ return unless item.data['docx'] || item.data['pdf']
65
+
66
+ # Check if file was modified (incremental build)
67
+ return if skip_unchanged_file?(site, item, config)
68
+
69
+ process_page(site, item, config)
70
+ end
71
+
72
+ def self.skip_unchanged_file?(site, item, config)
73
+ return false unless config['incremental']
74
+
75
+ source_file = item.respond_to?(:path) ? item.path : item.relative_path
76
+ return false unless File.exist?(source_file)
77
+
78
+ filename = get_output_filename(item)
79
+ output_dir = get_output_directory(site, config)
80
+
81
+ docx_file = File.join(output_dir, "#{filename}.docx")
82
+ pdf_file = File.join(output_dir, "#{filename}.pdf")
83
+
84
+ source_mtime = File.mtime(source_file)
85
+
86
+ if item.data['docx'] && File.exist?(docx_file)
87
+ return false if File.mtime(docx_file) < source_mtime
88
+ end
89
+
90
+ if item.data['pdf'] && File.exist?(pdf_file)
91
+ return false if File.mtime(pdf_file) < source_mtime
92
+ end
93
+
94
+ true
95
+ end
96
+
97
+ def self.get_output_filename(item)
98
+ if item.respond_to?(:basename)
99
+ File.basename(item.basename, '.md')
100
+ else
101
+ File.basename(item.path, '.md')
102
+ end
103
+ end
104
+
105
+ def self.get_output_directory(site, config)
106
+ if config['output_dir'].empty?
107
+ site.dest
108
+ else
109
+ output_path = File.join(site.dest, config['output_dir'])
110
+ FileUtils.mkdir_p(output_path) unless Dir.exist?(output_path)
111
+ output_path
112
+ end
113
+ end
114
+
115
+ def self.process_page(site, page, config)
116
+ html_file = get_html_file_path(site, page)
117
+ return unless File.exist?(html_file)
118
+
119
+ html_content = File.read(html_file)
120
+ processed_html = process_html_content(html_content, site, config)
121
+ filename = get_output_filename(page)
122
+ output_dir = get_output_directory(site, config)
123
+ generated_files = []
124
+
125
+ generate_docx(processed_html, filename, output_dir, site, generated_files) if page.data['docx']
126
+ generate_pdf(processed_html, filename, output_dir, site, generated_files, page, config) if page.data['pdf']
127
+
128
+ if config['inject_downloads'] && generated_files.any?
129
+ inject_download_links(html_content, generated_files, html_file, config)
130
+ end
131
+ end
132
+
133
+ def self.get_html_file_path(site, page)
134
+ # Handle different Jekyll URL structures
135
+ if page.url.end_with?('/')
136
+ File.join(site.dest, page.url, 'index.html')
137
+ else
138
+ File.join(site.dest, "#{page.url.gsub('/', '')}.html")
139
+ end
140
+ end
141
+
142
+ def self.process_html_content(html_content, site, config)
143
+ processed = html_content.dup
144
+
145
+ # Apply image path fixes from config
146
+ config['image_path_fixes'].each do |fix|
147
+ processed.gsub!(Regexp.new(fix['pattern']), fix['replacement'].gsub('{{site.dest}}', site.dest))
148
+ end
149
+
150
+ processed
151
+ end
152
+
153
+ def self.generate_docx(html_content, filename, output_dir, site, generated_files)
154
+ begin
155
+ docx_content = PandocRuby.convert(html_content, from: :html, to: :docx)
156
+ docx_file = File.join(output_dir, "#{filename}.docx")
157
+
158
+ File.open(docx_file, 'wb') { |file| file.write(docx_content) }
159
+
160
+ generated_files << {
161
+ type: 'Word Document (.docx)',
162
+ url: "#{site.baseurl}/#{filename}.docx"
163
+ }
164
+ Jekyll.logger.info "Pandoc Exports:", "Generated #{filename}.docx"
165
+ rescue => e
166
+ Jekyll.logger.error "Pandoc Exports:", "Failed to generate #{filename}.docx: #{e.message}"
167
+ end
168
+ end
169
+
170
+ def self.generate_pdf(html_content, filename, output_dir, site, generated_files, page, config)
171
+ begin
172
+ pdf_html = html_content.dup
173
+
174
+ # Apply Unicode cleanup if enabled
175
+ if config['unicode_cleanup']
176
+ pdf_html = clean_unicode_characters(pdf_html)
177
+ end
178
+
179
+ # Apply title cleanup patterns from config
180
+ config['title_cleanup'].each do |pattern|
181
+ pdf_html.gsub!(Regexp.new(pattern), '')
182
+ end
183
+
184
+ # Get PDF options from config or page front matter
185
+ pdf_options = page.data['pdf_options'] || config['pdf_options']
186
+
187
+ pdf_content = PandocRuby.new(pdf_html, from: :html, to: :pdf).convert(pdf_options)
188
+ pdf_file = File.join(output_dir, "#{filename}.pdf")
189
+
190
+ File.open(pdf_file, 'wb') { |file| file.write(pdf_content) }
191
+
192
+ generated_files << {
193
+ type: 'PDF Document (.pdf)',
194
+ url: "#{site.baseurl}/#{filename}.pdf"
195
+ }
196
+ Jekyll.logger.info "Pandoc Exports:", "Generated #{filename}.pdf"
197
+ rescue => e
198
+ Jekyll.logger.error "Pandoc Exports:", "Failed to generate #{filename}.pdf: #{e.message}"
199
+ end
200
+ end
201
+
202
+ def self.clean_unicode_characters(html)
203
+ # Remove emoji and symbol ranges that cause LaTeX issues
204
+ html.gsub(/[\u{1F000}-\u{1F9FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]/, '')
205
+ end
206
+
207
+ def self.inject_download_links(html_content, generated_files, html_file, config)
208
+ download_html = build_download_html(generated_files, config)
209
+
210
+ # Insert after first heading or at beginning of body
211
+ if html_content.match(/<h[1-6][^>]*>/)
212
+ html_content.sub!(/<\/h[1-6]>/, "\\&\n#{download_html}")
213
+ else
214
+ html_content.sub!(/<body[^>]*>/, "\\&\n#{download_html}")
215
+ end
216
+
217
+ File.write(html_file, html_content)
218
+ end
219
+
220
+ def self.build_download_html(generated_files, config)
221
+ download_html = "<div class=\"#{config['download_class']}\" style=\"#{config['download_style']}\">" +
222
+ "<p><strong>Download Options:</strong></p>" +
223
+ "<ul style=\"margin: 5px 0; padding-left: 20px;\">"
224
+
225
+ generated_files.each do |file|
226
+ download_html += "<li><a href=\"#{file[:url]}\" style=\"color: #007bff; text-decoration: none; font-weight: bold;\">#{file[:type]}</a></li>"
227
+ end
228
+
229
+ download_html += "</ul></div>"
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,5 @@
1
+ module Jekyll
2
+ module PandocExports
3
+ VERSION = "0.1.4"
4
+ end
5
+ end
@@ -0,0 +1,2 @@
1
+ require "jekyll-pandoc-exports/version"
2
+ require "jekyll-pandoc-exports/generator"
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-pandoc-exports
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Michael McGarrah
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-09-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pandoc-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '13.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '13.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '5.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '5.0'
83
+ description: Automatically generate Word documents and PDFs from Jekyll pages with
84
+ configurable options, Unicode cleanup, and auto-injected download links.
85
+ email:
86
+ - mcgarrah@gmail.com
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - CHANGELOG.md
92
+ - LICENSE
93
+ - README.md
94
+ - lib/jekyll-pandoc-exports.rb
95
+ - lib/jekyll-pandoc-exports/generator.rb
96
+ - lib/jekyll-pandoc-exports/version.rb
97
+ homepage: https://github.com/mcgarrah/jekyll-pandoc-exports
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 2.6.0
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubygems_version: 3.2.33
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: Jekyll plugin to generate DOCX and PDF exports using Pandoc
120
+ test_files: []