pdftailor 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/pdftailor ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ here = File.dirname(__FILE__)
4
+ jar_path = File.expand_path(File.join(here, "..", "jars"))
5
+ jars = Dir.open(jar_path).select{ |name| name =~ /\.jar$/ }.map{ |jar| File.join(jar_path, jar) }.join(":")
6
+ puts `java -verbose:classes -cp #{jars} org.documentcloud.pdftailor.PdfTailor #{ARGV.join(' ')}`
Binary file
Binary file
Binary file
@@ -0,0 +1 @@
1
+ Main-Class: org.documentcloud.pdftailor.PdfTailor
@@ -0,0 +1,160 @@
1
+ package org.documentcloud.pdftailor;
2
+
3
+ import java.io.FileOutputStream;
4
+ import java.io.IOException;
5
+ import java.util.regex.Pattern;
6
+ import java.util.Arrays;
7
+ import java.util.List;
8
+ import java.util.Iterator;
9
+
10
+ import com.itextpdf.text.Document;
11
+ import com.itextpdf.text.DocumentException;
12
+ import com.itextpdf.text.pdf.PdfCopy;
13
+ import com.itextpdf.text.pdf.PdfReader;
14
+ import com.itextpdf.text.pdf.PdfWriter;
15
+
16
+ import com.beust.jcommander.Parameter;
17
+ import com.beust.jcommander.JCommander;
18
+ import com.beust.jcommander.MissingCommandException;
19
+
20
+ public class PdfTailor {
21
+
22
+ public static void main( String[] args ) throws IOException, DocumentException {
23
+
24
+ // Initialize the JCommander parsers
25
+ JCommander cli = new JCommander();
26
+ StitchCommand stitch = new StitchCommand();
27
+ UnstitchCommand unstitch = new UnstitchCommand();
28
+
29
+ // Add our Stitch and Unstitch commands to the parser
30
+ cli.addCommand("stitch", stitch);
31
+ cli.addCommand("unstitch", unstitch);
32
+
33
+ // When called with no arguments display the usage information.
34
+ if (args.length == 0) {
35
+ usage();
36
+ } else {
37
+ try {
38
+ // Parse the provided arguments.
39
+ cli.parse(args);
40
+ String command = cli.getParsedCommand();
41
+ if (command.equals("stitch")) { stitch(stitch); }
42
+ else if (command.equals("unstitch")) { unstitch(unstitch); }
43
+ else { usage(); }
44
+
45
+ } catch (MissingCommandException unrecognizedCommand) {
46
+ // and if unrecognized, display the usage information.
47
+ usage();
48
+ }
49
+ }
50
+ }
51
+
52
+ public static String VERSION = "0.0.1";
53
+ public static String USAGE_MESSAGE = "pdftailor stitches and unstitches pdfs.\n\n" +
54
+ "Version: " + VERSION + "\n\n" +
55
+ "Usage:\n" +
56
+ " pdftailor COMMAND [OPTIONS] <pdf(s)>\n" +
57
+ " Main commands:\n" +
58
+ " stitch, unstitch\n\n" +
59
+ "Options:\n" +
60
+ " -o, --output\n" +
61
+ " The file name or file pattern to which output is written.\n" +
62
+ " For commands like unstitch which will write multiple files\n" +
63
+ " a pattern including \"%d\" can be used to specify a template\n" +
64
+ " for where files should be written (e.g. ./foo/bar_%d.pdf).\n\n" +
65
+ "Example:\n" +
66
+ " pdftailor stitch --output merged.pdf a.pdf b.pdf\n" +
67
+ " pdftailor unstitch --output merged_page_%d.pdf merged.pdf\n";
68
+
69
+ public static void usage() {
70
+ System.out.println(USAGE_MESSAGE);
71
+ }
72
+
73
+ // Stitch together an ordered list of pdfs into a single pdf.
74
+ public static void stitch( StitchCommand cli ) throws IOException, DocumentException {
75
+ String outputName = cli.output;
76
+
77
+ Document document = new Document();
78
+ PdfCopy writer = new PdfCopy(document, new FileOutputStream(outputName));
79
+ document.open();
80
+
81
+ // Loop over all of the pdfs specified on the commandline
82
+ // read out their contents and concatenate them
83
+ // in the order specified
84
+ PdfReader reader;
85
+ Iterator<String> pdfPaths = cli.files.iterator();
86
+ while (pdfPaths.hasNext()) {
87
+ String path = pdfPaths.next();
88
+ reader = new PdfReader(path);
89
+ for ( int pageNumber = 0; pageNumber < reader.getNumberOfPages(); ) {
90
+ writer.addPage(writer.getImportedPage( reader, ++pageNumber ));
91
+ }
92
+ // make sure to free the reader to prevent memory leak
93
+ // especially for very large PDFs.
94
+ writer.freeReader(reader);
95
+ }
96
+ document.close();
97
+ }
98
+
99
+ // The stitch command parser.
100
+ static class StitchCommand {
101
+ @Parameter(description = "The list of files to stitch together.")
102
+ private List<String> files;
103
+
104
+ @Parameter(names = {"--output", "-o"}, description = "The filename to write to.", required = true)
105
+ private String output;
106
+ }
107
+
108
+ // unstitch a pdf into its constitutent pages.
109
+ public static void unstitch( UnstitchCommand cli ) throws IOException, DocumentException {
110
+ // use JCommander's default file list to get the file to split.
111
+ String readerPath = cli.files.get(0);
112
+ PdfReader reader = new PdfReader(readerPath);
113
+
114
+ // Loop over the document's pages by page number.
115
+ for ( int pageNumber = 0; pageNumber < reader.getNumberOfPages(); ) {
116
+ pageNumber++;
117
+
118
+ Document document = new Document();
119
+ String outputName;
120
+ if (cli.output != null && cli.output.length() > 0) { outputName = cli.output; } else { outputName = readerPath; }
121
+ PdfCopy writer = new PdfCopy(document, new FileOutputStream(outputPath(outputName, pageNumber)));
122
+
123
+ document.open();
124
+ writer.addPage(writer.getImportedPage(reader, pageNumber));
125
+ document.close();
126
+ writer.close();
127
+ }
128
+ }
129
+
130
+ // The unstitch command parser.
131
+ static class UnstitchCommand {
132
+ @Parameter(description = "The file to unstitch.")
133
+ private List<String> files;
134
+
135
+ @Parameter(names = {"--output", "-o"}, description = "The filename pattern to write to. (e.g. mydir/file_%d_name.pdf)")
136
+ private String output;
137
+ }
138
+
139
+ // Return a file path for page when provided with a page number and
140
+ // either an existing file path, or a path template.
141
+ protected static String outputPath( String outputName, int pageNumber ) {
142
+ Pattern templatePattern = Pattern.compile("%d");
143
+ Pattern filePattern = Pattern.compile("^(.+)\\.pdf$");
144
+ String path;
145
+
146
+ // if outputName contains %d, we assume it's a template.
147
+ if ( templatePattern.matcher(outputName).find() ){
148
+ // replace %d with the page number.
149
+ path = String.format(outputName, pageNumber);
150
+ } else if ( filePattern.matcher(outputName).find() ) {
151
+ // if outputName is a pdf, chop off the file ending and insert the page number before reattaching.
152
+ path = filePattern.matcher(outputName).replaceFirst("$1_" + pageNumber + ".pdf");
153
+ } else {
154
+ // otherwise we'll just append the page number and add the pdf file extension.
155
+ path = outputName + "_" + pageNumber + ".pdf";
156
+ }
157
+ return path;
158
+ }
159
+
160
+ }
data/lib/pdftailor.rb ADDED
@@ -0,0 +1,7 @@
1
+ module PDFTailor
2
+ def self.stitch(pdfs, options)
3
+ end
4
+
5
+ def self.unstitch(pdf, options)
6
+ end
7
+ end
data/pdftailor.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'pdftailor'
3
+ s.version = '0.0.1'
4
+ s.date = '2013-09-09'
5
+
6
+ s.summary = "Stitching and unstitching for PDFs"
7
+ s.description = <<-EOS
8
+ Stitching and unstitching for PDFs. A java library delivered via ruby out of convenience.
9
+ EOS
10
+
11
+ s.authors = ['Ted Han']
12
+ s.email = 'opensource@documentcloud.org'
13
+ s.homepage = 'http://documentcloud.github.io/pdftailor'
14
+
15
+ s.require_paths = ['lib']
16
+ s.executables = ['pdftailor']
17
+
18
+ s.files = Dir['jars/*', 'lib/**/*', 'bin/*', 'java/**/*', 'pdftailor.gemspec']
19
+ s.license = "MIT"
20
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftailor
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Ted Han
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-09 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: |2
15
+ Stitching and unstitching for PDFs. A java library delivered via ruby out of convenience.
16
+ email: opensource@documentcloud.org
17
+ executables:
18
+ - pdftailor
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - jars/itextpdf-5.3.4.jar
23
+ - jars/jcommander-1.31-SNAPSHOT.jar
24
+ - jars/pdftailor.jar
25
+ - lib/pdftailor.rb
26
+ - bin/pdftailor
27
+ - java/src/META-INF/MANIFEST.MF
28
+ - java/src/org/documentcloud/pdftailor/PdfTailor$StitchCommand.class
29
+ - java/src/org/documentcloud/pdftailor/PdfTailor$UnstitchCommand.class
30
+ - java/src/org/documentcloud/pdftailor/PdfTailor.class
31
+ - java/src/org/documentcloud/pdftailor/PdfTailor.java
32
+ - pdftailor.gemspec
33
+ homepage: http://documentcloud.github.io/pdftailor
34
+ licenses:
35
+ - MIT
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ none: false
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ none: false
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Stitching and unstitching for PDFs
58
+ test_files: []