pdftailor 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/pdftailor ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ here = File.dirname(__FILE__)
4
+ jar_path = File.expand_path(File.join(here, "..", "jars"))
5
+ jars = Dir.open(jar_path).select{ |name| name =~ /\.jar$/ }.map{ |jar| File.join(jar_path, jar) }.join(":")
6
+ puts `java -verbose:classes -cp #{jars} org.documentcloud.pdftailor.PdfTailor #{ARGV.join(' ')}`
Binary file
Binary file
Binary file
@@ -0,0 +1 @@
1
+ Main-Class: org.documentcloud.pdftailor.PdfTailor
@@ -0,0 +1,160 @@
1
+ package org.documentcloud.pdftailor;
2
+
3
+ import java.io.FileOutputStream;
4
+ import java.io.IOException;
5
+ import java.util.regex.Pattern;
6
+ import java.util.Arrays;
7
+ import java.util.List;
8
+ import java.util.Iterator;
9
+
10
+ import com.itextpdf.text.Document;
11
+ import com.itextpdf.text.DocumentException;
12
+ import com.itextpdf.text.pdf.PdfCopy;
13
+ import com.itextpdf.text.pdf.PdfReader;
14
+ import com.itextpdf.text.pdf.PdfWriter;
15
+
16
+ import com.beust.jcommander.Parameter;
17
+ import com.beust.jcommander.JCommander;
18
+ import com.beust.jcommander.MissingCommandException;
19
+
20
+ public class PdfTailor {
21
+
22
+ public static void main( String[] args ) throws IOException, DocumentException {
23
+
24
+ // Initialize the JCommander parsers
25
+ JCommander cli = new JCommander();
26
+ StitchCommand stitch = new StitchCommand();
27
+ UnstitchCommand unstitch = new UnstitchCommand();
28
+
29
+ // Add our Stitch and Unstitch commands to the parser
30
+ cli.addCommand("stitch", stitch);
31
+ cli.addCommand("unstitch", unstitch);
32
+
33
+ // When called with no arguments display the usage information.
34
+ if (args.length == 0) {
35
+ usage();
36
+ } else {
37
+ try {
38
+ // Parse the provided arguments.
39
+ cli.parse(args);
40
+ String command = cli.getParsedCommand();
41
+ if (command.equals("stitch")) { stitch(stitch); }
42
+ else if (command.equals("unstitch")) { unstitch(unstitch); }
43
+ else { usage(); }
44
+
45
+ } catch (MissingCommandException unrecognizedCommand) {
46
+ // and if unrecognized, display the usage information.
47
+ usage();
48
+ }
49
+ }
50
+ }
51
+
52
+ public static String VERSION = "0.0.1";
53
+ public static String USAGE_MESSAGE = "pdftailor stitches and unstitches pdfs.\n\n" +
54
+ "Version: " + VERSION + "\n\n" +
55
+ "Usage:\n" +
56
+ " pdftailor COMMAND [OPTIONS] <pdf(s)>\n" +
57
+ " Main commands:\n" +
58
+ " stitch, unstitch\n\n" +
59
+ "Options:\n" +
60
+ " -o, --output\n" +
61
+ " The file name or file pattern to which output is written.\n" +
62
+ " For commands like unstitch which will write multiple files\n" +
63
+ " a pattern including \"%d\" can be used to specify a template\n" +
64
+ " for where files should be written (e.g. ./foo/bar_%d.pdf).\n\n" +
65
+ "Example:\n" +
66
+ " pdftailor stitch --output merged.pdf a.pdf b.pdf\n" +
67
+ " pdftailor unstitch --output merged_page_%d.pdf merged.pdf\n";
68
+
69
+ public static void usage() {
70
+ System.out.println(USAGE_MESSAGE);
71
+ }
72
+
73
+ // Stitch together an ordered list of pdfs into a single pdf.
74
+ public static void stitch( StitchCommand cli ) throws IOException, DocumentException {
75
+ String outputName = cli.output;
76
+
77
+ Document document = new Document();
78
+ PdfCopy writer = new PdfCopy(document, new FileOutputStream(outputName));
79
+ document.open();
80
+
81
+ // Loop over all of the pdfs specified on the commandline
82
+ // read out their contents and concatenate them
83
+ // in the order specified
84
+ PdfReader reader;
85
+ Iterator<String> pdfPaths = cli.files.iterator();
86
+ while (pdfPaths.hasNext()) {
87
+ String path = pdfPaths.next();
88
+ reader = new PdfReader(path);
89
+ for ( int pageNumber = 0; pageNumber < reader.getNumberOfPages(); ) {
90
+ writer.addPage(writer.getImportedPage( reader, ++pageNumber ));
91
+ }
92
+ // make sure to free the reader to prevent memory leak
93
+ // especially for very large PDFs.
94
+ writer.freeReader(reader);
95
+ }
96
+ document.close();
97
+ }
98
+
99
+ // The stitch command parser.
100
+ static class StitchCommand {
101
+ @Parameter(description = "The list of files to stitch together.")
102
+ private List<String> files;
103
+
104
+ @Parameter(names = {"--output", "-o"}, description = "The filename to write to.", required = true)
105
+ private String output;
106
+ }
107
+
108
+ // unstitch a pdf into its constitutent pages.
109
+ public static void unstitch( UnstitchCommand cli ) throws IOException, DocumentException {
110
+ // use JCommander's default file list to get the file to split.
111
+ String readerPath = cli.files.get(0);
112
+ PdfReader reader = new PdfReader(readerPath);
113
+
114
+ // Loop over the document's pages by page number.
115
+ for ( int pageNumber = 0; pageNumber < reader.getNumberOfPages(); ) {
116
+ pageNumber++;
117
+
118
+ Document document = new Document();
119
+ String outputName;
120
+ if (cli.output != null && cli.output.length() > 0) { outputName = cli.output; } else { outputName = readerPath; }
121
+ PdfCopy writer = new PdfCopy(document, new FileOutputStream(outputPath(outputName, pageNumber)));
122
+
123
+ document.open();
124
+ writer.addPage(writer.getImportedPage(reader, pageNumber));
125
+ document.close();
126
+ writer.close();
127
+ }
128
+ }
129
+
130
+ // The unstitch command parser.
131
+ static class UnstitchCommand {
132
+ @Parameter(description = "The file to unstitch.")
133
+ private List<String> files;
134
+
135
+ @Parameter(names = {"--output", "-o"}, description = "The filename pattern to write to. (e.g. mydir/file_%d_name.pdf)")
136
+ private String output;
137
+ }
138
+
139
+ // Return a file path for page when provided with a page number and
140
+ // either an existing file path, or a path template.
141
+ protected static String outputPath( String outputName, int pageNumber ) {
142
+ Pattern templatePattern = Pattern.compile("%d");
143
+ Pattern filePattern = Pattern.compile("^(.+)\\.pdf$");
144
+ String path;
145
+
146
+ // if outputName contains %d, we assume it's a template.
147
+ if ( templatePattern.matcher(outputName).find() ){
148
+ // replace %d with the page number.
149
+ path = String.format(outputName, pageNumber);
150
+ } else if ( filePattern.matcher(outputName).find() ) {
151
+ // if outputName is a pdf, chop off the file ending and insert the page number before reattaching.
152
+ path = filePattern.matcher(outputName).replaceFirst("$1_" + pageNumber + ".pdf");
153
+ } else {
154
+ // otherwise we'll just append the page number and add the pdf file extension.
155
+ path = outputName + "_" + pageNumber + ".pdf";
156
+ }
157
+ return path;
158
+ }
159
+
160
+ }
data/lib/pdftailor.rb ADDED
@@ -0,0 +1,7 @@
1
+ module PDFTailor
2
+ def self.stitch(pdfs, options)
3
+ end
4
+
5
+ def self.unstitch(pdf, options)
6
+ end
7
+ end
data/pdftailor.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'pdftailor'
3
+ s.version = '0.0.1'
4
+ s.date = '2013-09-09'
5
+
6
+ s.summary = "Stitching and unstitching for PDFs"
7
+ s.description = <<-EOS
8
+ Stitching and unstitching for PDFs. A java library delivered via ruby out of convenience.
9
+ EOS
10
+
11
+ s.authors = ['Ted Han']
12
+ s.email = 'opensource@documentcloud.org'
13
+ s.homepage = 'http://documentcloud.github.io/pdftailor'
14
+
15
+ s.require_paths = ['lib']
16
+ s.executables = ['pdftailor']
17
+
18
+ s.files = Dir['jars/*', 'lib/**/*', 'bin/*', 'java/**/*', 'pdftailor.gemspec']
19
+ s.license = "MIT"
20
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftailor
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Ted Han
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-09 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: |2
15
+ Stitching and unstitching for PDFs. A java library delivered via ruby out of convenience.
16
+ email: opensource@documentcloud.org
17
+ executables:
18
+ - pdftailor
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - jars/itextpdf-5.3.4.jar
23
+ - jars/jcommander-1.31-SNAPSHOT.jar
24
+ - jars/pdftailor.jar
25
+ - lib/pdftailor.rb
26
+ - bin/pdftailor
27
+ - java/src/META-INF/MANIFEST.MF
28
+ - java/src/org/documentcloud/pdftailor/PdfTailor$StitchCommand.class
29
+ - java/src/org/documentcloud/pdftailor/PdfTailor$UnstitchCommand.class
30
+ - java/src/org/documentcloud/pdftailor/PdfTailor.class
31
+ - java/src/org/documentcloud/pdftailor/PdfTailor.java
32
+ - pdftailor.gemspec
33
+ homepage: http://documentcloud.github.io/pdftailor
34
+ licenses:
35
+ - MIT
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ none: false
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ none: false
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Stitching and unstitching for PDFs
58
+ test_files: []