pdftailor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/pdftailor +6 -0
- data/jars/itextpdf-5.3.4.jar +0 -0
- data/jars/jcommander-1.31-SNAPSHOT.jar +0 -0
- data/jars/pdftailor.jar +0 -0
- data/java/src/META-INF/MANIFEST.MF +1 -0
- data/java/src/org/documentcloud/pdftailor/PdfTailor$StitchCommand.class +0 -0
- data/java/src/org/documentcloud/pdftailor/PdfTailor$UnstitchCommand.class +0 -0
- data/java/src/org/documentcloud/pdftailor/PdfTailor.class +0 -0
- data/java/src/org/documentcloud/pdftailor/PdfTailor.java +160 -0
- data/lib/pdftailor.rb +7 -0
- data/pdftailor.gemspec +20 -0
- metadata +58 -0
data/bin/pdftailor
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
here = File.dirname(__FILE__)
|
4
|
+
jar_path = File.expand_path(File.join(here, "..", "jars"))
|
5
|
+
jars = Dir.open(jar_path).select{ |name| name =~ /\.jar$/ }.map{ |jar| File.join(jar_path, jar) }.join(":")
|
6
|
+
puts `java -verbose:classes -cp #{jars} org.documentcloud.pdftailor.PdfTailor #{ARGV.join(' ')}`
|
Binary file
|
Binary file
|
data/jars/pdftailor.jar
ADDED
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
Main-Class: org.documentcloud.pdftailor.PdfTailor
|
Binary file
|
Binary file
|
@@ -0,0 +1,160 @@
|
|
1
|
+
package org.documentcloud.pdftailor;
|
2
|
+
|
3
|
+
import java.io.FileOutputStream;
|
4
|
+
import java.io.IOException;
|
5
|
+
import java.util.regex.Pattern;
|
6
|
+
import java.util.Arrays;
|
7
|
+
import java.util.List;
|
8
|
+
import java.util.Iterator;
|
9
|
+
|
10
|
+
import com.itextpdf.text.Document;
|
11
|
+
import com.itextpdf.text.DocumentException;
|
12
|
+
import com.itextpdf.text.pdf.PdfCopy;
|
13
|
+
import com.itextpdf.text.pdf.PdfReader;
|
14
|
+
import com.itextpdf.text.pdf.PdfWriter;
|
15
|
+
|
16
|
+
import com.beust.jcommander.Parameter;
|
17
|
+
import com.beust.jcommander.JCommander;
|
18
|
+
import com.beust.jcommander.MissingCommandException;
|
19
|
+
|
20
|
+
public class PdfTailor {
|
21
|
+
|
22
|
+
public static void main( String[] args ) throws IOException, DocumentException {
|
23
|
+
|
24
|
+
// Initialize the JCommander parsers
|
25
|
+
JCommander cli = new JCommander();
|
26
|
+
StitchCommand stitch = new StitchCommand();
|
27
|
+
UnstitchCommand unstitch = new UnstitchCommand();
|
28
|
+
|
29
|
+
// Add our Stitch and Unstitch commands to the parser
|
30
|
+
cli.addCommand("stitch", stitch);
|
31
|
+
cli.addCommand("unstitch", unstitch);
|
32
|
+
|
33
|
+
// When called with no arguments display the usage information.
|
34
|
+
if (args.length == 0) {
|
35
|
+
usage();
|
36
|
+
} else {
|
37
|
+
try {
|
38
|
+
// Parse the provided arguments.
|
39
|
+
cli.parse(args);
|
40
|
+
String command = cli.getParsedCommand();
|
41
|
+
if (command.equals("stitch")) { stitch(stitch); }
|
42
|
+
else if (command.equals("unstitch")) { unstitch(unstitch); }
|
43
|
+
else { usage(); }
|
44
|
+
|
45
|
+
} catch (MissingCommandException unrecognizedCommand) {
|
46
|
+
// and if unrecognized, display the usage information.
|
47
|
+
usage();
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
public static String VERSION = "0.0.1";
|
53
|
+
public static String USAGE_MESSAGE = "pdftailor stitches and unstitches pdfs.\n\n" +
|
54
|
+
"Version: " + VERSION + "\n\n" +
|
55
|
+
"Usage:\n" +
|
56
|
+
" pdftailor COMMAND [OPTIONS] <pdf(s)>\n" +
|
57
|
+
" Main commands:\n" +
|
58
|
+
" stitch, unstitch\n\n" +
|
59
|
+
"Options:\n" +
|
60
|
+
" -o, --output\n" +
|
61
|
+
" The file name or file pattern to which output is written.\n" +
|
62
|
+
" For commands like unstitch which will write multiple files\n" +
|
63
|
+
" a pattern including \"%d\" can be used to specify a template\n" +
|
64
|
+
" for where files should be written (e.g. ./foo/bar_%d.pdf).\n\n" +
|
65
|
+
"Example:\n" +
|
66
|
+
" pdftailor stitch --output merged.pdf a.pdf b.pdf\n" +
|
67
|
+
" pdftailor unstitch --output merged_page_%d.pdf merged.pdf\n";
|
68
|
+
|
69
|
+
public static void usage() {
|
70
|
+
System.out.println(USAGE_MESSAGE);
|
71
|
+
}
|
72
|
+
|
73
|
+
// Stitch together an ordered list of pdfs into a single pdf.
|
74
|
+
public static void stitch( StitchCommand cli ) throws IOException, DocumentException {
|
75
|
+
String outputName = cli.output;
|
76
|
+
|
77
|
+
Document document = new Document();
|
78
|
+
PdfCopy writer = new PdfCopy(document, new FileOutputStream(outputName));
|
79
|
+
document.open();
|
80
|
+
|
81
|
+
// Loop over all of the pdfs specified on the commandline
|
82
|
+
// read out their contents and concatenate them
|
83
|
+
// in the order specified
|
84
|
+
PdfReader reader;
|
85
|
+
Iterator<String> pdfPaths = cli.files.iterator();
|
86
|
+
while (pdfPaths.hasNext()) {
|
87
|
+
String path = pdfPaths.next();
|
88
|
+
reader = new PdfReader(path);
|
89
|
+
for ( int pageNumber = 0; pageNumber < reader.getNumberOfPages(); ) {
|
90
|
+
writer.addPage(writer.getImportedPage( reader, ++pageNumber ));
|
91
|
+
}
|
92
|
+
// make sure to free the reader to prevent memory leak
|
93
|
+
// especially for very large PDFs.
|
94
|
+
writer.freeReader(reader);
|
95
|
+
}
|
96
|
+
document.close();
|
97
|
+
}
|
98
|
+
|
99
|
+
// The stitch command parser.
|
100
|
+
static class StitchCommand {
|
101
|
+
@Parameter(description = "The list of files to stitch together.")
|
102
|
+
private List<String> files;
|
103
|
+
|
104
|
+
@Parameter(names = {"--output", "-o"}, description = "The filename to write to.", required = true)
|
105
|
+
private String output;
|
106
|
+
}
|
107
|
+
|
108
|
+
// unstitch a pdf into its constitutent pages.
|
109
|
+
public static void unstitch( UnstitchCommand cli ) throws IOException, DocumentException {
|
110
|
+
// use JCommander's default file list to get the file to split.
|
111
|
+
String readerPath = cli.files.get(0);
|
112
|
+
PdfReader reader = new PdfReader(readerPath);
|
113
|
+
|
114
|
+
// Loop over the document's pages by page number.
|
115
|
+
for ( int pageNumber = 0; pageNumber < reader.getNumberOfPages(); ) {
|
116
|
+
pageNumber++;
|
117
|
+
|
118
|
+
Document document = new Document();
|
119
|
+
String outputName;
|
120
|
+
if (cli.output != null && cli.output.length() > 0) { outputName = cli.output; } else { outputName = readerPath; }
|
121
|
+
PdfCopy writer = new PdfCopy(document, new FileOutputStream(outputPath(outputName, pageNumber)));
|
122
|
+
|
123
|
+
document.open();
|
124
|
+
writer.addPage(writer.getImportedPage(reader, pageNumber));
|
125
|
+
document.close();
|
126
|
+
writer.close();
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
// The unstitch command parser.
|
131
|
+
static class UnstitchCommand {
|
132
|
+
@Parameter(description = "The file to unstitch.")
|
133
|
+
private List<String> files;
|
134
|
+
|
135
|
+
@Parameter(names = {"--output", "-o"}, description = "The filename pattern to write to. (e.g. mydir/file_%d_name.pdf)")
|
136
|
+
private String output;
|
137
|
+
}
|
138
|
+
|
139
|
+
// Return a file path for page when provided with a page number and
|
140
|
+
// either an existing file path, or a path template.
|
141
|
+
protected static String outputPath( String outputName, int pageNumber ) {
|
142
|
+
Pattern templatePattern = Pattern.compile("%d");
|
143
|
+
Pattern filePattern = Pattern.compile("^(.+)\\.pdf$");
|
144
|
+
String path;
|
145
|
+
|
146
|
+
// if outputName contains %d, we assume it's a template.
|
147
|
+
if ( templatePattern.matcher(outputName).find() ){
|
148
|
+
// replace %d with the page number.
|
149
|
+
path = String.format(outputName, pageNumber);
|
150
|
+
} else if ( filePattern.matcher(outputName).find() ) {
|
151
|
+
// if outputName is a pdf, chop off the file ending and insert the page number before reattaching.
|
152
|
+
path = filePattern.matcher(outputName).replaceFirst("$1_" + pageNumber + ".pdf");
|
153
|
+
} else {
|
154
|
+
// otherwise we'll just append the page number and add the pdf file extension.
|
155
|
+
path = outputName + "_" + pageNumber + ".pdf";
|
156
|
+
}
|
157
|
+
return path;
|
158
|
+
}
|
159
|
+
|
160
|
+
}
|
data/lib/pdftailor.rb
ADDED
data/pdftailor.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'pdftailor'
|
3
|
+
s.version = '0.0.1'
|
4
|
+
s.date = '2013-09-09'
|
5
|
+
|
6
|
+
s.summary = "Stitching and unstitching for PDFs"
|
7
|
+
s.description = <<-EOS
|
8
|
+
Stitching and unstitching for PDFs. A java library delivered via ruby out of convenience.
|
9
|
+
EOS
|
10
|
+
|
11
|
+
s.authors = ['Ted Han']
|
12
|
+
s.email = 'opensource@documentcloud.org'
|
13
|
+
s.homepage = 'http://documentcloud.github.io/pdftailor'
|
14
|
+
|
15
|
+
s.require_paths = ['lib']
|
16
|
+
s.executables = ['pdftailor']
|
17
|
+
|
18
|
+
s.files = Dir['jars/*', 'lib/**/*', 'bin/*', 'java/**/*', 'pdftailor.gemspec']
|
19
|
+
s.license = "MIT"
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pdftailor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ted Han
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-09 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: |2
|
15
|
+
Stitching and unstitching for PDFs. A java library delivered via ruby out of convenience.
|
16
|
+
email: opensource@documentcloud.org
|
17
|
+
executables:
|
18
|
+
- pdftailor
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- jars/itextpdf-5.3.4.jar
|
23
|
+
- jars/jcommander-1.31-SNAPSHOT.jar
|
24
|
+
- jars/pdftailor.jar
|
25
|
+
- lib/pdftailor.rb
|
26
|
+
- bin/pdftailor
|
27
|
+
- java/src/META-INF/MANIFEST.MF
|
28
|
+
- java/src/org/documentcloud/pdftailor/PdfTailor$StitchCommand.class
|
29
|
+
- java/src/org/documentcloud/pdftailor/PdfTailor$UnstitchCommand.class
|
30
|
+
- java/src/org/documentcloud/pdftailor/PdfTailor.class
|
31
|
+
- java/src/org/documentcloud/pdftailor/PdfTailor.java
|
32
|
+
- pdftailor.gemspec
|
33
|
+
homepage: http://documentcloud.github.io/pdftailor
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
none: false
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
none: false
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.8.24
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: Stitching and unstitching for PDFs
|
58
|
+
test_files: []
|