gjman 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/.document +5 -0
  2. data/.gitignore +23 -0
  3. data/HISTORY.txt +8 -0
  4. data/LICENSE +20 -0
  5. data/README.rdoc +65 -0
  6. data/Rakefile +83 -0
  7. data/VERSION +1 -0
  8. data/gjman.gemspec +116 -0
  9. data/lib/ext/multivalent/Multivalent20060102.jar +0 -0
  10. data/lib/ext/pdfc/CCLib.jar +0 -0
  11. data/lib/ext/pdfc/CREDIT +2 -0
  12. data/lib/ext/pdfc/PDFC.bat +1 -0
  13. data/lib/ext/pdfc/PDFC.jar +0 -0
  14. data/lib/ext/pdfc/PDFC.sh +3 -0
  15. data/lib/ext/pdfc/PDFParser.jar +0 -0
  16. data/lib/ext/pdfc/config.xml +24 -0
  17. data/lib/ext/pdfc/license/LICENSE.log4j +48 -0
  18. data/lib/ext/pdfc/license/lgpl-3.0.txt +165 -0
  19. data/lib/ext/pdfc/license/overview.txt +9 -0
  20. data/lib/ext/pdfc/log4j-1.2.15.jar +0 -0
  21. data/lib/ext/pdfc/readme.txt +89 -0
  22. data/lib/gjman.rb +29 -0
  23. data/lib/gjman/file_system.rb +46 -0
  24. data/lib/gjman/java_hacks/ForbidSystemExit$1.class +0 -0
  25. data/lib/gjman/java_hacks/ForbidSystemExit$Exception.class +0 -0
  26. data/lib/gjman/java_hacks/ForbidSystemExit.class +0 -0
  27. data/lib/gjman/java_hacks/ForbidSystemExit.java +23 -0
  28. data/lib/gjman/jruby.rb +34 -0
  29. data/lib/gjman/pdf.rb +30 -0
  30. data/lib/gjman/pdf/base.rb +31 -0
  31. data/lib/gjman/pdf/compressor.rb +50 -0
  32. data/lib/gjman/pdf/matcher.rb +28 -0
  33. data/lib/gjman/pdf/merger.rb +25 -0
  34. data/lib/gjman/pdf/utils.rb +2 -0
  35. data/lib/gjman/pdf/utils/multivalent.rb +58 -0
  36. data/lib/gjman/pdf/utils/pdfc.rb +52 -0
  37. data/lib/gjman/rjb.rb +32 -0
  38. data/spec/generic/file_system_spec.rb +100 -0
  39. data/spec/generic/spec_helper.rb +2 -0
  40. data/spec/pdf/compressor_spec.rb +114 -0
  41. data/spec/pdf/data/compressed.pdf +0 -0
  42. data/spec/pdf/data/merged_pages.pdf +0 -0
  43. data/spec/pdf/data/page1.pdf +0 -0
  44. data/spec/pdf/data/page2.pdf +0 -0
  45. data/spec/pdf/data/page3.pdf +0 -0
  46. data/spec/pdf/data/picture_x1.pdf +0 -0
  47. data/spec/pdf/data/picture_x2.pdf +0 -0
  48. data/spec/pdf/data/picture_x3_diff_pos.pdf +0 -0
  49. data/spec/pdf/data/picture_x4_diff_size.pdf +0 -0
  50. data/spec/pdf/data/picture_y1.pdf +0 -0
  51. data/spec/pdf/data/text_x1.pdf +0 -0
  52. data/spec/pdf/data/text_x2.pdf +0 -0
  53. data/spec/pdf/data/text_y1.pdf +0 -0
  54. data/spec/pdf/data/text_y2_diff_pos.pdf +0 -0
  55. data/spec/pdf/data/text_y3_diff_size.pdf +0 -0
  56. data/spec/pdf/data/text_y4_diff_font.pdf +0 -0
  57. data/spec/pdf/data/text_y5_diff_style.pdf +0 -0
  58. data/spec/pdf/data/text_y6_diff_color.pdf +0 -0
  59. data/spec/pdf/data/text_y7_diff_bg.pdf +0 -0
  60. data/spec/pdf/data/uncompressed.pdf +0 -0
  61. data/spec/pdf/matcher_spec.rb +65 -0
  62. data/spec/pdf/merger_spec.rb +27 -0
  63. data/spec/pdf/spec_helper.rb +13 -0
  64. data/spec/spec_helper.rb +42 -0
  65. metadata +150 -0
@@ -0,0 +1,9 @@
1
+ Here is an overview of the licenses in the various jar files provided with PDFC.
2
+
3
+
4
+ JAR file License Info
5
+ ---------------------------------------------------------------------------------------------
6
+ log4j-1.2.15.jar See license/LICENSE.log4j
7
+ PDFCParser.jar LGPL license - see lgpl-3.0.txt
8
+ PDFC.jar Copyright 2009-2010, i-net software. All rights reserved.
9
+ CCLib.jar Copyright 2009-2010, i-net software. All rights reserved.
@@ -0,0 +1,89 @@
1
+ i-net PDF Comparer v1.01
2
+ -------------------------
3
+ Copyright i-net software GmbH 2009-2010
4
+ All rights reserved
5
+
6
+ 1. Introduction
7
+ ---------------
8
+ The PDF Comparer is a tool specifically for comparing two PDF files (or folders containing PDF files)
9
+ for differences.
10
+ It is useful for comparing the PDF output of a Crystal Reports report with the PDF output of this same
11
+ report as exported by i-net Crystal-Clear, or for comparing the PDF output of two different versions
12
+ of i-net Crystal-Clear for any differences or behavioral changes. The following elements are compared
13
+ and any differences logged:
14
+
15
+ * Text differences (letters or words missing)
16
+ * Line/Arc/Box differences (lines or boxes missing or with different styles)
17
+ * Image differences (images missing)
18
+ * Margin differences (page margins different)
19
+
20
+ These differences each have a configurable tolerance value so that minor differences can be
21
+ ignored if necessary. (See point 3 - Configuration)
22
+
23
+ 2. Parameters
24
+ -------------
25
+ Usage:
26
+ PDFC [-c <config file>] [-[i][o]] [<Folder1> <Folder2> | <File1> <File2>]
27
+
28
+ -c Specify a configuration file (config.xml) for PDFC. If none is specified, the default "config.xml" is taken
29
+ -i Creates diff images in <Folder1>/differences for any differences found (recommended for a graphical comparison)
30
+ -o Creates images for each page of each version (need only be used for debug purposes)
31
+
32
+ Note that if using two folders, the PDF files must have the same names in each folder.
33
+
34
+ Will result in an output on the console for any differences found between the PDFs being compared.
35
+
36
+ Example usage:
37
+
38
+ PDFC -i CRFolder CCFolder
39
+
40
+ This would compare all PDF files in the folder "CRFolder" with the PDF files of the same name in the folder "CCFolder".
41
+
42
+ 3. Configuration
43
+ ----------------
44
+ The following tolerance values can be set in the config.xml file:
45
+
46
+ CHART_DENSITY_THRESHOLD
47
+ (Decimal) density threshold: ((number of shapes)^3 / area size)
48
+ CHART_REMOVAL_MARGIN
49
+ (Decimal) percent of shape height to use as margin for removing PDF elements above and below detected charts
50
+ CREATE_DIFFIMAGES
51
+ True to create png files with the marked difference of the compared pages
52
+ CREATE_ORIGIMAGES
53
+ True to create a png file for each page that is compared
54
+ LOG_LEVEL
55
+ Level for Logging (OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE, ALL). The default is set to WARN
56
+ MAX_ERRORS_PER_REPORT
57
+ maximum number of errors that can occur before the comparison is canceled for the current pdf file.
58
+ MAX_WORD_DIFFERENCES
59
+ maximum number of differences that can occur before the comparison is canceled
60
+ MODULES
61
+ comma separated list of modules to be executed for each page
62
+ NORMALIZERS
63
+ comma separated list of normalizers to be executed before and after each page
64
+ TOLERANCE_BOX_ROUND_EDGES
65
+ (Integer) maximum number of pixels that a curve control point may differ in total
66
+ TOLERANCE_IMAGE_DISTANCE
67
+ maximum number of pixels that the position of an image can differ
68
+ TOLERANCE_IMAGE_SIZE
69
+ maximum difference in percent, that the area spanned by an image may differ
70
+ TOLERANCE_LINE_POSITION
71
+ (Decimal) maximum number of pixels that the position of a line or curves can differ per axis
72
+ TOLERANCE_LINE_SIZE
73
+ (Integer) maximum number of pixels that the length of a line can differ in total
74
+ TOLERANCE_LINE_STYLE
75
+ (Boolean) if true, different stroke styles will be an error
76
+ TOLERANCE_LINE_THICKNESS
77
+ (Integer) maximum difference in stroke thickness of two lines or curves
78
+ TOLERANCE_PAGE_LEFTCORNER
79
+ maximum number of pixels that the left or top margin of a page can differ (is the upper left corner of all elements)
80
+ TOLERANCE_PAGE_RATIO
81
+ tolerance for the aspect ratio of the pdf page
82
+ TOLERANCE_PAGE_SIZE
83
+ maximum number of pixels that the width or height of a page can differ
84
+ TOLERANCE_UNDERLINE_LENGTH
85
+ (Decimal) the maximum difference in percent, which the length of underlines may differ
86
+
87
+ 4. Support
88
+
89
+ If you have any questions or problems, please do not hesitate to contact tools@inetsoftware.de for technical support.
@@ -0,0 +1,29 @@
1
+ require 'gjman/file_system'
2
+ require 'gjman/jruby'
3
+ require 'gjman/rjb'
4
+
5
+ module Gjman
6
+
7
+ ROOT = File.join(File.expand_path(File.dirname(__FILE__)))
8
+ JAVA_LIBS = []
9
+ JAVA_MODE = RUBY_PLATFORM =~ /java/i ? :JRuby : (
10
+ begin
11
+ require 'rjb'
12
+ :Rjb
13
+ rescue LoadError
14
+ :Shell
15
+ end
16
+ )
17
+
18
+ class << self
19
+
20
+ def root(*args)
21
+ args.size == 0 ? ROOT : File.join(ROOT, *args)
22
+ end
23
+
24
+ def ext(*args)
25
+ root(*['ext', args].flatten)
26
+ end
27
+ end
28
+
29
+ end
@@ -0,0 +1,46 @@
1
+ require 'tempfile'
2
+ require 'ftools'
3
+ require 'digest/md5'
4
+
5
+ module Gjman
6
+
7
+ class FileNotFoundError < Exception ; end
8
+
9
+ private
10
+
11
+ module FileSystem
12
+ class << self
13
+
14
+ def tmp_dir(&block)
15
+ Dir.mktmpdir(&block)
16
+ end
17
+
18
+ def file_must_exist!(path, timeout=0)
19
+ if timeout.zero?
20
+ File.exists?(path) or raise_file_not_found_error(path)
21
+ else
22
+ 0.upto(timeout.pred) {|i| File.exists?(path) ? (return true) : sleep(1) }
23
+ raise_file_not_found_error(path)
24
+ end
25
+ end
26
+
27
+ def trash_tmp_files
28
+ (@trashable_tmp_files || []).each {|f| f.path && f.unlink }
29
+ @trashable_tmp_files = nil
30
+ end
31
+
32
+ def tmp_file(basename = nil)
33
+ basename ||= Digest::MD5.hexdigest(Time.now.to_s)
34
+ ((@trashable_tmp_files ||= []) << Tempfile.new(basename))[-1]
35
+ end
36
+
37
+ protected
38
+
39
+ def raise_file_not_found_error(path)
40
+ raise FileNotFoundError.new("File '#{path}' not found.")
41
+ end
42
+
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,23 @@
1
+ import java.security.Permission;
2
+
3
+ // Copy-&-pasted (almost) from
4
+ // http://www.jroller.com/ethdsy/entry/disabling_system_exit
5
+ class ForbidSystemExit
6
+ {
7
+ public static class Exception extends SecurityException { }
8
+
9
+ public static void apply() {
10
+ final SecurityManager securityManager = new SecurityManager() {
11
+ public void checkPermission( Permission permission ) {
12
+ if( permission.getName().startsWith("exitVM") ) {
13
+ throw new Exception() ;
14
+ }
15
+ }
16
+ } ;
17
+ System.setSecurityManager( securityManager ) ;
18
+ }
19
+
20
+ public static void unapply() {
21
+ System.setSecurityManager( null ) ;
22
+ }
23
+ }
@@ -0,0 +1,34 @@
1
+ module Gjman
2
+ module JRuby
3
+ class << self
4
+
5
+ def initialize
6
+ @initialized ||= (
7
+ include Java
8
+ Gjman::JAVA_LIBS.join(':').split(':').each{|jar| require jar }
9
+ $CLASSPATH << Gjman.root('gjman','java_hacks')
10
+ java_import 'ForbidSystemExit'
11
+ true
12
+ )
13
+ end
14
+
15
+ def classify(klass)
16
+ java_import klass
17
+ Java.send(klass)
18
+ end
19
+
20
+ def sandbox(&block)
21
+ initialize
22
+ begin
23
+ ForbidSystemExit.apply
24
+ @result = yield
25
+ rescue ForbidSystemExit::Exception
26
+ @result
27
+ ensure
28
+ ForbidSystemExit.unapply
29
+ end
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,30 @@
1
+ require 'gjman'
2
+ require 'gjman/pdf/utils'
3
+ require 'gjman/pdf/base'
4
+ require 'gjman/pdf/matcher'
5
+ require 'gjman/pdf/merger'
6
+ require 'gjman/pdf/compressor'
7
+
8
+ module Gjman
9
+ module PDF
10
+ class << self
11
+
12
+ def match?(x, y)
13
+ Matcher.test(x, y)
14
+ end
15
+
16
+ def merge(*args)
17
+ Merger.do(*args)
18
+ end
19
+
20
+ def compress(src, opts={})
21
+ Compressor.do(src, opts)
22
+ end
23
+
24
+ def uncompress(src, opts={})
25
+ Compressor.undo(src, opts)
26
+ end
27
+
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,31 @@
1
+ require 'forwardable'
2
+
3
+ module Gjman
4
+ module PDF
5
+ class Base
6
+ class << self
7
+
8
+ extend Forwardable
9
+
10
+ def_delegators Utils::PDFC, :diff
11
+ def_delegators Utils::Multivalent, :merge, :compress, :uncompress, :fonts, :images
12
+
13
+ def same_contents?(pdf_x, pdf_y)
14
+ diff(pdf_x, pdf_y) !~ %r{\| # of Differences.*\-+.*(\| [1-9]+)}m
15
+ end
16
+
17
+ def same_fonts?(pdf_x, pdf_y)
18
+ # The last line shows processing stats (which we don't need)
19
+ fonts(pdf_x).split("\n")[0..-2] == fonts(pdf_y).split("\n")[0..-2]
20
+ end
21
+
22
+ def same_images?(pdf_x, pdf_y)
23
+ # The fist line shows file name (which we don't need)
24
+ images(pdf_x).split("\n")[1..-1] == images(pdf_y).split("\n")[1..-1]
25
+ end
26
+
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,50 @@
1
+ module Gjman
2
+ module PDF
3
+ class Compressor < Base
4
+ class << self
5
+
6
+ def do(src, opts={})
7
+ default_dest = src.sub(/\.pdf$/, '-o.pdf')
8
+ work(:compress, src, opts.delete(:to) || default_dest, default_dest)
9
+ end
10
+
11
+ def undo(src, opts={})
12
+ default_dest = src.sub(/\.pdf$/, '-u.pdf')
13
+ work(:uncompress, src, opts.delete(:to) || default_dest, default_dest)
14
+ end
15
+
16
+ private
17
+
18
+ def work(action, src, dest, tmp_dest)
19
+ case send(action, src)
20
+ when /Already compressed\. \(Force recompression with \-force\.\)/,
21
+ /java\.lang\.ArrayIndexOutOfBoundsException/
22
+ File.copy(src, dest)
23
+ else
24
+ File.move(tmp_dest, dest)
25
+ end
26
+ dest
27
+ end
28
+
29
+ # NOTE: Since pdftk is almost as dead as multivalent (last updated in 2006),
30
+ # there is really no good reason to choose it over multivalent. Moreover,
31
+ # since pdf comparison requires java solution, it makes sense to be consistent,
32
+ # which is essentially sticking to java.
33
+ #
34
+ # def do(src, dest)
35
+ # pdftk(:compress, src, dest)
36
+ # end
37
+ #
38
+ # def undo(src, dest)
39
+ # pdftk(:uncompress, src, dest)
40
+ # end
41
+ #
42
+ # def pdftk(mode, src, dest)
43
+ # shell(:pdftk, [src, :output, dest, mode, :verbose])
44
+ # dest
45
+ # end
46
+
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,28 @@
1
+ module Gjman
2
+ module PDF
3
+ class Matcher < Base
4
+ class << self
5
+
6
+ def test(pdf_x, pdf_y)
7
+ begin
8
+ tmp_x, tmp_y = uncompress(pdf_x, pdf_y)
9
+ ! [:same_fonts?, :same_images?, :same_contents?].
10
+ any?{|test| !send(test, tmp_x, tmp_y) }
11
+ ensure
12
+ FileSystem.trash_tmp_files
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def uncompress(*pdfs)
19
+ [pdfs].flatten.map do |pdf|
20
+ tmp = FileSystem.tmp_file([Digest::MD5.hexdigest(pdf),'.pdf']).path
21
+ PDF.uncompress(pdf, :to => tmp)
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ module Gjman
2
+ module PDF
3
+ class Merger < Base
4
+ class << self
5
+
6
+ def do(*args)
7
+ opts = args.last.is_a?(Hash) ? args.pop : {}
8
+ srcs = [args].flatten
9
+ default_dest = srcs[0].sub(/\.pdf$/,'-m.pdf')
10
+ dest = opts.delete(:to) || default_dest
11
+ work(srcs, dest, default_dest)
12
+ end
13
+
14
+ private
15
+
16
+ def work(srcs, dest, tmp_dest)
17
+ merge(srcs)
18
+ File.move(tmp_dest, dest) unless dest == tmp_dest
19
+ dest
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,2 @@
1
+ require 'gjman/pdf/utils/pdfc'
2
+ require 'gjman/pdf/utils/multivalent'
@@ -0,0 +1,58 @@
1
+ module Gjman
2
+ module PDF
3
+ module Utils
4
+ module Multivalent
5
+
6
+ class NotSupportedServiceError < Exception ; end
7
+
8
+ JARS = Gjman.ext('multivalent', 'Multivalent20060102.jar')
9
+ Gjman::JAVA_LIBS << JARS
10
+
11
+ SERVICES = {
12
+ :compress => %w{tool.pdf.Compress},
13
+ :uncompress => %w{tool.pdf.Uncompress},
14
+ :merge => %w{tool.pdf.Merge},
15
+ :fonts => %w{tool.doc.ExtractText --output xml --style},
16
+ :images => %w{tool.pdf.Info --images},
17
+ }
18
+
19
+ module JRuby
20
+ def method_missing(mode, *args)
21
+ Gjman::JRuby.sandbox do
22
+ service, args = extract_args(mode, args)
23
+ Gjman::JRuby.classify(service).main(args.split(' '))
24
+ end
25
+ end
26
+ end
27
+
28
+ module Rjb
29
+ def method_missing(mode, *args)
30
+ Gjman::Rjb.sandbox do
31
+ service, args = extract_args(mode, args)
32
+ Gjman::Rjb.classify(service).main(args.split(' '))
33
+ end
34
+ end
35
+ end
36
+
37
+ module Shell
38
+ def method_missing(mode, *args)
39
+ service, args = extract_args(mode, args)
40
+ @cmd ||= 'java -cp %s' % JARS
41
+ %x|#{@cmd} #{service} #{args} 2>&1|
42
+ end
43
+ end
44
+
45
+ def self.extract_args(mode, args)
46
+ (service_args = SERVICES[mode]) or raise NotSupportedServiceError
47
+ [
48
+ service_args[0],
49
+ [service_args[1..-1], args].flatten.compact.join(' ')
50
+ ]
51
+ end
52
+
53
+ extend const_get(Gjman::JAVA_MODE)
54
+
55
+ end
56
+ end
57
+ end
58
+ end