gjman 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/.document +5 -0
  2. data/.gitignore +23 -0
  3. data/HISTORY.txt +8 -0
  4. data/LICENSE +20 -0
  5. data/README.rdoc +65 -0
  6. data/Rakefile +83 -0
  7. data/VERSION +1 -0
  8. data/gjman.gemspec +116 -0
  9. data/lib/ext/multivalent/Multivalent20060102.jar +0 -0
  10. data/lib/ext/pdfc/CCLib.jar +0 -0
  11. data/lib/ext/pdfc/CREDIT +2 -0
  12. data/lib/ext/pdfc/PDFC.bat +1 -0
  13. data/lib/ext/pdfc/PDFC.jar +0 -0
  14. data/lib/ext/pdfc/PDFC.sh +3 -0
  15. data/lib/ext/pdfc/PDFParser.jar +0 -0
  16. data/lib/ext/pdfc/config.xml +24 -0
  17. data/lib/ext/pdfc/license/LICENSE.log4j +48 -0
  18. data/lib/ext/pdfc/license/lgpl-3.0.txt +165 -0
  19. data/lib/ext/pdfc/license/overview.txt +9 -0
  20. data/lib/ext/pdfc/log4j-1.2.15.jar +0 -0
  21. data/lib/ext/pdfc/readme.txt +89 -0
  22. data/lib/gjman.rb +29 -0
  23. data/lib/gjman/file_system.rb +46 -0
  24. data/lib/gjman/java_hacks/ForbidSystemExit$1.class +0 -0
  25. data/lib/gjman/java_hacks/ForbidSystemExit$Exception.class +0 -0
  26. data/lib/gjman/java_hacks/ForbidSystemExit.class +0 -0
  27. data/lib/gjman/java_hacks/ForbidSystemExit.java +23 -0
  28. data/lib/gjman/jruby.rb +34 -0
  29. data/lib/gjman/pdf.rb +30 -0
  30. data/lib/gjman/pdf/base.rb +31 -0
  31. data/lib/gjman/pdf/compressor.rb +50 -0
  32. data/lib/gjman/pdf/matcher.rb +28 -0
  33. data/lib/gjman/pdf/merger.rb +25 -0
  34. data/lib/gjman/pdf/utils.rb +2 -0
  35. data/lib/gjman/pdf/utils/multivalent.rb +58 -0
  36. data/lib/gjman/pdf/utils/pdfc.rb +52 -0
  37. data/lib/gjman/rjb.rb +32 -0
  38. data/spec/generic/file_system_spec.rb +100 -0
  39. data/spec/generic/spec_helper.rb +2 -0
  40. data/spec/pdf/compressor_spec.rb +114 -0
  41. data/spec/pdf/data/compressed.pdf +0 -0
  42. data/spec/pdf/data/merged_pages.pdf +0 -0
  43. data/spec/pdf/data/page1.pdf +0 -0
  44. data/spec/pdf/data/page2.pdf +0 -0
  45. data/spec/pdf/data/page3.pdf +0 -0
  46. data/spec/pdf/data/picture_x1.pdf +0 -0
  47. data/spec/pdf/data/picture_x2.pdf +0 -0
  48. data/spec/pdf/data/picture_x3_diff_pos.pdf +0 -0
  49. data/spec/pdf/data/picture_x4_diff_size.pdf +0 -0
  50. data/spec/pdf/data/picture_y1.pdf +0 -0
  51. data/spec/pdf/data/text_x1.pdf +0 -0
  52. data/spec/pdf/data/text_x2.pdf +0 -0
  53. data/spec/pdf/data/text_y1.pdf +0 -0
  54. data/spec/pdf/data/text_y2_diff_pos.pdf +0 -0
  55. data/spec/pdf/data/text_y3_diff_size.pdf +0 -0
  56. data/spec/pdf/data/text_y4_diff_font.pdf +0 -0
  57. data/spec/pdf/data/text_y5_diff_style.pdf +0 -0
  58. data/spec/pdf/data/text_y6_diff_color.pdf +0 -0
  59. data/spec/pdf/data/text_y7_diff_bg.pdf +0 -0
  60. data/spec/pdf/data/uncompressed.pdf +0 -0
  61. data/spec/pdf/matcher_spec.rb +65 -0
  62. data/spec/pdf/merger_spec.rb +27 -0
  63. data/spec/pdf/spec_helper.rb +13 -0
  64. data/spec/spec_helper.rb +42 -0
  65. metadata +150 -0
@@ -0,0 +1,9 @@
1
+ Here is an overview of the licenses in the various jar files provided with PDFC.
2
+
3
+
4
+ JAR file License Info
5
+ ---------------------------------------------------------------------------------------------
6
+ log4j-1.2.15.jar See license/LICENSE.log4j
7
+ PDFCParser.jar LGPL license - see lgpl-3.0.txt
8
+ PDFC.jar Copyright 2009-2010, i-net software. All rights reserved.
9
+ CCLib.jar Copyright 2009-2010, i-net software. All rights reserved.
@@ -0,0 +1,89 @@
1
+ i-net PDF Comparer v1.01
2
+ -------------------------
3
+ Copyright i-net software GmbH 2009-2010
4
+ All rights reserved
5
+
6
+ 1. Introduction
7
+ ---------------
8
+ The PDF Comparer is a tool specifically for comparing two PDF files (or folders containing PDF files)
9
+ for differences.
10
+ It is useful for comparing the PDF output of a Crystal Reports report with the PDF output of this same
11
+ report as exported by i-net Crystal-Clear, or for comparing the PDF output of two different versions
12
+ of i-net Crystal-Clear for any differences or behavioral changes. The following elements are compared
13
+ and any differences logged:
14
+
15
+ * Text differences (letters or words missing)
16
+ * Line/Arc/Box differences (lines or boxes missing or with different styles)
17
+ * Image differences (images missing)
18
+ * Margin differences (page margins different)
19
+
20
+ These differences each have a configurable tolerance value so that minor differences can be
21
+ ignored if necessary. (See point 3 - Configuration)
22
+
23
+ 2. Parameters
24
+ -------------
25
+ Usage:
26
+ PDFC [-c <config file>] [-[i][o]] [<Folder1> <Folder2> | <File1> <File2>]
27
+
28
+ -c Specify a configuration file (config.xml) for PDFC. If none is specified, the default "config.xml" is taken
29
+ -i Creates diff images in <Folder1>/differences for any differences found (recommended for a graphical comparison)
30
+ -o Creates images for each page of each version (need only be used for debug purposes)
31
+
32
+ Note that if using two folders, the PDF files must have the same names in each folder.
33
+
34
+ Will result in an output on the console for any differences found between the PDFs being compared.
35
+
36
+ Example usage:
37
+
38
+ PDFC -i CRFolder CCFolder
39
+
40
+ This would compare all PDF files in the folder "CRFolder" with the PDF files of the same name in the folder "CCFolder".
41
+
42
+ 3. Configuration
43
+ ----------------
44
+ The following tolerance values can be set in the config.xml file:
45
+
46
+ CHART_DENSITY_THRESHOLD
47
+ (Decimal) density threshold: ((number of shapes)^3 / area size)
48
+ CHART_REMOVAL_MARGIN
49
+ (Decimal) percent of shape height to use as margin for removing PDF elements above and below detected charts
50
+ CREATE_DIFFIMAGES
51
+ True to create png files with the marked difference of the compared pages
52
+ CREATE_ORIGIMAGES
53
+ True to create a png file for each page that is compared
54
+ LOG_LEVEL
55
+ Level for Logging (OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE, ALL). The default is set to WARN
56
+ MAX_ERRORS_PER_REPORT
57
+ maximum number of errors that can occur before the comparison is canceled for the current pdf file.
58
+ MAX_WORD_DIFFERENCES
59
+ maximum number of differences that can occur before the comparison is canceled
60
+ MODULES
61
+ comma separated list of modules to be executed for each page
62
+ NORMALIZERS
63
+ comma separated list of normalizers to be executed before and after each page
64
+ TOLERANCE_BOX_ROUND_EDGES
65
+ (Integer) maximum number of pixels that a curve control point may differ in total
66
+ TOLERANCE_IMAGE_DISTANCE
67
+ maximum number of pixels that the position of an image can differ
68
+ TOLERANCE_IMAGE_SIZE
69
+ maximum difference in percent, that the area spanned by an image may differ
70
+ TOLERANCE_LINE_POSITION
71
+ (Decimal) maximum number of pixels that the position of a line or curves can differ per axis
72
+ TOLERANCE_LINE_SIZE
73
+ (Integer) maximum number of pixels that the length of a line can differ in total
74
+ TOLERANCE_LINE_STYLE
75
+ (Boolean) if true, different stroke styles will be an error
76
+ TOLERANCE_LINE_THICKNESS
77
+ (Integer) maximum difference in stroke thickness of two lines or curves
78
+ TOLERANCE_PAGE_LEFTCORNER
79
+ maximum number of pixels that the left or top margin of a page can differ (is the upper left corner of all elements)
80
+ TOLERANCE_PAGE_RATIO
81
+ tolerance for the aspect ratio of the pdf page
82
+ TOLERANCE_PAGE_SIZE
83
+ maximum number of pixels that the width or height of a page can differ
84
+ TOLERANCE_UNDERLINE_LENGTH
85
+ (Decimal) the maximum difference in percent, which the length of underlines may differ
86
+
87
+ 4. Support
88
+
89
+ If you have any questions or problems, please do not hesitate to contact tools@inetsoftware.de for technical support.
@@ -0,0 +1,29 @@
1
+ require 'gjman/file_system'
2
+ require 'gjman/jruby'
3
+ require 'gjman/rjb'
4
+
5
+ module Gjman
6
+
7
+ ROOT = File.join(File.expand_path(File.dirname(__FILE__)))
8
+ JAVA_LIBS = []
9
+ JAVA_MODE = RUBY_PLATFORM =~ /java/i ? :JRuby : (
10
+ begin
11
+ require 'rjb'
12
+ :Rjb
13
+ rescue LoadError
14
+ :Shell
15
+ end
16
+ )
17
+
18
+ class << self
19
+
20
+ def root(*args)
21
+ args.size == 0 ? ROOT : File.join(ROOT, *args)
22
+ end
23
+
24
+ def ext(*args)
25
+ root(*['ext', args].flatten)
26
+ end
27
+ end
28
+
29
+ end
@@ -0,0 +1,46 @@
1
+ require 'tempfile'
2
+ require 'ftools'
3
+ require 'digest/md5'
4
+
5
+ module Gjman
6
+
7
+ class FileNotFoundError < Exception ; end
8
+
9
+ private
10
+
11
+ module FileSystem
12
+ class << self
13
+
14
+ def tmp_dir(&block)
15
+ Dir.mktmpdir(&block)
16
+ end
17
+
18
+ def file_must_exist!(path, timeout=0)
19
+ if timeout.zero?
20
+ File.exists?(path) or raise_file_not_found_error(path)
21
+ else
22
+ 0.upto(timeout.pred) {|i| File.exists?(path) ? (return true) : sleep(1) }
23
+ raise_file_not_found_error(path)
24
+ end
25
+ end
26
+
27
+ def trash_tmp_files
28
+ (@trashable_tmp_files || []).each {|f| f.path && f.unlink }
29
+ @trashable_tmp_files = nil
30
+ end
31
+
32
+ def tmp_file(basename = nil)
33
+ basename ||= Digest::MD5.hexdigest(Time.now.to_s)
34
+ ((@trashable_tmp_files ||= []) << Tempfile.new(basename))[-1]
35
+ end
36
+
37
+ protected
38
+
39
+ def raise_file_not_found_error(path)
40
+ raise FileNotFoundError.new("File '#{path}' not found.")
41
+ end
42
+
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,23 @@
1
+ import java.security.Permission;
2
+
3
+ // Copy-&-pasted (almost) from
4
+ // http://www.jroller.com/ethdsy/entry/disabling_system_exit
5
+ class ForbidSystemExit
6
+ {
7
+ public static class Exception extends SecurityException { }
8
+
9
+ public static void apply() {
10
+ final SecurityManager securityManager = new SecurityManager() {
11
+ public void checkPermission( Permission permission ) {
12
+ if( permission.getName().startsWith("exitVM") ) {
13
+ throw new Exception() ;
14
+ }
15
+ }
16
+ } ;
17
+ System.setSecurityManager( securityManager ) ;
18
+ }
19
+
20
+ public static void unapply() {
21
+ System.setSecurityManager( null ) ;
22
+ }
23
+ }
@@ -0,0 +1,34 @@
1
+ module Gjman
2
+ module JRuby
3
+ class << self
4
+
5
+ def initialize
6
+ @initialized ||= (
7
+ include Java
8
+ Gjman::JAVA_LIBS.join(':').split(':').each{|jar| require jar }
9
+ $CLASSPATH << Gjman.root('gjman','java_hacks')
10
+ java_import 'ForbidSystemExit'
11
+ true
12
+ )
13
+ end
14
+
15
+ def classify(klass)
16
+ java_import klass
17
+ Java.send(klass)
18
+ end
19
+
20
+ def sandbox(&block)
21
+ initialize
22
+ begin
23
+ ForbidSystemExit.apply
24
+ @result = yield
25
+ rescue ForbidSystemExit::Exception
26
+ @result
27
+ ensure
28
+ ForbidSystemExit.unapply
29
+ end
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,30 @@
1
+ require 'gjman'
2
+ require 'gjman/pdf/utils'
3
+ require 'gjman/pdf/base'
4
+ require 'gjman/pdf/matcher'
5
+ require 'gjman/pdf/merger'
6
+ require 'gjman/pdf/compressor'
7
+
8
+ module Gjman
9
+ module PDF
10
+ class << self
11
+
12
+ def match?(x, y)
13
+ Matcher.test(x, y)
14
+ end
15
+
16
+ def merge(*args)
17
+ Merger.do(*args)
18
+ end
19
+
20
+ def compress(src, opts={})
21
+ Compressor.do(src, opts)
22
+ end
23
+
24
+ def uncompress(src, opts={})
25
+ Compressor.undo(src, opts)
26
+ end
27
+
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,31 @@
1
+ require 'forwardable'
2
+
3
+ module Gjman
4
+ module PDF
5
+ class Base
6
+ class << self
7
+
8
+ extend Forwardable
9
+
10
+ def_delegators Utils::PDFC, :diff
11
+ def_delegators Utils::Multivalent, :merge, :compress, :uncompress, :fonts, :images
12
+
13
+ def same_contents?(pdf_x, pdf_y)
14
+ diff(pdf_x, pdf_y) !~ %r{\| # of Differences.*\-+.*(\| [1-9]+)}m
15
+ end
16
+
17
+ def same_fonts?(pdf_x, pdf_y)
18
+ # The last line shows processing stats (which we don't need)
19
+ fonts(pdf_x).split("\n")[0..-2] == fonts(pdf_y).split("\n")[0..-2]
20
+ end
21
+
22
+ def same_images?(pdf_x, pdf_y)
23
+ # The fist line shows file name (which we don't need)
24
+ images(pdf_x).split("\n")[1..-1] == images(pdf_y).split("\n")[1..-1]
25
+ end
26
+
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,50 @@
1
+ module Gjman
2
+ module PDF
3
+ class Compressor < Base
4
+ class << self
5
+
6
+ def do(src, opts={})
7
+ default_dest = src.sub(/\.pdf$/, '-o.pdf')
8
+ work(:compress, src, opts.delete(:to) || default_dest, default_dest)
9
+ end
10
+
11
+ def undo(src, opts={})
12
+ default_dest = src.sub(/\.pdf$/, '-u.pdf')
13
+ work(:uncompress, src, opts.delete(:to) || default_dest, default_dest)
14
+ end
15
+
16
+ private
17
+
18
+ def work(action, src, dest, tmp_dest)
19
+ case send(action, src)
20
+ when /Already compressed\. \(Force recompression with \-force\.\)/,
21
+ /java\.lang\.ArrayIndexOutOfBoundsException/
22
+ File.copy(src, dest)
23
+ else
24
+ File.move(tmp_dest, dest)
25
+ end
26
+ dest
27
+ end
28
+
29
+ # NOTE: Since pdftk is almost as dead as multivalent (last updated in 2006),
30
+ # there is really no good reason to choose it over multivalent. Moreover,
31
+ # since pdf comparison requires java solution, it makes sense to be consistent,
32
+ # which is essentially sticking to java.
33
+ #
34
+ # def do(src, dest)
35
+ # pdftk(:compress, src, dest)
36
+ # end
37
+ #
38
+ # def undo(src, dest)
39
+ # pdftk(:uncompress, src, dest)
40
+ # end
41
+ #
42
+ # def pdftk(mode, src, dest)
43
+ # shell(:pdftk, [src, :output, dest, mode, :verbose])
44
+ # dest
45
+ # end
46
+
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,28 @@
1
+ module Gjman
2
+ module PDF
3
+ class Matcher < Base
4
+ class << self
5
+
6
+ def test(pdf_x, pdf_y)
7
+ begin
8
+ tmp_x, tmp_y = uncompress(pdf_x, pdf_y)
9
+ ! [:same_fonts?, :same_images?, :same_contents?].
10
+ any?{|test| !send(test, tmp_x, tmp_y) }
11
+ ensure
12
+ FileSystem.trash_tmp_files
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def uncompress(*pdfs)
19
+ [pdfs].flatten.map do |pdf|
20
+ tmp = FileSystem.tmp_file([Digest::MD5.hexdigest(pdf),'.pdf']).path
21
+ PDF.uncompress(pdf, :to => tmp)
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ module Gjman
2
+ module PDF
3
+ class Merger < Base
4
+ class << self
5
+
6
+ def do(*args)
7
+ opts = args.last.is_a?(Hash) ? args.pop : {}
8
+ srcs = [args].flatten
9
+ default_dest = srcs[0].sub(/\.pdf$/,'-m.pdf')
10
+ dest = opts.delete(:to) || default_dest
11
+ work(srcs, dest, default_dest)
12
+ end
13
+
14
+ private
15
+
16
+ def work(srcs, dest, tmp_dest)
17
+ merge(srcs)
18
+ File.move(tmp_dest, dest) unless dest == tmp_dest
19
+ dest
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,2 @@
1
+ require 'gjman/pdf/utils/pdfc'
2
+ require 'gjman/pdf/utils/multivalent'
@@ -0,0 +1,58 @@
1
+ module Gjman
2
+ module PDF
3
+ module Utils
4
+ module Multivalent
5
+
6
+ class NotSupportedServiceError < Exception ; end
7
+
8
+ JARS = Gjman.ext('multivalent', 'Multivalent20060102.jar')
9
+ Gjman::JAVA_LIBS << JARS
10
+
11
+ SERVICES = {
12
+ :compress => %w{tool.pdf.Compress},
13
+ :uncompress => %w{tool.pdf.Uncompress},
14
+ :merge => %w{tool.pdf.Merge},
15
+ :fonts => %w{tool.doc.ExtractText --output xml --style},
16
+ :images => %w{tool.pdf.Info --images},
17
+ }
18
+
19
+ module JRuby
20
+ def method_missing(mode, *args)
21
+ Gjman::JRuby.sandbox do
22
+ service, args = extract_args(mode, args)
23
+ Gjman::JRuby.classify(service).main(args.split(' '))
24
+ end
25
+ end
26
+ end
27
+
28
+ module Rjb
29
+ def method_missing(mode, *args)
30
+ Gjman::Rjb.sandbox do
31
+ service, args = extract_args(mode, args)
32
+ Gjman::Rjb.classify(service).main(args.split(' '))
33
+ end
34
+ end
35
+ end
36
+
37
+ module Shell
38
+ def method_missing(mode, *args)
39
+ service, args = extract_args(mode, args)
40
+ @cmd ||= 'java -cp %s' % JARS
41
+ %x|#{@cmd} #{service} #{args} 2>&1|
42
+ end
43
+ end
44
+
45
+ def self.extract_args(mode, args)
46
+ (service_args = SERVICES[mode]) or raise NotSupportedServiceError
47
+ [
48
+ service_args[0],
49
+ [service_args[1..-1], args].flatten.compact.join(' ')
50
+ ]
51
+ end
52
+
53
+ extend const_get(Gjman::JAVA_MODE)
54
+
55
+ end
56
+ end
57
+ end
58
+ end