identikal 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: dc02eff57d85a4e3890315c538f03acb43eb7bae0b21023374ddb36ea6170cb1
4
+ data.tar.gz: 4f4cbd925ec53da1fdf24ca45ed46310e5674c0aaf69b92e041d48dae17f7972
5
+ SHA512:
6
+ metadata.gz: 0c07ef817e71ddcbb81644f1fa7b09cb0f61dceecf6d082144eaba13f05b6dc6924a71e8d37664b2134bf091a0d49871551c6e301d1fedbd12b88a572d65cf8e
7
+ data.tar.gz: 6d94e772be5b1ac8219901ef34190f130b23eefcc2e5f7b55c02596bba78d108723f1d00dcf49c04a0c68b4aeabfde96911b2c63a543a9cac6ba7e9c893f218d
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+
13
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.4
3
+
4
+ Metrics/BlockLength:
5
+ ExcludedMethods: ['describe', 'context']
6
+
7
+ Style/Documentation:
8
+ Enabled: false
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.5.1
7
+ before_install: gem install bundler -v 2.0.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in identikal.gemspec
6
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Jahangir Anwari
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,82 @@
1
+ # Identikal
2
+
3
+ A no frills tiny gem that does one thing and only one thing. It compares two unencrypted PDF files and returns **true** if they are identical and **false** otherwise. PDF comparison is done in pure Ruby with the help of [HexaPDF](https://github.com/gettalong/hexapdf) and [PDF::Reader](https://github.com/yob/pdf-reader).
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'identikal'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install identikal
21
+
22
+ ## Usage
23
+
24
+ ### CLI
25
+ Comparing two PDFs from command line can be done by running the below command:
26
+
27
+ ```bash
28
+ $ identikal file_a.pdf file_b.pdf
29
+ true
30
+ ```
31
+
32
+ **Text Only**
33
+
34
+ If you would like to compare only the "text" content of the PDFs add the `-t` flag:
35
+
36
+ ```bash
37
+ $ identikal -t file_a.pdf file_c.pdf
38
+ false
39
+ ```
40
+
41
+ ### Ruby Code
42
+ Besides the command line tool Identikal can also be used in a Ruby application. Identikal supports the following `compare_method`:
43
+
44
+ * `:all` compare text along with formatting
45
+ * `:text` compare only text content
46
+
47
+ `:all` is the default comparison method.
48
+
49
+
50
+ **Text with formatting**
51
+ ```ruby
52
+ require 'identikal'
53
+
54
+ base_path = File.expand_path(File.join(__dir__) + '/pdfs')
55
+ pdf_a = File.join(base_path, 'report_a.pdf')
56
+ pdf_b = File.join(base_path, 'report_b.pdf')
57
+
58
+ if Identikal.files_same?(pdf_a, pdf_b)
59
+ # some action when files are identical
60
+ else
61
+ # another action when files are different
62
+ end
63
+ ```
64
+
65
+ **Text Only**
66
+ ```ruby
67
+ require 'identikal'
68
+
69
+ base_path = File.expand_path(File.join(__dir__) + '/pdfs')
70
+ pdf_a = File.join(base_path, 'report_a.pdf')
71
+ pdf_b = File.join(base_path, 'report_b.pdf')
72
+
73
+ if Identikal.files_same?(pdf_a, pdf_b, compare_method: :text)
74
+ # some action when files are identical
75
+ else
76
+ # another action when files are different
77
+ end
78
+ ```
79
+
80
+ ## License
81
+
82
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
5
+ require 'optparse'
6
+ require 'identikal'
7
+
8
+ ARGV << '-h' if ARGV.empty?
9
+ compare_method = :all
10
+ verbose = false
11
+ OptionParser.new do |parser|
12
+ parser.banner = 'Usage: identikal [options] PDF_1 PDF_2'
13
+ parser.on('-t', '--text', 'Only compare text') do
14
+ compare_method = :text
15
+ end
16
+
17
+ parser.on('-v', '--verbose', 'Verbose output') do
18
+ verbose = true
19
+ end
20
+
21
+ parser.on('-h', '--help', 'Print this help') do
22
+ puts parser
23
+ exit
24
+ end
25
+ end.parse!
26
+
27
+ file_a = ARGV[0]
28
+ file_b = ARGV[1]
29
+
30
+ begin
31
+ result = Identikal.files_same? file_a, file_b, compare_method: compare_method
32
+ rescue ArgumentError => e
33
+ warn "Error: #{e.message}"
34
+ exit(1)
35
+ end
36
+
37
+ if verbose
38
+ puts 'Files are identical' if result
39
+ puts 'Files are different' unless result
40
+ else
41
+ puts result
42
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'identikal/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'identikal'
9
+ spec.version = Identikal::VERSION
10
+ spec.authors = ['Jahangir Anwari']
11
+ spec.email = ['jahangir.rubygems@gmail.com']
12
+
13
+ spec.summary = 'Verify if two given PDF files are identical or not.'
14
+ spec.description = <<-DESCRIPTION
15
+ A no frills gem that does one thing and only one thing. Checks whether two given PDF files are identical or not.
16
+ PDF comparison done in pure Ruby.
17
+ DESCRIPTION
18
+ spec.homepage = 'https://github.com/jahangiranwari/identikal'
19
+ spec.license = 'MIT'
20
+
21
+ if spec.respond_to?(:metadata)
22
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
23
+ spec.metadata['homepage_uri'] = spec.homepage
24
+ spec.metadata['source_code_uri'] = spec.homepage
25
+ else
26
+ raise 'RubyGems 2.0 or newer is required to protect against ' \
27
+ 'public gem pushes.'
28
+ end
29
+
30
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
31
+ `git ls-files -z`.split("\x0").reject do |f|
32
+ f.match(%r{^(test|spec|features)/})
33
+ end
34
+ end
35
+
36
+ spec.required_ruby_version = '>= 2.4.0'
37
+ spec.bindir = 'bin'
38
+ spec.executables = 'identikal'
39
+ spec.require_paths = ['lib']
40
+
41
+ spec.add_development_dependency 'bundler'
42
+ spec.add_development_dependency 'rake', '~> 10.0'
43
+ spec.add_development_dependency 'rspec', '~> 3.0'
44
+ spec.add_development_dependency 'rubocop', '~> 0.71.0'
45
+ spec.add_dependency 'hexapdf', '~> 0.6.0'
46
+ spec.add_dependency 'pdf-reader', '~> 2.2.0'
47
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'identikal/version'
4
+ require 'identikal/compare'
5
+
6
+ module Identikal
7
+ class << self
8
+ def files_same?(*args)
9
+ Identikal::Compare.files_same?(*args)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: false
2
+
3
+ require 'identikal/error'
4
+
5
+ module Identikal
6
+ autoload(:HexaPDF, 'hexapdf')
7
+ autoload(:PDF, 'pdf-reader')
8
+
9
+ class Compare
10
+ COMPARE_METHODS = %i[text all].freeze
11
+
12
+ class << self
13
+ def files_same?(file_a, file_b, compare_method: :all)
14
+ validate_arguments(file_a, file_b, compare_method)
15
+ if compare_method == :text
16
+ text_only(file_a, file_b)
17
+ else
18
+ with_formatting(file_a, file_b)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def text_only(file_a, file_b)
25
+ reader_a = PDF::Reader.new(file_a)
26
+ reader_b = PDF::Reader.new(file_b)
27
+ return false unless reader_a.page_count == reader_b.page_count
28
+
29
+ text_compare(reader_a, reader_b)
30
+ end
31
+
32
+ def with_formatting(file_a, file_b)
33
+ reader_a = HexaPDF::Document.open(file_a)
34
+ reader_b = HexaPDF::Document.open(file_b)
35
+ return false unless reader_a.pages.count == reader_b.pages.count
36
+
37
+ format_compare(reader_a, reader_b)
38
+ end
39
+
40
+ def format_compare(reader_a, reader_b)
41
+ reader_a.pages.count.times do |i|
42
+ text_a = reader_a.pages[i].contents
43
+ text_b = reader_b.pages[i].contents
44
+ return false unless text_a == text_b
45
+ end
46
+ true
47
+ end
48
+
49
+ def text_compare(reader_a, reader_b)
50
+ reader_a.page_count.times do |i|
51
+ text_a = reader_a.pages[i].text.gsub!(/\n+|\s+/, '')
52
+ text_b = reader_b.pages[i].text.gsub!(/\n+|\s+/, '')
53
+ return false unless text_a == text_b
54
+ end
55
+ true
56
+ end
57
+
58
+ def validate_arguments(file_a, file_b, compare_method)
59
+ raise Identikal::Error::FileNotFound unless
60
+ File.file?(file_a) && File.file?(file_b)
61
+
62
+ raise Identikal::Error::InvalidComparisonMethod unless
63
+ COMPARE_METHODS.include?(compare_method)
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'identikal/compare'
4
+
5
+ module Identikal
6
+ module Error
7
+ class FileNotFound < ArgumentError
8
+ def to_s
9
+ 'PDF file(s) could not be found. ' \
10
+ 'Please provide two PDF files to compare'
11
+ end
12
+ end
13
+
14
+ class InvalidComparisonMethod < ArgumentError
15
+ def to_s
16
+ 'Invalid comparision method. ' \
17
+ 'Please choose one of these methods: ' \
18
+ "#{Identikal::Compare::COMPARE_METHODS}"
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Identikal
4
+ VERSION = '0.1.2'
5
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: identikal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - Jahangir Anwari
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-06-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.71.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.71.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: hexapdf
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.6.0
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.6.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: pdf-reader
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 2.2.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 2.2.0
97
+ description: |2
98
+ A no frills gem that does one thing and only one thing. Checks whether two given PDF files are identical or not.
99
+ PDF comparison done in pure Ruby.
100
+ email:
101
+ - jahangir.rubygems@gmail.com
102
+ executables:
103
+ - identikal
104
+ extensions: []
105
+ extra_rdoc_files: []
106
+ files:
107
+ - ".gitignore"
108
+ - ".rspec"
109
+ - ".rubocop.yml"
110
+ - ".travis.yml"
111
+ - Gemfile
112
+ - LICENSE.txt
113
+ - README.md
114
+ - Rakefile
115
+ - bin/identikal
116
+ - identikal.gemspec
117
+ - lib/identikal.rb
118
+ - lib/identikal/compare.rb
119
+ - lib/identikal/error.rb
120
+ - lib/identikal/version.rb
121
+ homepage: https://github.com/jahangiranwari/identikal
122
+ licenses:
123
+ - MIT
124
+ metadata:
125
+ allowed_push_host: https://rubygems.org
126
+ homepage_uri: https://github.com/jahangiranwari/identikal
127
+ source_code_uri: https://github.com/jahangiranwari/identikal
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: 2.4.0
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubyforge_project:
144
+ rubygems_version: 2.7.6
145
+ signing_key:
146
+ specification_version: 4
147
+ summary: Verify if two given PDF files are identical or not.
148
+ test_files: []