pdf-reader 1.1.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
4
+ data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
5
+ SHA512:
6
+ metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
7
+ data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
data/CHANGELOG CHANGED
@@ -1,3 +1,88 @@
1
+ v2.5.0 (6th June 2021)
2
+ - bump minimum ruby version to 2.0
3
+ - Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
4
+ - Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
5
+ - Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
6
+ - Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
7
+
8
+ v2.4.2 (28th January 2021)
9
+ - relax ASCII85 dependency to allow 1.x
10
+ - improved support for decompressing objects with slightly malformed zlib data
11
+
12
+ v.2.4.1 (24th September 2020)
13
+ - Re-vendor font metrics from Adobe to clarify their license
14
+
15
+ v2.4.0 (21st November 2019)
16
+ - Optimise overlapping characters code introduced in 2.3.0. Text extraction of pages with
17
+ thousands of characters is still slower than it was in 2.2.1, but it might tolerable
18
+ for now. See https://github.com/yob/pdf-reader/pull/308 for details.
19
+ - Implement very basic font substitution for Type1 and TrueType fonts that aren't embedded
20
+ - Remove PDF::Hash class. It's been deprecated since 2010, and it's hard to believe anyone
21
+ is still using it.
22
+ - Several small bug fixes
23
+
24
+ v2.3.0 (7th November 2019)
25
+ - Text extraction now makes an effort to skip duplicate characters that overlap, a
26
+ common approach used for a fake "bold" effect, This will make text extraction a bit
27
+ slower - if that turns out to be an issue I'll look into further optimisations or
28
+ provide a toggle to turn it off
29
+ - Several small bug fixes
30
+
31
+ v2.2.1 (27th July 2019)
32
+ - Improve utf8 text extraction from CMaps that contain surrogate pair ligatures
33
+
34
+ v2.2.0 (18th December 2018)
35
+ - Support additional XRef Stream variants (thanks Stefan Wienert)
36
+ - Add frozen_strings pragma to reduce object allocations on ruby 2.3+
37
+ - various bug fixes
38
+
39
+ v2.1.0 (15th February 2018)
40
+ - Support extra encrypted PDF variants (thanks to Gyuchang Jun)
41
+ - various bug fixes
42
+
43
+ v2.0.0 (25th February 2017)
44
+ - various bug fixes
45
+
46
+ v2.0.0.beta1 (15th February 2017)
47
+ - BREAKING CHANGE: remove all methods that were deprecated in 1.0.0
48
+ - Bug: Support extra encrypted PDF variants (thanks to Gyuchang Jun)
49
+ - various bug fixes
50
+
51
+ v1.4.1 (2nd January 2017)
52
+ - improve compatibility with ruby 2.4 (thanks Akira Matsuda)
53
+ - various bug fixes
54
+
55
+ v1.4.0 (22nd February 2016)
56
+ - raise minimum ruby version to 1.9.3
57
+ - print warnings to stderr when deprecated methods are used. These methods have been
58
+ deprecated for 4 years, so hopefully few people are depending on them
59
+ - Fix exception when a non-breaking space (character 160) is used with a
60
+ built-in font (helvetica, etc)
61
+ - various bug fixes
62
+
63
+ v1.3.3 (7th April 2013)
64
+ - various bug fixes
65
+
66
+ v1.3.2 (26th February 2013)
67
+ - various bug fixes
68
+
69
+ v1.3.1 (12th February 2013)
70
+ - various bug fixes
71
+
72
+ v1.3.0 (30th December 2012)
73
+ - Numerous performance optimisations (thanks Alex Dowad)
74
+ - Improved text extraction (thanks Nathaniel Madura)
75
+ - Load less of the hashery gem to reduce core monkey patches
76
+ - various bug fixes
77
+
78
+ v1.2.0 (28th August 2012)
79
+ - Feature: correctly extract text using surrogate pairs and ligatures
80
+ (thanks Nathaniel Madura)
81
+ - Speed optimisation: cache tokenised Form XObjects to avoid re-parsing them
82
+ - Feature: support opening documents with some junk bytes prepended to file
83
+ (thanks Paul Gallagher)
84
+ - Acrobat does this, so it seemed reasonable to add support
85
+
1
86
  v1.1.1 (9th May 2012)
2
87
  - bugfix release to improve parsing of some PDFs
3
88
 
@@ -56,10 +141,10 @@ v0.9.2 (24th April 2011)
56
141
 
57
142
  v0.9.1 (21st December 2010)
58
143
  - force gem to only install on ruby 1.8.7 or higher
59
- - maintaining supprot for earlier versions takes more time than I have
144
+ - maintaining support for earlier versions takes more time than I have
60
145
  available at the moment
61
146
  - bug: fix parsing of obscure pdf name format
62
- - bug: fix behaviour when loaded in confunction with htmldoc gem
147
+ - bug: fix behaviour when loaded in conjunction with htmldoc gem
63
148
 
64
149
  v0.9.0 (19th November 2010)
65
150
  - support for pdf 1.5+ files that use object and xref streams
@@ -1,4 +1,4 @@
1
- = Release Notes
1
+ # pdf-reader
2
2
 
3
3
  The PDF::Reader library implements a PDF parser conforming as much as possible
4
4
  to the PDF specification from Adobe.
@@ -15,46 +15,55 @@ higher level functionality - it's not going to render a PDF for you. There are
15
15
  a few exceptions to support very common use cases like extracting text from a
16
16
  page.
17
17
 
18
- = Installation
18
+ # Installation
19
19
 
20
20
  The recommended installation method is via Rubygems.
21
21
 
22
+ ```ruby
22
23
  gem install pdf-reader
24
+ ```
23
25
 
24
- = Usage
26
+ # Usage
25
27
 
26
28
  Begin by creating a PDF::Reader instance that points to a PDF file. Document
27
29
  level information (metadata, page count, bookmarks, etc) is available via
28
30
  this object.
29
31
 
32
+ ```ruby
30
33
  reader = PDF::Reader.new("somefile.pdf")
31
34
 
32
35
  puts reader.pdf_version
33
36
  puts reader.info
34
37
  puts reader.metadata
35
38
  puts reader.page_count
39
+ ```
36
40
 
37
41
  PDF::Reader.new accepts an IO stream or a filename. Here's an example with
38
42
  an IO stream:
39
43
 
44
+ ```ruby
40
45
  require 'open-uri'
41
46
 
42
47
  io = open('http://example.com/somefile.pdf')
43
48
  reader = PDF::Reader.new(io)
44
49
  puts reader.info
50
+ ```
45
51
 
46
52
  If you open a PDF with File#open or IO#open, I strongly recommend using "rb"
47
53
  mode to ensure the file isn't mangled by ruby being 'helpful'. This is
48
54
  particularly important on windows and MRI >= 1.9.2.
49
55
 
56
+ ```ruby
50
57
  File.open("somefile.pdf", "rb") do |io|
51
58
  reader = PDF::Reader.new(io)
52
59
  puts reader.info
53
60
  end
61
+ ```
54
62
 
55
63
  PDF is a page based file format, so most visible information is available via
56
64
  page-based iteration
57
65
 
66
+ ```ruby
58
67
  reader = PDF::Reader.new("somefile.pdf")
59
68
 
60
69
  reader.pages.each do |page|
@@ -62,10 +71,12 @@ page-based iteration
62
71
  puts page.text
63
72
  puts page.raw_content
64
73
  end
74
+ ```
65
75
 
66
76
  If you need to access the full program for rendering a page, use the walk() method
67
77
  of PDF::Reader::Page.
68
78
 
79
+ ```ruby
69
80
  class RedGreenBlue
70
81
  def set_rgb_color_for_nonstroking(r, g, b)
71
82
  puts "R: #{r}, G: #{g}, B: #{b}"
@@ -76,37 +87,32 @@ of PDF::Reader::Page.
76
87
  page = reader.page(1)
77
88
  receiver = RedGreenBlue.new
78
89
  page.walk(receiver)
90
+ ```
79
91
 
80
- For low level access to the objects in a PDF file, use the ObjectHash class. You can
81
- build an ObjectHash instance directly:
82
-
83
- puts PDF::Reader::ObjectHash.new("somefile.pdf")
84
-
85
- or via a PDF::Reader instance:
92
+ For low level access to the objects in a PDF file, use the ObjectHash class like
93
+ so:
86
94
 
95
+ ```ruby
87
96
  reader = PDF::Reader.new("somefile.pdf")
88
- puts reader.objects
89
-
90
- The second method is preferred to increase the effectiveness of internal caching.
97
+ puts reader.objects.inspect
98
+ ```
91
99
 
92
- = Text Encoding
100
+ # Text Encoding
93
101
 
94
102
  Regardless of the internal encoding used in the PDF all text will be converted
95
103
  to UTF-8 before it is passed back from PDF::Reader.
96
104
 
97
- Strings that contain binary data (like font blobs) will be marked as such on
98
- M17N aware VMs.
105
+ Strings that contain binary data (like font blobs) will be marked as such.
99
106
 
100
- = Former API
107
+ # Former API
101
108
 
102
109
  Version 1.0.0 of PDF::Reader introduced a new page-based API that provides
103
110
  efficient and easy access to any page.
104
111
 
105
- The previous API is marked as deprecated but will continue to work for the
106
- time being. Eventually calls to the old API will begin triggering deprecation
107
- warnings before it is completely removed in version 2.0.0.
112
+ The pre-1.0 API was deprecated during the 1.x release series, and has been
113
+ removed from 2.0.0.
108
114
 
109
- = Exceptions
115
+ # Exceptions
110
116
 
111
117
  There are two key exceptions that you will need to watch out for when processing a
112
118
  PDF file:
@@ -126,7 +132,7 @@ don't, 'rescue MalformedPDFError' will catch all the subclassed errors as well.
126
132
  Any other exceptions should be considered bugs in either PDF::Reader (please
127
133
  report it!).
128
134
 
129
- = PDF Integrity
135
+ # PDF Integrity
130
136
 
131
137
  Windows developers may run into problems when running specs due to MalformedPDFError's
132
138
  This is usually because CRLF characters are automatically added to some of the PDF's in
@@ -134,18 +140,20 @@ the spec folder when you checkout a branch from Git.
134
140
 
135
141
  To remove any invalid CRLF characters added while checking out a branch from Git, run:
136
142
 
143
+ ```ruby
137
144
  rake fix_integrity
145
+ ```
138
146
 
139
- = Maintainers
147
+ # Maintainers
140
148
 
141
- - James Healy <mailto:jimmy@deefa.com>
149
+ * James Healy <mailto:jimmy@deefa.com>
142
150
 
143
- = Licensing
151
+ # Licensing
144
152
 
145
153
  This library is distributed under the terms of the MIT License. See the included file for
146
154
  more detail.
147
155
 
148
- = Mailing List
156
+ # Mailing List
149
157
 
150
158
  Any questions or feedback should be sent to the PDF::Reader google group. It's
151
159
  better that any answers be available for others instead of hiding in someone's
@@ -153,19 +161,23 @@ inbox.
153
161
 
154
162
  http://groups.google.com/group/pdf-reader
155
163
 
156
- = Examples
164
+ # Examples
157
165
 
158
166
  The easiest way to explain how this works in practice is to show some examples.
159
167
  Check out the examples/ directory for a few files.
160
168
 
161
- = Known Limitations
169
+ # Known Limitations
162
170
 
163
171
  Occasionally some text cannot be extracted properly due to the way it has been
164
172
  stored, or the use of invalid bytes. In these cases PDF::Reader will output a
165
173
  little UTF-8 friendly box to indicate an unrecognisable character.
166
174
 
167
- = Resources
175
+ # Resources
176
+
177
+ * PDF::Reader Code Repository: http://github.com/yob/pdf-reader
178
+
179
+ * PDF Specification: http://www.adobe.com/devnet/pdf/pdf_reference.html
180
+
181
+ * PDF Tutorial Slide Presentations: https://web.archive.org/web/20150110042057/http://home.comcast.net/~jk05/presentations/PDFTutorials.html
168
182
 
169
- - PDF::Reader Code Repository: http://github.com/yob/pdf-reader
170
- - PDF Specification: http://www.adobe.com/devnet/pdf/pdf_reference.html
171
- - PDF Tutorial Slide Presentations: http://home.comcast.net/~jk05/presentations/PDFTutorials.html
183
+ * Developing with PDF (book): http://shop.oreilly.com/product/0636920025269.do
data/Rakefile CHANGED
@@ -1,19 +1,26 @@
1
- require "rubygems"
2
- require "bundler"
3
- Bundler.setup
4
-
5
- require 'rake'
6
- require 'rake/rdoctask'
7
- require 'rspec/core/rake_task'
8
- require 'roodi'
9
- require 'roodi_task'
1
+ require "bundler/gem_tasks"
2
+ require "digest/md5"
3
+ require "rdoc/task"
4
+ require "rspec/core/rake_task"
5
+ require "yaml"
10
6
 
11
7
  desc "Default Task"
12
- task :default => [ :spec ]
8
+ task :default => [ :quality, :spec ]
9
+
10
+ require 'cane/rake_task'
11
+ require 'morecane'
12
+
13
+ desc "Run cane to check quality metrics"
14
+ Cane::RakeTask.new(:quality) do |cane|
15
+ cane.abc_max = 20
16
+ cane.style_measure = 100
17
+ cane.max_violations = 31
18
+
19
+ cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
20
+ end
13
21
 
14
- # run all rspecs
15
22
  desc "Run all rspec files"
16
- RSpec::Core::RakeTask.new("spec") do |t|
23
+ RSpec::Core::RakeTask.new(:spec) do |t|
17
24
  t.rspec_opts = ["--color", "--format progress"]
18
25
  t.ruby_opts = "-w"
19
26
  end
@@ -31,16 +38,14 @@ Rake::RDocTask.new("doc") do |rdoc|
31
38
  rdoc.options << "--inline-source"
32
39
  end
33
40
 
34
- RoodiTask.new 'roodi', ['lib/**/*.rb']
35
-
36
41
  desc "Create a YAML file of integrity info for PDFs in the spec suite"
37
42
  task :integrity_yaml do
38
43
  data = {}
39
- Dir.glob("spec/data/**/*.*").each do |path|
44
+ Dir.glob("spec/data/**/*.*").sort.each do |path|
40
45
  path_without_spec = path.gsub("spec/","")
41
46
  data[path_without_spec] = {
42
47
  :bytes => File.size(path),
43
- :md5 => `md5sum "#{path}"`.split.first
48
+ :md5 => Digest::MD5.hexdigest(File.read(path))
44
49
  } if File.file?(path)
45
50
  end
46
51
  File.open("spec/integrity.yml","wb") { |f| f.write YAML.dump(data)}
data/bin/pdf_callbacks CHANGED
@@ -9,7 +9,7 @@ require 'pdf/reader'
9
9
  receiver = PDF::Reader::PrintReceiver.new
10
10
 
11
11
  if ARGV.empty?
12
- browser = PDF::Reader.new($stdin)
12
+ browser = PDF::Reader.new(StringIO.new(ARGF.read))
13
13
  else
14
14
  browser = PDF::Reader.new(ARGV[0])
15
15
  end
data/bin/pdf_object CHANGED
@@ -25,7 +25,10 @@ gen = gen.to_i
25
25
 
26
26
  # make magic happen
27
27
  begin
28
- obj = PDF::Reader.object_file(filename, id, gen)
28
+ obj = nil
29
+ PDF::Reader.open(filename) do |pdf|
30
+ obj = pdf.objects[PDF::Reader::Reference.new(id, gen)]
31
+ end
29
32
 
30
33
  case obj
31
34
  when Hash, Array
data/bin/pdf_text CHANGED
@@ -1,12 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
5
-
6
4
  require 'pdf/reader'
7
5
 
8
6
  if ARGV.empty?
9
- browser = PDF::Reader.new($stdin)
7
+ browser = PDF::Reader.new(StringIO.new(ARGF.read))
10
8
  else
11
9
  browser = PDF::Reader.new(ARGV[0])
12
10
  end
@@ -9,12 +9,13 @@
9
9
  require 'rubygems'
10
10
  require 'pdf/reader'
11
11
 
12
- receiver = PDF::Reader::RegisterReceiver.new
13
12
  filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-basic.pdf"
14
13
 
15
14
  PDF::Reader.open(filename) do |reader|
16
15
  reader.pages.each do |page|
16
+ receiver = PDF::Reader::RegisterReceiver.new
17
17
  page.walk(receiver)
18
+
18
19
  receiver.callbacks.each do |cb|
19
20
  puts cb
20
21
  end
@@ -86,14 +86,15 @@ module ExtractImages
86
86
  tiff = header.dup
87
87
  tiff << short_tag.call( 256, 1, w ) # image width
88
88
  tiff << short_tag.call( 257, 1, h ) # image height
89
- tiff << long_tag.call( 258, 4, (header.size + (tag_count*12))) # bits per pixel
89
+ tiff << long_tag.call( 258, 4, (header.size + (tag_count*12) + 4)) # bits per pixel
90
90
  tiff << short_tag.call( 259, 1, 1 ) # compression
91
91
  tiff << short_tag.call( 262, 1, 5 ) # colorspace - separation
92
- tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 16) ) # data offset
92
+ tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 20) ) # data offset
93
93
  tiff << short_tag.call( 277, 1, 4 ) # samples per pixel
94
94
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
95
95
  tiff << short_tag.call( 284, 1, 1 ) # planer config
96
96
  tiff << long_tag.call( 332, 1, 1) # inkset - CMYK
97
+ tiff << [0].pack("I") # next IFD pointer
97
98
  tiff << [bpc, bpc, bpc, bpc].pack("IIII")
98
99
  tiff << stream.unfiltered_data
99
100
  File.open(filename, "wb") { |file| file.write tiff }
@@ -119,10 +120,12 @@ module ExtractImages
119
120
  tiff << short_tag.call( 258, 1, 8 ) # bits per pixel
120
121
  tiff << short_tag.call( 259, 1, 1 ) # compression
121
122
  tiff << short_tag.call( 262, 1, 1 ) # colorspace - grayscale
122
- tiff << long_tag.call( 273, 1, (10 + (tag_count*12)) ) # data offset
123
+ tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 4) ) # data offset
123
124
  tiff << short_tag.call( 277, 1, 1 ) # samples per pixel
124
125
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
125
126
  tiff << short_tag.call( 284, 1, 1 ) # planer config
127
+ tiff << [0].pack("I") # next IFD pointer
128
+ p stream.unfiltered_data.size
126
129
  tiff << stream.unfiltered_data
127
130
  File.open(filename, "wb") { |file| file.write tiff }
128
131
  end
@@ -144,12 +147,13 @@ module ExtractImages
144
147
  tiff = header.dup
145
148
  tiff << short_tag.call( 256, 1, w ) # image width
146
149
  tiff << short_tag.call( 257, 1, h ) # image height
147
- tiff << long_tag.call( 258, 3, (header.size + (tag_count*12))) # bits per pixel
150
+ tiff << long_tag.call( 258, 3, (header.size + (tag_count*12) + 4)) # bits per pixel
148
151
  tiff << short_tag.call( 259, 1, 1 ) # compression
149
152
  tiff << short_tag.call( 262, 1, 2 ) # colorspace - RGB
150
- tiff << long_tag.call( 273, 1, (header.size + (tag_count*12) + 12) ) # data offset
153
+ tiff << long_tag.call( 273, 1, (header.size + (tag_count*12) + 16) ) # data offset
151
154
  tiff << short_tag.call( 277, 1, 3 ) # samples per pixel
152
155
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
156
+ tiff << [0].pack("I") # next IFD pointer
153
157
  tiff << [bpc, bpc, bpc].pack("III")
154
158
  tiff << stream.unfiltered_data
155
159
  File.open(filename, "wb") { |file| file.write tiff }
@@ -209,8 +213,9 @@ module ExtractImages
209
213
  + short_tag.call( 256, cols ) \
210
214
  + short_tag.call( 257, h ) \
211
215
  + short_tag.call( 259, 4 ) \
212
- + long_tag.call( 273, (10 + (5*12)) ) \
216
+ + long_tag.call( 273, (10 + (5*12) + 4) ) \
213
217
  + long_tag.call( 279, len) \
218
+ + [0].pack("I") \
214
219
  + stream.data
215
220
  File.open(filename, "wb") { |file| file.write tiff }
216
221
  end