pdf-reader 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ # A mapping of WinAnsi (win-1252) characters to unicode. Anything
2
+ # not specified is left unchanged
3
+ 80;20AC
4
+ 82;201A
5
+ 83;0192
6
+ 84;201E
7
+ 85;2026
8
+ 86;2020
9
+ 87;2021
10
+ 88;02C6
11
+ 89;2030
12
+ 8A;0160
13
+ 8B;2039
14
+ 8C;0152
15
+ 8E;017D
16
+ 91;2018
17
+ 92;2019
18
+ 93;201C
19
+ 94;201D
20
+ 95;2022
21
+ 96;2013
22
+ 97;2014
23
+ 98;02DC
24
+ 99;2122
25
+ 9A;0161
26
+ 9B;203A
27
+ 9C;0152
28
+ 9E;017E
29
+ 9F;0178
@@ -0,0 +1,201 @@
1
+ 21;2701
2
+ 22;2702
3
+ 23;2703
4
+ 24;2704
5
+ 25;260E
6
+ 26;2706
7
+ 27;2707
8
+ 28;2708
9
+ 29;2709
10
+ 2A;261B
11
+ 2B;261E
12
+ 2C;270C
13
+ 2D;270D
14
+ 2E;270E
15
+ 2F;270F
16
+ 30;2710
17
+ 31;2711
18
+ 32;2712
19
+ 33;2713
20
+ 34;2714
21
+ 35;2715
22
+ 36;2716
23
+ 37;2717
24
+ 38;2718
25
+ 39;2719
26
+ 3A;271A
27
+ 3B;271B
28
+ 3C;271C
29
+ 3D;271D
30
+ 3E;271E
31
+ 3F;271E
32
+ 40;2720
33
+ 41;2721
34
+ 42;2722
35
+ 43;2723
36
+ 44;2724
37
+ 45;2725
38
+ 46;2726
39
+ 47;2727
40
+ 48;2605
41
+ 49;2729
42
+ 4A;272A
43
+ 4B;272B
44
+ 4C;272C
45
+ 4D;272D
46
+ 4E;272E
47
+ 4F;272F
48
+ 50;2730
49
+ 51;2731
50
+ 52;2732
51
+ 53;2733
52
+ 54;2734
53
+ 55;2735
54
+ 56;2736
55
+ 57;2737
56
+ 58;2738
57
+ 59;2739
58
+ 5A;273A
59
+ 5B;273B
60
+ 5C;273C
61
+ 5D;273D
62
+ 5E;273E
63
+ 5F;273F
64
+ 60;2740
65
+ 61;2741
66
+ 62;2742
67
+ 63;2743
68
+ 64;2744
69
+ 65;2745
70
+ 66;2746
71
+ 67;2747
72
+ 68;2748
73
+ 69;2749
74
+ 6A;274A
75
+ 6B;274B
76
+ 6C;25CF
77
+ 6D;274D
78
+ 6E;25A0
79
+ 6F;274F
80
+ 70;2750
81
+ 71;2751
82
+ 72;2752
83
+ 73;2753
84
+ 74;2754
85
+ 75;2755
86
+ 76;2756
87
+ 77;2757
88
+ 78;2758
89
+ 79;2759
90
+ 7A;275A
91
+ 7B;275B
92
+ 7C;275C
93
+ 7D;275D
94
+ 7E;275E
95
+ 80;F8D7
96
+ 81;F8D8
97
+ 82;F8D9
98
+ 83;F8DA
99
+ 84;F8DB
100
+ 85;F8DC
101
+ 86;F8DD
102
+ 87;F8DE
103
+ 88;F8DF
104
+ 89;F8E0
105
+ 8A;F8E1
106
+ 8B;F8E2
107
+ 8C;F8E3
108
+ 8D;F8E4
109
+ A1;2761
110
+ A2;2762
111
+ A3;2763
112
+ A4;2764
113
+ A5;2765
114
+ A6;2766
115
+ A7;2767
116
+ A8;2663
117
+ A9;2666
118
+ AA;2665
119
+ AB;2660
120
+ AC;2460
121
+ AD;2461
122
+ AE;2462
123
+ AF;2463
124
+ B0;2464
125
+ B1;2465
126
+ B2;2466
127
+ B3;2467
128
+ B4;2468
129
+ B5;2469
130
+ B6;2776
131
+ B7;2777
132
+ B8;2778
133
+ B9;2779
134
+ BA;277A
135
+ BB;277B
136
+ BC;277C
137
+ BD;277D
138
+ BE;277E
139
+ BF;277F
140
+ C0;2780
141
+ C1;2781
142
+ C2;2782
143
+ C3;2783
144
+ C4;2784
145
+ C5;2785
146
+ C6;2786
147
+ C7;2787
148
+ C8;2788
149
+ C9;2789
150
+ CA;278A
151
+ CB;278B
152
+ CC;278C
153
+ CD;278D
154
+ CE;278E
155
+ CF;278F
156
+ D0;2790
157
+ D1;2791
158
+ D2;2792
159
+ D3;2793
160
+ D4;2794
161
+ D5;2795
162
+ D6;2796
163
+ D7;2797
164
+ D8;2798
165
+ D9;2799
166
+ DA;279A
167
+ DB;279B
168
+ DC;279C
169
+ DD;279D
170
+ DE;279E
171
+ DF;279F
172
+ E0;27A0
173
+ E1;27A1
174
+ E2;27A2
175
+ E3;27A3
176
+ E4;27A4
177
+ E5;27A5
178
+ E6;27A6
179
+ E7;27A7
180
+ E8;27A8
181
+ E9;27A9
182
+ EA;27AA
183
+ EB;27AB
184
+ EC;27AC
185
+ ED;27AD
186
+ EE;27AE
187
+ EF;27AF
188
+ F1;27B1
189
+ F2;27B2
190
+ F3;27B3
191
+ F4;27B4
192
+ F5;27B5
193
+ F6;27B6
194
+ F7;27B7
195
+ F8;27B8
196
+ F9;27B9
197
+ FA;27BA
198
+ FB;27BB
199
+ FC;27BC
200
+ FD;27BD
201
+ FE;27BE
@@ -48,6 +48,7 @@ class PDF::Reader
48
48
  end
49
49
  ################################################################################
50
50
  class MalformedPDFError < RuntimeError; end
51
+ class InvalidObjectError < MalformedPDFError; end
51
52
  class UnsupportedFeatureError < RuntimeError; end
52
53
  end
53
54
  ################################################################################
@@ -52,10 +52,11 @@ class PDF::Reader
52
52
  # with encoding= if required
53
53
  case font
54
54
  when "Symbol" then
55
- self.encoding = PDF::Reader::Encoding.factory("SymbolEncoding")
55
+ self.encoding = PDF::Reader::Encoding.new("SymbolEncoding")
56
56
  when "ZapfDingbats" then
57
- self.encoding = PDF::Reader::Encoding.factory("ZapfDingbatsEncoding")
57
+ self.encoding = PDF::Reader::Encoding.new("ZapfDingbatsEncoding")
58
58
  end
59
+ @basefont = font
59
60
  end
60
61
 
61
62
  def to_utf8(params)
@@ -65,7 +66,7 @@ class PDF::Reader
65
66
  # translate the bytestram into a UTF-8 string.
66
67
  # If an encoding hasn't been specified, assume the text using this
67
68
  # font is in Adobe Standard Encoding.
68
- (encoding || PDF::Reader::Encoding::StandardEncoding.new).to_utf8(params, tounicode)
69
+ (encoding || PDF::Reader::Encoding.new(:StandardEncoding)).to_utf8(params, tounicode)
69
70
  elsif params.class == Array
70
71
  params.collect { |param| self.to_utf8(param) }
71
72
  else
@@ -47,6 +47,7 @@ class PDF::Reader
47
47
  token = @buffer.token
48
48
 
49
49
  case token
50
+ when nil then return nil
50
51
  when "/" then return @buffer.token.to_sym
51
52
  when "<<" then return dictionary()
52
53
  when "[" then return array()
@@ -0,0 +1,19 @@
1
+ class PDF::Reader
2
+ class PrintReceiver
3
+
4
+ attr_accessor :callbacks
5
+
6
+ def initialize
7
+ @callbacks = []
8
+ end
9
+
10
+ def respond_to?(meth)
11
+ return false if [:begin_inline_image_data].include?(meth)
12
+ true
13
+ end
14
+
15
+ def method_missing(methodname, *args)
16
+ puts "#{methodname} => #{args.inspect}"
17
+ end
18
+ end
19
+ end
@@ -37,6 +37,16 @@ class PDF::Reader
37
37
  @xref = {}
38
38
  end
39
39
  ################################################################################
40
+ # returns the PDF version of the current document. Technically this isn't part of the XRef
41
+ # table, but it is one of the lowest level data items in the file, so we've lumped it in
42
+ # with the cross reference code.
43
+ def pdf_version
44
+ @buffer.seek(0)
45
+ m, version = *@buffer.read(8).match(/%PDF-(\d.\d)/)
46
+ raise MalformedPDFError, 'invalid PDF version' if version.nil?
47
+ return version.to_f
48
+ end
49
+ ################################################################################
40
50
  # Read the xref table from the underlying buffer. If offset is specified the table
41
51
  # will be loaded from there, otherwise the default offset will be located and used.
42
52
  #
@@ -122,6 +132,8 @@ class PDF::Reader
122
132
  # ref - a PDF::Reader::Reference object containing an object ID and revision number
123
133
  def offset_for (ref)
124
134
  @xref[ref.id][ref.gen]
135
+ rescue
136
+ raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
125
137
  end
126
138
  ################################################################################
127
139
  # Stores an offset value for a particular PDF object ID and revision number
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Jones
@@ -9,44 +9,53 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-20 00:00:00 +10:00
12
+ date: 2008-06-11 00:00:00 +10:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
16
  description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
17
17
  email: pjones@pmade.com
18
18
  executables:
19
+ - pdf_object
19
20
  - pdf_text
20
21
  - pdf_list_callbacks
21
22
  extensions: []
22
23
 
23
24
  extra_rdoc_files:
24
- - README
25
+ - README.rdoc
25
26
  - TODO
26
27
  - CHANGELOG
27
28
  files:
28
29
  - lib/pdf
30
+ - lib/pdf/reader.rb
29
31
  - lib/pdf/reader
30
- - lib/pdf/reader/explore.rb
31
- - lib/pdf/reader/reference.rb
32
- - lib/pdf/reader/xref.rb
33
- - lib/pdf/reader/token.rb
34
- - lib/pdf/reader/filter.rb
35
- - lib/pdf/reader/text_receiver.rb
36
32
  - lib/pdf/reader/buffer.rb
37
- - lib/pdf/reader/error.rb
38
- - lib/pdf/reader/content.rb
39
- - lib/pdf/reader/parser.rb
40
33
  - lib/pdf/reader/cmap.rb
34
+ - lib/pdf/reader/content.rb
41
35
  - lib/pdf/reader/encoding.rb
42
- - lib/pdf/reader/register_receiver.rb
36
+ - lib/pdf/reader/error.rb
37
+ - lib/pdf/reader/explore.rb
38
+ - lib/pdf/reader/filter.rb
43
39
  - lib/pdf/reader/font.rb
44
40
  - lib/pdf/reader/glyphlist.txt
41
+ - lib/pdf/reader/parser.rb
42
+ - lib/pdf/reader/xref.rb
43
+ - lib/pdf/reader/reference.rb
44
+ - lib/pdf/reader/register_receiver.rb
45
+ - lib/pdf/reader/text_receiver.rb
46
+ - lib/pdf/reader/token.rb
47
+ - lib/pdf/reader/encodings
48
+ - lib/pdf/reader/encodings/mac_expert.txt
49
+ - lib/pdf/reader/encodings/mac_roman.txt
50
+ - lib/pdf/reader/encodings/pdf_doc.txt
51
+ - lib/pdf/reader/encodings/standard.txt
52
+ - lib/pdf/reader/encodings/symbol.txt
53
+ - lib/pdf/reader/encodings/win_ansi.txt
54
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
45
55
  - lib/pdf/reader/stream.rb
46
- - lib/pdf/reader/parser.rb.rej
47
- - lib/pdf/reader.rb
56
+ - lib/pdf/reader/print_receiver.rb
48
57
  - Rakefile
49
- - README
58
+ - README.rdoc
50
59
  - TODO
51
60
  - CHANGELOG
52
61
  has_rdoc: true
@@ -56,7 +65,7 @@ rdoc_options:
56
65
  - --title
57
66
  - PDF::Reader Documentation
58
67
  - --main
59
- - README
68
+ - README.rdoc
60
69
  - -q
61
70
  require_paths:
62
71
  - lib
@@ -1,29 +0,0 @@
1
- ***************
2
- *** 173,178 ****
3
-
4
- obj = parse_token
5
- post_obj = parse_token
6
- case post_obj
7
- when "endobj" then return [obj,nil]
8
- when "stream" then return [obj, stream(obj)]
9
- --- 173,192 ----
10
-
11
- obj = parse_token
12
- post_obj = parse_token
13
- +
14
- + if obj.class == Array
15
- + newobj = Array.new
16
- + obj.each_index {|idx|
17
- + if obj[idx].class == PDF::Reader::Reference
18
- + xo, xs = @xref.object(obj[idx])
19
- + if xs
20
- + newobj << xs
21
- + end
22
- + end
23
- + }
24
- + return newobj.flatten
25
- + end
26
- +
27
- case post_obj
28
- when "endobj" then return [obj,nil]
29
- when "stream" then return [obj, stream(obj)]