pdf-reader 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,29 @@
1
+ # A mapping of WinAnsi (win-1252) characters to unicode. Anything
2
+ # not specified is left unchanged
3
+ 80;20AC
4
+ 82;201A
5
+ 83;0192
6
+ 84;201E
7
+ 85;2026
8
+ 86;2020
9
+ 87;2021
10
+ 88;02C6
11
+ 89;2030
12
+ 8A;0160
13
+ 8B;2039
14
+ 8C;0152
15
+ 8E;017D
16
+ 91;2018
17
+ 92;2019
18
+ 93;201C
19
+ 94;201D
20
+ 95;2022
21
+ 96;2013
22
+ 97;2014
23
+ 98;02DC
24
+ 99;2122
25
+ 9A;0161
26
+ 9B;203A
27
+ 9C;0152
28
+ 9E;017E
29
+ 9F;0178
@@ -0,0 +1,201 @@
1
+ 21;2701
2
+ 22;2702
3
+ 23;2703
4
+ 24;2704
5
+ 25;260E
6
+ 26;2706
7
+ 27;2707
8
+ 28;2708
9
+ 29;2709
10
+ 2A;261B
11
+ 2B;261E
12
+ 2C;270C
13
+ 2D;270D
14
+ 2E;270E
15
+ 2F;270F
16
+ 30;2710
17
+ 31;2711
18
+ 32;2712
19
+ 33;2713
20
+ 34;2714
21
+ 35;2715
22
+ 36;2716
23
+ 37;2717
24
+ 38;2718
25
+ 39;2719
26
+ 3A;271A
27
+ 3B;271B
28
+ 3C;271C
29
+ 3D;271D
30
+ 3E;271E
31
+ 3F;271E
32
+ 40;2720
33
+ 41;2721
34
+ 42;2722
35
+ 43;2723
36
+ 44;2724
37
+ 45;2725
38
+ 46;2726
39
+ 47;2727
40
+ 48;2605
41
+ 49;2729
42
+ 4A;272A
43
+ 4B;272B
44
+ 4C;272C
45
+ 4D;272D
46
+ 4E;272E
47
+ 4F;272F
48
+ 50;2730
49
+ 51;2731
50
+ 52;2732
51
+ 53;2733
52
+ 54;2734
53
+ 55;2735
54
+ 56;2736
55
+ 57;2737
56
+ 58;2738
57
+ 59;2739
58
+ 5A;273A
59
+ 5B;273B
60
+ 5C;273C
61
+ 5D;273D
62
+ 5E;273E
63
+ 5F;273F
64
+ 60;2740
65
+ 61;2741
66
+ 62;2742
67
+ 63;2743
68
+ 64;2744
69
+ 65;2745
70
+ 66;2746
71
+ 67;2747
72
+ 68;2748
73
+ 69;2749
74
+ 6A;274A
75
+ 6B;274B
76
+ 6C;25CF
77
+ 6D;274D
78
+ 6E;25A0
79
+ 6F;274F
80
+ 70;2750
81
+ 71;2751
82
+ 72;2752
83
+ 73;2753
84
+ 74;2754
85
+ 75;2755
86
+ 76;2756
87
+ 77;2757
88
+ 78;2758
89
+ 79;2759
90
+ 7A;275A
91
+ 7B;275B
92
+ 7C;275C
93
+ 7D;275D
94
+ 7E;275E
95
+ 80;F8D7
96
+ 81;F8D8
97
+ 82;F8D9
98
+ 83;F8DA
99
+ 84;F8DB
100
+ 85;F8DC
101
+ 86;F8DD
102
+ 87;F8DE
103
+ 88;F8DF
104
+ 89;F8E0
105
+ 8A;F8E1
106
+ 8B;F8E2
107
+ 8C;F8E3
108
+ 8D;F8E4
109
+ A1;2761
110
+ A2;2762
111
+ A3;2763
112
+ A4;2764
113
+ A5;2765
114
+ A6;2766
115
+ A7;2767
116
+ A8;2663
117
+ A9;2666
118
+ AA;2665
119
+ AB;2660
120
+ AC;2460
121
+ AD;2461
122
+ AE;2462
123
+ AF;2463
124
+ B0;2464
125
+ B1;2465
126
+ B2;2466
127
+ B3;2467
128
+ B4;2468
129
+ B5;2469
130
+ B6;2776
131
+ B7;2777
132
+ B8;2778
133
+ B9;2779
134
+ BA;277A
135
+ BB;277B
136
+ BC;277C
137
+ BD;277D
138
+ BE;277E
139
+ BF;277F
140
+ C0;2780
141
+ C1;2781
142
+ C2;2782
143
+ C3;2783
144
+ C4;2784
145
+ C5;2785
146
+ C6;2786
147
+ C7;2787
148
+ C8;2788
149
+ C9;2789
150
+ CA;278A
151
+ CB;278B
152
+ CC;278C
153
+ CD;278D
154
+ CE;278E
155
+ CF;278F
156
+ D0;2790
157
+ D1;2791
158
+ D2;2792
159
+ D3;2793
160
+ D4;2794
161
+ D5;2795
162
+ D6;2796
163
+ D7;2797
164
+ D8;2798
165
+ D9;2799
166
+ DA;279A
167
+ DB;279B
168
+ DC;279C
169
+ DD;279D
170
+ DE;279E
171
+ DF;279F
172
+ E0;27A0
173
+ E1;27A1
174
+ E2;27A2
175
+ E3;27A3
176
+ E4;27A4
177
+ E5;27A5
178
+ E6;27A6
179
+ E7;27A7
180
+ E8;27A8
181
+ E9;27A9
182
+ EA;27AA
183
+ EB;27AB
184
+ EC;27AC
185
+ ED;27AD
186
+ EE;27AE
187
+ EF;27AF
188
+ F1;27B1
189
+ F2;27B2
190
+ F3;27B3
191
+ F4;27B4
192
+ F5;27B5
193
+ F6;27B6
194
+ F7;27B7
195
+ F8;27B8
196
+ F9;27B9
197
+ FA;27BA
198
+ FB;27BB
199
+ FC;27BC
200
+ FD;27BD
201
+ FE;27BE
@@ -48,6 +48,7 @@ class PDF::Reader
48
48
  end
49
49
  ################################################################################
50
50
  class MalformedPDFError < RuntimeError; end
51
+ class InvalidObjectError < MalformedPDFError; end
51
52
  class UnsupportedFeatureError < RuntimeError; end
52
53
  end
53
54
  ################################################################################
@@ -52,10 +52,11 @@ class PDF::Reader
52
52
  # with encoding= if required
53
53
  case font
54
54
  when "Symbol" then
55
- self.encoding = PDF::Reader::Encoding.factory("SymbolEncoding")
55
+ self.encoding = PDF::Reader::Encoding.new("SymbolEncoding")
56
56
  when "ZapfDingbats" then
57
- self.encoding = PDF::Reader::Encoding.factory("ZapfDingbatsEncoding")
57
+ self.encoding = PDF::Reader::Encoding.new("ZapfDingbatsEncoding")
58
58
  end
59
+ @basefont = font
59
60
  end
60
61
 
61
62
  def to_utf8(params)
@@ -65,7 +66,7 @@ class PDF::Reader
65
66
  # translate the bytestram into a UTF-8 string.
66
67
  # If an encoding hasn't been specified, assume the text using this
67
68
  # font is in Adobe Standard Encoding.
68
- (encoding || PDF::Reader::Encoding::StandardEncoding.new).to_utf8(params, tounicode)
69
+ (encoding || PDF::Reader::Encoding.new(:StandardEncoding)).to_utf8(params, tounicode)
69
70
  elsif params.class == Array
70
71
  params.collect { |param| self.to_utf8(param) }
71
72
  else
@@ -47,6 +47,7 @@ class PDF::Reader
47
47
  token = @buffer.token
48
48
 
49
49
  case token
50
+ when nil then return nil
50
51
  when "/" then return @buffer.token.to_sym
51
52
  when "<<" then return dictionary()
52
53
  when "[" then return array()
@@ -0,0 +1,19 @@
1
+ class PDF::Reader
2
+ class PrintReceiver
3
+
4
+ attr_accessor :callbacks
5
+
6
+ def initialize
7
+ @callbacks = []
8
+ end
9
+
10
+ def respond_to?(meth)
11
+ return false if [:begin_inline_image_data].include?(meth)
12
+ true
13
+ end
14
+
15
+ def method_missing(methodname, *args)
16
+ puts "#{methodname} => #{args.inspect}"
17
+ end
18
+ end
19
+ end
@@ -37,6 +37,16 @@ class PDF::Reader
37
37
  @xref = {}
38
38
  end
39
39
  ################################################################################
40
+ # returns the PDF version of the current document. Technically this isn't part of the XRef
41
+ # table, but it is one of the lowest level data items in the file, so we've lumped it in
42
+ # with the cross reference code.
43
+ def pdf_version
44
+ @buffer.seek(0)
45
+ m, version = *@buffer.read(8).match(/%PDF-(\d.\d)/)
46
+ raise MalformedPDFError, 'invalid PDF version' if version.nil?
47
+ return version.to_f
48
+ end
49
+ ################################################################################
40
50
  # Read the xref table from the underlying buffer. If offset is specified the table
41
51
  # will be loaded from there, otherwise the default offset will be located and used.
42
52
  #
@@ -122,6 +132,8 @@ class PDF::Reader
122
132
  # ref - a PDF::Reader::Reference object containing an object ID and revision number
123
133
  def offset_for (ref)
124
134
  @xref[ref.id][ref.gen]
135
+ rescue
136
+ raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
125
137
  end
126
138
  ################################################################################
127
139
  # Stores an offset value for a particular PDF object ID and revision number
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Jones
@@ -9,44 +9,53 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-20 00:00:00 +10:00
12
+ date: 2008-06-11 00:00:00 +10:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
16
  description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
17
17
  email: pjones@pmade.com
18
18
  executables:
19
+ - pdf_object
19
20
  - pdf_text
20
21
  - pdf_list_callbacks
21
22
  extensions: []
22
23
 
23
24
  extra_rdoc_files:
24
- - README
25
+ - README.rdoc
25
26
  - TODO
26
27
  - CHANGELOG
27
28
  files:
28
29
  - lib/pdf
30
+ - lib/pdf/reader.rb
29
31
  - lib/pdf/reader
30
- - lib/pdf/reader/explore.rb
31
- - lib/pdf/reader/reference.rb
32
- - lib/pdf/reader/xref.rb
33
- - lib/pdf/reader/token.rb
34
- - lib/pdf/reader/filter.rb
35
- - lib/pdf/reader/text_receiver.rb
36
32
  - lib/pdf/reader/buffer.rb
37
- - lib/pdf/reader/error.rb
38
- - lib/pdf/reader/content.rb
39
- - lib/pdf/reader/parser.rb
40
33
  - lib/pdf/reader/cmap.rb
34
+ - lib/pdf/reader/content.rb
41
35
  - lib/pdf/reader/encoding.rb
42
- - lib/pdf/reader/register_receiver.rb
36
+ - lib/pdf/reader/error.rb
37
+ - lib/pdf/reader/explore.rb
38
+ - lib/pdf/reader/filter.rb
43
39
  - lib/pdf/reader/font.rb
44
40
  - lib/pdf/reader/glyphlist.txt
41
+ - lib/pdf/reader/parser.rb
42
+ - lib/pdf/reader/xref.rb
43
+ - lib/pdf/reader/reference.rb
44
+ - lib/pdf/reader/register_receiver.rb
45
+ - lib/pdf/reader/text_receiver.rb
46
+ - lib/pdf/reader/token.rb
47
+ - lib/pdf/reader/encodings
48
+ - lib/pdf/reader/encodings/mac_expert.txt
49
+ - lib/pdf/reader/encodings/mac_roman.txt
50
+ - lib/pdf/reader/encodings/pdf_doc.txt
51
+ - lib/pdf/reader/encodings/standard.txt
52
+ - lib/pdf/reader/encodings/symbol.txt
53
+ - lib/pdf/reader/encodings/win_ansi.txt
54
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
45
55
  - lib/pdf/reader/stream.rb
46
- - lib/pdf/reader/parser.rb.rej
47
- - lib/pdf/reader.rb
56
+ - lib/pdf/reader/print_receiver.rb
48
57
  - Rakefile
49
- - README
58
+ - README.rdoc
50
59
  - TODO
51
60
  - CHANGELOG
52
61
  has_rdoc: true
@@ -56,7 +65,7 @@ rdoc_options:
56
65
  - --title
57
66
  - PDF::Reader Documentation
58
67
  - --main
59
- - README
68
+ - README.rdoc
60
69
  - -q
61
70
  require_paths:
62
71
  - lib
@@ -1,29 +0,0 @@
1
- ***************
2
- *** 173,178 ****
3
-
4
- obj = parse_token
5
- post_obj = parse_token
6
- case post_obj
7
- when "endobj" then return [obj,nil]
8
- when "stream" then return [obj, stream(obj)]
9
- --- 173,192 ----
10
-
11
- obj = parse_token
12
- post_obj = parse_token
13
- +
14
- + if obj.class == Array
15
- + newobj = Array.new
16
- + obj.each_index {|idx|
17
- + if obj[idx].class == PDF::Reader::Reference
18
- + xo, xs = @xref.object(obj[idx])
19
- + if xs
20
- + newobj << xs
21
- + end
22
- + end
23
- + }
24
- + return newobj.flatten
25
- + end
26
- +
27
- case post_obj
28
- when "endobj" then return [obj,nil]
29
- when "stream" then return [obj, stream(obj)]