ocrsdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,311 +0,0 @@
1
- <html>
2
- <head>
3
- <title>lib/ocrsdk/pdf.rb</title>
4
-
5
- <link href="../../report.css?0.4158445434855237" media="screen" rel="stylesheet" type="text/css" />
6
- <script src='../../jquery.js?0.4247975075509465'></script>
7
- <script>
8
- $(function() {
9
-
10
- $('#toggle_test_file').click(function(e) {
11
- e.preventDefault();
12
- if ($('#main_file').hasClass('side_by_side')) {
13
- $('#main_file').removeClass('side_by_side');
14
- } else {
15
- $('#main_file').addClass('side_by_side');
16
- }
17
-
18
- $('#test_file').toggle();
19
- });
20
-
21
- $('#test_file').hide();
22
-
23
- function cleanString(str) {
24
- return $.trim(str.replace(/<[^(<|>)]+>/gi, ''));
25
- }
26
-
27
- $('.line_number').hover(function() {
28
- $(this).attr('title', 'Line: ' + cleanString($(this).html()));
29
- });
30
-
31
- $('.hit_number').hover(function() {
32
- $(this).attr('title', 'Execution Count: ' + cleanString($(this).html()));
33
- });
34
-
35
- $('.code_line').hover(function() {
36
- $(this).attr('title', cleanString($(this).html()));
37
- });
38
-
39
- });
40
- </script>
41
- </head>
42
- <body>
43
- <h1><a href="../../index.html">Coverage Report</a></h1>
44
-
45
- <table cellpadding='0' cellspacing='1'>
46
- <tr class='header'>
47
- <th>File</th>
48
- <th>Lines</th>
49
- <th>Lines Of Code</th>
50
- <th>Untested Lines of Code</th>
51
- <th>Tested %</th>
52
- </tr>
53
- <tr valign='top'>
54
- <td>
55
- lib/ocrsdk/pdf.rb
56
-
57
- </td>
58
- <td>27</td>
59
- <td>12</td>
60
- <td>0</td>
61
- <td>100.0%</td>
62
- </tr>
63
- </table>
64
-
65
- <div>&nbsp;</div>
66
-
67
- <table cellpadding='0' cellspacing='1'>
68
- <tr>
69
- <th>Legend</th>
70
- </tr>
71
- <tr>
72
- <td class='hit'>This line was executed.</td>
73
- </tr>
74
- <tr>
75
- <td class='miss'>This line was not executed!</td>
76
- </tr>
77
- <tr>
78
- <td class='never'>This line doesn't matter.</td>
79
- </tr>
80
- </table>
81
-
82
- <div>&nbsp;</div>
83
-
84
- <div id='content'>
85
- <div id='main_file'>
86
- <table id='main'>
87
-
88
- <tr class="hit" data-hits='#{cov}'"">
89
- <td class='line_number'>1</td>
90
- <td class='code_line'>
91
- <pre>class OCRSDK::PDF < OCRSDK::Image</pre>
92
- </td>
93
- <td class='hit_number'>23</td>
94
- </tr>
95
-
96
- <tr class="never" "">
97
- <td class='line_number'>2</td>
98
- <td class='code_line'>
99
- <pre> # We're on a shaky ground regarding what kind of pdfs</pre>
100
- </td>
101
- <td class='hit_number'></td>
102
- </tr>
103
-
104
- <tr class="never" "">
105
- <td class='line_number'>3</td>
106
- <td class='code_line'>
107
- <pre> # should be recognized and what shouldn't.</pre>
108
- </td>
109
- <td class='hit_number'></td>
110
- </tr>
111
-
112
- <tr class="never" "">
113
- <td class='line_number'>4</td>
114
- <td class='code_line'>
115
- <pre> # Currently we count that if there are</pre>
116
- </td>
117
- <td class='hit_number'></td>
118
- </tr>
119
-
120
- <tr class="never" "">
121
- <td class='line_number'>5</td>
122
- <td class='code_line'>
123
- <pre> # images * 20 > length of text</pre>
124
- </td>
125
- <td class='hit_number'></td>
126
- </tr>
127
-
128
- <tr class="never" "">
129
- <td class='line_number'>6</td>
130
- <td class='code_line'>
131
- <pre> # then this document might need recognition.</pre>
132
- </td>
133
- <td class='hit_number'></td>
134
- </tr>
135
-
136
- <tr class="never" "">
137
- <td class='line_number'>7</td>
138
- <td class='code_line'>
139
- <pre> # Assumption is that there might be a title,</pre>
140
- </td>
141
- <td class='hit_number'></td>
142
- </tr>
143
-
144
- <tr class="never" "">
145
- <td class='line_number'>8</td>
146
- <td class='code_line'>
147
- <pre> # page numbers or credits along with images.</pre>
148
- </td>
149
- <td class='hit_number'></td>
150
- </tr>
151
-
152
- <tr class="hit" data-hits='#{cov}'"">
153
- <td class='line_number'>9</td>
154
- <td class='code_line'>
155
- <pre> def recognizeable?</pre>
156
- </td>
157
- <td class='hit_number'>23</td>
158
- </tr>
159
-
160
- <tr class="hit" data-hits='#{cov}'"">
161
- <td class='line_number'>10</td>
162
- <td class='code_line'>
163
- <pre> reader = PDF::Reader.new @image_path</pre>
164
- </td>
165
- <td class='hit_number'>20</td>
166
- </tr>
167
-
168
- <tr class="never" "">
169
- <td class='line_number'>11</td>
170
- <td class='code_line'>
171
- <pre></pre>
172
- </td>
173
- <td class='hit_number'></td>
174
- </tr>
175
-
176
- <tr class="hit" data-hits='#{cov}'"">
177
- <td class='line_number'>12</td>
178
- <td class='code_line'>
179
- <pre> images = 0</pre>
180
- </td>
181
- <td class='hit_number'>12</td>
182
- </tr>
183
-
184
- <tr class="hit" data-hits='#{cov}'"">
185
- <td class='line_number'>13</td>
186
- <td class='code_line'>
187
- <pre> text = 0</pre>
188
- </td>
189
- <td class='hit_number'>12</td>
190
- </tr>
191
-
192
- <tr class="hit" data-hits='#{cov}'"">
193
- <td class='line_number'>14</td>
194
- <td class='code_line'>
195
- <pre> chars = Set.new</pre>
196
- </td>
197
- <td class='hit_number'>12</td>
198
- </tr>
199
-
200
- <tr class="hit" data-hits='#{cov}'"">
201
- <td class='line_number'>15</td>
202
- <td class='code_line'>
203
- <pre> reader.pages.each do |page|</pre>
204
- </td>
205
- <td class='hit_number'>12</td>
206
- </tr>
207
-
208
- <tr class="hit" data-hits='#{cov}'"">
209
- <td class='line_number'>16</td>
210
- <td class='code_line'>
211
- <pre> text += page.text.length</pre>
212
- </td>
213
- <td class='hit_number'>51</td>
214
- </tr>
215
-
216
- <tr class="hit" data-hits='#{cov}'"">
217
- <td class='line_number'>17</td>
218
- <td class='code_line'>
219
- <pre> chars += page.text.split('').map(&:ord).uniq</pre>
220
- </td>
221
- <td class='hit_number'>51</td>
222
- </tr>
223
-
224
- <tr class="hit" data-hits='#{cov}'"">
225
- <td class='line_number'>18</td>
226
- <td class='code_line'>
227
- <pre> images += page.xobjects.map {|k, v| v.hash[:Subtype]}.count(:Image)</pre>
228
- </td>
229
- <td class='hit_number'>99</td>
230
- </tr>
231
-
232
- <tr class="never" "">
233
- <td class='line_number'>19</td>
234
- <td class='code_line'>
235
- <pre> end</pre>
236
- </td>
237
- <td class='hit_number'></td>
238
- </tr>
239
-
240
- <tr class="never" "">
241
- <td class='line_number'>20</td>
242
- <td class='code_line'>
243
- <pre></pre>
244
- </td>
245
- <td class='hit_number'></td>
246
- </tr>
247
-
248
- <tr class="never" "">
249
- <td class='line_number'>21</td>
250
- <td class='code_line'>
251
- <pre> # count number of distinct characters</pre>
252
- </td>
253
- <td class='hit_number'></td>
254
- </tr>
255
-
256
- <tr class="never" "">
257
- <td class='line_number'>22</td>
258
- <td class='code_line'>
259
- <pre> # in case of "searchable", but incorrectly recognized document</pre>
260
- </td>
261
- <td class='hit_number'></td>
262
- </tr>
263
-
264
- <tr class="hit" data-hits='#{cov}'"">
265
- <td class='line_number'>23</td>
266
- <td class='code_line'>
267
- <pre> images * 20 > text || chars.length < 10</pre>
268
- </td>
269
- <td class='hit_number'>12</td>
270
- </tr>
271
-
272
- <tr class="never" "">
273
- <td class='line_number'>24</td>
274
- <td class='code_line'>
275
- <pre> rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError</pre>
276
- </td>
277
- <td class='hit_number'></td>
278
- </tr>
279
-
280
- <tr class="hit" data-hits='#{cov}'"">
281
- <td class='line_number'>25</td>
282
- <td class='code_line'>
283
- <pre> false</pre>
284
- </td>
285
- <td class='hit_number'>3</td>
286
- </tr>
287
-
288
- <tr class="never" "">
289
- <td class='line_number'>26</td>
290
- <td class='code_line'>
291
- <pre> end</pre>
292
- </td>
293
- <td class='hit_number'></td>
294
- </tr>
295
-
296
- <tr class="never" "">
297
- <td class='line_number'>27</td>
298
- <td class='code_line'>
299
- <pre>end</pre>
300
- </td>
301
- <td class='hit_number'></td>
302
- </tr>
303
-
304
- </table>
305
- </div>
306
-
307
- </div>
308
-
309
- <p>Generated on: 2012-12-01 01:40:30 +0400</p>
310
- </body>
311
- </html>