rhocr 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/ocr_page.rb +3 -3
- data/rhocr.gemspec +1 -1
- metadata +1 -1
data/Rakefile
CHANGED
data/lib/ocr_page.rb
CHANGED
@@ -10,10 +10,10 @@ class OCRPage < OCRBox
|
|
10
10
|
|
11
11
|
def hocr_lines( hocr_contents)
|
12
12
|
hocr_array = []
|
13
|
-
for line in hocr_contents.split(/<span class="ocr_line"/) do
|
13
|
+
for line in hocr_contents.split(/<span class=['"]ocr_line['"]/) do
|
14
14
|
line_array = []
|
15
|
-
for ocrx_word in line.scan(/<span class="ocrx_word"[^>]+>[^<]+<\/span>/) do
|
16
|
-
ocrx_word =~ /title="bbox (\d+) (\d+) (\d+) (\d+)">([^<]+)</
|
15
|
+
for ocrx_word in line.scan(/<span class=['"]ocrx_word['"][^>]+>[^<]+<\/span>/) do
|
16
|
+
ocrx_word =~ /title=['"]bbox (\d+) (\d+) (\d+) (\d+)['"]>([^<]+)</
|
17
17
|
current_word = OCRXWord.new($1,$2,$3,$4,$5)
|
18
18
|
line_array << current_word
|
19
19
|
end
|
data/rhocr.gemspec
CHANGED