rhocr 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/lib/ocr_page.rb +3 -3
- data/rhocr.gemspec +1 -1
- metadata +1 -1
data/Rakefile
CHANGED
data/lib/ocr_page.rb
CHANGED
@@ -10,10 +10,10 @@ class OCRPage < OCRBox
|
|
10
10
|
|
11
11
|
def hocr_lines( hocr_contents)
|
12
12
|
hocr_array = []
|
13
|
-
for line in hocr_contents.split(/<span class="ocr_line"/) do
|
13
|
+
for line in hocr_contents.split(/<span class=['"]ocr_line['"]/) do
|
14
14
|
line_array = []
|
15
|
-
for ocrx_word in line.scan(/<span class="ocrx_word"[^>]+>[^<]+<\/span>/) do
|
16
|
-
ocrx_word =~ /title="bbox (\d+) (\d+) (\d+) (\d+)">([^<]+)</
|
15
|
+
for ocrx_word in line.scan(/<span class=['"]ocrx_word['"][^>]+>[^<]+<\/span>/) do
|
16
|
+
ocrx_word =~ /title=['"]bbox (\d+) (\d+) (\d+) (\d+)['"]>([^<]+)</
|
17
17
|
current_word = OCRXWord.new($1,$2,$3,$4,$5)
|
18
18
|
line_array << current_word
|
19
19
|
end
|
data/rhocr.gemspec
CHANGED