oorb 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/doc/OORB.html +212 -51
- data/doc/_index.html +1 -1
- data/doc/file.CODE_OF_CONDUCT.html +1 -1
- data/doc/file.LICENSE.html +1 -1
- data/doc/file.README.html +4 -1
- data/doc/index.html +4 -1
- data/doc/method_list.html +8 -2
- data/doc/top-level-namespace.html +1 -1
- data/lib/oorb.rb +54 -18
- data/lib/oorb/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0ed182c9ec1317cce1cd3cabc6b80168bd501fd9
|
|
4
|
+
data.tar.gz: 42d91e754f59f21f9875ec769c3a13c93ffe937f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d270cfbd8d1602e9aa8a318fec1bbd3ffc2ca07c6b7de6bbfc4bf27ee8eeeed3727d9ae3f5457f40edc396ca6e015fde9a882fe7b1674c626c9bc5cc258d6bb7
|
|
7
|
+
data.tar.gz: 23117c474e77dd27a6d53b95c5f04514cb200af03f6737a05f89df6f5d633b3929f2119aeeaa28c1afec54e91dbb4e546121947a92349c6c821815e68b1aecec
|
data/README.md
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
[](https://travis-ci.org/Calvyn82/oorb)
|
|
2
|
+
|
|
1
3
|
# Oorb
|
|
2
4
|
|
|
3
5
|
OCR Optimized Regex Builder is a command line tool for taking user input and converting it to regular expressions optimized for capturing characters that are commonly mistaken by optical character recognition engines.
|
data/doc/OORB.html
CHANGED
|
@@ -129,27 +129,45 @@
|
|
|
129
129
|
|
|
130
130
|
</div>
|
|
131
131
|
</dt>
|
|
132
|
-
<dd><pre class="code"><span class='lbrace'>{</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>
|
|
133
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>h</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>b</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
132
|
+
<dd><pre class="code"><span class='lbrace'>{</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>a</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>9</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
134
133
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>b</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>h</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
135
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>y</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>v</span><span class='words_sep'> </span><span class='tstring_content'>j</span><span class='words_sep'> </span><span class='tstring_content'>7</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
136
134
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>c</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>e</span><span class='words_sep'> </span><span class='tstring_content'>f</span><span class='words_sep'> </span><span class='tstring_content'>d</span><span class='words_sep'> </span><span class='tstring_content'>o</span><span class='words_sep'> </span><span class='tstring_content'>6</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
137
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>
|
|
135
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>d</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>3</span><span class='words_sep'> </span><span class='tstring_content'>0</span><span class='words_sep'> </span><span class='tstring_content'>o</span><span class='words_sep'> </span><span class='tstring_content'>7</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
138
136
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>e</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>6</span><span class='words_sep'> </span><span class='tstring_content'>c</span><span class='words_sep'> </span><span class='tstring_content'>d</span><span class='words_sep'> </span><span class='tstring_content'>f</span><span class='words_sep'> </span><span class='tstring_content'>4</span><span class='words_sep'> </span><span class='tstring_content'>3</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
139
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>o</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>c</span><span class='words_sep'> </span><span class='tstring_content'>6</span><span class='words_sep'> </span><span class='tstring_content'>0</span><span class='words_sep'> </span><span class='tstring_content'>3</span><span class='words_sep'> </span><span class='tstring_content'>d</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
140
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>t</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>i</span><span class='words_sep'> </span><span class='tstring_content'>l</span><span class='words_sep'> </span><span class='tstring_content'>4</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
141
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>a</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>9</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
142
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>l</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>1</span><span class='words_sep'> </span><span class='tstring_content'>i</span><span class='words_sep'> </span><span class='tstring_content'>t</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
143
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>v</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>yu</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
144
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>u</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>v</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
145
137
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>f</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>c</span><span class='words_sep'> </span><span class='tstring_content'>s</span><span class='words_sep'> </span><span class='tstring_content'>p</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
146
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>d</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>3</span><span class='words_sep'> </span><span class='tstring_content'>0</span><span class='words_sep'> </span><span class='tstring_content'>o</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
147
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>z</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>2</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
148
138
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>g</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>9</span><span class='words_sep'> </span><span class='tstring_content'>8</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
139
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>h</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>b</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
140
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>i</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>l</span><span class='words_sep'> </span><span class='tstring_content'>1</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
149
141
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>j</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>y</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
150
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>
|
|
142
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>l</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>1</span><span class='words_sep'> </span><span class='tstring_content'>i</span><span class='words_sep'> </span><span class='tstring_content'>t</span><span class='words_sep'> </span><span class='tstring_content'>7</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
151
143
|
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>n</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>r</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
152
|
-
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>
|
|
144
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>o</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>c</span><span class='words_sep'> </span><span class='tstring_content'>6</span><span class='words_sep'> </span><span class='tstring_content'>0</span><span class='words_sep'> </span><span class='tstring_content'>3</span><span class='words_sep'> </span><span class='tstring_content'>d</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
145
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>p</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>fr</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
146
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>r</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>np</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
147
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>s</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>f</span><span class='words_sep'> </span><span class='tstring_content'>l</span><span class='words_sep'> </span><span class='tstring_content'>j</span><span class='words_sep'> </span><span class='tstring_content'>i</span><span class='words_sep'> </span><span class='tstring_content'>3</span><span class='words_sep'> </span><span class='tstring_content'>8</span><span class='words_sep'> </span><span class='tstring_content'>5</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
148
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>t</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>i</span><span class='words_sep'> </span><span class='tstring_content'>l</span><span class='words_sep'> </span><span class='tstring_content'>4</span><span class='words_sep'> </span><span class='tstring_content'>7</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
149
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>u</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>v</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
150
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>v</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>yu</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
151
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>y</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>v</span><span class='words_sep'> </span><span class='tstring_content'>j</span><span class='words_sep'> </span><span class='tstring_content'>7</span><span class='words_sep'>)</span><span class='comma'>,</span>
|
|
152
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>z</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='qwords_beg'>%w(</span><span class='tstring_content'>2</span><span class='words_sep'>)</span>
|
|
153
|
+
<span class='rbrace'>}</span></pre></dd>
|
|
154
|
+
|
|
155
|
+
<dt id="SECTIONS-constant" class="">SECTIONS =
|
|
156
|
+
<div class="docstring">
|
|
157
|
+
<div class="discussion">
|
|
158
|
+
|
|
159
|
+
<p>Letters that are commonly mistakenly split up and their replacements</p>
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
</div>
|
|
163
|
+
</div>
|
|
164
|
+
<div class="tags">
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
</div>
|
|
168
|
+
</dt>
|
|
169
|
+
<dd><pre class="code"><span class='lbrace'>{</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>m</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>[mnr][nr]?</span><span class='tstring_end'>'</span></span><span class='comma'>,</span>
|
|
170
|
+
<span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>w</span><span class='tstring_end'>'</span></span> <span class='op'>=></span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>[wvu][vu]?</span><span class='tstring_end'>'</span></span>
|
|
153
171
|
<span class='rbrace'>}</span></pre></dd>
|
|
154
172
|
|
|
155
173
|
</dl>
|
|
@@ -214,6 +232,30 @@
|
|
|
214
232
|
<p>Builds an OCR optimized regular expression from a string.</p>
|
|
215
233
|
</div></span>
|
|
216
234
|
|
|
235
|
+
</li>
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
<li class="public ">
|
|
239
|
+
<span class="summary_signature">
|
|
240
|
+
|
|
241
|
+
<a href="#build_section-instance_method" title="#build_section (instance method)">- (String) <strong>build_section</strong>(character) </a>
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
</span>
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
<span class="summary_desc"><div class='inline'>
|
|
256
|
+
<p>Builds a section from an input letter.</p>
|
|
257
|
+
</div></span>
|
|
258
|
+
|
|
217
259
|
</li>
|
|
218
260
|
|
|
219
261
|
|
|
@@ -368,7 +410,7 @@ whitespace character.</p>
|
|
|
368
410
|
|
|
369
411
|
—
|
|
370
412
|
<div class='inline'>
|
|
371
|
-
<p>if the argument isn't a single character string</p>
|
|
413
|
+
<p>if the argument isn't a single character string from OORB::LETTERS</p>
|
|
372
414
|
</div>
|
|
373
415
|
|
|
374
416
|
</li>
|
|
@@ -381,20 +423,22 @@ whitespace character.</p>
|
|
|
381
423
|
<pre class="lines">
|
|
382
424
|
|
|
383
425
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
426
|
+
77
|
|
427
|
+
78
|
|
428
|
+
79
|
|
429
|
+
80
|
|
430
|
+
81
|
|
431
|
+
82
|
|
432
|
+
83
|
|
433
|
+
84</pre>
|
|
391
434
|
</td>
|
|
392
435
|
<td>
|
|
393
|
-
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line
|
|
436
|
+
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line 77</span>
|
|
394
437
|
|
|
395
438
|
<span class='kw'>def</span> <span class='id identifier rubyid_build_collection'>build_collection</span><span class='lparen'>(</span><span class='id identifier rubyid_character'>character</span><span class='rparen'>)</span>
|
|
396
439
|
<span class='kw'>unless</span> <span class='const'>LETTERS</span><span class='lbracket'>[</span><span class='id identifier rubyid_character'>character</span><span class='rbracket'>]</span>
|
|
397
|
-
<span class='id identifier rubyid_raise'>raise</span> <span class='const'>ArgumentError</span><span class='comma'>,</span>
|
|
440
|
+
<span class='id identifier rubyid_raise'>raise</span> <span class='const'>ArgumentError</span><span class='comma'>,</span>
|
|
441
|
+
<span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>Valid arguments are a single character from </span><span class='embexpr_beg'>#{</span><span class='const'>LETTERS</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='period'>.</span><span class='id identifier rubyid_join'>join</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>, </span><span class='tstring_end'>"</span></span><span class='rparen'>)</span><span class='embexpr_end'>}</span><span class='tstring_content'>.</span><span class='tstring_end'>"</span></span>
|
|
398
442
|
<span class='kw'>end</span>
|
|
399
443
|
<span class='const'>LETTERS</span><span class='lbracket'>[</span><span class='id identifier rubyid_character'>character</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_x'>x</span><span class='op'>|</span> <span class='id identifier rubyid_character'>character</span> <span class='op'><<</span> <span class='id identifier rubyid_x'>x</span> <span class='rbrace'>}</span>
|
|
400
444
|
<span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>[</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_character'>character</span><span class='embexpr_end'>}</span><span class='tstring_content'>]</span><span class='tstring_end'>"</span></span>
|
|
@@ -469,23 +513,140 @@ mistakes</p>
|
|
|
469
513
|
<pre class="lines">
|
|
470
514
|
|
|
471
515
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
516
|
+
52
|
|
517
|
+
53
|
|
518
|
+
54
|
|
519
|
+
55
|
|
520
|
+
56
|
|
521
|
+
57
|
|
522
|
+
58
|
|
523
|
+
59
|
|
524
|
+
60
|
|
525
|
+
61
|
|
526
|
+
62</pre>
|
|
477
527
|
</td>
|
|
478
528
|
<td>
|
|
479
|
-
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line
|
|
529
|
+
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line 52</span>
|
|
480
530
|
|
|
481
531
|
<span class='kw'>def</span> <span class='id identifier rubyid_build_regex'>build_regex</span><span class='lparen'>(</span><span class='id identifier rubyid_input'>input</span><span class='rparen'>)</span>
|
|
482
532
|
<span class='id identifier rubyid_input'>input</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='period'>.</span><span class='id identifier rubyid_chars'>chars</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_char'>char</span><span class='op'>|</span>
|
|
483
|
-
<span class='
|
|
533
|
+
<span class='kw'>if</span> <span class='const'>LETTERS</span><span class='period'>.</span><span class='id identifier rubyid_has_key?'>has_key?</span><span class='lparen'>(</span><span class='id identifier rubyid_char'>char</span><span class='rparen'>)</span>
|
|
534
|
+
<span class='id identifier rubyid_build_collection'>build_collection</span><span class='lparen'>(</span><span class='id identifier rubyid_char'>char</span><span class='rparen'>)</span>
|
|
535
|
+
<span class='kw'>elsif</span> <span class='const'>SECTIONS</span><span class='period'>.</span><span class='id identifier rubyid_has_key?'>has_key?</span><span class='lparen'>(</span><span class='id identifier rubyid_char'>char</span><span class='rparen'>)</span>
|
|
536
|
+
<span class='id identifier rubyid_build_section'>build_section</span><span class='lparen'>(</span><span class='id identifier rubyid_char'>char</span><span class='rparen'>)</span>
|
|
537
|
+
<span class='kw'>else</span>
|
|
538
|
+
<span class='id identifier rubyid_escape'>escape</span><span class='lparen'>(</span><span class='id identifier rubyid_char'>char</span><span class='rparen'>)</span>
|
|
539
|
+
<span class='kw'>end</span>
|
|
484
540
|
<span class='kw'>end</span><span class='period'>.</span><span class='id identifier rubyid_join'>join</span>
|
|
485
541
|
<span class='kw'>end</span></pre>
|
|
486
542
|
</td>
|
|
487
543
|
</tr>
|
|
488
544
|
</table>
|
|
545
|
+
</div>
|
|
546
|
+
|
|
547
|
+
<div class="method_details ">
|
|
548
|
+
<h3 class="signature " id="build_section-instance_method">
|
|
549
|
+
|
|
550
|
+
- (<tt>String</tt>) <strong>build_section</strong>(character)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
</h3><div class="docstring">
|
|
557
|
+
<div class="discussion">
|
|
558
|
+
|
|
559
|
+
<p>Builds a section from an input letter.</p>
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
</div>
|
|
563
|
+
</div>
|
|
564
|
+
<div class="tags">
|
|
565
|
+
<p class="tag_title">Parameters:</p>
|
|
566
|
+
<ul class="param">
|
|
567
|
+
|
|
568
|
+
<li>
|
|
569
|
+
|
|
570
|
+
<span class='name'>character</span>
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
<span class='type'>(<tt>String</tt>)</span>
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
—
|
|
578
|
+
<div class='inline'>
|
|
579
|
+
<p>made of a single character</p>
|
|
580
|
+
</div>
|
|
581
|
+
|
|
582
|
+
</li>
|
|
583
|
+
|
|
584
|
+
</ul>
|
|
585
|
+
|
|
586
|
+
<p class="tag_title">Returns:</p>
|
|
587
|
+
<ul class="return">
|
|
588
|
+
|
|
589
|
+
<li>
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
<span class='type'>(<tt>String</tt>)</span>
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
—
|
|
597
|
+
<div class='inline'>
|
|
598
|
+
<p>section of commonly split characters with optional second character</p>
|
|
599
|
+
</div>
|
|
600
|
+
|
|
601
|
+
</li>
|
|
602
|
+
|
|
603
|
+
</ul>
|
|
604
|
+
<p class="tag_title">Raises:</p>
|
|
605
|
+
<ul class="raise">
|
|
606
|
+
|
|
607
|
+
<li>
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
<span class='type'>(<tt>ArgumentError</tt>)</span>
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
—
|
|
615
|
+
<div class='inline'>
|
|
616
|
+
<p>if the argument isn't a single character string from OORB::SECTIONS</p>
|
|
617
|
+
</div>
|
|
618
|
+
|
|
619
|
+
</li>
|
|
620
|
+
|
|
621
|
+
</ul>
|
|
622
|
+
|
|
623
|
+
</div><table class="source_code">
|
|
624
|
+
<tr>
|
|
625
|
+
<td>
|
|
626
|
+
<pre class="lines">
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
91
|
|
630
|
+
92
|
|
631
|
+
93
|
|
632
|
+
94
|
|
633
|
+
95
|
|
634
|
+
96
|
|
635
|
+
97</pre>
|
|
636
|
+
</td>
|
|
637
|
+
<td>
|
|
638
|
+
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line 91</span>
|
|
639
|
+
|
|
640
|
+
<span class='kw'>def</span> <span class='id identifier rubyid_build_section'>build_section</span><span class='lparen'>(</span><span class='id identifier rubyid_character'>character</span><span class='rparen'>)</span>
|
|
641
|
+
<span class='kw'>unless</span> <span class='const'>SECTIONS</span><span class='lbracket'>[</span><span class='id identifier rubyid_character'>character</span><span class='rbracket'>]</span>
|
|
642
|
+
<span class='id identifier rubyid_raise'>raise</span> <span class='const'>ArgumentError</span><span class='comma'>,</span>
|
|
643
|
+
<span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>Valid arguments are a single character from </span><span class='embexpr_beg'>#{</span><span class='const'>SECTIONS</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span><span class='period'>.</span><span class='id identifier rubyid_join'>join</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>, </span><span class='tstring_end'>"</span></span><span class='rparen'>)</span><span class='embexpr_end'>}</span><span class='tstring_content'>.</span><span class='tstring_end'>"</span></span>
|
|
644
|
+
<span class='kw'>end</span>
|
|
645
|
+
<span class='const'>SECTIONS</span><span class='lbracket'>[</span><span class='id identifier rubyid_character'>character</span><span class='rbracket'>]</span>
|
|
646
|
+
<span class='kw'>end</span></pre>
|
|
647
|
+
</td>
|
|
648
|
+
</tr>
|
|
649
|
+
</table>
|
|
489
650
|
</div>
|
|
490
651
|
|
|
491
652
|
<div class="method_details ">
|
|
@@ -553,12 +714,12 @@ whitespace character</p>
|
|
|
553
714
|
<pre class="lines">
|
|
554
715
|
|
|
555
716
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
717
|
+
68
|
|
718
|
+
69
|
|
719
|
+
70</pre>
|
|
559
720
|
</td>
|
|
560
721
|
<td>
|
|
561
|
-
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line
|
|
722
|
+
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line 68</span>
|
|
562
723
|
|
|
563
724
|
<span class='kw'>def</span> <span class='id identifier rubyid_combine_whitespace'>combine_whitespace</span><span class='lparen'>(</span><span class='id identifier rubyid_string'>string</span><span class='rparen'>)</span>
|
|
564
725
|
<span class='id identifier rubyid_string'>string</span><span class='period'>.</span><span class='id identifier rubyid_gsub'>gsub</span><span class='lparen'>(</span><span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>\s+</span><span class='regexp_end'>/</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>\s</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
|
|
@@ -650,15 +811,15 @@ whitespace character</p>
|
|
|
650
811
|
<pre class="lines">
|
|
651
812
|
|
|
652
813
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
814
|
+
104
|
|
815
|
+
105
|
|
816
|
+
106
|
|
817
|
+
107
|
|
818
|
+
108
|
|
819
|
+
109</pre>
|
|
659
820
|
</td>
|
|
660
821
|
<td>
|
|
661
|
-
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line
|
|
822
|
+
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line 104</span>
|
|
662
823
|
|
|
663
824
|
<span class='kw'>def</span> <span class='id identifier rubyid_escape'>escape</span><span class='lparen'>(</span><span class='id identifier rubyid_character'>character</span><span class='rparen'>)</span>
|
|
664
825
|
<span class='kw'>if</span> <span class='id identifier rubyid_character'>character</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>></span> <span class='int'>1</span>
|
|
@@ -697,16 +858,16 @@ whitespace character</p>
|
|
|
697
858
|
<pre class="lines">
|
|
698
859
|
|
|
699
860
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
861
|
+
40
|
|
862
|
+
41
|
|
863
|
+
42
|
|
864
|
+
43
|
|
865
|
+
44
|
|
866
|
+
45
|
|
867
|
+
46</pre>
|
|
707
868
|
</td>
|
|
708
869
|
<td>
|
|
709
|
-
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line
|
|
870
|
+
<pre class="code"><span class="info file"># File 'lib/oorb.rb', line 40</span>
|
|
710
871
|
|
|
711
872
|
<span class='kw'>def</span> <span class='id identifier rubyid_run'>run</span>
|
|
712
873
|
<span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>Waiting for a statement.</span><span class='tstring_end'>"</span></span>
|
|
@@ -725,7 +886,7 @@ whitespace character</p>
|
|
|
725
886
|
</div>
|
|
726
887
|
|
|
727
888
|
<div id="footer">
|
|
728
|
-
Generated on
|
|
889
|
+
Generated on Thu Jun 16 09:56:22 2016 by
|
|
729
890
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
730
891
|
0.8.7.6 (ruby-2.3.0).
|
|
731
892
|
</div>
|
data/doc/_index.html
CHANGED
|
@@ -107,7 +107,7 @@
|
|
|
107
107
|
</div>
|
|
108
108
|
|
|
109
109
|
<div id="footer">
|
|
110
|
-
Generated on
|
|
110
|
+
Generated on Thu Jun 16 09:56:21 2016 by
|
|
111
111
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
112
112
|
0.8.7.6 (ruby-2.3.0).
|
|
113
113
|
</div>
|
|
@@ -118,7 +118,7 @@ href="http://contributor-covenant.org/version/1/3/0/">contributor-covenant.org/v
|
|
|
118
118
|
</div></div>
|
|
119
119
|
|
|
120
120
|
<div id="footer">
|
|
121
|
-
Generated on
|
|
121
|
+
Generated on Thu Jun 16 09:56:22 2016 by
|
|
122
122
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
123
123
|
0.8.7.6 (ruby-2.3.0).
|
|
124
124
|
</div>
|
data/doc/file.LICENSE.html
CHANGED
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
<div id="content"><div id='filecontents'>The MIT License (MIT)<br/><br/>Copyright (c) 2016 Calvyn82<br/><br/>Permission is hereby granted, free of charge, to any person obtaining a copy<br/>of this software and associated documentation files (the "Software"), to deal<br/>in the Software without restriction, including without limitation the rights<br/>to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br/>copies of the Software, and to permit persons to whom the Software is<br/>furnished to do so, subject to the following conditions:<br/><br/>The above copyright notice and this permission notice shall be included in<br/>all copies or substantial portions of the Software.<br/><br/>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br/>IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br/>FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br/>AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br/>LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br/>OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br/>THE SOFTWARE.</div></div>
|
|
65
65
|
|
|
66
66
|
<div id="footer">
|
|
67
|
-
Generated on
|
|
67
|
+
Generated on Thu Jun 16 09:56:22 2016 by
|
|
68
68
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
69
69
|
0.8.7.6 (ruby-2.3.0).
|
|
70
70
|
</div>
|
data/doc/file.README.html
CHANGED
|
@@ -62,6 +62,9 @@
|
|
|
62
62
|
<iframe id="search_frame"></iframe>
|
|
63
63
|
|
|
64
64
|
<div id="content"><div id='filecontents'>
|
|
65
|
+
<p><a href="https://travis-ci.org/Calvyn82/oorb"><img
|
|
66
|
+
src="https://travis-ci.org/Calvyn82/oorb.svg?branch=master"></a></p>
|
|
67
|
+
|
|
65
68
|
<h1 id="label-Oorb">Oorb</h1>
|
|
66
69
|
|
|
67
70
|
<p>OCR Optimized Regex Builder is a command line tool for taking user input
|
|
@@ -124,7 +127,7 @@ href="http://opensource.org/licenses/MIT">MIT License</a>.</p>
|
|
|
124
127
|
</div></div>
|
|
125
128
|
|
|
126
129
|
<div id="footer">
|
|
127
|
-
Generated on
|
|
130
|
+
Generated on Thu Jun 16 09:56:22 2016 by
|
|
128
131
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
129
132
|
0.8.7.6 (ruby-2.3.0).
|
|
130
133
|
</div>
|
data/doc/index.html
CHANGED
|
@@ -62,6 +62,9 @@
|
|
|
62
62
|
<iframe id="search_frame"></iframe>
|
|
63
63
|
|
|
64
64
|
<div id="content"><div id='filecontents'>
|
|
65
|
+
<p><a href="https://travis-ci.org/Calvyn82/oorb"><img
|
|
66
|
+
src="https://travis-ci.org/Calvyn82/oorb.svg?branch=master"></a></p>
|
|
67
|
+
|
|
65
68
|
<h1 id="label-Oorb">Oorb</h1>
|
|
66
69
|
|
|
67
70
|
<p>OCR Optimized Regex Builder is a command line tool for taking user input
|
|
@@ -124,7 +127,7 @@ href="http://opensource.org/licenses/MIT">MIT License</a>.</p>
|
|
|
124
127
|
</div></div>
|
|
125
128
|
|
|
126
129
|
<div id="footer">
|
|
127
|
-
Generated on
|
|
130
|
+
Generated on Thu Jun 16 09:56:22 2016 by
|
|
128
131
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
129
132
|
0.8.7.6 (ruby-2.3.0).
|
|
130
133
|
</div>
|
data/doc/method_list.html
CHANGED
|
@@ -64,18 +64,24 @@
|
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
<li class="r1 ">
|
|
67
|
-
<span class='object_link'><a href="OORB.html#
|
|
67
|
+
<span class='object_link'><a href="OORB.html#build_section-instance_method" title="OORB#build_section (method)">#build_section</a></span>
|
|
68
68
|
<small>OORB</small>
|
|
69
69
|
</li>
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
<li class="r2 ">
|
|
73
|
-
<span class='object_link'><a href="OORB.html#
|
|
73
|
+
<span class='object_link'><a href="OORB.html#combine_whitespace-instance_method" title="OORB#combine_whitespace (method)">#combine_whitespace</a></span>
|
|
74
74
|
<small>OORB</small>
|
|
75
75
|
</li>
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
<li class="r1 ">
|
|
79
|
+
<span class='object_link'><a href="OORB.html#escape-instance_method" title="OORB#escape (method)">#escape</a></span>
|
|
80
|
+
<small>OORB</small>
|
|
81
|
+
</li>
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
<li class="r2 ">
|
|
79
85
|
<span class='object_link'><a href="OORB.html#run-instance_method" title="OORB#run (method)">#run</a></span>
|
|
80
86
|
<small>OORB</small>
|
|
81
87
|
</li>
|
|
@@ -103,7 +103,7 @@
|
|
|
103
103
|
</div>
|
|
104
104
|
|
|
105
105
|
<div id="footer">
|
|
106
|
-
Generated on
|
|
106
|
+
Generated on Thu Jun 16 09:56:22 2016 by
|
|
107
107
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
108
108
|
0.8.7.6 (ruby-2.3.0).
|
|
109
109
|
</div>
|
data/lib/oorb.rb
CHANGED
|
@@ -6,27 +6,33 @@ class OORB
|
|
|
6
6
|
|
|
7
7
|
##
|
|
8
8
|
# Letters that regularly are mistaken in OCR and their common replacements
|
|
9
|
-
LETTERS = {'
|
|
10
|
-
'h' => %w(b),
|
|
9
|
+
LETTERS = {'a' => %w(9),
|
|
11
10
|
'b' => %w(h),
|
|
12
|
-
'y' => %w(v j 7),
|
|
13
11
|
'c' => %w(e f d o 6),
|
|
14
|
-
'
|
|
12
|
+
'd' => %w(3 0 o 7),
|
|
15
13
|
'e' => %w(6 c d f 4 3),
|
|
16
|
-
'o' => %w(c 6 0 3 d),
|
|
17
|
-
't' => %w(i l 4),
|
|
18
|
-
'a' => %w(9),
|
|
19
|
-
'l' => %w(1 i t),
|
|
20
|
-
'v' => %w(yu),
|
|
21
|
-
'u' => %w(v),
|
|
22
14
|
'f' => %w(c s p),
|
|
23
|
-
'd' => %w(3 0 o),
|
|
24
|
-
'z' => %w(2),
|
|
25
15
|
'g' => %w(9 8),
|
|
16
|
+
'h' => %w(b),
|
|
17
|
+
'i' => %w(l 1),
|
|
26
18
|
'j' => %w(y),
|
|
27
|
-
'
|
|
19
|
+
'l' => %w(1 i t 7),
|
|
28
20
|
'n' => %w(r),
|
|
29
|
-
'
|
|
21
|
+
'o' => %w(c 6 0 3 d),
|
|
22
|
+
'p' => %w(fr),
|
|
23
|
+
'r' => %w(np),
|
|
24
|
+
's' => %w(f l j i 3 8 5),
|
|
25
|
+
't' => %w(i l 4 7),
|
|
26
|
+
'u' => %w(v),
|
|
27
|
+
'v' => %w(yu),
|
|
28
|
+
'y' => %w(v j 7),
|
|
29
|
+
'z' => %w(2)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
##
|
|
33
|
+
# Letters that are commonly mistakenly split up and their replacements
|
|
34
|
+
SECTIONS = {'m' => '[mnr][nr]?',
|
|
35
|
+
'w' => '[wvu][vu]?'
|
|
30
36
|
}
|
|
31
37
|
|
|
32
38
|
##
|
|
@@ -45,7 +51,13 @@ class OORB
|
|
|
45
51
|
# @return [String] formatted as a valid regular expression optimized for capturing OCR mistakes
|
|
46
52
|
def build_regex(input)
|
|
47
53
|
input.downcase.chars.map do |char|
|
|
48
|
-
LETTERS.has_key?(char)
|
|
54
|
+
if LETTERS.has_key?(char)
|
|
55
|
+
build_collection(char)
|
|
56
|
+
elsif SECTIONS.has_key?(char)
|
|
57
|
+
build_section(char)
|
|
58
|
+
else
|
|
59
|
+
escape(char)
|
|
60
|
+
end
|
|
49
61
|
end.join
|
|
50
62
|
end
|
|
51
63
|
|
|
@@ -58,18 +70,32 @@ class OORB
|
|
|
58
70
|
end
|
|
59
71
|
|
|
60
72
|
##
|
|
61
|
-
# Builds a group match from an input letter.
|
|
62
|
-
# @raise [ArgumentError] if the argument isn't a single character string
|
|
73
|
+
# Builds a group match from an input letter.
|
|
74
|
+
# @raise [ArgumentError] if the argument isn't a single character string from OORB::LETTERS
|
|
63
75
|
# @param character [String] made of a single character
|
|
64
76
|
# @return [String] collection of commonly mis-ocr'd characters bounded by square brackets
|
|
65
77
|
def build_collection(character)
|
|
66
78
|
unless LETTERS[character]
|
|
67
|
-
raise ArgumentError,
|
|
79
|
+
raise ArgumentError,
|
|
80
|
+
"Valid arguments are a single character from #{LETTERS.keys.join(", ")}."
|
|
68
81
|
end
|
|
69
82
|
LETTERS[character].each { |x| character << x }
|
|
70
83
|
"[#{character}]"
|
|
71
84
|
end
|
|
72
85
|
|
|
86
|
+
##
|
|
87
|
+
# Builds a section from an input letter.
|
|
88
|
+
# @raise [ArgumentError] if the argument isn't a single character string from OORB::SECTIONS
|
|
89
|
+
# @param character [String] made of a single character
|
|
90
|
+
# @return [String] section of commonly split characters with optional second character
|
|
91
|
+
def build_section(character)
|
|
92
|
+
unless SECTIONS[character]
|
|
93
|
+
raise ArgumentError,
|
|
94
|
+
"Valid arguments are a single character from #{SECTIONS.keys.join(", ")}."
|
|
95
|
+
end
|
|
96
|
+
SECTIONS[character]
|
|
97
|
+
end
|
|
98
|
+
|
|
73
99
|
##
|
|
74
100
|
# Escapes a single-character string and makes whitespace characters optional
|
|
75
101
|
# @param character [String] made of a single character
|
|
@@ -81,4 +107,14 @@ class OORB
|
|
|
81
107
|
end
|
|
82
108
|
character == "\s" ? "\\s?" : Regexp.escape(character)
|
|
83
109
|
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
def format(character)
|
|
114
|
+
if character =~ /[wm]/
|
|
115
|
+
build_
|
|
116
|
+
else
|
|
117
|
+
Regexp.escape(character)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
84
120
|
end
|
data/lib/oorb/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: oorb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Calvyn82
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-06-
|
|
11
|
+
date: 2016-06-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|