text_detector 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -4
- data/examples/regexp.rb +15 -0
- data/examples/simple.rb +15 -0
- data/lib/text_detector/dictionary/file.rb +7 -8
- data/lib/text_detector/dictionary.rb +2 -1
- data/lib/text_detector/version.rb +1 -1
- data/text_detector.gemspec +2 -2
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 315dfa048ec3b6dd195976900bb573407cbfdfc9
|
4
|
+
data.tar.gz: 2441e251ec922dc7900907fab919a068f1dbd614
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e31c2fe0028629483e7dc6079c16314217ef15c471289b9ffedc8030c3ea807db09fc55905080eec18f196de2009f1b9615c0e7770e9e946bd802d4ea7b38d18
|
7
|
+
data.tar.gz: f1134bc00b6cd5261d566b545c0ea9b2c108f6c36c1b76c860f2ecab935f5e1d4aaf384db662d9deb65149b09781a50709e60bbdaa231bf608a0d37ace52a547
|
data/README.md
CHANGED
@@ -3,9 +3,7 @@
|
|
3
3
|
|
4
4
|
# TextDetector
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
TODO: Delete this and the text above, and describe your gem
|
6
|
+
This is an experimental implementation for detecting text from document.
|
9
7
|
|
10
8
|
## Installation
|
11
9
|
|
@@ -25,7 +23,23 @@ Or install it yourself as:
|
|
25
23
|
|
26
24
|
## Usage
|
27
25
|
|
28
|
-
|
26
|
+
See also `bin/benchmark` and `examples/`.
|
27
|
+
|
28
|
+
### Regexp
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'text_detector'
|
32
|
+
regexp_detector = TextDetector.factory(:regexp, open('dictionary.txt'))
|
33
|
+
regexp_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
|
34
|
+
```
|
35
|
+
|
36
|
+
### Like Boyer-Moore String Search Algorithm
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
require 'text_detector'
|
40
|
+
simple_detector = TextDetector.factory(:simple, open('dictionary.txt'))
|
41
|
+
simple_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
|
42
|
+
```
|
29
43
|
|
30
44
|
## Development
|
31
45
|
|
data/examples/regexp.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
require 'text_detector'
|
5
|
+
|
6
|
+
dictionary = StringIO.new(<<EOF)
|
7
|
+
NG_A
|
8
|
+
NG_B
|
9
|
+
NG_C
|
10
|
+
EOF
|
11
|
+
|
12
|
+
regexp_detector = TextDetector.factory(:regexp, dictionary)
|
13
|
+
|
14
|
+
p regexp_detector.detect('This document not include NG words.')
|
15
|
+
p regexp_detector.detect('This document include NG words: NG_A')
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
require 'text_detector'
|
5
|
+
|
6
|
+
dictionary = StringIO.new(<<EOF)
|
7
|
+
NG_A
|
8
|
+
NG_B
|
9
|
+
NG_C
|
10
|
+
EOF
|
11
|
+
|
12
|
+
simple_detector = TextDetector.factory(:simple, dictionary)
|
13
|
+
|
14
|
+
p simple_detector.detect('This document not include NG words.')
|
15
|
+
p simple_detector.detect('This document include NG words: NG_A')
|
@@ -20,15 +20,14 @@ module TextDetector
|
|
20
20
|
@dictionary = Set.new
|
21
21
|
@depth = []
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
@depth << text.size
|
30
|
-
end
|
23
|
+
dictionary.each_line do |line|
|
24
|
+
text = TextDetector.normalize(line.chomp)
|
25
|
+
next if text.size == 0
|
26
|
+
|
27
|
+
@dictionary << text
|
28
|
+
@depth << text.size
|
31
29
|
end
|
30
|
+
|
32
31
|
@depth = @depth.sort.uniq
|
33
32
|
end
|
34
33
|
end
|
data/text_detector.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["koshigoe"]
|
10
10
|
spec.email = ["koshigoeb@gmail.com"]
|
11
11
|
|
12
|
-
spec.summary = %q{
|
13
|
-
spec.description = %q{
|
12
|
+
spec.summary = %q{This is an experimental implementation for detecting text from document.}
|
13
|
+
spec.description = %q{This is an experimental implementation for detecting text from document.}
|
14
14
|
spec.homepage = "https://github.com/koshigoe/text_detector"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_detector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- koshigoe
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description:
|
55
|
+
description: This is an experimental implementation for detecting text from document.
|
56
56
|
email:
|
57
57
|
- koshigoeb@gmail.com
|
58
58
|
executables: []
|
@@ -68,6 +68,8 @@ files:
|
|
68
68
|
- bin/benchmark
|
69
69
|
- bin/console
|
70
70
|
- bin/setup
|
71
|
+
- examples/regexp.rb
|
72
|
+
- examples/simple.rb
|
71
73
|
- lib/text_detector.rb
|
72
74
|
- lib/text_detector/detector.rb
|
73
75
|
- lib/text_detector/detector/base.rb
|
@@ -102,5 +104,5 @@ rubyforge_project:
|
|
102
104
|
rubygems_version: 2.4.5
|
103
105
|
signing_key:
|
104
106
|
specification_version: 4
|
105
|
-
summary:
|
107
|
+
summary: This is an experimental implementation for detecting text from document.
|
106
108
|
test_files: []
|